def get_DER(all_reference, all_hypothesis): """ calculates DER, CER, FA and MISS Args: all_reference (list[Annotation]): reference annotations for score calculation all_hypothesis (list[Annotation]): hypothesis annotations for score calculation Returns: DER (float): Diarization Error Rate CER (float): Confusion Error Rate FA (float): False Alarm Miss (float): Miss Detection """ metric = DiarizationErrorRate(collar=0.25, skip_overlap=True) DER = 0 for reference, hypothesis in zip(all_reference, all_hypothesis): metric(reference, hypothesis, detailed=True) DER = abs(metric) CER = metric['confusion'] / metric['total'] FA = metric['false alarm'] / metric['total'] MISS = metric['missed detection'] / metric['total'] metric.reset() return DER, CER, FA, MISS
def get_DER(all_reference, all_hypothesis): """ calculates DER, CER, FA and MISS Args: all_reference (list[Annotation]): reference annotations for score calculation all_hypothesis (list[Annotation]): hypothesis annotations for score calculation Returns: DER (float): Diarization Error Rate CER (float): Confusion Error Rate FA (float): False Alarm Miss (float): Miss Detection < Caveat > Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of "no score" collar from left to right. Therefore, if 0.25s is applied for "no score" collar in md-eval.pl, 0.5s should be applied for pyannote.metrics. """ metric = DiarizationErrorRate(collar=0.5, skip_overlap=True) for reference, hypothesis in zip(all_reference, all_hypothesis): metric(reference, hypothesis, detailed=True) DER = abs(metric) CER = metric['confusion'] / metric['total'] FA = metric['false alarm'] / metric['total'] MISS = metric['missed detection'] / metric['total'] metric.reset() return DER, CER, FA, MISS
def get_mapping(reference, system): """ get speaker mapping between system and reference""" metric = DiarizationErrorRate() mapping = metric.optimal_mapping(reference, system) return mapping
def score_labels(AUDIO_RTTM_MAP, all_reference, all_hypothesis, collar=0.25, ignore_overlap=True): """ calculates DER, CER, FA and MISS Args: AUDIO_RTTM_MAP : Dictionary containing information provided from manifestpath all_reference (list[uniq_name,Annotation]): reference annotations for score calculation all_hypothesis (list[uniq_name,Annotation]): hypothesis annotations for score calculation Returns: metric (pyannote.DiarizationErrorRate): Pyannote Diarization Error Rate metric object. This object contains detailed scores of each audiofile. mapping (dict): Mapping dict containing the mapping speaker label for each audio input < Caveat > Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of "no score" collar from left to right. Therefore, if 0.25s is applied for "no score" collar in md-eval.pl, 0.5s should be applied for pyannote.metrics. """ metric = None if len(all_reference) == len(all_hypothesis): metric = DiarizationErrorRate(collar=2 * collar, skip_overlap=ignore_overlap) mapping_dict = {} for (reference, hypothesis) in zip(all_reference, all_hypothesis): ref_key, ref_labels = reference _, hyp_labels = hypothesis uem = AUDIO_RTTM_MAP[ref_key].get('uem_filepath', None) if uem is not None: uem = uem_timeline_from_file(uem_file=uem, uniq_name=ref_key) metric(ref_labels, hyp_labels, uem=uem, detailed=True) mapping_dict[ref_key] = metric.optimal_mapping( ref_labels, hyp_labels) DER = abs(metric) CER = metric['confusion'] / metric['total'] FA = metric['false alarm'] / metric['total'] MISS = metric['missed detection'] / metric['total'] logging.info( "Cumulative Results for collar {} sec and ignore_overlap {}: \n FA: {:.4f}\t MISS {:.4f}\t \ Diarization ER: {:.4f}\t, Confusion ER:{:.4f}".format( collar, ignore_overlap, FA, MISS, DER, CER)) return metric, mapping_dict else: logging.warning( "check if each ground truth RTTMs were present in provided manifest file. Skipping calculation of Diariazation Error Rate" ) return None
def test_bug_16(): reference = Annotation() reference[Segment(0, 10)] = 'A' hypothesis = Annotation() metric = DiarizationErrorRate(collar=1) total = metric(reference, hypothesis, detailed=True)['total'] npt.assert_almost_equal(total, 9, decimal=3) metric = DiarizationErrorRate(collar=0) total = metric(reference, hypothesis, detailed=True)['total'] npt.assert_almost_equal(total, 10, decimal=3)
def gecko(args): hypotheses_path = args['<hypotheses_path>'] uri = args['<uri>'] colors = get_colors(uri) distances = {} if Path(hypotheses_path).exists(): hypotheses = load_rttm(hypotheses_path) hypothesis = hypotheses[uri] else: # protocol protocol = get_protocol(args['<hypotheses_path>']) reference = get_file(protocol, uri) hypothesis = reference['annotation'] annotated = get_annotated(reference) hypotheses_path = Path(hypotheses_path) protocol = args['--database.task.protocol'] features = None if protocol: protocol = get_protocol(protocol) embeddings = args['--embeddings'] reference, features = get_file(protocol, uri, embeddings=embeddings) if args['--map']: print(f"mapping {uri} with {protocol}") diarizationErrorRate = DiarizationErrorRate() annotated = get_annotated(reference) optimal_mapping = diarizationErrorRate.optimal_mapping( reference['annotation'], hypothesis, annotated) hypothesis = hypothesis.rename_labels(mapping=optimal_mapping) hypothesis = update_labels(hypothesis, distances) # tag unsure clusters distances_per_speaker = get_distances_per_speaker( features, hypothesis) if features else {} if args['--tag_na']: whole_file = Segment(0., annotated.segments_boundaries_[-1]) not_annotated = annotated.gaps(whole_file).to_annotation(na()) hypothesis = hypothesis.crop(annotated).update(not_annotated) gecko_json = annotation_to_GeckoJSON(hypothesis, distances_per_speaker, colors) if hypotheses_path.exists(): dir_path = hypotheses_path.parent else: dir_path = Path(".") json_path = os.path.join(dir_path, f'{uri}.json') with open(json_path, 'w') as file: json.dump(gecko_json, file) print(f"succefully dumped {json_path}")
def DER(outfile, AudioDataSet, annotationlist, audioLength): reference = Annotation() if not AudioDataSet == 'DiaExample': treeA = ET.parse(annotationlist[0]) rootA = treeA.getroot() for child in rootA.findall('segment'): start, end = float(child.get('transcriber_start')), float( child.get('transcriber_end')) reference[Segment(start, end)] = 'A' treeB = ET.parse(annotationlist[1]) rootB = treeB.getroot() for child in rootB.findall('segment'): start, end = float(child.get('transcriber_start')), float( child.get('transcriber_end')) reference[Segment(start, end)] = 'B' treeC = ET.parse(annotationlist[2]) rootC = treeC.getroot() for child in rootC.findall('segment'): start, end = float(child.get('transcriber_start')), float( child.get('transcriber_end')) reference[Segment(start, end)] = 'C' treeD = ET.parse(annotationlist[3]) rootD = treeD.getroot() for child in rootD.findall('segment'): start, end = float(child.get('transcriber_start')), float( child.get('transcriber_end')) reference[Segment(start, end)] = 'D' else: reference = Annotation() reference[Segment(0.15, 3.41)] = 'A' reference[Segment(3.83, 5.82)] = 'A' reference[Segment(6.75, 11.10)] = 'B' reference[Segment(11.32, 15.8)] = 'C' reference[Segment(15.9, 18.8)] = 'B' reference[Segment(18.8, 27.8)] = 'C' reference[Segment(27.8, 34.4)] = 'B' reference[Segment(34.4, 42)] = 'D' hypothesis = Annotation() f = open(outfile, 'r') for line in f.readlines(): start = float(line.split(' ')[3]) end = start + float(line.split(' ')[4]) annotation = line.split(' ')[5][0:-1] hypothesis[Segment(start, end)] = annotation f.close() metric = DiarizationErrorRate() metricPurity = DiarizationPurity() uem = Timeline([Segment(0, audioLength)]) print('DER: %.2f %%' % (metric(reference, hypothesis, uem=uem) * 100)) print('Cluster Purity: %.2f %%' % (metricPurity(reference, hypothesis, uem=uem) * 100)) return metric, reference, hypothesis
def performance_metrics(df_labels, df_embeddings_verification, track_embedding, cfg, frame_list, iteration): speaker_list = df_labels.columns.tolist() df_precision = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) der = [] metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar) groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]] for threshold in cfg.audio.threshold: df_output = multi_speaker_verification( track_embedding=track_embedding, df_labels=df_labels, df_embeddings_verification=df_embeddings_verification, threshold=threshold) for speaker in speaker_list: try: df_precision.loc[threshold, speaker] = precision_score(df_labels[speaker], df_output[speaker], average='binary') except: df_precision.loc[threshold, speaker] = 0 try: df_recall.loc[threshold, speaker] = recall_score(df_labels[speaker], df_output[speaker], average='binary') except: df_recall.loc[threshold, speaker] = 0 try: df_roc.loc[threshold, speaker] = roc_auc_score(df_labels[speaker], df_output[speaker], average=None) except: df_roc.loc[threshold, speaker] = 0 try: far, frr = FAR_FRR(y_true=df_labels[speaker], y_pred=df_output[speaker]) df_far.loc[threshold, speaker] = far df_frr.loc[threshold, speaker] = frr except: df_far.loc[threshold, speaker] = 0 df_frr.loc[threshold, speaker] = 0 components = metric(groundtruth, merge_frames(df_outputs=df_output, frame_list=frame_list, filename=cfg.audio.uri[iteration] + '_' + str(threshold)), detailed=True) components = metric[:] der.append(components) return df_precision, df_recall, df_roc, df_far, df_frr, der
def get_der(self, ref_file, scores): """ Compute Diarization Error Rate from reference and scores. :param ref_file: path to file with diarization reference :type ref_file: str :param scores: input scores from PLDA model :type scores: numpy.array """ ref, hyp = self.init_annotations() with open(ref_file, 'r') as f: for line in f: _, name, _, start, duration, _, _, speaker, _ = line.split() ref[name][Segment(float(start), float(start) + float(duration))] = speaker for ivecset in self.ivecs: if ivecset.size() > 0: name, reg_name = ivecset.name, ivecset.name # dirty trick, will be removed, watch out if 'beamformed' in name: reg_name = re.sub('beamformed/', '', name) # # # # # # # # # # # # # # # # # # # # # reg_name = re.sub('/.*', '', reg_name) for i, ivec in enumerate(ivecset.ivecs): start, end = ivec.window_start / 1000.0, ivec.window_end / 1000.0 hyp[reg_name][Segment(start, end)] = np.argmax(scores[name].T[i]) else: logwarning( '[Diarization.get_der] No i-vectors to dump in {}.'.format( ivecset.name)) der = DiarizationErrorRate() der.collar = 0.25 names, values, summ = [], [], 0.0 for name in ref.keys(): names.append(name) der_num = der(ref[name], hyp[name]) * 100 values.append(der_num) summ += der_num loginfo('[Diarization.get_der] {} DER = {}'.format( name, '{0:.3f}'.format(der_num))) loginfo('[Diarization.get_der] Average DER = {}'.format( '{0:.3f}'.format(summ / float(len(ref.keys()))))) Diarization.plot_der(names, values)
def diarization(protocol, subset, hypotheses, greedy=False, collar=0.0, skip_overlap=False): options = { 'collar': collar, 'skip_overlap': skip_overlap, 'parallel': True } metrics = { 'purity': DiarizationPurity(**options), 'coverage': DiarizationCoverage(**options) } if greedy: metrics['error'] = GreedyDiarizationErrorRate(**options) else: metrics['error'] = DiarizationErrorRate(**options) reports = get_reports(protocol, subset, hypotheses, metrics) report = metrics['error'].report(display=False) purity = metrics['purity'].report(display=False) coverage = metrics['coverage'].report(display=False) report['purity', '%'] = purity[metrics['purity'].name, '%'] report['coverage', '%'] = coverage[metrics['coverage'].name, '%'] columns = list(report.columns) report = report[[columns[0]] + columns[-2:] + columns[1:-2]] report = reindex(report) summary = 'Diarization ({0:s}collar = {1:g} ms{2})'.format( 'greedy, ' if greedy else '', 1000 * collar, ', no overlap' if skip_overlap else '') headers = [summary] + \ [report.columns[i][0] for i in range(3)] + \ ['%' if c[1] == '%' else c[0] for c in report.columns[3:]] print( tabulate(report, headers=headers, tablefmt="simple", floatfmt=".2f", numalign="decimal", stralign="left", missingval="", showindex="default", disable_numparse=False))
def get_der(true_annotation, pred_annotation): """Calculate Diarization Error Rate - only the confusion. """ metric = DiarizationErrorRate(collar=0.5) start = true_annotation.get_timeline().extent().start end = true_annotation.get_timeline().extent().end components = metric(true_annotation, pred_annotation, detailed=True, uem=Segment(start, end)) der_rate = components['confusion'] / components['total'] # Only consider confusion. print("DER = {0:.3f}".format(der_rate)) return der_rate
def diarization_error_rate(y_true, y_pred, times): ''' :param y_true: Ground truth speakers per utterance :param y_pred: Predicted speakers per utterance :param times: time per utterance in seconds (the seconds can be a float, they will be converted and rounded to integer milliseconds) :return: The Diarization Error Rate (DER) ''' metric = DiarizationErrorRate() reference = _generate_annotations(y_true, times) hypothesis = _generate_annotations(y_pred, times) value = metric(reference, hypothesis) return value
def calculate_der(reference_filename, hypothesis_filename): lbls = Util.read_audacity_labels(reference_filename) reference = Annotation() for lbl in lbls: reference[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label predicted_lbls = Util.read_audacity_labels(hypothesis_filename) hypothesis = Annotation() for lbl in predicted_lbls: if lbl.label != 'non_speech': hypothesis[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label metric = DiarizationErrorRate() der = metric(reference, hypothesis) return der
def test_detailed(reference, hypothesis): diarizationErrorRate = DiarizationErrorRate() details = diarizationErrorRate(reference, hypothesis, detailed=True) confusion = details['confusion'] npt.assert_almost_equal(confusion, 7.0, decimal=7) correct = details['correct'] npt.assert_almost_equal(correct, 22.0, decimal=7) rate = details['diarization error rate'] npt.assert_almost_equal(rate, 0.5161290322580645, decimal=7) false_alarm = details['false alarm'] npt.assert_almost_equal(false_alarm, 7.0, decimal=7) missed_detection = details['missed detection'] npt.assert_almost_equal(missed_detection, 2.0, decimal=7) total = details['total'] npt.assert_almost_equal(total, 31.0, decimal=7)
def test_optimal_mapping(reference, hypothesis): diarizationErrorRate = DiarizationErrorRate() mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis) assert mapping == {'a': 'A', 'b': 'B', 'c': 'C'}
def test_leep_overlap(reference_with_overlap, hypothesis): metric = DiarizationErrorRate(skip_overlap=False) total = metric(reference_with_overlap, hypothesis, detailed=True)['total'] npt.assert_almost_equal(total, 34, decimal=3)
def test_error_rate(reference, hypothesis): diarizationErrorRate = DiarizationErrorRate() error_rate = diarizationErrorRate(reference, hypothesis) npt.assert_almost_equal(error_rate, 0.5161290322580645, decimal=7)
def main(reference_dir, hypothesis_dir, output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) flist = os.listdir(reference_dir) total_references = len(flist) total_hypotheses = len(os.listdir(hypothesis_dir)) if total_references == 0: # no references available score_f = os.path.join(output_dir, 'score.seconds') score = open(score_f, 'w') score.write('No references available.\n') score.write('references {0}\n'.format(total_references)) score.write('hypotheses {0}\n'.format(total_hypotheses)) sys.exit(0) collar = 0.1 # collar in seconds der_eval = DiarizationErrorRate(collar=collar) ier_eval = IdentificationErrorRate(collar=collar) prec_eval = IdentificationPrecision(collar=collar) rec_eval = IdentificationRecall(collar=collar) skip_tokens = ['OVERLAP', 'SPN'] skip_tokens_child = ['OVERLAP', 'SPN', 'SLT'] missing_hypotheses = 0 missing_hypotheses_seconds = 0 utt_scores = [] for f in flist: ref_f = os.path.join(reference_dir, f) hyp_f = os.path.join(hypothesis_dir, f) reference = read_annotation(ref_f, \ annotation_type='reference', skip_tokens=skip_tokens) reference_child = read_annotation(ref_f, \ annotation_type='reference', skip_tokens=skip_tokens_child) if not os.path.isfile(hyp_f): missing_hypotheses += 1 missed_sum = sum( [i.end - i.start for i in reference.itersegments()]) missing_hypotheses_seconds += missed_sum # read_annotation can handle non-existing files hypothesis = read_annotation(hyp_f, \ annotation_type='hypothesis', skip_tokens=skip_tokens) hypothesis_child = read_annotation(hyp_f, \ annotation_type='hypothesis', skip_tokens=skip_tokens_child) # find global min and max time_ref = [[i.start, i.end] for i in reference.itersegments()] time_hyp = [[i.start, i.end] for i in hypothesis.itersegments()] min_f = min([i for i, e in time_hyp] + [i for i, e in time_ref]) max_f = max([e for i, e in time_hyp] + [e for i, e in time_ref]) # evaluate DER der = der_eval(reference, hypothesis, \ uem=Segment(min_f, max_f), detailed=True) # find global min and max time_ref = [[i.start, i.end] for i in reference_child.itersegments()] time_hyp = [[i.start, i.end] for i in hypothesis_child.itersegments()] min_f = min([i for i, e in time_hyp] + [i for i, e in time_ref]) max_f = max([e for i, e in time_hyp] + [e for i, e in time_ref]) # evaluate IER ier = ier_eval(reference_child, hypothesis_child, \ uem=Segment(min_f, max_f), detailed=True) prec = prec_eval(reference_child, hypothesis_child, \ uem=Segment(min_f, max_f)) rec = rec_eval(reference_child, hypothesis_child, \ uem=Segment(min_f, max_f)) f1 = 0 if prec == 0 or rec == 0 else 2 * (prec * rec) / (prec + rec) ref_labs = ' '.join(reference.labels()) hyp_labs = ' '.join(hypothesis.labels()) ref_labs = ' '.join( [label for _, _, label in reference.itertracks(yield_label=True)]) hyp_labs = ' '.join( [label for _, _, label in hypothesis.itertracks(yield_label=True)]) if not hyp_labs: hyp_labs = 'no_alignment' utt_scores.append([f, prec, rec, f1, der, ier, ref_labs, hyp_labs]) # global scores ier = abs(ier_eval) der = abs(der_eval) precision = abs(prec_eval) recall = abs(rec_eval) f1 = 0 if precision == 0 or recall == 0 else 2 * (precision * recall) / ( precision + recall) # keys to intermediate metrics keys = ['correct', 'missed detection', 'false alarm', \ 'confusion', 'total', 'diarization error rate'] aggregate = {k: 0 for k in keys} ## global correct, missed, false alarm, confusion for item in utt_scores: der_errors = item[4] for key in keys: aggregate[key] += der_errors[key] ier_errors = item[5] item_ier = ier_errors['identification error rate'] aggregate['der'] = item_ier if aggregate['total'] == 0: aggregate['total'] = 1 # write global scores to file score_f = os.path.join(output_dir, 'score.seconds') score = open(score_f, 'w') score.write('precision {0:.3f}\n'.format(precision)) score.write('recall {0:.3f}\n'.format(recall)) score.write('f1 score {0:.3f}\n\n'.format(f1)) score.write('IER {0:.3f}\n\n'.format(ier)) score.write('DER {0:.3f}\n'.format(der)) score.write(' missed {0:.3f}\n'.format(aggregate['missed detection'] / aggregate['total'])) score.write(' false alarm {0:.3f}\n'.format(aggregate['false alarm'] / aggregate['total'])) score.write(' confusion {0:.3f}\n'.format(aggregate['confusion'] / aggregate['total'])) score.write(' correct {0:.3f}\n'.format(aggregate['correct'] / aggregate['total'])) score.write('\n') score.write('total files {0}\n'.format(total_references)) score.write('alignment failures\n') score.write(' total utterances: {0}\n'.format(missing_hypotheses)) score.write(' total seconds in failed utterances: {0}\n\n'.format( missing_hypotheses_seconds)) score.write('precision details\n') for i in prec_eval[:]: score.write(' {0} {1}\n'.format(i, prec_eval[:][i])) score.write('\n') score.write('recall details\n') for i in rec_eval[:]: score.write(' {0} {1}\n'.format(i, rec_eval[:][i])) score.close() # write detailed scores to file sorted by DER # columns: filename, precision, recall, f1, reference_words, hypothesis_words report_f = os.path.join(output_dir, 'report.seconds') report = open(report_f, 'w') header = [ 'filename', 'precision', 'recall', 'f1', 'correct', 'missed', 'false_alarm', 'confusion', 'total', 'der', 'ier', 'reference_words', 'hypothesis_words' ] report.write('\t'.join(header) + '\n') for item in sorted(utt_scores, key=lambda x: x[4]['diarization error rate']): data = [] # filename data.append(item[0]) # precision, recall, f1 for i in range(1, 3 + 1): data.append('{0:.3f}'.format(item[i])) # DER related scores errors = item[4] for key in keys: value = '{0:.3f}'.format(errors[key]) data.append(value) # IER score ier = item[5]['identification error rate'] data.append('{0:.3f}'.format(ier)) data.append(item[-2]) data.append(item[-1]) report.write('\t'.join(data) + '\n') report.close()
import time import torch from pyannote.database import FileFinder, get_protocol from pyannote.metrics.diarization import DiarizationErrorRate, JaccardErrorRate preprocessors = {'audio': FileFinder()} protocol = get_protocol('VOXCON.SpeakerDiarization.Challenge', preprocessors=preprocessors) diarization_pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_dihard', device = 'gpu') ders = [] jers = [] hypotheses = [] derMetric = DiarizationErrorRate(collar=0.25) jerMetric = JaccardErrorRate(collar=0.25) for file in protocol.test(): hypothesis = diarization_pipeline(file) hypotheses.append(hypothesis) reference = file["annotation"] # uem = file['annotated'] der = derMetric(reference, hypothesis) jer = jerMetric(reference, hypothesis) ders.append(der) jers.append(jer) uri = file['uri'] print(f'{uri} DER = {100 * der:.1f}% JER = {100 * jer:.1f}% {time.strftime("%H:%M:%S")}')
def performance_metrics(df_labels, df_embeddings_verification, track_embedding, cfg, frame_list, iteration): speaker_list = df_labels.columns.tolist() df_precision = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold) der = [] metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar) groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]] for threshold in cfg.audio.threshold: df_output = speaker_verification( track_embedding=track_embedding, df_labels=df_labels, df_embeddings_verification=df_embeddings_verification, threshold=threshold) for speaker in speaker_list: try: df_precision.loc[threshold, speaker] = precision_score(df_labels[speaker], df_output[speaker], average='binary') except: df_precision.loc[threshold, speaker] = 0 try: df_recall.loc[threshold, speaker] = recall_score(df_labels[speaker], df_output[speaker], average='binary') except: df_recall.loc[threshold, speaker] = 0 try: df_roc.loc[threshold, speaker] = roc_auc_score(df_labels[speaker], df_output[speaker], average=None) except: df_roc.loc[threshold, speaker] = 0 try: far, frr = FAR_FRR(y_true=df_labels[speaker], y_pred=df_output[speaker]) df_far.loc[threshold, speaker] = far df_frr.loc[threshold, speaker] = frr except: df_far.loc[threshold, speaker] = 0 df_frr.loc[threshold, speaker] = 0 #der.append(metric(groundtruth, merge_frames(df_outputs=df_output, frame_list=frame_list, filename='try1_'+str(threshold)))) components = metric(groundtruth, merge_frames(df_outputs=df_output, frame_list=frame_list, filename=cfg.audio.uri[iteration] + '_' + str(threshold)), detailed=True) components = metric[:] # print('False alarm: {}, Missed_Detection: {}, Confusion{}, Total {}'.format(DER['false alarm'], DER['missed detection'], DER['confusion'], DER['total'])) #if DER <= 1: der.append(components) #else: # der.append(1.0) return df_precision, df_recall, df_roc, df_far, df_frr, der
def test(custom=True, prefix=''): der = DiarizationErrorRate(collar=0.5) prec = DetectionPrecision(collar=0.5) recall = DetectionRecall(collar=0.5) coverage = SegmentationCoverage() purity = SegmentationPurity() result = {} if os.path.exists('results.json'): with open('results.json') as json_file: result = json.load(json_file) base_test = prefix + 'audio/' test_files = os.listdir(base_test) test_path = base_test + test_files[0] + '/new_data/' test_types = [ name for name in os.listdir(test_path) if os.path.isdir(os.path.join(test_path, name)) ] result_data = [] if (custom): for _ in clusterings: result_data.append([]) for test in test_types: avg_der = 0 avg_prec = 0 avg_rec = 0 avg_cov = 0 avg_pur = 0 counter = 0 speaker_results = {} cluster_results = [] speaker_results_cluster = [] for _ in clusterings: cluster_results.append({ 'der': 0, 'prec': 0, 'rec': 0, 'cov': 0, 'pur': 0 }) speaker_results_cluster.append({}) for f in test_files: test_file = base_test + f data_file = test_file + '/new_data/' + f + '.json' with open(data_file) as f: data = json.load(f) for sub_f in data: counter += 1 sub_f_data = data[sub_f] true_labels = sub_f_data['labels'] true_speakers = sub_f_data['no_speakers'] speakers_int = OrderedSet( map(lambda x: x['speaker'], true_labels)) for i, s in enumerate(speakers_int): for datadict in true_labels: if datadict['speaker'] == s: datadict['speaker'] = i + 1 true_annotation = convert_to_annotation(true_labels) pred_path = test_file + '/new_data/' + test + '/' pred_file = sub_f if test == 'default' else sub_f.split( '.')[0] + '_' + test + '.wav' audio = {'uri': pred_file, 'audio': pred_path + pred_file} if (custom): long_turns, _, _, embeddings = predict(audio) index = 0 for algorithm in clusterings: if (custom): pred_annotation = cluster_annotation( long_turns, embeddings, true_speakers, algorithm) if (type(pred_annotation) is tuple or pred_annotation == Annotation()): continue pred_annotation = pred_annotation.rename_labels( generator='int') der_res = der(true_annotation, pred_annotation) prec_res = prec(true_annotation, pred_annotation) rec_res = recall(true_annotation, pred_annotation) cov_res = coverage(true_annotation, pred_annotation) pur_res = purity(true_annotation, pred_annotation) cluster_results[index]['der'] += der_res cluster_results[index]['prec'] += prec_res cluster_results[index]['rec'] += rec_res cluster_results[index]['cov'] += cov_res cluster_results[index]['pur'] += pur_res if not true_speakers in speaker_results_cluster[ index]: speaker_results_cluster[index][ true_speakers] = { 'der': 0, 'prec': 0, 'rec': 0, 'cov': 0, 'pur': 0, 'counter': 0 } speaker_results_cluster[index][true_speakers][ 'der'] += der_res speaker_results_cluster[index][true_speakers][ 'prec'] += prec_res speaker_results_cluster[index][true_speakers][ 'rec'] += rec_res speaker_results_cluster[index][true_speakers][ 'cov'] += cov_res speaker_results_cluster[index][true_speakers][ 'pur'] += pur_res speaker_results_cluster[index][true_speakers][ 'counter'] += 1 index += 1 else: pred_annotation = pipeline( {'audio': pred_path + pred_file}) der_res = der(true_annotation, pred_annotation) prec_res = prec(true_annotation, pred_annotation) rec_res = recall(true_annotation, pred_annotation) cov_res = coverage(true_annotation, pred_annotation) pur_res = purity(true_annotation, pred_annotation) avg_der += der_res avg_prec += prec_res avg_rec += rec_res avg_cov += cov_res avg_pur += pur_res if not true_speakers in speaker_results: speaker_results[true_speakers] = { 'der': 0, 'prec': 0, 'rec': 0, 'cov': 0, 'pur': 0, 'counter': 0 } speaker_results[true_speakers]['der'] += der_res speaker_results[true_speakers]['prec'] += prec_res speaker_results[true_speakers]['rec'] += rec_res speaker_results[true_speakers]['cov'] += cov_res speaker_results[true_speakers]['pur'] += pur_res speaker_results[true_speakers]['counter'] += 1 if custom: index = 0 for algorithm in clusterings: cluster_data = cluster_results[index] sub_data = {'type': test} sub_data['DER'] = cluster_data['der'] / counter sub_data['Precision'] = cluster_data['prec'] / counter sub_data['Recall'] = cluster_data['rec'] / counter sub_data['Coverage'] = cluster_data['cov'] / counter sub_data['Purity'] = cluster_data['pur'] / counter for s in speaker_results_cluster[index]: speaker_results_cluster[index][s]['der'] = speaker_results_cluster[index][s]['der'] / \ speaker_results_cluster[index][s]['counter'] speaker_results_cluster[index][s]['prec'] = speaker_results_cluster[index][s]['prec'] / \ speaker_results_cluster[index][s]['counter'] speaker_results_cluster[index][s]['rec'] = speaker_results_cluster[index][s]['rec'] / \ speaker_results_cluster[index][s]['counter'] speaker_results_cluster[index][s]['cov'] = speaker_results_cluster[index][s]['cov'] / \ speaker_results_cluster[index][s]['counter'] speaker_results_cluster[index][s]['pur'] = speaker_results_cluster[index][s]['pur'] / \ speaker_results_cluster[index][s]['counter'] sub_data['Speaker_data'] = speaker_results_cluster[index] result_data[index].append(sub_data) result[prefix + 'custom' + algorithm] = result_data[index] index += 1 else: sub_data = {'type': test} sub_data['DER'] = avg_der / counter sub_data['Precision'] = avg_prec / counter sub_data['Recall'] = avg_rec / counter sub_data['Coverage'] = avg_cov / counter sub_data['Purity'] = avg_pur / counter for s in speaker_results: speaker_results[s]['der'] = speaker_results[s]['der'] / \ speaker_results[s]['counter'] speaker_results[s]['prec'] = speaker_results[s]['prec'] / \ speaker_results[s]['counter'] speaker_results[s]['rec'] = speaker_results[s]['rec'] / \ speaker_results[s]['counter'] speaker_results[s]['cov'] = speaker_results[s]['cov'] / \ speaker_results[s]['counter'] speaker_results[s]['pur'] = speaker_results[s]['pur'] / \ speaker_results[s]['counter'] sub_data['Speaker_data'] = speaker_results result_data.append(sub_data) result[prefix + 'auto'] = result_data save_file = 'results.json' with open(save_file, 'w') as outfile: json.dump(result, outfile) return result_data
def get_diarization_metrics(reference, hypothesis, uem=None): metric_dict = {} metric = DiarizationErrorRate() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DiarizationCompleteness() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DiarizationCoverage() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DiarizationPurity() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DiarizationHomogeneity() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met return metric_dict
from pyannote.database.util import load_rttm from pyannote.metrics.diarization import DiarizationErrorRate from param import * with open(set_path) as json_file: set_dict = json.load(json_file) # Build RAL (assumes C set was processed) cases = [item.split('.')[0] for item in set_dict['r']] #RAL needs case name only scotus_ral = RefAudioLibrary(cases, inf_lab_path + 'r' + str(encoder_rate) + '/', rttm_path, sd_path, min_audio_len=mal) metric = DiarizationErrorRate(collar=der_collar, skip_overlap=True) print('T Set Encoding (no labels)') der = [] size = [] for wav in set_dict['t']: case = wav.split('.')[0] print('Encoding Case:', case) embed, info, sz = case_to_dvec(audio_path + wav, device=device, verbose=verbose, rate=encoder_rate) if save_test_emb: np.save(inf_path + '{}_embeds.npy'.format(case), embed) np.save(inf_path + '{}_embeds_times.npy'.format(case), info[0]) timelst = Diarize(scotus_ral,
def get_metric(self) -> Union[DetectionErrorRate, DiarizationErrorRate]: if self.only_sad: return DetectionErrorRate(collar=0.0) else: return DiarizationErrorRate(collar=0.0, skip_overlap=False)
def get_der(cfg, rttm, output_annotations): metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar) groundtruth = load_rttm(rttm)[rttm[rttm.rfind('/')+1:rttm.find('.')]] der = metric(groundtruth, output_annotations, detailed=False) return der
from pyannote.database.util import load_rttm from pyannote.core import Segment, notebook from pyannote.audio.features import RawAudio #from IPython.display import Audio import torch from pyannote.metrics.diarization import DiarizationErrorRate Audio_File = { 'uri': 'ES2011a.Mix-Headset', 'audio': '/home/lucas/PycharmProjects/Data/pyannote/amicorpus/ES2011a/audio/ES2011a.Mix-Headset.wav' } groundtruth = load_rttm( '/home/lucas/PycharmProjects/Data/pyannote/AMI/MixHeadset.development.rttm' )[Audio_File['uri']] for segment in groundtruth.get_timeline(): print(list(groundtruth.get_labels(segment))[0]) pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_ami') diarization = pipeline(Audio_File) #print(diarization) metric = DiarizationErrorRate(collar=0.25, skip_overlap=True) der = metric(groundtruth, diarization) print(der) #print('done')