コード例 #1
0
def get_DER(all_reference, all_hypothesis):
    """
    calculates DER, CER, FA and MISS

    Args:
    all_reference (list[Annotation]): reference annotations for score calculation
    all_hypothesis (list[Annotation]): hypothesis annotations for score calculation

    Returns:
    DER (float): Diarization Error Rate
    CER (float): Confusion Error Rate
    FA (float): False Alarm
    Miss (float): Miss Detection 

    """
    metric = DiarizationErrorRate(collar=0.25, skip_overlap=True)
    DER = 0

    for reference, hypothesis in zip(all_reference, all_hypothesis):
        metric(reference, hypothesis, detailed=True)

    DER = abs(metric)
    CER = metric['confusion'] / metric['total']
    FA = metric['false alarm'] / metric['total']
    MISS = metric['missed detection'] / metric['total']

    metric.reset()

    return DER, CER, FA, MISS
コード例 #2
0
ファイル: speaker_utils.py プロジェクト: tiborkiss/NeMo
def get_DER(all_reference, all_hypothesis):
    """
    calculates DER, CER, FA and MISS

    Args:
    all_reference (list[Annotation]): reference annotations for score calculation
    all_hypothesis (list[Annotation]): hypothesis annotations for score calculation

    Returns:
    DER (float): Diarization Error Rate
    CER (float): Confusion Error Rate
    FA (float): False Alarm
    Miss (float): Miss Detection 

    < Caveat >
    Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of
    "no score" collar from left to right. Therefore, if 0.25s is applied for "no score"
    collar in md-eval.pl, 0.5s should be applied for pyannote.metrics.

    """
    metric = DiarizationErrorRate(collar=0.5, skip_overlap=True)

    for reference, hypothesis in zip(all_reference, all_hypothesis):
        metric(reference, hypothesis, detailed=True)

    DER = abs(metric)
    CER = metric['confusion'] / metric['total']
    FA = metric['false alarm'] / metric['total']
    MISS = metric['missed detection'] / metric['total']

    metric.reset()

    return DER, CER, FA, MISS
コード例 #3
0
def get_mapping(reference, system):
    """ get speaker mapping between system and reference"""

    metric = DiarizationErrorRate()
    mapping = metric.optimal_mapping(reference, system)

    return mapping
コード例 #4
0
def score_labels(AUDIO_RTTM_MAP,
                 all_reference,
                 all_hypothesis,
                 collar=0.25,
                 ignore_overlap=True):
    """
    calculates DER, CER, FA and MISS

    Args:
    AUDIO_RTTM_MAP : Dictionary containing information provided from manifestpath
    all_reference (list[uniq_name,Annotation]): reference annotations for score calculation
    all_hypothesis (list[uniq_name,Annotation]): hypothesis annotations for score calculation

    Returns:
    metric (pyannote.DiarizationErrorRate): Pyannote Diarization Error Rate metric object. This object contains detailed scores of each audiofile.
    mapping (dict): Mapping dict containing the mapping speaker label for each audio input

    < Caveat >
    Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of
    "no score" collar from left to right. Therefore, if 0.25s is applied for "no score"
    collar in md-eval.pl, 0.5s should be applied for pyannote.metrics.

    """
    metric = None
    if len(all_reference) == len(all_hypothesis):
        metric = DiarizationErrorRate(collar=2 * collar,
                                      skip_overlap=ignore_overlap)

        mapping_dict = {}
        for (reference, hypothesis) in zip(all_reference, all_hypothesis):
            ref_key, ref_labels = reference
            _, hyp_labels = hypothesis
            uem = AUDIO_RTTM_MAP[ref_key].get('uem_filepath', None)
            if uem is not None:
                uem = uem_timeline_from_file(uem_file=uem, uniq_name=ref_key)
            metric(ref_labels, hyp_labels, uem=uem, detailed=True)
            mapping_dict[ref_key] = metric.optimal_mapping(
                ref_labels, hyp_labels)

        DER = abs(metric)
        CER = metric['confusion'] / metric['total']
        FA = metric['false alarm'] / metric['total']
        MISS = metric['missed detection'] / metric['total']

        logging.info(
            "Cumulative Results for collar {} sec and ignore_overlap {}: \n FA: {:.4f}\t MISS {:.4f}\t \
                Diarization ER: {:.4f}\t, Confusion ER:{:.4f}".format(
                collar, ignore_overlap, FA, MISS, DER, CER))

        return metric, mapping_dict
    else:
        logging.warning(
            "check if each ground truth RTTMs were present in provided manifest file. Skipping calculation of Diariazation Error Rate"
        )

        return None
コード例 #5
0
def test_bug_16():
    reference = Annotation()
    reference[Segment(0, 10)] = 'A'
    hypothesis = Annotation()

    metric = DiarizationErrorRate(collar=1)
    total = metric(reference, hypothesis, detailed=True)['total']
    npt.assert_almost_equal(total, 9, decimal=3)

    metric = DiarizationErrorRate(collar=0)
    total = metric(reference, hypothesis, detailed=True)['total']
    npt.assert_almost_equal(total, 10, decimal=3)
コード例 #6
0
ファイル: visualize.py プロジェクト: PaulLerner/Prune
def gecko(args):
    hypotheses_path = args['<hypotheses_path>']
    uri = args['<uri>']
    colors = get_colors(uri)
    distances = {}
    if Path(hypotheses_path).exists():
        hypotheses = load_rttm(hypotheses_path)
        hypothesis = hypotheses[uri]
    else:  # protocol
        protocol = get_protocol(args['<hypotheses_path>'])
        reference = get_file(protocol, uri)
        hypothesis = reference['annotation']
        annotated = get_annotated(reference)
    hypotheses_path = Path(hypotheses_path)
    protocol = args['--database.task.protocol']
    features = None
    if protocol:
        protocol = get_protocol(protocol)
        embeddings = args['--embeddings']
        reference, features = get_file(protocol, uri, embeddings=embeddings)
        if args['--map']:
            print(f"mapping {uri} with {protocol}")
            diarizationErrorRate = DiarizationErrorRate()
            annotated = get_annotated(reference)
            optimal_mapping = diarizationErrorRate.optimal_mapping(
                reference['annotation'], hypothesis, annotated)
            hypothesis = hypothesis.rename_labels(mapping=optimal_mapping)

    hypothesis = update_labels(hypothesis, distances)  # tag unsure clusters

    distances_per_speaker = get_distances_per_speaker(
        features, hypothesis) if features else {}

    if args['--tag_na']:
        whole_file = Segment(0., annotated.segments_boundaries_[-1])
        not_annotated = annotated.gaps(whole_file).to_annotation(na())
        hypothesis = hypothesis.crop(annotated).update(not_annotated)

    gecko_json = annotation_to_GeckoJSON(hypothesis, distances_per_speaker,
                                         colors)

    if hypotheses_path.exists():
        dir_path = hypotheses_path.parent
    else:
        dir_path = Path(".")

    json_path = os.path.join(dir_path, f'{uri}.json')
    with open(json_path, 'w') as file:
        json.dump(gecko_json, file)
    print(f"succefully dumped {json_path}")
コード例 #7
0
def DER(outfile, AudioDataSet, annotationlist, audioLength):
    reference = Annotation()

    if not AudioDataSet == 'DiaExample':
        treeA = ET.parse(annotationlist[0])
        rootA = treeA.getroot()
        for child in rootA.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'A'

        treeB = ET.parse(annotationlist[1])
        rootB = treeB.getroot()
        for child in rootB.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'B'

        treeC = ET.parse(annotationlist[2])
        rootC = treeC.getroot()
        for child in rootC.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'C'

        treeD = ET.parse(annotationlist[3])
        rootD = treeD.getroot()
        for child in rootD.findall('segment'):
            start, end = float(child.get('transcriber_start')), float(
                child.get('transcriber_end'))
            reference[Segment(start, end)] = 'D'
    else:
        reference = Annotation()
        reference[Segment(0.15, 3.41)] = 'A'
        reference[Segment(3.83, 5.82)] = 'A'
        reference[Segment(6.75, 11.10)] = 'B'
        reference[Segment(11.32, 15.8)] = 'C'
        reference[Segment(15.9, 18.8)] = 'B'
        reference[Segment(18.8, 27.8)] = 'C'
        reference[Segment(27.8, 34.4)] = 'B'
        reference[Segment(34.4, 42)] = 'D'

    hypothesis = Annotation()
    f = open(outfile, 'r')
    for line in f.readlines():
        start = float(line.split(' ')[3])
        end = start + float(line.split(' ')[4])
        annotation = line.split(' ')[5][0:-1]
        hypothesis[Segment(start, end)] = annotation
    f.close()
    metric = DiarizationErrorRate()
    metricPurity = DiarizationPurity()
    uem = Timeline([Segment(0, audioLength)])

    print('DER: %.2f %%' % (metric(reference, hypothesis, uem=uem) * 100))
    print('Cluster Purity: %.2f %%' %
          (metricPurity(reference, hypothesis, uem=uem) * 100))

    return metric, reference, hypothesis
コード例 #8
0
def performance_metrics(df_labels, df_embeddings_verification, track_embedding,
                        cfg, frame_list, iteration):
    speaker_list = df_labels.columns.tolist()
    df_precision = pd.DataFrame(columns=speaker_list,
                                index=cfg.audio.threshold)
    df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    der = []
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]]
    for threshold in cfg.audio.threshold:
        df_output = multi_speaker_verification(
            track_embedding=track_embedding,
            df_labels=df_labels,
            df_embeddings_verification=df_embeddings_verification,
            threshold=threshold)
        for speaker in speaker_list:
            try:
                df_precision.loc[threshold,
                                 speaker] = precision_score(df_labels[speaker],
                                                            df_output[speaker],
                                                            average='binary')
            except:
                df_precision.loc[threshold, speaker] = 0
            try:
                df_recall.loc[threshold,
                              speaker] = recall_score(df_labels[speaker],
                                                      df_output[speaker],
                                                      average='binary')
            except:
                df_recall.loc[threshold, speaker] = 0
            try:
                df_roc.loc[threshold,
                           speaker] = roc_auc_score(df_labels[speaker],
                                                    df_output[speaker],
                                                    average=None)
            except:
                df_roc.loc[threshold, speaker] = 0
            try:
                far, frr = FAR_FRR(y_true=df_labels[speaker],
                                   y_pred=df_output[speaker])
                df_far.loc[threshold, speaker] = far
                df_frr.loc[threshold, speaker] = frr
            except:
                df_far.loc[threshold, speaker] = 0
                df_frr.loc[threshold, speaker] = 0

        components = metric(groundtruth,
                            merge_frames(df_outputs=df_output,
                                         frame_list=frame_list,
                                         filename=cfg.audio.uri[iteration] +
                                         '_' + str(threshold)),
                            detailed=True)
        components = metric[:]
        der.append(components)
    return df_precision, df_recall, df_roc, df_far, df_frr, der
コード例 #9
0
    def get_der(self, ref_file, scores):
        """ Compute Diarization Error Rate from reference and scores.

            :param ref_file: path to file with diarization reference
            :type ref_file: str
            :param scores: input scores from PLDA model
            :type scores: numpy.array
        """
        ref, hyp = self.init_annotations()
        with open(ref_file, 'r') as f:
            for line in f:
                _, name, _, start, duration, _, _, speaker, _ = line.split()
                ref[name][Segment(float(start),
                                  float(start) + float(duration))] = speaker
        for ivecset in self.ivecs:
            if ivecset.size() > 0:
                name, reg_name = ivecset.name, ivecset.name
                # dirty trick, will be removed, watch out
                if 'beamformed' in name:
                    reg_name = re.sub('beamformed/', '', name)
                # # # # # # # # # # # # # # # # # # # # #
                reg_name = re.sub('/.*', '', reg_name)
                for i, ivec in enumerate(ivecset.ivecs):
                    start, end = ivec.window_start / 1000.0, ivec.window_end / 1000.0
                    hyp[reg_name][Segment(start,
                                          end)] = np.argmax(scores[name].T[i])
            else:
                logwarning(
                    '[Diarization.get_der] No i-vectors to dump in {}.'.format(
                        ivecset.name))
        der = DiarizationErrorRate()
        der.collar = 0.25
        names, values, summ = [], [], 0.0
        for name in ref.keys():
            names.append(name)
            der_num = der(ref[name], hyp[name]) * 100
            values.append(der_num)
            summ += der_num
            loginfo('[Diarization.get_der] {} DER = {}'.format(
                name, '{0:.3f}'.format(der_num)))
        loginfo('[Diarization.get_der] Average DER = {}'.format(
            '{0:.3f}'.format(summ / float(len(ref.keys())))))
        Diarization.plot_der(names, values)
コード例 #10
0
def diarization(protocol,
                subset,
                hypotheses,
                greedy=False,
                collar=0.0,
                skip_overlap=False):

    options = {
        'collar': collar,
        'skip_overlap': skip_overlap,
        'parallel': True
    }

    metrics = {
        'purity': DiarizationPurity(**options),
        'coverage': DiarizationCoverage(**options)
    }

    if greedy:
        metrics['error'] = GreedyDiarizationErrorRate(**options)
    else:
        metrics['error'] = DiarizationErrorRate(**options)

    reports = get_reports(protocol, subset, hypotheses, metrics)

    report = metrics['error'].report(display=False)
    purity = metrics['purity'].report(display=False)
    coverage = metrics['coverage'].report(display=False)

    report['purity', '%'] = purity[metrics['purity'].name, '%']
    report['coverage', '%'] = coverage[metrics['coverage'].name, '%']

    columns = list(report.columns)
    report = report[[columns[0]] + columns[-2:] + columns[1:-2]]

    report = reindex(report)

    summary = 'Diarization ({0:s}collar = {1:g} ms{2})'.format(
        'greedy, ' if greedy else '', 1000 * collar,
        ', no overlap' if skip_overlap else '')

    headers = [summary] + \
              [report.columns[i][0] for i in range(3)] + \
              ['%' if c[1] == '%' else c[0] for c in report.columns[3:]]

    print(
        tabulate(report,
                 headers=headers,
                 tablefmt="simple",
                 floatfmt=".2f",
                 numalign="decimal",
                 stralign="left",
                 missingval="",
                 showindex="default",
                 disable_numparse=False))
def get_der(true_annotation, pred_annotation):
    """Calculate Diarization Error Rate - only the confusion. """

    metric = DiarizationErrorRate(collar=0.5)
    start = true_annotation.get_timeline().extent().start
    end = true_annotation.get_timeline().extent().end
    components = metric(true_annotation, pred_annotation, detailed=True, uem=Segment(start, end))
    der_rate = components['confusion'] / components['total']  # Only consider confusion.
    print("DER = {0:.3f}".format(der_rate))

    return der_rate
コード例 #12
0
ファイル: der.py プロジェクト: neurudan/ZHAW_deep_voice
def diarization_error_rate(y_true, y_pred, times):
    '''
    :param y_true: Ground truth speakers per utterance
    :param y_pred: Predicted speakers per utterance
    :param times: time per utterance in seconds (the seconds can be a float, they will be converted and rounded to
        integer milliseconds)
    :return: The Diarization Error Rate (DER)
    '''
    metric = DiarizationErrorRate()
    reference = _generate_annotations(y_true, times)
    hypothesis = _generate_annotations(y_pred, times)
    value = metric(reference, hypothesis)
    return value
コード例 #13
0
def calculate_der(reference_filename, hypothesis_filename):
    lbls = Util.read_audacity_labels(reference_filename)
    reference = Annotation()
    for lbl in lbls:
        reference[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    predicted_lbls = Util.read_audacity_labels(hypothesis_filename)
    hypothesis = Annotation()
    for lbl in predicted_lbls:
        if lbl.label != 'non_speech':
            hypothesis[Segment(lbl.start_seconds, lbl.end_seconds)] = lbl.label

    metric = DiarizationErrorRate()
    der = metric(reference, hypothesis)
    return der
コード例 #14
0
def test_detailed(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    details = diarizationErrorRate(reference, hypothesis, detailed=True)

    confusion = details['confusion']
    npt.assert_almost_equal(confusion, 7.0, decimal=7)

    correct = details['correct']
    npt.assert_almost_equal(correct, 22.0, decimal=7)

    rate = details['diarization error rate']
    npt.assert_almost_equal(rate, 0.5161290322580645, decimal=7)

    false_alarm = details['false alarm']
    npt.assert_almost_equal(false_alarm, 7.0, decimal=7)

    missed_detection = details['missed detection']
    npt.assert_almost_equal(missed_detection, 2.0, decimal=7)

    total = details['total']
    npt.assert_almost_equal(total, 31.0, decimal=7)
コード例 #15
0
def test_optimal_mapping(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis)
    assert mapping == {'a': 'A', 'b': 'B', 'c': 'C'}
コード例 #16
0
def test_leep_overlap(reference_with_overlap, hypothesis):
    metric = DiarizationErrorRate(skip_overlap=False)
    total = metric(reference_with_overlap, hypothesis, detailed=True)['total']
    npt.assert_almost_equal(total, 34, decimal=3)
コード例 #17
0
def test_optimal_mapping(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis)
    assert mapping == {'a': 'A', 'b': 'B', 'c': 'C'}
コード例 #18
0
def test_error_rate(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    error_rate = diarizationErrorRate(reference, hypothesis)
    npt.assert_almost_equal(error_rate, 0.5161290322580645, decimal=7)
コード例 #19
0
def main(reference_dir, hypothesis_dir, output_dir):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    flist = os.listdir(reference_dir)
    total_references = len(flist)
    total_hypotheses = len(os.listdir(hypothesis_dir))

    if total_references == 0:  # no references available
        score_f = os.path.join(output_dir, 'score.seconds')
        score = open(score_f, 'w')
        score.write('No references available.\n')
        score.write('references {0}\n'.format(total_references))
        score.write('hypotheses {0}\n'.format(total_hypotheses))
        sys.exit(0)

    collar = 0.1  # collar in seconds

    der_eval = DiarizationErrorRate(collar=collar)
    ier_eval = IdentificationErrorRate(collar=collar)
    prec_eval = IdentificationPrecision(collar=collar)
    rec_eval = IdentificationRecall(collar=collar)

    skip_tokens = ['OVERLAP', 'SPN']
    skip_tokens_child = ['OVERLAP', 'SPN', 'SLT']

    missing_hypotheses = 0
    missing_hypotheses_seconds = 0
    utt_scores = []

    for f in flist:
        ref_f = os.path.join(reference_dir, f)
        hyp_f = os.path.join(hypothesis_dir, f)

        reference       = read_annotation(ref_f, \
            annotation_type='reference', skip_tokens=skip_tokens)
        reference_child = read_annotation(ref_f, \
            annotation_type='reference', skip_tokens=skip_tokens_child)

        if not os.path.isfile(hyp_f):
            missing_hypotheses += 1
            missed_sum = sum(
                [i.end - i.start for i in reference.itersegments()])
            missing_hypotheses_seconds += missed_sum

        # read_annotation can handle non-existing files
        hypothesis       = read_annotation(hyp_f, \
            annotation_type='hypothesis', skip_tokens=skip_tokens)
        hypothesis_child = read_annotation(hyp_f, \
            annotation_type='hypothesis', skip_tokens=skip_tokens_child)

        # find global min and max
        time_ref = [[i.start, i.end] for i in reference.itersegments()]
        time_hyp = [[i.start, i.end] for i in hypothesis.itersegments()]
        min_f = min([i for i, e in time_hyp] + [i for i, e in time_ref])
        max_f = max([e for i, e in time_hyp] + [e for i, e in time_ref])

        # evaluate DER
        der = der_eval(reference, hypothesis, \
            uem=Segment(min_f, max_f), detailed=True)

        # find global min and max
        time_ref = [[i.start, i.end] for i in reference_child.itersegments()]
        time_hyp = [[i.start, i.end] for i in hypothesis_child.itersegments()]
        min_f = min([i for i, e in time_hyp] + [i for i, e in time_ref])
        max_f = max([e for i, e in time_hyp] + [e for i, e in time_ref])

        # evaluate IER
        ier = ier_eval(reference_child, hypothesis_child, \
            uem=Segment(min_f, max_f), detailed=True)
        prec = prec_eval(reference_child, hypothesis_child, \
            uem=Segment(min_f, max_f))
        rec  = rec_eval(reference_child, hypothesis_child, \
            uem=Segment(min_f, max_f))
        f1 = 0 if prec == 0 or rec == 0 else 2 * (prec * rec) / (prec + rec)

        ref_labs = ' '.join(reference.labels())
        hyp_labs = ' '.join(hypothesis.labels())

        ref_labs = ' '.join(
            [label for _, _, label in reference.itertracks(yield_label=True)])
        hyp_labs = ' '.join(
            [label for _, _, label in hypothesis.itertracks(yield_label=True)])

        if not hyp_labs: hyp_labs = 'no_alignment'
        utt_scores.append([f, prec, rec, f1, der, ier, ref_labs, hyp_labs])

    # global scores
    ier = abs(ier_eval)
    der = abs(der_eval)
    precision = abs(prec_eval)
    recall = abs(rec_eval)
    f1 = 0 if precision == 0 or recall == 0 else 2 * (precision * recall) / (
        precision + recall)

    # keys to intermediate metrics
    keys = ['correct', 'missed detection', 'false alarm', \
        'confusion', 'total', 'diarization error rate']
    aggregate = {k: 0 for k in keys}

    ## global correct, missed, false alarm, confusion
    for item in utt_scores:
        der_errors = item[4]
        for key in keys:
            aggregate[key] += der_errors[key]
        ier_errors = item[5]
        item_ier = ier_errors['identification error rate']
        aggregate['der'] = item_ier

    if aggregate['total'] == 0: aggregate['total'] = 1
    # write global scores to file
    score_f = os.path.join(output_dir, 'score.seconds')
    score = open(score_f, 'w')

    score.write('precision {0:.3f}\n'.format(precision))
    score.write('recall {0:.3f}\n'.format(recall))
    score.write('f1 score {0:.3f}\n\n'.format(f1))

    score.write('IER {0:.3f}\n\n'.format(ier))

    score.write('DER {0:.3f}\n'.format(der))
    score.write('  missed {0:.3f}\n'.format(aggregate['missed detection'] /
                                            aggregate['total']))
    score.write('  false alarm {0:.3f}\n'.format(aggregate['false alarm'] /
                                                 aggregate['total']))
    score.write('  confusion {0:.3f}\n'.format(aggregate['confusion'] /
                                               aggregate['total']))
    score.write('  correct {0:.3f}\n'.format(aggregate['correct'] /
                                             aggregate['total']))
    score.write('\n')

    score.write('total files {0}\n'.format(total_references))
    score.write('alignment failures\n')
    score.write('  total utterances: {0}\n'.format(missing_hypotheses))
    score.write('  total seconds in failed utterances: {0}\n\n'.format(
        missing_hypotheses_seconds))

    score.write('precision details\n')
    for i in prec_eval[:]:
        score.write('  {0} {1}\n'.format(i, prec_eval[:][i]))

    score.write('\n')
    score.write('recall details\n')
    for i in rec_eval[:]:
        score.write('  {0} {1}\n'.format(i, rec_eval[:][i]))

    score.close()

    # write detailed scores to file sorted by DER
    # columns: filename, precision, recall, f1, reference_words, hypothesis_words
    report_f = os.path.join(output_dir, 'report.seconds')
    report = open(report_f, 'w')

    header = [
        'filename', 'precision', 'recall', 'f1', 'correct', 'missed',
        'false_alarm', 'confusion', 'total', 'der', 'ier', 'reference_words',
        'hypothesis_words'
    ]
    report.write('\t'.join(header) + '\n')

    for item in sorted(utt_scores,
                       key=lambda x: x[4]['diarization error rate']):
        data = []
        # filename
        data.append(item[0])
        # precision, recall, f1
        for i in range(1, 3 + 1):
            data.append('{0:.3f}'.format(item[i]))

        # DER related scores
        errors = item[4]
        for key in keys:
            value = '{0:.3f}'.format(errors[key])
            data.append(value)

        # IER score
        ier = item[5]['identification error rate']
        data.append('{0:.3f}'.format(ier))

        data.append(item[-2])
        data.append(item[-1])

        report.write('\t'.join(data) + '\n')
    report.close()
コード例 #20
0
import time
import torch

from pyannote.database import FileFinder, get_protocol
from pyannote.metrics.diarization import DiarizationErrorRate, JaccardErrorRate

preprocessors = {'audio': FileFinder()}
protocol = get_protocol('VOXCON.SpeakerDiarization.Challenge', preprocessors=preprocessors)

diarization_pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_dihard', device = 'gpu')

ders = []
jers = []
hypotheses = []

derMetric = DiarizationErrorRate(collar=0.25)
jerMetric = JaccardErrorRate(collar=0.25)

for file in protocol.test():
    hypothesis = diarization_pipeline(file)
    hypotheses.append(hypothesis)

    reference = file["annotation"]
    # uem = file['annotated']
    der = derMetric(reference, hypothesis)
    jer = jerMetric(reference, hypothesis)
    ders.append(der)
    jers.append(jer)

    uri = file['uri']
    print(f'{uri} DER = {100 * der:.1f}% JER = {100 * jer:.1f}% {time.strftime("%H:%M:%S")}')
def performance_metrics(df_labels, df_embeddings_verification, track_embedding,
                        cfg, frame_list, iteration):
    speaker_list = df_labels.columns.tolist()
    df_precision = pd.DataFrame(columns=speaker_list,
                                index=cfg.audio.threshold)
    df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    der = []
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]]
    for threshold in cfg.audio.threshold:
        df_output = speaker_verification(
            track_embedding=track_embedding,
            df_labels=df_labels,
            df_embeddings_verification=df_embeddings_verification,
            threshold=threshold)
        for speaker in speaker_list:
            try:
                df_precision.loc[threshold,
                                 speaker] = precision_score(df_labels[speaker],
                                                            df_output[speaker],
                                                            average='binary')
            except:
                df_precision.loc[threshold, speaker] = 0
            try:
                df_recall.loc[threshold,
                              speaker] = recall_score(df_labels[speaker],
                                                      df_output[speaker],
                                                      average='binary')
            except:
                df_recall.loc[threshold, speaker] = 0
            try:
                df_roc.loc[threshold,
                           speaker] = roc_auc_score(df_labels[speaker],
                                                    df_output[speaker],
                                                    average=None)
            except:
                df_roc.loc[threshold, speaker] = 0
            try:
                far, frr = FAR_FRR(y_true=df_labels[speaker],
                                   y_pred=df_output[speaker])
                df_far.loc[threshold, speaker] = far
                df_frr.loc[threshold, speaker] = frr
            except:
                df_far.loc[threshold, speaker] = 0
                df_frr.loc[threshold, speaker] = 0

        #der.append(metric(groundtruth, merge_frames(df_outputs=df_output, frame_list=frame_list, filename='try1_'+str(threshold))))
        components = metric(groundtruth,
                            merge_frames(df_outputs=df_output,
                                         frame_list=frame_list,
                                         filename=cfg.audio.uri[iteration] +
                                         '_' + str(threshold)),
                            detailed=True)
        components = metric[:]
        # print('False alarm: {}, Missed_Detection: {}, Confusion{}, Total {}'.format(DER['false alarm'], DER['missed detection'], DER['confusion'], DER['total']))
        #if DER <= 1:
        der.append(components)
        #else:
        #    der.append(1.0)
    return df_precision, df_recall, df_roc, df_far, df_frr, der
コード例 #22
0
def test(custom=True, prefix=''):
    der = DiarizationErrorRate(collar=0.5)
    prec = DetectionPrecision(collar=0.5)
    recall = DetectionRecall(collar=0.5)
    coverage = SegmentationCoverage()
    purity = SegmentationPurity()

    result = {}
    if os.path.exists('results.json'):
        with open('results.json') as json_file:
            result = json.load(json_file)
    base_test = prefix + 'audio/'
    test_files = os.listdir(base_test)

    test_path = base_test + test_files[0] + '/new_data/'
    test_types = [
        name for name in os.listdir(test_path)
        if os.path.isdir(os.path.join(test_path, name))
    ]

    result_data = []
    if (custom):
        for _ in clusterings:
            result_data.append([])

    for test in test_types:
        avg_der = 0
        avg_prec = 0
        avg_rec = 0
        avg_cov = 0
        avg_pur = 0
        counter = 0

        speaker_results = {}
        cluster_results = []
        speaker_results_cluster = []

        for _ in clusterings:
            cluster_results.append({
                'der': 0,
                'prec': 0,
                'rec': 0,
                'cov': 0,
                'pur': 0
            })

            speaker_results_cluster.append({})

        for f in test_files:
            test_file = base_test + f
            data_file = test_file + '/new_data/' + f + '.json'
            with open(data_file) as f:
                data = json.load(f)
                for sub_f in data:
                    counter += 1
                    sub_f_data = data[sub_f]
                    true_labels = sub_f_data['labels']
                    true_speakers = sub_f_data['no_speakers']
                    speakers_int = OrderedSet(
                        map(lambda x: x['speaker'], true_labels))

                    for i, s in enumerate(speakers_int):
                        for datadict in true_labels:
                            if datadict['speaker'] == s:
                                datadict['speaker'] = i + 1

                    true_annotation = convert_to_annotation(true_labels)

                    pred_path = test_file + '/new_data/' + test + '/'
                    pred_file = sub_f if test == 'default' else sub_f.split(
                        '.')[0] + '_' + test + '.wav'
                    audio = {'uri': pred_file, 'audio': pred_path + pred_file}

                    if (custom):
                        long_turns, _, _, embeddings = predict(audio)
                        index = 0
                        for algorithm in clusterings:
                            if (custom):
                                pred_annotation = cluster_annotation(
                                    long_turns, embeddings, true_speakers,
                                    algorithm)
                            if (type(pred_annotation) is tuple
                                    or pred_annotation == Annotation()):
                                continue
                            pred_annotation = pred_annotation.rename_labels(
                                generator='int')

                            der_res = der(true_annotation, pred_annotation)
                            prec_res = prec(true_annotation, pred_annotation)
                            rec_res = recall(true_annotation, pred_annotation)
                            cov_res = coverage(true_annotation,
                                               pred_annotation)
                            pur_res = purity(true_annotation, pred_annotation)

                            cluster_results[index]['der'] += der_res
                            cluster_results[index]['prec'] += prec_res
                            cluster_results[index]['rec'] += rec_res
                            cluster_results[index]['cov'] += cov_res
                            cluster_results[index]['pur'] += pur_res

                            if not true_speakers in speaker_results_cluster[
                                    index]:
                                speaker_results_cluster[index][
                                    true_speakers] = {
                                        'der': 0,
                                        'prec': 0,
                                        'rec': 0,
                                        'cov': 0,
                                        'pur': 0,
                                        'counter': 0
                                    }

                            speaker_results_cluster[index][true_speakers][
                                'der'] += der_res
                            speaker_results_cluster[index][true_speakers][
                                'prec'] += prec_res
                            speaker_results_cluster[index][true_speakers][
                                'rec'] += rec_res
                            speaker_results_cluster[index][true_speakers][
                                'cov'] += cov_res
                            speaker_results_cluster[index][true_speakers][
                                'pur'] += pur_res
                            speaker_results_cluster[index][true_speakers][
                                'counter'] += 1

                            index += 1
                    else:
                        pred_annotation = pipeline(
                            {'audio': pred_path + pred_file})

                        der_res = der(true_annotation, pred_annotation)
                        prec_res = prec(true_annotation, pred_annotation)
                        rec_res = recall(true_annotation, pred_annotation)
                        cov_res = coverage(true_annotation, pred_annotation)
                        pur_res = purity(true_annotation, pred_annotation)

                        avg_der += der_res
                        avg_prec += prec_res
                        avg_rec += rec_res
                        avg_cov += cov_res
                        avg_pur += pur_res

                        if not true_speakers in speaker_results:
                            speaker_results[true_speakers] = {
                                'der': 0,
                                'prec': 0,
                                'rec': 0,
                                'cov': 0,
                                'pur': 0,
                                'counter': 0
                            }

                        speaker_results[true_speakers]['der'] += der_res
                        speaker_results[true_speakers]['prec'] += prec_res
                        speaker_results[true_speakers]['rec'] += rec_res
                        speaker_results[true_speakers]['cov'] += cov_res
                        speaker_results[true_speakers]['pur'] += pur_res
                        speaker_results[true_speakers]['counter'] += 1
        if custom:
            index = 0
            for algorithm in clusterings:
                cluster_data = cluster_results[index]
                sub_data = {'type': test}
                sub_data['DER'] = cluster_data['der'] / counter
                sub_data['Precision'] = cluster_data['prec'] / counter
                sub_data['Recall'] = cluster_data['rec'] / counter
                sub_data['Coverage'] = cluster_data['cov'] / counter
                sub_data['Purity'] = cluster_data['pur'] / counter

                for s in speaker_results_cluster[index]:
                    speaker_results_cluster[index][s]['der'] = speaker_results_cluster[index][s]['der'] / \
                        speaker_results_cluster[index][s]['counter']
                    speaker_results_cluster[index][s]['prec'] = speaker_results_cluster[index][s]['prec'] / \
                        speaker_results_cluster[index][s]['counter']
                    speaker_results_cluster[index][s]['rec'] = speaker_results_cluster[index][s]['rec'] / \
                        speaker_results_cluster[index][s]['counter']
                    speaker_results_cluster[index][s]['cov'] = speaker_results_cluster[index][s]['cov'] / \
                        speaker_results_cluster[index][s]['counter']
                    speaker_results_cluster[index][s]['pur'] = speaker_results_cluster[index][s]['pur'] / \
                        speaker_results_cluster[index][s]['counter']

                sub_data['Speaker_data'] = speaker_results_cluster[index]

                result_data[index].append(sub_data)
                result[prefix + 'custom' + algorithm] = result_data[index]
                index += 1
        else:
            sub_data = {'type': test}
            sub_data['DER'] = avg_der / counter
            sub_data['Precision'] = avg_prec / counter
            sub_data['Recall'] = avg_rec / counter
            sub_data['Coverage'] = avg_cov / counter
            sub_data['Purity'] = avg_pur / counter

            for s in speaker_results:
                speaker_results[s]['der'] = speaker_results[s]['der'] / \
                    speaker_results[s]['counter']
                speaker_results[s]['prec'] = speaker_results[s]['prec'] / \
                    speaker_results[s]['counter']
                speaker_results[s]['rec'] = speaker_results[s]['rec'] / \
                    speaker_results[s]['counter']
                speaker_results[s]['cov'] = speaker_results[s]['cov'] / \
                    speaker_results[s]['counter']
                speaker_results[s]['pur'] = speaker_results[s]['pur'] / \
                    speaker_results[s]['counter']

            sub_data['Speaker_data'] = speaker_results

            result_data.append(sub_data)
            result[prefix + 'auto'] = result_data

        save_file = 'results.json'

        with open(save_file, 'w') as outfile:
            json.dump(result, outfile)
    return result_data
コード例 #23
0
def get_diarization_metrics(reference, hypothesis, uem=None):
    metric_dict = {}
    metric = DiarizationErrorRate()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DiarizationCompleteness()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DiarizationCoverage()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DiarizationPurity()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DiarizationHomogeneity()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met

    return metric_dict
コード例 #24
0
from pyannote.database.util import load_rttm
from pyannote.metrics.diarization import DiarizationErrorRate
from param import *

with open(set_path) as json_file:
    set_dict = json.load(json_file)

# Build RAL (assumes C set was processed)
cases = [item.split('.')[0]
         for item in set_dict['r']]  #RAL needs case name only
scotus_ral = RefAudioLibrary(cases,
                             inf_lab_path + 'r' + str(encoder_rate) + '/',
                             rttm_path,
                             sd_path,
                             min_audio_len=mal)
metric = DiarizationErrorRate(collar=der_collar, skip_overlap=True)

print('T Set Encoding (no labels)')
der = []
size = []
for wav in set_dict['t']:
    case = wav.split('.')[0]
    print('Encoding Case:', case)
    embed, info, sz = case_to_dvec(audio_path + wav,
                                   device=device,
                                   verbose=verbose,
                                   rate=encoder_rate)
    if save_test_emb:
        np.save(inf_path + '{}_embeds.npy'.format(case), embed)
        np.save(inf_path + '{}_embeds_times.npy'.format(case), info[0])
    timelst = Diarize(scotus_ral,
コード例 #25
0
 def get_metric(self) -> Union[DetectionErrorRate, DiarizationErrorRate]:
     if self.only_sad:
         return DetectionErrorRate(collar=0.0)
     else:
         return DiarizationErrorRate(collar=0.0, skip_overlap=False)
def get_der(cfg, rttm, output_annotations):
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(rttm)[rttm[rttm.rfind('/')+1:rttm.find('.')]]
    der = metric(groundtruth, output_annotations, detailed=False)
    return der
コード例 #27
0
from pyannote.database.util import load_rttm
from pyannote.core import Segment, notebook
from pyannote.audio.features import RawAudio
#from IPython.display import Audio
import torch
from pyannote.metrics.diarization import DiarizationErrorRate

Audio_File = {
    'uri':
    'ES2011a.Mix-Headset',
    'audio':
    '/home/lucas/PycharmProjects/Data/pyannote/amicorpus/ES2011a/audio/ES2011a.Mix-Headset.wav'
}
groundtruth = load_rttm(
    '/home/lucas/PycharmProjects/Data/pyannote/AMI/MixHeadset.development.rttm'
)[Audio_File['uri']]
for segment in groundtruth.get_timeline():
    print(list(groundtruth.get_labels(segment))[0])

pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_ami')
diarization = pipeline(Audio_File)

#print(diarization)

metric = DiarizationErrorRate(collar=0.25, skip_overlap=True)
der = metric(groundtruth, diarization)

print(der)
#print('done')