예제 #1
0
def get_mapping(reference, system):
    """ get speaker mapping between system and reference"""

    metric = DiarizationErrorRate()
    mapping = metric.optimal_mapping(reference, system)

    return mapping
예제 #2
0
def score_labels(AUDIO_RTTM_MAP,
                 all_reference,
                 all_hypothesis,
                 collar=0.25,
                 ignore_overlap=True):
    """
    calculates DER, CER, FA and MISS

    Args:
    AUDIO_RTTM_MAP : Dictionary containing information provided from manifestpath
    all_reference (list[uniq_name,Annotation]): reference annotations for score calculation
    all_hypothesis (list[uniq_name,Annotation]): hypothesis annotations for score calculation

    Returns:
    metric (pyannote.DiarizationErrorRate): Pyannote Diarization Error Rate metric object. This object contains detailed scores of each audiofile.
    mapping (dict): Mapping dict containing the mapping speaker label for each audio input

    < Caveat >
    Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of
    "no score" collar from left to right. Therefore, if 0.25s is applied for "no score"
    collar in md-eval.pl, 0.5s should be applied for pyannote.metrics.

    """
    metric = None
    if len(all_reference) == len(all_hypothesis):
        metric = DiarizationErrorRate(collar=2 * collar,
                                      skip_overlap=ignore_overlap)

        mapping_dict = {}
        for (reference, hypothesis) in zip(all_reference, all_hypothesis):
            ref_key, ref_labels = reference
            _, hyp_labels = hypothesis
            uem = AUDIO_RTTM_MAP[ref_key].get('uem_filepath', None)
            if uem is not None:
                uem = uem_timeline_from_file(uem_file=uem, uniq_name=ref_key)
            metric(ref_labels, hyp_labels, uem=uem, detailed=True)
            mapping_dict[ref_key] = metric.optimal_mapping(
                ref_labels, hyp_labels)

        DER = abs(metric)
        CER = metric['confusion'] / metric['total']
        FA = metric['false alarm'] / metric['total']
        MISS = metric['missed detection'] / metric['total']

        logging.info(
            "Cumulative Results for collar {} sec and ignore_overlap {}: \n FA: {:.4f}\t MISS {:.4f}\t \
                Diarization ER: {:.4f}\t, Confusion ER:{:.4f}".format(
                collar, ignore_overlap, FA, MISS, DER, CER))

        return metric, mapping_dict
    else:
        logging.warning(
            "check if each ground truth RTTMs were present in provided manifest file. Skipping calculation of Diariazation Error Rate"
        )

        return None
예제 #3
0
def gecko(args):
    hypotheses_path = args['<hypotheses_path>']
    uri = args['<uri>']
    colors = get_colors(uri)
    distances = {}
    if Path(hypotheses_path).exists():
        hypotheses = load_rttm(hypotheses_path)
        hypothesis = hypotheses[uri]
    else:  # protocol
        protocol = get_protocol(args['<hypotheses_path>'])
        reference = get_file(protocol, uri)
        hypothesis = reference['annotation']
        annotated = get_annotated(reference)
    hypotheses_path = Path(hypotheses_path)
    protocol = args['--database.task.protocol']
    features = None
    if protocol:
        protocol = get_protocol(protocol)
        embeddings = args['--embeddings']
        reference, features = get_file(protocol, uri, embeddings=embeddings)
        if args['--map']:
            print(f"mapping {uri} with {protocol}")
            diarizationErrorRate = DiarizationErrorRate()
            annotated = get_annotated(reference)
            optimal_mapping = diarizationErrorRate.optimal_mapping(
                reference['annotation'], hypothesis, annotated)
            hypothesis = hypothesis.rename_labels(mapping=optimal_mapping)

    hypothesis = update_labels(hypothesis, distances)  # tag unsure clusters

    distances_per_speaker = get_distances_per_speaker(
        features, hypothesis) if features else {}

    if args['--tag_na']:
        whole_file = Segment(0., annotated.segments_boundaries_[-1])
        not_annotated = annotated.gaps(whole_file).to_annotation(na())
        hypothesis = hypothesis.crop(annotated).update(not_annotated)

    gecko_json = annotation_to_GeckoJSON(hypothesis, distances_per_speaker,
                                         colors)

    if hypotheses_path.exists():
        dir_path = hypotheses_path.parent
    else:
        dir_path = Path(".")

    json_path = os.path.join(dir_path, f'{uri}.json')
    with open(json_path, 'w') as file:
        json.dump(gecko_json, file)
    print(f"succefully dumped {json_path}")
예제 #4
0
def get_DER(all_reference, all_hypothesis, collar=0.5, skip_overlap=True):
    """
    calculates DER, CER, FA and MISS

    Args:
    all_reference (list[Annotation]): reference annotations for score calculation
    all_hypothesis (list[Annotation]): hypothesis annotations for score calculation

    Returns:
    DER (float): Diarization Error Rate
    CER (float): Confusion Error Rate
    FA (float): False Alarm
    Miss (float): Miss Detection 

    < Caveat >
    Unlike md-eval.pl, "no score" collar in pyannote.metrics is the maximum length of
    "no score" collar from left to right. Therefore, if 0.25s is applied for "no score"
    collar in md-eval.pl, 0.5s should be applied for pyannote.metrics.

    """
    metric = DiarizationErrorRate(collar=collar,
                                  skip_overlap=skip_overlap,
                                  uem=None)

    mapping_dict = {}
    for k, (reference,
            hypothesis) in enumerate(zip(all_reference, all_hypothesis)):
        metric(reference, hypothesis, detailed=True)
        mapping_dict[k] = metric.optimal_mapping(reference, hypothesis)

    DER = abs(metric)
    CER = metric['confusion'] / metric['total']
    FA = metric['false alarm'] / metric['total']
    MISS = metric['missed detection'] / metric['total']

    metric.reset()

    return DER, CER, FA, MISS, mapping_dict
예제 #5
0
def test_optimal_mapping(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis)
    assert mapping == {'a': 'A', 'b': 'B', 'c': 'C'}
예제 #6
0
def test_optimal_mapping(reference, hypothesis):
    diarizationErrorRate = DiarizationErrorRate()
    mapping = diarizationErrorRate.optimal_mapping(reference, hypothesis)
    assert mapping == {'a': 'A', 'b': 'B', 'c': 'C'}