def performance_metrics(df_labels, df_embeddings_verification, track_embedding,
                        cfg, frame_list, iteration):
    speaker_list = df_labels.columns.tolist()
    df_precision = pd.DataFrame(columns=speaker_list,
                                index=cfg.audio.threshold)
    df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    der = []
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]]
    for threshold in cfg.audio.threshold:
        df_output = multi_speaker_verification(
            track_embedding=track_embedding,
            df_labels=df_labels,
            df_embeddings_verification=df_embeddings_verification,
            threshold=threshold)
        for speaker in speaker_list:
            try:
                df_precision.loc[threshold,
                                 speaker] = precision_score(df_labels[speaker],
                                                            df_output[speaker],
                                                            average='binary')
            except:
                df_precision.loc[threshold, speaker] = 0
            try:
                df_recall.loc[threshold,
                              speaker] = recall_score(df_labels[speaker],
                                                      df_output[speaker],
                                                      average='binary')
            except:
                df_recall.loc[threshold, speaker] = 0
            try:
                df_roc.loc[threshold,
                           speaker] = roc_auc_score(df_labels[speaker],
                                                    df_output[speaker],
                                                    average=None)
            except:
                df_roc.loc[threshold, speaker] = 0
            try:
                far, frr = FAR_FRR(y_true=df_labels[speaker],
                                   y_pred=df_output[speaker])
                df_far.loc[threshold, speaker] = far
                df_frr.loc[threshold, speaker] = frr
            except:
                df_far.loc[threshold, speaker] = 0
                df_frr.loc[threshold, speaker] = 0

        components = metric(groundtruth,
                            merge_frames(df_outputs=df_output,
                                         frame_list=frame_list,
                                         filename=cfg.audio.uri[iteration] +
                                         '_' + str(threshold)),
                            detailed=True)
        components = metric[:]
        der.append(components)
    return df_precision, df_recall, df_roc, df_far, df_frr, der
def DER(df_labels, df_outputs, frame_list, cfg, collar):
    speaker_list = df_labels.columns.tolist()
    rttm_segment = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[0]]
    E_MISS = 0
    E_FA = 0
    E_Spk = 0
    reference_length = 0
    length = (len(frame_list))
    for i, frame in enumerate(frame_list):
        frame_start, frame_end = float(frame[0]), float(frame[1])
        segments = []

        for segment in rttm_segment.get_timeline():

            if list(rttm_segment.get_labels(segment))[0] in speaker_list:
                intersection = max(
                    0,
                    min(float(frame[1]), segment.end) -
                    max(float(frame[0]), segment.start))
                if intersection > collar:
                    segments.append(segment)
                    #print('start', segment.start)
                    #print('end', segment.end)
                    reference_length = reference_length + intersection
        if len(segments) == 0:
            if 1 in df_outputs.iloc[i, :].to_numpy():
                E_FA = E_FA + (float(frame[1]) - float(frame[0]))
        if len(segments) > 0:
            if 1 not in (df_outputs.iloc[i, :].to_numpy()):
                E_MISS = E_MISS + (float(frame[1]) - float(frame[0]))
            else:
                active_speakers = []
                for interval in segments:
                    intersection = max(
                        0,
                        min(float(frame[1]), interval.end) -
                        max(float(frame[0]), interval.start))
                    active_speakers.append(
                        list(rttm_segment.get_labels(interval))[0])
                for active_spk in active_speakers:
                    if (df_outputs.loc[i, active_spk] == 0):
                        E_Spk = E_Spk + (
                            segments[active_speakers.index(active_spk)].end -
                            segments[active_speakers.index(active_spk)].start)
                inactive_speakers = list(
                    set(speaker_list) - set(active_speakers))
                for spk in inactive_speakers:
                    if (df_outputs.loc[i, spk] == 1):
                        E_Spk = E_Spk + (float(frame[1]) - float(frame[0]))

    print(reference_length)
    print(E_MISS)
    print(E_FA)
    print(E_Spk)
    return (E_MISS + E_Spk + E_FA) / reference_length
Esempio n. 3
0
def gecko(args):
    hypotheses_path = args['<hypotheses_path>']
    uri = args['<uri>']
    colors = get_colors(uri)
    distances = {}
    if Path(hypotheses_path).exists():
        hypotheses = load_rttm(hypotheses_path)
        hypothesis = hypotheses[uri]
    else:  # protocol
        protocol = get_protocol(args['<hypotheses_path>'])
        reference = get_file(protocol, uri)
        hypothesis = reference['annotation']
        annotated = get_annotated(reference)
    hypotheses_path = Path(hypotheses_path)
    protocol = args['--database.task.protocol']
    features = None
    if protocol:
        protocol = get_protocol(protocol)
        embeddings = args['--embeddings']
        reference, features = get_file(protocol, uri, embeddings=embeddings)
        if args['--map']:
            print(f"mapping {uri} with {protocol}")
            diarizationErrorRate = DiarizationErrorRate()
            annotated = get_annotated(reference)
            optimal_mapping = diarizationErrorRate.optimal_mapping(
                reference['annotation'], hypothesis, annotated)
            hypothesis = hypothesis.rename_labels(mapping=optimal_mapping)

    hypothesis = update_labels(hypothesis, distances)  # tag unsure clusters

    distances_per_speaker = get_distances_per_speaker(
        features, hypothesis) if features else {}

    if args['--tag_na']:
        whole_file = Segment(0., annotated.segments_boundaries_[-1])
        not_annotated = annotated.gaps(whole_file).to_annotation(na())
        hypothesis = hypothesis.crop(annotated).update(not_annotated)

    gecko_json = annotation_to_GeckoJSON(hypothesis, distances_per_speaker,
                                         colors)

    if hypotheses_path.exists():
        dir_path = hypotheses_path.parent
    else:
        dir_path = Path(".")

    json_path = os.path.join(dir_path, f'{uri}.json')
    with open(json_path, 'w') as file:
        json.dump(gecko_json, file)
    print(f"succefully dumped {json_path}")
Esempio n. 4
0
def speakers(args):
    hypotheses_path = args['<hypotheses_path>']
    uri = args['<uri>']
    if Path(hypotheses_path).exists():
        hypotheses = load_rttm(hypotheses_path)
        hypothesis = hypotheses[uri]
    else:  # protocol
        distances = {}
        protocol = get_protocol(args['<hypotheses_path>'])
        reference = get_file(protocol, uri)
        hypothesis = reference['annotation']
        annotated = get_annotated(reference)
    print(uri)
    print(f"Number of speakers: {len(hypothesis.labels())}")
    print(f"Chart:\n{hypothesis.chart()}")
Esempio n. 5
0
    def from_rttm(cls, path: Union[str, Path]) -> 'Continuum':
        """
        Load annotations from a RTTM file. The file name field will be used
        as an annotation's annotator

        Parameters
        ----------
        path: Path or str
            Path to the CSV file storing annotations

        Returns
        -------
        continuum : Continuum
            New continuum object loaded from the RTTM file
        """
        annotations = load_rttm(str(path))
        continuum = cls()
        for uri, annot in annotations.items():
            continuum.add_annotation(uri, annot)
        return continuum
Esempio n. 6
0
def get_friends_per_speaker(rttm_files, BABYTRAIN):
    """
    Given a list of .rttm files, return the dictionnary
    whose :
        - key is a speaker.
        - value is a list of speakers that appear in the same file than the key.

    If BABYTRAIN = True, skip the speakers whose name does not start by !
    """
    friends_per_speaker = {}
    for rttm in rttm_files:
        basename = os.path.splitext(os.path.basename(rttm))[0]
        data = pyda.load_rttm(rttm)

        if data != {}:
            annotation = data[basename]

            # Get the list of speakers participating to this file
            speakers = annotation.labels()

            if BABYTRAIN:
                speakers = [s for s in speakers if s.startswith("!")]

            # For each speaker, add his/her friends.
            for speaker in speakers:
                if speaker not in friends_per_speaker.keys():
                    friends_per_speaker[speaker] = set(speakers)
                else:
                    friends_per_speaker[speaker] |= set(speakers)

    # Replace friends of investigators by empty set
    if BABYTRAIN:
        for k, v in friends_per_speaker.items():
            if k.startswith("!INV"):
                friends_per_speaker[k] = set()

    return friends_per_speaker
def get_der(cfg, rttm, output_annotations):
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(rttm)[rttm[rttm.rfind('/')+1:rttm.find('.')]]
    der = metric(groundtruth, output_annotations, detailed=False)
    return der
Esempio n. 8
0
def main():
    arguments = docopt(__doc__, version='Evaluation')

    collar = float(arguments['--collar'])
    skip_overlap = arguments['--skip-overlap']
    tolerance = float(arguments['--tolerance'])

    # protocol
    protocol_name = arguments['<database.task.protocol>']

    preprocessors = dict()
    if arguments['overlap']:
        if skip_overlap:
            msg = ('Option --skip-overlap is not supported '
                   'when evaluating overlapped speech detection.')
            sys.exit(msg)
        preprocessors = {'annotation': to_overlap}

    protocol = get_protocol(protocol_name,
                            progress=True,
                            preprocessors=preprocessors)

    # subset (train, development, or test)
    subset = arguments['--subset']

    if arguments['spotting']:

        hypothesis_json = arguments['<hypothesis.json>']
        with open(hypothesis_json, mode='r') as fp:
            hypotheses = json.load(fp)

        output_prefix = hypothesis_json[:-5]

        latencies = [float(l) for l in arguments['--latency']]

        filters = arguments['--filter']
        if filters:
            from sympy import sympify, lambdify, symbols
            speech = symbols('speech')
            filter_funcs = []
            filter_funcs = [
                lambdify([speech], sympify(expression))
                for expression in filters
            ]
            filter_func = lambda speech: \
                any(~func(speech) for func in filter_funcs)
        else:
            filter_func = None

        spotting(protocol,
                 subset,
                 latencies,
                 hypotheses,
                 output_prefix,
                 filter_func=filter_func)

        sys.exit(0)

    hypothesis_rttm = arguments['<hypothesis.rttm>']

    try:
        hypotheses = load_rttm(hypothesis_rttm)

    except FileNotFoundError:
        msg = f'Could not find file {hypothesis_rttm}.'
        sys.exit(msg)

    except:
        msg = (f'Failed to load {hypothesis_rttm}, please check its format '
               f'(only RTTM files are supported).')
        sys.exit(msg)

    if arguments['detection']:
        detection(protocol,
                  subset,
                  hypotheses,
                  collar=collar,
                  skip_overlap=skip_overlap)

    if arguments['overlap']:
        detection(protocol,
                  subset,
                  hypotheses,
                  collar=collar,
                  skip_overlap=skip_overlap)

    if arguments['segmentation']:
        segmentation(protocol, subset, hypotheses, tolerance=tolerance)

    if arguments['diarization']:
        greedy = arguments['--greedy']
        diarization(protocol,
                    subset,
                    hypotheses,
                    greedy=greedy,
                    collar=collar,
                    skip_overlap=skip_overlap)

    if arguments['identification']:
        identification(protocol,
                       subset,
                       hypotheses,
                       collar=collar,
                       skip_overlap=skip_overlap)
                lambdify([speech], sympify(expression))
                for expression in filters]
            filter_func = lambda speech: \
                any(~func(speech) for func in filter_funcs)
        else:
            filter_func = None

        spotting(protocol, subset, latencies, hypotheses, output_prefix,
                 filter_func=filter_func)

        sys.exit(0)

    hypothesis_rttm = arguments['<hypothesis.rttm>']

    try:
        hypotheses = load_rttm(hypothesis_rttm)

    except FileNotFoundError:
        msg = f'Could not find file {hypothesis_rttm}.'
        sys.exit(msg)

    except:
        msg = (
            f'Failed to load {hypothesis_rttm}, please check its format '
            f'(only RTTM files are supported).'
        )
        sys.exit(msg)

    if arguments['detection']:
        detection(protocol, subset, hypotheses,
                  collar=collar, skip_overlap=skip_overlap)
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "path",
        type=str,
        help=
        "Relative path to the database folder, containing"  # Positional Argument
        "train, dev, and test sub-folders.")
    parser.add_argument("-d",
                        "--duration",
                        type=int,
                        default=60,
                        help="Duration of a trial. (Default to 60 seconds)")

    parser.add_argument(
        '--bbt',
        action='store_true',
        help="Indicates whether the corpora is BabyTrain or not. If true,"
        "skip all the speakers whose name doesn't start by '!'.")
    args = parser.parse_args()

    # Parameters
    DURATION_TRIAL = args.duration
    DATABASE_PATH = os.path.join(
        os.getcwd(), args.path)  # needs to loop through dev and test
    BABYTRAIN = args.bbt

    # Header
    trials_txt = "target_speaker\tfile_basename\tbeginning_time\tend_time\tduration_total_speech\tduration_overlapping_speech\n"

    # Extract target and non-target trials
    rttm_files = utils.get_dev_test_rttm(DATABASE_PATH)

    ## First, get the dictionnary of every friends of every speakers.
    friends_per_speaker = get_friends_per_speaker(rttm_files, BABYTRAIN)

    for rttm in rttm_files:
        basename = os.path.splitext(os.path.basename(rttm))[0]
        data = pyda.load_rttm(rttm)

        if data != {}:
            annotation = data[basename]

            participants = annotation.labels()
            if BABYTRAIN:
                participants = [p for p in participants if p.startswith("!")]
            all_friends = get_friends_of_participants(friends_per_speaker,
                                                      participants)
            last_offset = annotation.get_timeline()[-1][1]

            for end in range(DURATION_TRIAL, int(last_offset), DURATION_TRIAL):
                beg = end - DURATION_TRIAL
                chunk = annotation.crop(Segment(beg, end))
                targets = chunk.labels()

                if BABYTRAIN:
                    targets = [t for t in targets if t.startswith("!")]
                overlapping_chunk = utils.overlapping_annotation(chunk)
                # A speaker is defined as a target speaker for a chunk c,
                # when he/she is speaking in c.
                for target in targets:
                    tot_speech = chunk.label_duration(target)
                    overlapping_speech = overlapping_chunk.label_duration(
                        target)
                    trials_txt += "%s\t%s\t%d\t%d\t%.3f\t%.3f\n" % (
                        target, basename, beg, end, tot_speech,
                        overlapping_speech)

                non_targets = list(set(all_friends) - set(targets))
                # A speaker is defined as a non-target speaker for a chunk c,
                # when he/she is not speaking in c, but speaks somewhere in whatever file
                # where one of the target speaker is also participating
                for non_target in non_targets:
                    trials_txt += "%s\t%s\t%d\t%d\t%.1f\t%.1f\n" % (
                        non_target, basename, beg, end, 0.0, 0.0)

    with open(os.path.join(DATABASE_PATH, "trials_%d.txt" % DURATION_TRIAL),
              "w") as f:
        f.write(trials_txt[:-1])

    print("trials.txt generated in %s" % DATABASE_PATH)
Esempio n. 11
0
                                   device=device,
                                   verbose=verbose,
                                   rate=encoder_rate)
    if save_test_emb:
        np.save(inf_path + '{}_embeds.npy'.format(case), embed)
        np.save(inf_path + '{}_embeds_times.npy'.format(case), info[0])
    timelst = Diarize(scotus_ral,
                      embed,
                      info[0],
                      sim_thresh=diar,
                      score_thresh=score)
    diar_to_rttm(timelst, case, di_path)
    rttmto_RALrttm(case, scotus_ral, rttm_path, di_path)
    predict = case + '_rdsv.rttm'
    ral_label = case + '_ral.rttm'
    predictions = load_rttm(di_path + predict)[case]
    groundtruths = load_rttm(di_path + ral_label)[case]
    der.append(
        metric(groundtruths, predictions,
               detailed=True)['diarization error rate'])
    size.append(sz)

bycase = list(zip([item.split('.')[0] for item in set_dict['t']], der, size))
desc = stats.describe(der)
settings = ['Param:', encoder_rate, '|', diar, '-', score]
with open(test_eval_path, 'w') as f:
    write = csv.writer(f)
    write.writerow(settings)
    write.writerows(bycase)
    write.writerow(desc)
Esempio n. 12
0
from pyannote.database.util import load_rttm
from pyannote.core import Segment, notebook
from pyannote.audio.features import RawAudio
#from IPython.display import Audio
import torch
from pyannote.metrics.diarization import DiarizationErrorRate

Audio_File = {
    'uri':
    'ES2011a.Mix-Headset',
    'audio':
    '/home/lucas/PycharmProjects/Data/pyannote/amicorpus/ES2011a/audio/ES2011a.Mix-Headset.wav'
}
groundtruth = load_rttm(
    '/home/lucas/PycharmProjects/Data/pyannote/AMI/MixHeadset.development.rttm'
)[Audio_File['uri']]
for segment in groundtruth.get_timeline():
    print(list(groundtruth.get_labels(segment))[0])

pipeline = torch.hub.load('pyannote/pyannote-audio', 'dia_ami')
diarization = pipeline(Audio_File)

#print(diarization)

metric = DiarizationErrorRate(collar=0.25, skip_overlap=True)
der = metric(groundtruth, diarization)

print(der)
#print('done')
def performance_metrics(df_labels, df_embeddings_verification, track_embedding,
                        cfg, frame_list, iteration):
    speaker_list = df_labels.columns.tolist()
    df_precision = pd.DataFrame(columns=speaker_list,
                                index=cfg.audio.threshold)
    df_roc = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_recall = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_far = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    df_frr = pd.DataFrame(columns=speaker_list, index=cfg.audio.threshold)
    der = []
    metric = DiarizationErrorRate(skip_overlap=True, collar=cfg.audio.collar)
    groundtruth = load_rttm(cfg.audio.rttm_path)[cfg.audio.uri[iteration]]
    for threshold in cfg.audio.threshold:
        df_output = speaker_verification(
            track_embedding=track_embedding,
            df_labels=df_labels,
            df_embeddings_verification=df_embeddings_verification,
            threshold=threshold)
        for speaker in speaker_list:
            try:
                df_precision.loc[threshold,
                                 speaker] = precision_score(df_labels[speaker],
                                                            df_output[speaker],
                                                            average='binary')
            except:
                df_precision.loc[threshold, speaker] = 0
            try:
                df_recall.loc[threshold,
                              speaker] = recall_score(df_labels[speaker],
                                                      df_output[speaker],
                                                      average='binary')
            except:
                df_recall.loc[threshold, speaker] = 0
            try:
                df_roc.loc[threshold,
                           speaker] = roc_auc_score(df_labels[speaker],
                                                    df_output[speaker],
                                                    average=None)
            except:
                df_roc.loc[threshold, speaker] = 0
            try:
                far, frr = FAR_FRR(y_true=df_labels[speaker],
                                   y_pred=df_output[speaker])
                df_far.loc[threshold, speaker] = far
                df_frr.loc[threshold, speaker] = frr
            except:
                df_far.loc[threshold, speaker] = 0
                df_frr.loc[threshold, speaker] = 0

        #der.append(metric(groundtruth, merge_frames(df_outputs=df_output, frame_list=frame_list, filename='try1_'+str(threshold))))
        components = metric(groundtruth,
                            merge_frames(df_outputs=df_output,
                                         frame_list=frame_list,
                                         filename=cfg.audio.uri[iteration] +
                                         '_' + str(threshold)),
                            detailed=True)
        components = metric[:]
        # print('False alarm: {}, Missed_Detection: {}, Confusion{}, Total {}'.format(DER['false alarm'], DER['missed detection'], DER['confusion'], DER['total']))
        #if DER <= 1:
        der.append(components)
        #else:
        #    der.append(1.0)
    return df_precision, df_recall, df_roc, df_far, df_frr, der
Esempio n. 14
0
def main():
    arguments = docopt(__doc__, version="Evaluation")

    collar = float(arguments["--collar"])
    skip_overlap = arguments["--skip-overlap"]
    tolerance = float(arguments["--tolerance"])

    # protocol
    protocol_name = arguments["<database.task.protocol>"]

    preprocessors = dict()
    if arguments["overlap"]:
        if skip_overlap:
            msg = ("Option --skip-overlap is not supported "
                   "when evaluating overlapped speech detection.")
            sys.exit(msg)
        preprocessors = {"annotation": to_overlap}

    protocol = get_protocol(protocol_name, preprocessors=preprocessors)

    # subset (train, development, or test)
    subset = arguments["--subset"]

    if arguments["spotting"]:

        hypothesis_json = arguments["<hypothesis.json>"]
        with open(hypothesis_json, mode="r") as fp:
            hypotheses = json.load(fp)

        output_prefix = hypothesis_json[:-5]

        latencies = [float(l) for l in arguments["--latency"]]

        filters = arguments["--filter"]
        if filters:
            from sympy import sympify, lambdify, symbols

            speech = symbols("speech")
            filter_funcs = []
            filter_funcs = [
                lambdify([speech], sympify(expression))
                for expression in filters
            ]
            filter_func = lambda speech: any(~func(speech)
                                             for func in filter_funcs)
        else:
            filter_func = None

        spotting(
            protocol,
            subset,
            latencies,
            hypotheses,
            output_prefix,
            filter_func=filter_func,
        )

        sys.exit(0)

    hypothesis_rttm = arguments["<hypothesis.rttm>"]

    try:
        hypotheses = load_rttm(hypothesis_rttm)

    except FileNotFoundError:
        msg = f"Could not find file {hypothesis_rttm}."
        sys.exit(msg)

    except:
        msg = (f"Failed to load {hypothesis_rttm}, please check its format "
               f"(only RTTM files are supported).")
        sys.exit(msg)

    if arguments["detection"]:
        detection(protocol,
                  subset,
                  hypotheses,
                  collar=collar,
                  skip_overlap=skip_overlap)

    if arguments["overlap"]:
        detection(protocol,
                  subset,
                  hypotheses,
                  collar=collar,
                  skip_overlap=skip_overlap)

    if arguments["segmentation"]:
        segmentation(protocol, subset, hypotheses, tolerance=tolerance)

    if arguments["diarization"]:
        greedy = arguments["--greedy"]
        diarization(
            protocol,
            subset,
            hypotheses,
            greedy=greedy,
            collar=collar,
            skip_overlap=skip_overlap,
        )

    if arguments["identification"]:
        identification(protocol,
                       subset,
                       hypotheses,
                       collar=collar,
                       skip_overlap=skip_overlap)
 def __init__(self, sad_rttm: Path = None):
     self.sad_rttm = sad_rttm
     self.sad_ = load_rttm(self.sad_rttm)
Esempio n. 16
0
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('system',
                           type=str,
                           help='Path to the system\'s output')
    argparser.add_argument('protocol',
                           type=str,
                           help='The protocol on which you want to evaluate'
                           'your system')
    argparser.add_argument('subset',
                           type=str,
                           help='The subset of the database on which you want'
                           'to evaluate your system.\n'
                           'Choose between [train, test, development].\n'
                           'Default is test.')
    argparser.add_argument('--vad',
                           action='store_false',
                           help='(OPTIONNAL) Enable if Evaluation a VAD system'
                           ', this way only speech/non speech metrics '
                           'will be reported.')

    args = argparser.parse_args()

    # Create timeline for both reference & system
    system = load_rttm(args.system)
    #system_sils = system.get_timeline().gaps()
    #system_spch = system.get_timeline()

    # get Reference using Pyannote Protocol
    protocol = get_protocol(args.protocol)

    items = list(getattr(protocol, args.subset)())
    reference = {item['uri']: item['annotation'] for item in items}

    results = dict()
    for uri in reference:
        # preffix r: reference
        # prefix s: system
        r_annot = reference[uri]
        # In case the uri was not evaluated, skip this one and go to the next
        try:
            s_annot = system[uri]
        except:
            continue

        r_labels = {
            lab: r_annot.label_timeline(lab)
            for lab in r_annot.labels()
        }
        s_labels = {
            lab: s_annot.label_timeline(lab)
            for lab in s_annot.labels()
        }

        if not args.vad:
            mapping = get_mapping(r_annot, s_annot)
        else:
            mapping = None

        # accumulate results, reference side
        dur = get_speech_duration(r_annot, uri)
        print(uri)
        print(dur)
        correct, miss_spk, miss_speech = accumulate_reference(
            r_labels, s_labels, mapping, dur)

        # Both "correct" should be the same
        _, FA_spk, FA_speech = accumulate_system(r_labels, s_labels, mapping,
                                                 dur)

        results[uri] = (correct, FA_spk, FA_speech, miss_spk, miss_speech)
    # evaluate each wav referenced in system:
    # IF not vad:
    # for each label (FEM, MAL, CHI, KCHI), measure the time
    # in Correct/False alarm Speaker, False alarm Speech/Missed speaker/
    # Missed Speech
    write_evaluation(results, args.vad)