def test_load(sample_list, sample_annotation, sample_scores):

    coParser = CoParser(uris=sample_list,
                        annotation=sample_annotation,
                        scores=sample_scores)

    for uri, annotation, scores in coParser.iter('uris', 'annotation', 'scores'):
        break

    assert uri == 'uri1'

    assert list(annotation.itertracks(label=True)) == [
        (Segment(1, 3.5), 0, 'alice'),
        (Segment(3, 7.5), 1, 'barbara'),
        (Segment(6, 9), 2, 'chris')]

    assert list(scores.itervalues()) == [
        (Segment(1, 3.5), 0, 'alice', 0.8),
        (Segment(1, 3.5), 1, 'barbara', 0.1),
        (Segment(1, 3.5), 2, 'chris', 0.1),
        (Segment(3, 7.5), 3, 'barbara', 0.5),
        (Segment(3, 7.5), 4, 'chris', 0.4),
        (Segment(6, 9), 5, 'alice', 0.1),
        (Segment(6, 9), 6, 'barbara', 0.2),
        (Segment(6, 9), 7, 'chris', 0.7)]
def do_identification(
    references_mdtm, hypothesis_mdtm, uris_lst=None, eval_uem=None
):

    ier = IdentificationErrorRate()

    iter_over = {
        'reference': references_mdtm,
        'hypothesis': hypothesis_mdtm
    }

    if uris_lst:
        iter_over['uris'] = uris_lst
    else:
        iter_over['uris'] = 'reference'

    if eval_uem:
        iter_over['uem'] = eval_uem

    coParser = CoParser(**iter_over)

    for uri, ref, hyp, uem in coParser.iter('uris', 'reference', 'hypothesis',
                                            'uem'):
        rate = ier(ref, hyp, uem=uem)
        sys.stdout.write('{uri:s}: {ier:3.2f}%\n'.format(uri=uri, ier=100 * rate))
        sys.stdout.flush()

    sys.stdout.write('Total: {ier:3.2f}%'.format(ier=100 * abs(ier)))
def test_load(sample_list, sample_annotation, sample_scores):

    coParser = CoParser(uris=sample_list,
                        annotation=sample_annotation,
                        scores=sample_scores)

    for uri, annotation, scores in coParser.iter('uris', 'annotation',
                                                 'scores'):
        break

    assert uri == 'uri1'

    assert list(annotation.itertracks(label=True)) == [
        (Segment(1, 3.5), 0, 'alice'), (Segment(3, 7.5), 1, 'barbara'),
        (Segment(6, 9), 2, 'chris')
    ]

    assert list(scores.itervalues()) == [(Segment(1, 3.5), 0, 'alice', 0.8),
                                         (Segment(1, 3.5), 1, 'barbara', 0.1),
                                         (Segment(1, 3.5), 2, 'chris', 0.1),
                                         (Segment(3, 7.5), 3, 'barbara', 0.5),
                                         (Segment(3, 7.5), 4, 'chris', 0.4),
                                         (Segment(6, 9), 5, 'alice', 0.1),
                                         (Segment(6, 9), 6, 'barbara', 0.2),
                                         (Segment(6, 9), 7, 'chris', 0.7)]
def do_train(
    uris_lst,
    references_mdtm,
    features_pkl,
    model_pkl,
    n_components=16,
    covariance_type='diag',
    min_duration=0.250,
):

    hmm = ViterbiHMM(n_components=n_components,
                     covariance_type=covariance_type,
                     random_state=None,
                     thresh=1e-2,
                     min_covar=1e-3,
                     n_iter=10,
                     disturb=0.05,
                     sampling=1000,
                     min_duration=min_duration)

    # iterate over all uris in a synchronous manner
    coParser = CoParser(uris=uris_lst,
                        reference=references_mdtm,
                        features=features_pkl)
    references, features = coParser.generators('reference', 'features')

    hmm.fit(references, features)

    with open(model_pkl, 'wb') as f:
        pickle.dump(hmm, f)
def do_stats(reference_repere, hypothesis_repere,
             uris_lst=None, eval_uem=None,
             collar=0., unknown=False):

    identificationErrorAnalysis = IdentificationErrorAnalysis(
        collar=collar, unknown=unknown)

    iter_over = {
        'reference': reference_repere,
        'hypothesis': hypothesis_repere
    }

    if uris_lst:
        iter_over['uris'] = uris_lst
    else:
        iter_over['uris'] = 'reference'

    if eval_uem:
        iter_over['uem'] = eval_uem

    coParser = CoParser(**iter_over)

    for uri, ref, hyp, uem in coParser.iter(
        'uris', 'reference', 'hypothesis', 'uem'
    ):

        # perform error analysis
        analysis = identificationErrorAnalysis.matrix(ref, hyp, uem=uem)

        # obtain list of speakers
        speakers = [speaker for speaker in analysis.get_rows()]

        # loop on each speaker
        for spk in speakers:

            if isinstance(spk, tuple) and spk[0] == 'false alarm':
                continue

            ref = analysis[spk, 'reference']
            hyp = analysis[spk, 'hypothesis']
            cor = analysis[spk, 'correct']
            conf = analysis[spk, 'confusion']
            miss = analysis[spk, 'missed detection']
            fa = analysis[spk, 'false alarm']
            ier = (conf + miss + fa) / ref

            sys.stdout.write(
                TEMPLATE.format(uri=uri, spk=spk, ier=ier, ref=ref, hyp=hyp,
                                cor=cor, conf=conf, miss=miss, fa=fa))

            sys.stdout.flush()
def do_train(
    uris_lst, references_mdtm, features_pkl, model_pkl,
    n_components=16, covariance_type='diag', min_duration=0.250,
):

    hmm = ViterbiHMM(
        n_components=n_components, covariance_type=covariance_type,
        random_state=None, thresh=1e-2, min_covar=1e-3, n_iter=10,
        disturb=0.05, sampling=1000, min_duration=min_duration)

    # iterate over all uris in a synchronous manner
    coParser = CoParser(uris=uris_lst,
                        reference=references_mdtm,
                        features=features_pkl)
    references, features = coParser.generators('reference', 'features')

    hmm.fit(references, features)

    with open(model_pkl, 'wb') as f:
        pickle.dump(hmm, f)
def do_stats(reference_repere, hypothesis_repere,
             uris_lst=None, eval_uem=None,
             unknown=False):

    iter_over = {
        'reference': reference_repere,
        'hypothesis': hypothesis_repere
    }

    if uris_lst:
        iter_over['uris'] = uris_lst
    else:
        iter_over['uris'] = 'reference'

    if eval_uem:
        iter_over['uem'] = eval_uem

    coParser = CoParser(**iter_over)

    identificationErrorRate = IdentificationErrorRate(unknown=unknown)

    for uri, ref, hyp, uem in coParser.iter(
        'uris', 'reference', 'hypothesis', 'uem'
    ):

        ref = ref.crop(uem, mode='intersection')
        hyp = hyp.crop(uem, mode='intersection')

        overlap = {}

        # starts by finding overlap regions in reference
        # this will iterate over all pairs of intersecting segments
        for (segment, track), (segment_, track_) in ref.co_iter(ref):

            # do not count twice
            if (segment, track) >= (segment, track_):
                continue

            # overlap duration
            intersection = (segment & segment_).duration

            # increment overlap count
            overlap[segment, track] = \
                overlap.get((segment, track), 0.) + intersection
            overlap[segment_, track_] = \
                overlap.get((segment_, track_), 0.) + intersection

        for segment, _, spk in ref.itertracks(label=True):

            if isinstance(spk, Unknown):
                continue

            r = Annotation(uri=ref.uri, modality=ref.modality)
            r[segment] = spk

            h = hyp.crop(segment)

            details = identificationErrorRate(r, h, detailed=True)
            ovr = overlap.get((segment, track), 0)
            ier = details[IER_NAME]
            cor = details[IER_CORRECT]
            conf = details[IER_CONFUSION] - 0.5 * ovr
            miss = details[IER_MISS] + .5 * ovr
            fa = details[IER_FALSE_ALARM]

            sys.stdout.write(
                TEMPLATE.format(uri=uri, start=segment.start, end=segment.end,
                                spk=spk, ier=ier, cor=cor, conf=conf, miss=miss,
                                fa=fa))

            sys.stdout.flush()