Ejemplo n.º 1
0
    def test_false_rejection_rate_with_no_occurences_returns_zero(self):
        result = evaluator.KWSEvaluator().evaluate(
            annotations.LabelList(labels=[]),
            annotations.LabelList(
                labels=[annotations.Label('four', 2.5, 3.0)]))

        assert result.false_rejection_rate() == 0.0
Ejemplo n.º 2
0
    def test_evaluate_with_multiple_labels(self):
        ref = evaluator.Outcome(
            label_lists={
                'a':
                annotations.LabelList(labels=[
                    annotations.Label('a b', start=0, end=3),
                    annotations.Label('a d', start=3, end=5),
                    annotations.Label('f a b', start=5, end=6)
                ])
            })

        hyp = evaluator.Outcome(label_lists={
            'a':
            annotations.LabelList(labels=[annotations.Label('a b d f i b')])
        })

        result = evaluator.ASREvaluator().do_evaluate(ref, hyp)

        assert len(result.utt_to_label_pairs) == 1
        assert result.utt_to_label_pairs['a'] == [
            alignment.LabelPair(annotations.Label('a'),
                                annotations.Label('a')),
            alignment.LabelPair(annotations.Label('b'),
                                annotations.Label('b')),
            alignment.LabelPair(annotations.Label('a'), None),
            alignment.LabelPair(annotations.Label('d'),
                                annotations.Label('d')),
            alignment.LabelPair(annotations.Label('f'),
                                annotations.Label('f')),
            alignment.LabelPair(annotations.Label('a'),
                                annotations.Label('i')),
            alignment.LabelPair(annotations.Label('b'),
                                annotations.Label('b')),
        ]
Ejemplo n.º 3
0
    def test_validate(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=1.44),
                                            annotations.Label('a',
                                                              start=1.89,
                                                              end=10.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=1.33,
                                                              end=5.9),
                                            annotations.Label('a',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelCoverageValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)]
        assert result.uncovered_segments['utt-6'] == [(0.0, 1.33),
                                                      (14.7, 15.0)]
Ejemplo n.º 4
0
    def test_evaluate(self):
        ref = evaluator.Outcome(
            label_lists={
                'a':
                annotations.LabelList(
                    labels=[annotations.Label('a b a d f a b')])
            })

        hyp = evaluator.Outcome(label_lists={
            'a':
            annotations.LabelList(labels=[annotations.Label('a b d f i b')])
        })

        result = evaluator.ASREvaluator().do_evaluate(ref, hyp)

        assert len(result.utt_to_label_pairs) == 1
        assert result.utt_to_label_pairs['a'] == [
            alignment.LabelPair(annotations.Label('a'),
                                annotations.Label('a')),
            alignment.LabelPair(annotations.Label('b'),
                                annotations.Label('b')),
            alignment.LabelPair(annotations.Label('a'), None),
            alignment.LabelPair(annotations.Label('d'),
                                annotations.Label('d')),
            alignment.LabelPair(annotations.Label('f'),
                                annotations.Label('f')),
            alignment.LabelPair(annotations.Label('a'),
                                annotations.Label('i')),
            alignment.LabelPair(annotations.Label('b'),
                                annotations.Label('b')),
        ]
Ejemplo n.º 5
0
    def test_split(self):
        ll_1 = annotations.LabelList('phones', labels=[
            annotations.Label('alpha', start=0.0, end=30.0)
        ])
        ll_2 = annotations.LabelList('words', labels=[
            annotations.Label('b', start=0.0, end=30.0)
        ])
        utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2])

        res = utt.split([14.0, 29.5])

        assert len(res) == 3

        assert res[0].start == 0.0
        assert res[0].end == 14.0
        assert 'phones' in res[0].label_lists.keys()
        assert 'words' in res[0].label_lists.keys()

        assert res[1].start == 14.0
        assert res[1].end == 29.5
        assert 'phones' in res[1].label_lists.keys()
        assert 'words' in res[1].label_lists.keys()

        assert res[2].start == 29.5
        assert res[2].end == 40.0
        assert 'phones' in res[2].label_lists.keys()
        assert 'words' in res[2].label_lists.keys()
Ejemplo n.º 6
0
    def test_align_different_start(self):
        ref = annotations.LabelList(labels=[
            annotations.Label('b', 4, 9)
        ])

        hyp = annotations.LabelList(labels=[
            annotations.Label('b', 2, 8)
        ])

        result = alignment.InvariantSegmentAligner().align(ref, hyp)

        assert len(result) == 3

        segment = result[0]
        assert segment.start == 2
        assert segment.end == 4
        assert segment.ref == []
        assert segment.hyp == [annotations.Label('b', 2, 8)]

        segment = result[1]
        assert segment.start == 4
        assert segment.end == 8
        assert segment.ref == [annotations.Label('b', 4, 9)]
        assert segment.hyp == [annotations.Label('b', 2, 8)]

        segment = result[2]
        assert segment.start == 8
        assert segment.end == 9
        assert segment.ref == [annotations.Label('b', 4, 9)]
        assert segment.hyp == []
Ejemplo n.º 7
0
    def test_create_event_list(self):
        ll_ref = annotations.LabelList(labels=[
            annotations.Label('a', 0.89, 13.73),
            annotations.Label('a', 13.73, 17.49),
            annotations.Label('b', 17.49, 22.75)
        ])

        ll_hyp = annotations.LabelList(labels=[
            annotations.Label('b', 0.1, 1.656),
            annotations.Label('a', 1.656, 1.976),
            annotations.Label('b', 1.976, 3.896),
            annotations.Label('a', 3.896, 3.957)
        ])

        events = alignment.InvariantSegmentAligner.create_event_list(ll_ref, ll_hyp, time_threshold=0.01)

        assert events[0] == (0.1, [(0.1, 'S', 1, annotations.Label('b', 0.1, 1.656))])
        assert events[1] == (0.89, [(0.89, 'S', 0, annotations.Label('a', 0.89, 13.73))])
        assert events[2] == (1.656, [(1.656, 'E', 1, annotations.Label('b', 0.1, 1.656)),
                                     (1.656, 'S', 1, annotations.Label('a', 1.656, 1.976))])
        assert events[3] == (1.976, [(1.976, 'E', 1, annotations.Label('a', 1.656, 1.976)),
                                     (1.976, 'S', 1, annotations.Label('b', 1.976, 3.896))])
        assert events[4] == (3.896, [(3.896, 'E', 1, annotations.Label('b', 1.976, 3.896)),
                                     (3.896, 'S', 1, annotations.Label('a', 3.896, 3.957))])
        assert events[5] == (3.957, [(3.957, 'E', 1, annotations.Label('a', 3.896, 3.957))])
        assert events[6] == (13.73, [(13.73, 'E', 0, annotations.Label('a', 0.89, 13.73)),
                                     (13.73, 'S', 0, annotations.Label('a', 13.73, 17.49))])
        assert events[7] == (17.49, [(17.49, 'E', 0, annotations.Label('a', 13.73, 17.49)),
                                     (17.49, 'S', 0, annotations.Label('b', 17.49, 22.75))])
        assert events[8] == (22.75, [(22.75, 'E', 0, annotations.Label('b', 17.49, 22.75))])
Ejemplo n.º 8
0
    def test_validate_returns_part_of_overlapping_label(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=9.0),
                                            annotations.Label('b',
                                                              start=9.0,
                                                              end=13.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=-2.0,
                                                              end=5.9),
                                            annotations.Label('b',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelOverflowValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')]
        assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
Ejemplo n.º 9
0
    def test_eq_ignores_label_list_relation(self):
        a = annotations.Label('some label A', 1.0, 2.0)
        b = annotations.Label('some label a', 1.0, 2.0)

        al = annotations.LabelList(idx='one', labels=[a])
        bl = annotations.LabelList(idx='another', labels=[b])

        assert a.label_list == al
        assert b.label_list == bl
        assert a == b
Ejemplo n.º 10
0
    def test_align(self):
        ref = annotations.LabelList(labels=[
            annotations.Label('a', 0, 3),
            annotations.Label('b', 3, 6),
            annotations.Label('c', 7, 10)
        ])

        hyp = annotations.LabelList(labels=[
            annotations.Label('a', 0, 3),
            annotations.Label('b', 4, 8),
            annotations.Label('c', 8, 10)
        ])

        result = alignment.InvariantSegmentAligner().align(ref, hyp)

        assert len(result) == 6

        segment = result[0]
        assert segment.start == 0
        assert segment.end == 3
        assert segment.ref == [annotations.Label('a', 0, 3)]
        assert segment.hyp == [annotations.Label('a', 0, 3)]

        segment = result[1]
        assert segment.start == 3
        assert segment.end == 4
        assert segment.ref == [annotations.Label('b', 3, 6)]
        assert segment.hyp == []

        segment = result[2]
        assert segment.start == 4
        assert segment.end == 6
        assert segment.ref == [annotations.Label('b', 3, 6)]
        assert segment.hyp == [annotations.Label('b', 4, 8)]

        segment = result[3]
        assert segment.start == 6
        assert segment.end == 7
        assert segment.ref == []
        assert segment.hyp == [annotations.Label('b', 4, 8)]

        segment = result[4]
        assert segment.start == 7
        assert segment.end == 8
        assert segment.ref == [annotations.Label('c', 7, 10)]
        assert segment.hyp == [annotations.Label('b', 4, 8)]

        segment = result[5]
        assert segment.start == 8
        assert segment.end == 10
        assert segment.ref == [annotations.Label('c', 7, 10)]
        assert segment.hyp == [annotations.Label('c', 8, 10)]
Ejemplo n.º 11
0
def corpus_with_more_labels():
    """ Corpus with an extra label-list.  """
    corpus = resources.create_single_label_corpus()

    corpus.utterances['utt-1'].set_label_list(
        annotations.LabelList(idx='radio',
                              labels=[annotations.Label('alpha')]))

    corpus.utterances['utt-2'].set_label_list(
        annotations.LabelList(idx='radio',
                              labels=[annotations.Label('alpha')]))

    corpus.utterances['utt-3'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-4'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-5'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-6'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-7'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-8'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    return corpus
Ejemplo n.º 12
0
    def read_labels(path, corpus):

        for label_file in glob.glob(
                os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))):
            file_name = os.path.basename(label_file)
            key = file_name[len('{}_'.format(LABEL_FILE_PREFIX)
                                ):len(file_name) - len('.txt')]

            utterance_labels = collections.defaultdict(list)

            labels = textfile.read_separated_lines_generator(label_file,
                                                             separator=' ',
                                                             max_columns=4)

            for record in labels:
                label = record[3]
                start = float(record[1])
                end = float(record[2])
                meta = None
                meta_match = META_PATTERN.match(label)

                if end == -1:
                    end = float('inf')

                if meta_match is not None:
                    meta_json = meta_match.group(2)
                    meta = json.loads(meta_json)
                    label = meta_match.group(1)

                utterance_labels[record[0]].append(
                    annotations.Label(label, start, end, meta=meta))

            for utterance_idx, labels in utterance_labels.items():
                ll = annotations.LabelList(idx=key, labels=labels)
                corpus.utterances[utterance_idx].set_label_list(ll)
Ejemplo n.º 13
0
    def test_encode_label_ends_at_utterance_end(self):
        track = tracks.FileTrack('file1',
                                 resources.sample_wav_file('med_len.wav'))
        utt = tracks.Utterance('utt1', track, start=3, end=14)
        ll = annotations.LabelList(labels=[
            annotations.Label('speech', 0, 4),
            annotations.Label('music', 4, 9),
            annotations.Label('speech', 9, float('inf')),
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'],
                                       'default',
                                       frame_settings=units.FrameSettings(
                                           32000, 16000),
                                       sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [1, 1, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 1, 0],
            [0, 1, 0],
        ]).astype(np.float32)

        assert np.array_equal(expected, actual)
Ejemplo n.º 14
0
    def read_labels(path, corpus):
        label_reference_file = os.path.join(path, LABEL_FILE)
        label_references = textfile.read_separated_lines(label_reference_file,
                                                         separator=' ',
                                                         max_columns=3)

        for record in label_references:
            utt_idx = record[0]
            label_path = os.path.join(path, record[1])
            label_idx = None

            if len(record) > 2:
                label_idx = record[2]

            ll = annotations.LabelList(idx=label_idx)

            for label in audacity.read_label_file(label_path):
                start = label[0]
                end = label[1]
                value = label[2]

                if end < 0:
                    end = float('inf')

                ll.addl(value, start, end)

            ll.apply(extract_meta_from_label_value)
            corpus.utterances[utt_idx].set_label_list(ll)
Ejemplo n.º 15
0
    def test_read_samples(self):
        path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('wav', path)
        issuer = issuers.Issuer('toni')
        utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30)

        l1 = annotations.Label('a', 0.15, 0.448)
        l2 = annotations.Label('a', 0.5, 0.73)
        ll = annotations.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.5,
                                         duration=1.73 - 1.5)

        print(expected.shape)
        print(l2.read_samples().shape)
        assert np.array_equal(l2.read_samples(), expected)
Ejemplo n.º 16
0
    def test_read_label_list_de(self):
        path = os.path.join(os.path.dirname(__file__), 'audacity_labels_de.txt')
        ll = audacity.read_label_list(path)

        assert ll == annotations.LabelList(labels=[
            annotations.Label('music', 43352.824046, 43525.837661),
            annotations.Label('speech_male', 43512.446969, 43531.343483),
        ])
Ejemplo n.º 17
0
    def test_align_empty_hypothesis(self):
        ref = annotations.LabelList(labels=[
            annotations.Label('b', 4, 8)
        ])

        hyp = annotations.LabelList(labels=[
        ])

        result = alignment.InvariantSegmentAligner().align(ref, hyp)

        assert len(result) == 1

        segment = result[0]
        assert segment.start == 4
        assert segment.end == 8
        assert segment.ref == [annotations.Label('b', 4, 8)]
        assert segment.hyp == []
Ejemplo n.º 18
0
    def setup_method(self):
        self.ll_1 = annotations.LabelList(idx='alpha',
                                          labels=[
                                              annotations.Label('a', 3.2, 4.5),
                                              annotations.Label('b', 5.1, 8.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('d', 10.5, 14),
                                              annotations.Label('d', 15, 18)
                                          ])

        self.ll_2 = annotations.LabelList(idx='bravo',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('e', 4.2, 7.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('f', 10.5, 14),
                                              annotations.Label('d', 15, 17.3)
                                          ])

        self.ll_duplicate_idx = annotations.LabelList(
            idx='charlie',
            labels=[
                annotations.Label('t', 1.0, 4.2),
                annotations.Label('h', 4.2, 7.9)
            ])

        self.ll_3 = annotations.LabelList(idx='charlie',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('g', 4.2, 7.9)
                                          ])

        self.track = tracks.FileTrack('wav',
                                      resources.sample_wav_file('wav_1.wav'))
        self.issuer = issuers.Issuer('toni')
        self.utt = tracks.Utterance('test',
                                    self.track,
                                    issuer=self.issuer,
                                    start=1.25,
                                    end=1.30,
                                    label_lists=[
                                        self.ll_1, self.ll_2,
                                        self.ll_duplicate_idx, self.ll_3
                                    ])
Ejemplo n.º 19
0
    def test_encode_utterance_with_single_label(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[annotations.Label('a c b')])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c'])
        encoded = encoder.encode_utterance(utt)

        assert np.array_equal(encoded, [0, 2, 1])
Ejemplo n.º 20
0
    def test_encode_utterance_with_non_existing_label_list_raises_error(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[annotations.Label('a c b unknown')])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('not_existing', ['a', 'b', 'c'])

        with pytest.raises(ValueError):
            encoder.encode_utterance(utt)
Ejemplo n.º 21
0
    def test_read_label_list_with_empty_value(self):
        path = os.path.join(os.path.dirname(__file__), 'audacity_labels_empty_value.txt')
        ll = audacity.read_label_list(path)

        assert ll == annotations.LabelList(labels=[
            annotations.Label('music', 1, 4),
            annotations.Label('', 4, 7),
            annotations.Label('speech_male', 7, 9),
        ])
Ejemplo n.º 22
0
def ll_ref():
    return annotations.LabelList(labels=[
        annotations.Label('up', start=5.28, end=5.99),
        annotations.Label('down', start=10.35, end=11.12),
        annotations.Label('right', start=20.87, end=22.01),
        annotations.Label('up', start=33.00, end=33.4),
        annotations.Label('up', start=33.4, end=33.8),
        annotations.Label('down', start=39.28, end=40.0)
    ])
Ejemplo n.º 23
0
def ll_hyp():
    return annotations.LabelList(labels=[
        annotations.Label('up', start=5.20, end=5.88),
        annotations.Label('right', start=10.30, end=11.08),
        annotations.Label('up', start=32.00, end=32.5),
        annotations.Label('up', start=34.2, end=34.8),
        annotations.Label('left', start=39.3, end=39.9),
        annotations.Label('down', start=39.27, end=40.01)
    ])
Ejemplo n.º 24
0
    def test_align_with_empty_segments(self):
        ref = annotations.LabelList(labels=[
            annotations.Label('a', 0, 3),
            annotations.Label('b', 4, 6),
        ])

        hyp = annotations.LabelList(labels=[
            annotations.Label('a', 0, 3),
            annotations.Label('c', 5, 8),
        ])

        result = alignment.InvariantSegmentAligner().align(ref, hyp)

        assert len(result) == 5

        segment = result[0]
        assert segment.start == 0
        assert segment.end == 3
        assert segment.ref == [annotations.Label('a', 0, 3)]
        assert segment.hyp == [annotations.Label('a', 0, 3)]

        segment = result[1]
        assert segment.start == 3
        assert segment.end == 4
        assert segment.ref == []
        assert segment.hyp == []

        segment = result[2]
        assert segment.start == 4
        assert segment.end == 5
        assert segment.ref == [annotations.Label('b', 4, 6)]
        assert segment.hyp == []

        segment = result[3]
        assert segment.start == 5
        assert segment.end == 6
        assert segment.ref == [annotations.Label('b', 4, 6)]
        assert segment.hyp == [annotations.Label('c', 5, 8)]

        segment = result[4]
        assert segment.start == 6
        assert segment.end == 8
        assert segment.ref == []
        assert segment.hyp == [annotations.Label('c', 5, 8)]
Ejemplo n.º 25
0
def relabel(label_list, projections):
    """
    Relabel an entire :py:class:`~audiomate.annotations.LabelList` using user-defined projections.
    Labels can be renamed, removed or overlapping labels can be flattened to a single label per segment.

    Each entry in the dictionary of projections represents a single projection that maps a combination of labels (key)
    to a single new label (value). The combination of labels to be mapped is a tuple of naturally sorted labels that
    apply to one or more segments simultaneously. By defining a special wildcard projection using `('**',)` is is not
    required to specify a projection for every single combination of labels.

    This method raises a :py:class:`~audiomate.corpus.utils.labellist.UnmappedLabelsException` if a projection for one
    or more combinations of labels is not defined.

    Args:
        label_list (audiomate.annotations.LabelList): The label list to relabel
        projections (dict): A dictionary that maps tuples of label combinations to string
                            labels.
    Returns:
        audiomate.annotations.LabelList: New label list with remapped labels

    Raises:
        UnmappedLabelsException: If a projection for one or more combinations of labels is not defined.

    Example:
        >>> projections = {
        ...     ('a',): 'a',
        ...     ('b',): 'b',
        ...     ('c',): 'c',
        ...     ('a', 'b',): 'a_b',
        ...     ('a', 'b', 'c',): 'a_b_c',
        ...     ('**',): 'b_c',
        ... }
        >>> label_list = annotations.LabelList(labels=[
        ...     annotations.Label('a', 3.2, 4.5),
        ...     annotations.Label('b', 4.0, 4.9),
        ...     annotations.Label('c', 4.2, 5.1)
        ... ])
        >>> ll = relabel(label_list, projections)
        >>> [l.value for l in ll]
        ['a', 'a_b', 'a_b_c', 'b_c', 'c']
    """
    unmapped_combinations = find_missing_projections(label_list, projections)
    if len(unmapped_combinations) > 0:
        raise UnmappedLabelsException('Unmapped combinations: {}'.format(unmapped_combinations))

    new_labels = []
    for labeled_segment in label_list.ranges():
        combination = tuple(sorted([label.value for label in labeled_segment[2]]))
        label_mapping = projections[combination] if combination in projections else projections[WILDCARD_COMBINATION]

        if label_mapping == '':
            continue

        new_labels.append(annotations.Label(label_mapping, labeled_segment[0], labeled_segment[1]))

    return annotations.LabelList(idx=label_list.idx, labels=new_labels)
Ejemplo n.º 26
0
    def test_all_tokens_with_custom_delimiter(self):
        corpus = resources.create_dataset()
        ll = annotations.LabelList(idx='test', labels=[
            annotations.Label('a, b, a, c')
        ])
        corpus.utterances['utt-1'].set_label_list(ll)

        target_lls = ['test']
        expected_tokens = {'a', 'b', 'c'}
        assert corpus.all_tokens(delimiter=',', label_list_ids=target_lls) == expected_tokens
Ejemplo n.º 27
0
    def test_all_tokens_returns_only_from_selected_label_lists(self):
        corpus = resources.create_dataset()
        ll = annotations.LabelList(idx='test', labels=[
            annotations.Label('what can he do')
        ])
        corpus.utterances['utt-1'].set_label_list(ll)

        target_lls = [audiomate.corpus.LL_WORD_TRANSCRIPT]
        expected_tokens = {'who', 'am', 'i', 'are', 'is', 'he', 'you', 'she', 'they'}
        assert corpus.all_tokens(label_list_ids=target_lls) == expected_tokens
Ejemplo n.º 28
0
def utt_with_noise():
    utt = tracks.Utterance('utt-2', 'file-2')

    utt.set_label_list(
        annotations.LabelList(idx='alpha',
                              labels=[
                                  annotations.Label('music', 0, 5),
                                  annotations.Label('speech', 5, 12),
                                  annotations.Label('noise', 13, 15)
                              ]))

    utt.set_label_list(
        annotations.LabelList(idx='bravo',
                              labels=[
                                  annotations.Label('music', 0, 1),
                                  annotations.Label('speech', 2, 6)
                              ]))

    return utt
Ejemplo n.º 29
0
    def test_split_with_overlap(self):
        ll_1 = annotations.LabelList('phones',
                                     labels=[
                                         annotations.Label('alpha',
                                                           start=0.0,
                                                           end=30.0),
                                         annotations.Label('bravo',
                                                           start=20.0,
                                                           end=42.0)
                                     ])
        ll_2 = annotations.LabelList(
            'words', labels=[annotations.Label('b', start=8.0, end=30.0)])
        utt = tracks.Utterance('utt-1',
                               'file-x',
                               start=10.0,
                               end=55.0,
                               label_lists=[ll_1, ll_2])

        res = utt.split([12.0, 24.0], track_relative=False, overlap=2.0)

        assert len(res) == 3

        assert res[0].start == 10.0
        assert res[0].end == 24.0
        assert res[0].label_lists['phones'] == annotations.LabelList(
            idx='phones', labels=[annotations.Label('alpha', 0.0, 14.0)])
        assert res[0].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 8.0, 14.0)])

        print(res[1].label_lists['phones'].labels)

        assert res[1].start == 20.0
        assert res[1].end == 36.0
        assert res[1].label_lists['phones'] == annotations.LabelList(
            idx='phones',
            labels=[
                annotations.Label('alpha', 0.0, 16.0),
                annotations.Label('bravo', 10.0, 16.0),
            ])
        assert res[1].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 0.0, 16.0)])

        assert res[2].start == 32.0
        assert res[2].end == 55.0
        assert res[2].label_lists['phones'] == annotations.LabelList(
            idx='phones',
            labels=[
                annotations.Label('alpha', 0.0, 8.0),
                annotations.Label('bravo', 0.0, 20.0),
            ])
        assert res[2].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 0.0, 8.0)])
Ejemplo n.º 30
0
def classification_ref_and_hyp_label_list():
    """
    Sample output of a classification system, consisting of a reference and hypothesis label-list.
    """

    ll_ref = annotations.LabelList(labels=[
        annotations.Label('music', start=0, end=5),
        annotations.Label('speech', start=5, end=11),
        annotations.Label('mix', start=11, end=14),
        annotations.Label('speech', start=14, end=19)
    ])

    ll_hyp = annotations.LabelList(labels=[
        annotations.Label('music', start=0, end=4),
        annotations.Label('speech', start=4, end=6),
        annotations.Label('mix', start=8, end=16),
        annotations.Label('speech', start=16, end=21)
    ])

    return ll_ref, ll_hyp