Esempio n. 1
0
    def test_read_samples(self):
        path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('wav', path)
        issuer = issuers.Issuer('toni')
        utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30)

        l1 = annotations.Label('a', 0.15, 0.448)
        l2 = annotations.Label('a', 0.5, 0.73)
        ll = annotations.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.5,
                                         duration=1.73 - 1.5)

        print(expected.shape)
        print(l2.read_samples().shape)
        assert np.array_equal(l2.read_samples(), expected)
Esempio n. 2
0
    def test_validate_returns_part_of_overlapping_label(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=9.0),
                                            annotations.Label('b',
                                                              start=9.0,
                                                              end=13.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=-2.0,
                                                              end=5.9),
                                            annotations.Label('b',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelOverflowValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')]
        assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
Esempio n. 3
0
    def test_validate(self):
        ds = resources.create_single_label_corpus()
        utt4_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=0.0,
                                                              end=1.44),
                                            annotations.Label('a',
                                                              start=1.89,
                                                              end=10.0),
                                        ])
        ds.utterances['utt-4'].set_label_list(utt4_ll)
        utt6_ll = annotations.LabelList(idx='default',
                                        labels=[
                                            annotations.Label('a',
                                                              start=1.33,
                                                              end=5.9),
                                            annotations.Label('a',
                                                              start=5.9,
                                                              end=14.7),
                                        ])
        ds.utterances['utt-6'].set_label_list(utt6_ll)

        val = validation.LabelCoverageValidator('default')
        result = val.validate(ds)

        assert not result.passed
        assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'}

        assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)]
        assert result.uncovered_segments['utt-6'] == [(0.0, 1.33),
                                                      (14.7, 15.0)]
Esempio n. 4
0
    def test_split(self):
        ll_1 = annotations.LabelList('phones', labels=[
            annotations.Label('alpha', start=0.0, end=30.0)
        ])
        ll_2 = annotations.LabelList('words', labels=[
            annotations.Label('b', start=0.0, end=30.0)
        ])
        utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2])

        res = utt.split([14.0, 29.5])

        assert len(res) == 3

        assert res[0].start == 0.0
        assert res[0].end == 14.0
        assert 'phones' in res[0].label_lists.keys()
        assert 'words' in res[0].label_lists.keys()

        assert res[1].start == 14.0
        assert res[1].end == 29.5
        assert 'phones' in res[1].label_lists.keys()
        assert 'words' in res[1].label_lists.keys()

        assert res[2].start == 29.5
        assert res[2].end == 40.0
        assert 'phones' in res[2].label_lists.keys()
        assert 'words' in res[2].label_lists.keys()
Esempio n. 5
0
    def test_encode_label_ends_at_utterance_end(self):
        track = tracks.FileTrack('file1',
                                 resources.sample_wav_file('med_len.wav'))
        utt = tracks.Utterance('utt1', track, start=3, end=14)
        ll = annotations.LabelList(labels=[
            annotations.Label('speech', 0, 4),
            annotations.Label('music', 4, 9),
            annotations.Label('speech', 9, float('inf')),
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'],
                                       'default',
                                       frame_settings=units.FrameSettings(
                                           32000, 16000),
                                       sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [1, 1, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 1, 0],
            [0, 1, 0],
        ]).astype(np.float32)

        assert np.array_equal(expected, actual)
Esempio n. 6
0
    def test_ordering_both_ref_none(self):
        pair_a = alignment.LabelPair(
            None, annotations.Label('a2', start=1.66, end=1.92))

        pair_b = alignment.LabelPair(
            None, annotations.Label('b2', start=1.65, end=1.92))

        assert pair_b < pair_a
Esempio n. 7
0
    def test_read_label_list_de(self):
        path = os.path.join(os.path.dirname(__file__), 'audacity_labels_de.txt')
        ll = audacity.read_label_list(path)

        assert ll == annotations.LabelList(labels=[
            annotations.Label('music', 43352.824046, 43525.837661),
            annotations.Label('speech_male', 43512.446969, 43531.343483),
        ])
Esempio n. 8
0
    def test_equals(self):
        seg_a = alignment.Segment(
            0.0, 0.9, ref=[annotations.Label('a'),
                           annotations.Label('a')])
        seg_b = alignment.Segment(
            0.0, 0.9, ref=[annotations.Label('a'),
                           annotations.Label('a')])

        assert seg_a == seg_b
Esempio n. 9
0
    def test_compare_multi_labels_returns_smaller_start_time(self):
        seg_a = alignment.Segment(
            0.0, 0.9, ref=[annotations.Label('a'),
                           annotations.Label('a')])
        seg_b = alignment.Segment(
            0.2, 0.8, ref=[annotations.Label('a'),
                           annotations.Label('a')])

        assert seg_a < seg_b
Esempio n. 10
0
    def test_read_label_list_with_empty_value(self):
        path = os.path.join(os.path.dirname(__file__), 'audacity_labels_empty_value.txt')
        ll = audacity.read_label_list(path)

        assert ll == annotations.LabelList(labels=[
            annotations.Label('music', 1, 4),
            annotations.Label('', 4, 7),
            annotations.Label('speech_male', 7, 9),
        ])
Esempio n. 11
0
    def test_ordering_only_end_differs(self):
        pair_a = alignment.LabelPair(
            annotations.Label('a1', start=1.55, end=1.88),
            annotations.Label('a2', start=1.66, end=1.92))

        pair_b = alignment.LabelPair(
            annotations.Label('b1', start=1.55, end=1.87),
            annotations.Label('b2', start=1.66, end=1.92))

        assert pair_b < pair_a
Esempio n. 12
0
    def test_align_insertion(self):
        ref_ll = []

        hyp_ll = [annotations.Label('y', 7.6, 15.2)]

        result = alignment.FullMatchingAligner(0.1).align(ref_ll, hyp_ll)

        assert result == [
            alignment.LabelPair(None, annotations.Label('y', 7.6, 15.2))
        ]
Esempio n. 13
0
    def test_align_deletion(self):
        ref_ll = [annotations.Label('a', 4.2, 8.5)]

        hyp_ll = []

        result = alignment.FullMatchingAligner(0.1).align(ref_ll, hyp_ll)

        assert result == [
            alignment.LabelPair(annotations.Label('a', 4.2, 8.5), None)
        ]
Esempio n. 14
0
    def test_compare_multi_labels_with_same_times_returns_smaller_label_value(
            self):
        seg_a = alignment.Segment(
            0.0, 1.0, ref=[annotations.Label('a'),
                           annotations.Label('a')])
        seg_b = alignment.Segment(
            0.0, 1.0, ref=[annotations.Label('a'),
                           annotations.Label('b')])

        assert seg_a < seg_b
Esempio n. 15
0
    def test_eq_ignores_label_list_relation(self):
        a = annotations.Label('some label A', 1.0, 2.0)
        b = annotations.Label('some label a', 1.0, 2.0)

        al = annotations.LabelList(idx='one', labels=[a])
        bl = annotations.LabelList(idx='another', labels=[b])

        assert a.label_list == al
        assert b.label_list == bl
        assert a == b
Esempio n. 16
0
    def test_ordering(self):
        pair_a = alignment.LabelPair(
            annotations.Label('a1', start=1.55, end=1.88),
            annotations.Label('a2', start=1.66, end=1.92))

        pair_b = alignment.LabelPair(
            annotations.Label('b1', start=1.59, end=1.88),
            annotations.Label('b2', start=1.66, end=1.92))

        assert pair_a < pair_b
Esempio n. 17
0
    def test_label_set_for_value(self, sample_outcome):
        ls = sample_outcome.label_set_for_value('down')

        expected = [
            annotations.Label('down', start=10.35, end=11.12),
            annotations.Label('down', start=39.28, end=40.0),
            annotations.Label('down', start=31.20, end=33.4),
            annotations.Label('down', start=39.28, end=40.0),
            annotations.Label('down', start=28.20, end=33.4)
        ]

        assert sorted(expected) == sorted(ls.labels)
Esempio n. 18
0
    def test_align_deletion(self):
        lev = alignment.LevenshteinAligner()

        ali = lev.align(
            ll_with_values(['a', 'b', 'c']),
            ll_with_values(['a', 'c'])
        )

        assert ali == [
            alignment.LabelPair(annotations.Label('a'), annotations.Label('a')),
            alignment.LabelPair(annotations.Label('b'), None),
            alignment.LabelPair(annotations.Label('c'), annotations.Label('c')),
        ]
Esempio n. 19
0
    def test_align_empty_ref_returns_all_none(self):
        lev = alignment.LevenshteinAligner()

        ali = lev.align(
            ll_with_values([]),
            ll_with_values(['a', 'b', 'c'])
        )

        assert ali == [
            alignment.LabelPair(None, annotations.Label('a')),
            alignment.LabelPair(None, annotations.Label('b')),
            alignment.LabelPair(None, annotations.Label('c')),
        ]
Esempio n. 20
0
def corpus_with_more_labels():
    """ Corpus with an extra label-list.  """
    corpus = resources.create_single_label_corpus()

    corpus.utterances['utt-1'].set_label_list(
        annotations.LabelList(idx='radio',
                              labels=[annotations.Label('alpha')]))

    corpus.utterances['utt-2'].set_label_list(
        annotations.LabelList(idx='radio',
                              labels=[annotations.Label('alpha')]))

    corpus.utterances['utt-3'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-4'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-5'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-6'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-7'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    corpus.utterances['utt-8'].set_label_list(
        annotations.LabelList(idx='radio', labels=[annotations.Label('beta')]))

    return corpus
Esempio n. 21
0
    def test_align_empty_ref_returns_insertions(self):
        ll_ref = []

        ll_hyp = [annotations.Label('greasy', 1.4, 1.9)]

        aligner = alignment.BipartiteMatchingAligner(
            substitution_penalty=2,
            non_overlap_penalty_weight=1
        )

        matches = aligner.align(ll_ref, ll_hyp)

        assert matches == [
            alignment.LabelPair(None, annotations.Label('greasy', 1.4, 1.9))
        ]
Esempio n. 22
0
def ll_with_values(values):
    ll = []

    for value in values:
        ll.append(annotations.Label(value))

    return ll
Esempio n. 23
0
    def test_false_rejection_rate_with_no_occurences_returns_zero(self):
        result = evaluator.KWSEvaluator().evaluate(
            annotations.LabelList(labels=[]),
            annotations.LabelList(
                labels=[annotations.Label('four', 2.5, 3.0)]))

        assert result.false_rejection_rate() == 0.0
Esempio n. 24
0
    def test_label_creation(self):
        a = annotations.Label('value', 6.2, 8.9)

        assert a.value == 'value'
        assert a.start == 6.2
        assert a.end == 8.9
        assert len(a.meta) == 0
Esempio n. 25
0
    def read_labels(path, corpus):

        for label_file in glob.glob(
                os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))):
            file_name = os.path.basename(label_file)
            key = file_name[len('{}_'.format(LABEL_FILE_PREFIX)
                                ):len(file_name) - len('.txt')]

            utterance_labels = collections.defaultdict(list)

            labels = textfile.read_separated_lines_generator(label_file,
                                                             separator=' ',
                                                             max_columns=4)

            for record in labels:
                label = record[3]
                start = float(record[1])
                end = float(record[2])
                meta = None
                meta_match = META_PATTERN.match(label)

                if end == -1:
                    end = float('inf')

                if meta_match is not None:
                    meta_json = meta_match.group(2)
                    meta = json.loads(meta_json)
                    label = meta_match.group(1)

                utterance_labels[record[0]].append(
                    annotations.Label(label, start, end, meta=meta))

            for utterance_idx, labels in utterance_labels.items():
                ll = annotations.LabelList(idx=key, labels=labels)
                corpus.utterances[utterance_idx].set_label_list(ll)
Esempio n. 26
0
def generate_labels(n):
    items = []

    for i in range(n):
        label = annotations.Label('label-{}'.format(i))
        items.append(label)

    return items
Esempio n. 27
0
    def test_align_empty_hypothesis(self):
        ref = annotations.LabelList(labels=[
            annotations.Label('b', 4, 8)
        ])

        hyp = annotations.LabelList(labels=[
        ])

        result = alignment.InvariantSegmentAligner().align(ref, hyp)

        assert len(result) == 1

        segment = result[0]
        assert segment.start == 4
        assert segment.end == 8
        assert segment.ref == [annotations.Label('b', 4, 8)]
        assert segment.hyp == []
Esempio n. 28
0
    def test_label_creation_with_info(self):
        a = annotations.Label('value', 6.2, 8.9, meta={'something': 2})

        assert a.value == 'value'
        assert a.start == 6.2
        assert a.end == 8.9
        assert len(a.meta) == 1
        assert a.meta['something'] == 2
Esempio n. 29
0
    def test_merge_corpus_label_lists(self):
        main_corpus = resources.create_dataset()
        merging_corpus = resources.create_multi_label_corpus()

        main_corpus.merge_corpus(merging_corpus)

        assert set(main_corpus.utterances['utt-2_1'].label_lists.keys()) == {
            'default'
        }

        ll = main_corpus.utterances['utt-2_1'].label_lists['default']

        assert ll == annotations.LabelList(labels=[
            annotations.Label('music', 0, 5),
            annotations.Label('speech', 5, 12),
            annotations.Label('music', 13, 15)
        ])
Esempio n. 30
0
    def test_set_label_list(self):
        ll_4 = annotations.LabelList(idx='delta', labels=[
            annotations.Label('y', 0.0, 3.3),
            annotations.Label('t', 3.8, 7.9)
        ])

        self.utt.set_label_list(ll_4)

        assert len(self.utt.label_lists) == 4
        assert self.utt.label_lists['alpha'] == self.ll_1
        assert self.utt.label_lists['bravo'] == self.ll_2
        assert self.utt.label_lists['charlie'] == self.ll_3
        assert self.utt.label_lists['delta'] == ll_4
        assert self.ll_1.utterance == self.utt
        assert self.ll_2.utterance == self.utt
        assert self.ll_3.utterance == self.utt
        assert ll_4.utterance == self.utt