예제 #1
0
    def test_ranges_zero_to_end(self):
        labels = [
            Label('a', 0, float('inf')),
            Label('b', 5.1, 8.9)
        ]
        ll = LabelList(labels=labels)

        ranges = ll.ranges()

        r = next(ranges)
        assert 0 == r[0]
        assert 5.1 == r[1]
        assert labels[0] in r[2]

        r = next(ranges)
        assert 5.1 == r[0]
        assert 8.9 == r[1]
        assert labels[0] in r[2]
        assert labels[1] in r[2]

        r = next(ranges)
        assert 8.9 == r[0]
        assert float('inf') == r[1]
        assert labels[0] in r[2]

        with pytest.raises(StopIteration):
            next(ranges)
예제 #2
0
    def test_split_without_cutting_points_raises_error(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 9.0),
                               Label('c', 9.0, 12.0)])

        with pytest.raises(ValueError):
            ll.split([])
예제 #3
0
    def test_tokenized(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.tokenized() == ['a', 'u', 't', 'b', 'x', 'c']
예제 #4
0
    def test_join_with_custom_delimiter(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.join(delimiter=' - ') == 'a - b - c'
예제 #5
0
    def test_join(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.join() == 'a b c'
예제 #6
0
    def test_relabel_maps_a_onto_b(self):
        label_list = LabelList(labels=[Label('a', 3.2, 4.5)])

        actual = relabeling.relabel(label_list, {('a', ): 'b'})

        expected = LabelList(labels=[Label('b', 3.2, 4.5)])

        assert actual == expected
예제 #7
0
    def test_tokenized_raises_error_if_overlap_is_higher_than_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 3.85, 8.0),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.tokenized()
예제 #8
0
    def test_join_raises_error_if_overlap_is_higher_than_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 3.8, 8.0),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.join(overlap_threshold=0.1)
예제 #9
0
    def test_tokenized_raises_error_if_overlap_is_higher_than_threshold_given_an_endless_label(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 4.5, float('inf')),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.tokenized()
예제 #10
0
def create_sample_dataset(temp_dir):
    ds = audiomate.Corpus(str(temp_dir))

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.sample_wav_file('wav_2.wav')
    file_3_path = resources.get_resource_path(
        ['audio_formats', 'flac_1_16k_16b.flac'])

    file_1 = ds.new_file(file_1_path, track_idx='wav_1')
    file_2 = ds.new_file(file_2_path, track_idx='wav_2')
    file_3 = ds.new_file(file_3_path, track_idx='wav_3')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_3 = ds.new_utterance('utt-3',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    # 5.0416875
    utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you')]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    return ds
예제 #11
0
    def test_addl(self):
        ll = LabelList()

        assert len(ll) == 0

        ll.addl('a', 12.3, 19.3)

        assert ll == LabelList(labels=[
            Label('a', 12.3, 19.3),
        ])
예제 #12
0
    def test_split_with_cutting_point_after_last_label(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 4.0),
                               Label('c', 4.0, 8.9)])

        res = ll.split([10.5])

        assert len(res) == 2
        assert len(res[0]) == 2
        assert len(res[1]) == 0
예제 #13
0
    def test_label_values(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 14)
        ])

        assert ll.label_values() == ['a', 'b', 'c']
예제 #14
0
    def test_split_cutting_point_on_boundary_doesnot_split_label(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 9.0),
                               Label('c', 9.0, 12.0)])

        res = ll.split([9.0])

        assert len(res) == 2

        assert len(res[0]) == 1
        assert len(res[1]) == 1
예제 #15
0
    def test_add(self):
        ll = LabelList()

        assert len(ll) == 0

        label = Label('some text')
        ll.add(label)

        assert len(ll) == 1
        assert label.label_list == ll
        assert sorted(ll)[0] == label
예제 #16
0
    def test_split_single_label_that_doesnt_start_at_zero(self):
        ll = LabelList(idx='test', labels=[Label('c', 8.0, 12.0)])

        res = ll.split([11.2], shift_times=True)

        assert len(res) == 2

        assert len(res[0]) == 1
        assert sorted(res[0])[0] == Label('c', 8.0, 11.2)

        assert len(res[1]) == 1
        assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
예제 #17
0
    def test_with_label_values_sets_correct_idx(self):
        ll = LabelList.with_label_values([
            'a',
            'b',
            'c',
        ], idx='letters')

        assert ll == LabelList(idx='letters', labels=[
            Label('a'),
            Label('b'),
            Label('c'),
        ])
예제 #18
0
    def test_with_label_values(self):
        ll = LabelList.with_label_values([
            'a',
            'b',
            'c',
        ])

        assert ll == LabelList(labels=[
            Label('a'),
            Label('b'),
            Label('c'),
        ])
예제 #19
0
    def test_ranges_include_labels(self):
        labels = [Label('a', 3.2, 4.5), Label('b', 5.1, 8.9)]
        ll = LabelList(labels=labels)

        ranges = ll.ranges(include_labels=['a'])

        r = next(ranges)
        assert 3.2 == r[0]
        assert 4.5 == r[1]
        assert labels[0] in r[2]

        with pytest.raises(StopIteration):
            next(ranges)
예제 #20
0
    def test_merge_overlaps_with_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('c', 9.0, 12.0),
            Label('c', 12.05, 14.0),
        ])

        ll.merge_overlaps(threshold=0.1)

        expected = LabelList(idx='test', labels=[
            Label('c', 9.0, 14.0),
        ])

        assert ll == expected
예제 #21
0
    def test_split_label_within_cutting_points_is_included(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        res = ll.split([1.9, 10.5])

        assert len(res[1]) == 3
        assert sorted(res[1])[1].value == 'b'
        assert sorted(res[1])[1].start == 4.0
        assert sorted(res[1])[1].end == 8.0
예제 #22
0
    def test_all_tokens(self):
        ll = LabelList(labels=[
            Label('some text'),
            Label('more text'),
            Label('text again'),
        ])

        assert sorted(ll.all_tokens()) == [
            'again',
            'more',
            'some',
            'text',
        ]
예제 #23
0
    def test_label_count(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 14)
        ])

        res = ll.label_count()

        assert 2 == res['a']
        assert 1 == res['b']
        assert 2 == res['c']
예제 #24
0
    def test_label_total_duration(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 14)
        ])

        res = ll.label_total_duration()

        assert res['a'] == pytest.approx(4.8)
        assert res['b'] == pytest.approx(3.8)
        assert res['c'] == pytest.approx(4.3)
예제 #25
0
    def test_update(self):
        ll = LabelList()

        assert len(ll) == 0

        label_a = Label('some text')
        label_b = Label('more text')
        label_c = Label('text again')
        ll.update([label_a, label_b, label_c])

        assert len(ll) == 3
        assert label_a.label_list == ll
        assert label_b.label_list == ll
        assert label_c.label_list == ll
예제 #26
0
    def test_relabel_removes_unwanted_labels(self):
        projections = {
            ('a', ): '',
            ('b', ): 'b',
        }

        label_list = LabelList(
            labels=[Label('a', 3.2, 4.4),
                    Label('b', 4.4, 5.1)])

        actual = relabeling.relabel(label_list, projections)

        expected = LabelList(labels=[Label('b', 4.4, 5.1)])

        assert actual == expected
예제 #27
0
    def test_split_first_label_not_splitted(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 9.0),
                               Label('c', 9.0, 12.0)])

        res = ll.split([11.2], shift_times=True)

        assert len(res) == 2

        assert len(res[0]) == 2
        assert sorted(res[0])[0] == Label('a', 0.0, 9.0)
        assert sorted(res[0])[1] == Label('c', 9.0, 11.2)

        assert len(res[1]) == 1
        assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
예제 #28
0
    def test_labels_in_range_returns_only_fully_included(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 15)
        ])

        in_range = ll.labels_in_range(7.2, 14.99, fully_included=True)

        assert sorted(in_range) == [
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
        ]
예제 #29
0
    def test_apply(self):
        ll = LabelList(labels=[
            Label('some text'),
            Label('more text'),
            Label('text again'),
        ])

        def apply_func(label):
            label.value = 'app {}'.format(label.value)

        ll.apply(apply_func)

        labels = sorted(ll)
        assert labels[0].value == 'app more text'
        assert labels[1].value == 'app some text'
        assert labels[2].value == 'app text again'
예제 #30
0
    def test_labels_in_range(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 14)
        ])

        in_range = ll.labels_in_range(8.2, 12.5)

        assert sorted(in_range) == [
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14)
        ]