Beispiel #1
0
    def test_ranges_zero_to_end(self):
        labels = [
            Label('a', 0, float('inf')),
            Label('b', 5.1, 8.9)
        ]
        ll = LabelList(labels=labels)

        ranges = ll.ranges()

        r = next(ranges)
        assert 0 == r[0]
        assert 5.1 == r[1]
        assert labels[0] in r[2]

        r = next(ranges)
        assert 5.1 == r[0]
        assert 8.9 == r[1]
        assert labels[0] in r[2]
        assert labels[1] in r[2]

        r = next(ranges)
        assert 8.9 == r[0]
        assert float('inf') == r[1]
        assert labels[0] in r[2]

        with pytest.raises(StopIteration):
            next(ranges)
Beispiel #2
0
    def test_split_without_cutting_points_raises_error(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 9.0),
                               Label('c', 9.0, 12.0)])

        with pytest.raises(ValueError):
            ll.split([])
Beispiel #3
0
    def test_no_missing_projections_if_projection_complete(self):
        projections = {
            ('b', ): 'foo',
            (
                'a',
                'b',
            ): 'foo',
            (
                'a',
                'b',
                'c',
            ): 'foo',
            (
                'a',
                'c',
            ): 'foo',
            ('c', ): 'bar'
        }

        label_list = LabelList(labels=[
            Label('b', 3.2, 4.5),
            Label('a', 4.0, 4.9),
            Label('c', 4.2, 5.1)
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, projections)

        assert len(unmapped_combinations) == 0
Beispiel #4
0
    def test_all_projections_missing_if_no_projections_defined(self):
        label_list = LabelList(labels=[
            Label('b', 3.2, 4.5),
            Label('a', 4.0, 4.9),
            Label('c', 4.2, 5.1)
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, {})

        assert len(unmapped_combinations) == 5
        assert ('b', ) in unmapped_combinations
        assert (
            'a',
            'b',
        ) in unmapped_combinations
        assert (
            'a',
            'b',
            'c',
        ) in unmapped_combinations
        assert (
            'a',
            'c',
        ) in unmapped_combinations
        assert ('c', ) in unmapped_combinations
Beispiel #5
0
    def test_tokenized(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.tokenized() == ['a', 'u', 't', 'b', 'x', 'c']
Beispiel #6
0
    def test_join_with_custom_delimiter(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.join(delimiter=' - ') == 'a - b - c'
Beispiel #7
0
    def test_join(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        assert ll.join() == 'a b c'
Beispiel #8
0
    def test_len(self):
        ll = LabelList(labels=[
            Label('some text'),
            Label('more text'),
            Label('text again'),
        ])

        assert len(ll) == 3
Beispiel #9
0
    def test_relabel_maps_a_onto_b(self):
        label_list = LabelList(labels=[Label('a', 3.2, 4.5)])

        actual = relabeling.relabel(label_list, {('a', ): 'b'})

        expected = LabelList(labels=[Label('b', 3.2, 4.5)])

        assert actual == expected
Beispiel #10
0
def create_sample_dataset(temp_dir):
    ds = audiomate.Corpus(str(temp_dir))

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.sample_wav_file('wav_2.wav')
    file_3_path = resources.get_resource_path(
        ['audio_formats', 'flac_1_16k_16b.flac'])

    file_1 = ds.new_file(file_1_path, track_idx='wav_1')
    file_2 = ds.new_file(file_2_path, track_idx='wav_2')
    file_3 = ds.new_file(file_3_path, track_idx='wav_3')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_3 = ds.new_utterance('utt-3',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    # 5.0416875
    utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you')]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    return ds
Beispiel #11
0
    def test_tokenized_raises_error_if_overlap_is_higher_than_threshold_given_an_endless_label(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 4.5, float('inf')),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.tokenized()
Beispiel #12
0
    def test_tokenized_raises_error_if_overlap_is_higher_than_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('a u t', 0.0, 4.0),
            Label('b x', 3.85, 8.0),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.tokenized()
Beispiel #13
0
    def test_join_raises_error_if_overlap_is_higher_than_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 3.8, 8.0),
            Label('c', 9.0, 12.0)
        ])

        with pytest.raises(ValueError):
            ll.join(overlap_threshold=0.1)
Beispiel #14
0
    def test_end(self):
        ll = LabelList(idx='test', labels=[
            Label('c', 9.0, 12.0),
            Label('a', 0.2, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 12.05, 14.0),
            Label('b', 7.9, 9.3),
        ])

        assert ll.end == 14.0
Beispiel #15
0
    def test_split_with_cutting_point_after_last_label(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 4.0),
                               Label('c', 4.0, 8.9)])

        res = ll.split([10.5])

        assert len(res) == 2
        assert len(res[0]) == 2
        assert len(res[1]) == 0
Beispiel #16
0
    def test_label_values(self):
        ll = LabelList(labels=[
            Label('a', 3.2, 4.5),
            Label('b', 5.1, 8.9),
            Label('c', 7.2, 10.5),
            Label('a', 10.5, 14),
            Label('c', 13, 14)
        ])

        assert ll.label_values() == ['a', 'b', 'c']
Beispiel #17
0
    def test_total_length(self):
        ll = LabelList(labels=[
            Label('abc', 3.2, 4.5),
            Label('bg', 5.1, 8.9),
            Label('caaf', 7.2, 10.5),
            Label('yxva', 10.5, 14),
            Label('cy', 13, 14)
        ])

        assert ll.total_length == 15
Beispiel #18
0
    def test_end_returns_inf(self):
        ll = LabelList(idx='test', labels=[
            Label('c', 9.0, 12.0),
            Label('a', 0.2, 4.0),
            Label('b', 4.0, float('inf')),
            Label('c', 12.05, 14.0),
            Label('b', 7.9, 9.3),
        ])

        assert ll.end == float('inf')
Beispiel #19
0
    def test_split_unsorted_label_list(self):
        ll = LabelList(idx='test',
                       labels=[
                           Label('a', 0.0, 4.0),
                           Label('c', 9.0, 12.0),
                           Label('b', 4.0, 8.0)
                       ])

        res = ll.split([1.9, 6.2, 10.5])

        assert res == [
            LabelList(idx='test', labels=[
                Label('a', 0.0, 1.9),
            ]),
            LabelList(idx='test',
                      labels=[
                          Label('a', 1.9, 4.0),
                          Label('b', 4.0, 6.2),
                      ]),
            LabelList(idx='test',
                      labels=[
                          Label('b', 6.2, 8.0),
                          Label('c', 9.0, 10.5),
                      ]),
            LabelList(idx='test', labels=[
                Label('c', 10.5, 12.0),
            ]),
        ]
Beispiel #20
0
    def test_split_with_overlap(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        res = ll.split([1.9, 6.2, 10.5], overlap=2.0)

        assert res == [
            LabelList(idx='test', labels=[
                Label('a', 0.0, 3.9)
            ]),
            LabelList(idx='test', labels=[
                Label('a', 0, 4.0),
                Label('b', 4.0, 8.0)
            ]),
            LabelList(idx='test', labels=[
                Label('b', 4.2, 8.0),
                Label('c', 9.0, 12.0)]
            ),
            LabelList(idx='test', labels=[
                Label('c', 9.0, 12.0)
            ]),
        ]
Beispiel #21
0
    def test_split_cutting_point_on_boundary_doesnot_split_label(self):
        ll = LabelList(idx='test',
                       labels=[Label('a', 0.0, 9.0),
                               Label('c', 9.0, 12.0)])

        res = ll.split([9.0])

        assert len(res) == 2

        assert len(res[0]) == 1
        assert len(res[1]) == 1
Beispiel #22
0
    def test_with_label_values(self):
        ll = LabelList.with_label_values([
            'a',
            'b',
            'c',
        ])

        assert ll == LabelList(labels=[
            Label('a'),
            Label('b'),
            Label('c'),
        ])
Beispiel #23
0
    def test_split_single_label_that_doesnt_start_at_zero(self):
        ll = LabelList(idx='test', labels=[Label('c', 8.0, 12.0)])

        res = ll.split([11.2], shift_times=True)

        assert len(res) == 2

        assert len(res[0]) == 1
        assert sorted(res[0])[0] == Label('c', 8.0, 11.2)

        assert len(res[1]) == 1
        assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
Beispiel #24
0
    def test_is(self):
        ll_a = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('b', 7.9, 9.3),
            Label('c', 9.0, 12.0),
            Label('c', 12.05, 14.0),
        ])

        ll_b = ll_a

        assert ll_a is ll_b
Beispiel #25
0
    def test_iter(self):
        ll = LabelList(labels=[
            Label('some text'),
            Label('more text'),
            Label('text again'),
        ])

        labels = sorted(ll)

        assert labels[0].value == 'more text'
        assert labels[1].value == 'some text'
        assert labels[2].value == 'text again'
Beispiel #26
0
    def test_with_label_values_sets_correct_idx(self):
        ll = LabelList.with_label_values([
            'a',
            'b',
            'c',
        ], idx='letters')

        assert ll == LabelList(idx='letters', labels=[
            Label('a'),
            Label('b'),
            Label('c'),
        ])
Beispiel #27
0
    def test_merge_overlaps_with_threshold(self):
        ll = LabelList(idx='test', labels=[
            Label('c', 9.0, 12.0),
            Label('c', 12.05, 14.0),
        ])

        ll.merge_overlaps(threshold=0.1)

        expected = LabelList(idx='test', labels=[
            Label('c', 9.0, 14.0),
        ])

        assert ll == expected
Beispiel #28
0
    def test_ranges_include_labels(self):
        labels = [Label('a', 3.2, 4.5), Label('b', 5.1, 8.9)]
        ll = LabelList(labels=labels)

        ranges = ll.ranges(include_labels=['a'])

        r = next(ranges)
        assert 3.2 == r[0]
        assert 4.5 == r[1]
        assert labels[0] in r[2]

        with pytest.raises(StopIteration):
            next(ranges)
Beispiel #29
0
    def test_all_tokens(self):
        ll = LabelList(labels=[
            Label('some text'),
            Label('more text'),
            Label('text again'),
        ])

        assert sorted(ll.all_tokens()) == [
            'again',
            'more',
            'some',
            'text',
        ]
Beispiel #30
0
    def test_split_label_within_cutting_points_is_included(self):
        ll = LabelList(idx='test', labels=[
            Label('a', 0.0, 4.0),
            Label('b', 4.0, 8.0),
            Label('c', 9.0, 12.0)
        ])

        res = ll.split([1.9, 10.5])

        assert len(res[1]) == 3
        assert sorted(res[1])[1].value == 'b'
        assert sorted(res[1])[1].start == 4.0
        assert sorted(res[1])[1].end == 8.0