def test_ranges_zero_to_end(self): labels = [ Label('a', 0, float('inf')), Label('b', 5.1, 8.9) ] ll = LabelList(labels=labels) ranges = ll.ranges() r = next(ranges) assert 0 == r[0] assert 5.1 == r[1] assert labels[0] in r[2] r = next(ranges) assert 5.1 == r[0] assert 8.9 == r[1] assert labels[0] in r[2] assert labels[1] in r[2] r = next(ranges) assert 8.9 == r[0] assert float('inf') == r[1] assert labels[0] in r[2] with pytest.raises(StopIteration): next(ranges)
def test_split_without_cutting_points_raises_error(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 9.0), Label('c', 9.0, 12.0)]) with pytest.raises(ValueError): ll.split([])
def test_tokenized(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.tokenized() == ['a', 'u', 't', 'b', 'x', 'c']
def test_join_with_custom_delimiter(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.join(delimiter=' - ') == 'a - b - c'
def test_join(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) assert ll.join() == 'a b c'
def test_relabel_maps_a_onto_b(self): label_list = LabelList(labels=[Label('a', 3.2, 4.5)]) actual = relabeling.relabel(label_list, {('a', ): 'b'}) expected = LabelList(labels=[Label('b', 3.2, 4.5)]) assert actual == expected
def test_tokenized_raises_error_if_overlap_is_higher_than_threshold(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 3.85, 8.0), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.tokenized()
def test_join_raises_error_if_overlap_is_higher_than_threshold(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 3.8, 8.0), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.join(overlap_threshold=0.1)
def test_tokenized_raises_error_if_overlap_is_higher_than_threshold_given_an_endless_label(self): ll = LabelList(idx='test', labels=[ Label('a u t', 0.0, 4.0), Label('b x', 4.5, float('inf')), Label('c', 9.0, 12.0) ]) with pytest.raises(ValueError): ll.tokenized()
def create_sample_dataset(temp_dir): ds = audiomate.Corpus(str(temp_dir)) file_1_path = resources.sample_wav_file('wav_1.wav') file_2_path = resources.sample_wav_file('wav_2.wav') file_3_path = resources.get_resource_path( ['audio_formats', 'flac_1_16k_16b.flac']) file_1 = ds.new_file(file_1_path, track_idx='wav_1') file_2 = ds.new_file(file_2_path, track_idx='wav_2') file_3 = ds.new_file(file_3_path, track_idx='wav_3') issuer_1 = Speaker('spk-1', gender=Gender.MALE) issuer_2 = Speaker('spk-2', gender=Gender.FEMALE) issuer_3 = Issuer('spk-3') ds.import_issuers([issuer_1, issuer_2, issuer_3]) # 2.5951875 utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_2.idx, start=0, end=1.5) utt_3 = ds.new_utterance('utt-3', file_2.idx, issuer_idx=issuer_2.idx, start=1.5, end=2.5) # 5.0416875 utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx) utt_1.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who am i')])) utt_2.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are you')])) utt_3.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is he')])) utt_4.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are they')])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-3'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) return ds
def test_addl(self): ll = LabelList() assert len(ll) == 0 ll.addl('a', 12.3, 19.3) assert ll == LabelList(labels=[ Label('a', 12.3, 19.3), ])
def test_split_with_cutting_point_after_last_label(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 4.0), Label('c', 4.0, 8.9)]) res = ll.split([10.5]) assert len(res) == 2 assert len(res[0]) == 2 assert len(res[1]) == 0
def test_label_values(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 14) ]) assert ll.label_values() == ['a', 'b', 'c']
def test_split_cutting_point_on_boundary_doesnot_split_label(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 9.0), Label('c', 9.0, 12.0)]) res = ll.split([9.0]) assert len(res) == 2 assert len(res[0]) == 1 assert len(res[1]) == 1
def test_add(self): ll = LabelList() assert len(ll) == 0 label = Label('some text') ll.add(label) assert len(ll) == 1 assert label.label_list == ll assert sorted(ll)[0] == label
def test_split_single_label_that_doesnt_start_at_zero(self): ll = LabelList(idx='test', labels=[Label('c', 8.0, 12.0)]) res = ll.split([11.2], shift_times=True) assert len(res) == 2 assert len(res[0]) == 1 assert sorted(res[0])[0] == Label('c', 8.0, 11.2) assert len(res[1]) == 1 assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
def test_with_label_values_sets_correct_idx(self): ll = LabelList.with_label_values([ 'a', 'b', 'c', ], idx='letters') assert ll == LabelList(idx='letters', labels=[ Label('a'), Label('b'), Label('c'), ])
def test_with_label_values(self): ll = LabelList.with_label_values([ 'a', 'b', 'c', ]) assert ll == LabelList(labels=[ Label('a'), Label('b'), Label('c'), ])
def test_ranges_include_labels(self): labels = [Label('a', 3.2, 4.5), Label('b', 5.1, 8.9)] ll = LabelList(labels=labels) ranges = ll.ranges(include_labels=['a']) r = next(ranges) assert 3.2 == r[0] assert 4.5 == r[1] assert labels[0] in r[2] with pytest.raises(StopIteration): next(ranges)
def test_merge_overlaps_with_threshold(self): ll = LabelList(idx='test', labels=[ Label('c', 9.0, 12.0), Label('c', 12.05, 14.0), ]) ll.merge_overlaps(threshold=0.1) expected = LabelList(idx='test', labels=[ Label('c', 9.0, 14.0), ]) assert ll == expected
def test_split_label_within_cutting_points_is_included(self): ll = LabelList(idx='test', labels=[ Label('a', 0.0, 4.0), Label('b', 4.0, 8.0), Label('c', 9.0, 12.0) ]) res = ll.split([1.9, 10.5]) assert len(res[1]) == 3 assert sorted(res[1])[1].value == 'b' assert sorted(res[1])[1].start == 4.0 assert sorted(res[1])[1].end == 8.0
def test_all_tokens(self): ll = LabelList(labels=[ Label('some text'), Label('more text'), Label('text again'), ]) assert sorted(ll.all_tokens()) == [ 'again', 'more', 'some', 'text', ]
def test_label_count(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 14) ]) res = ll.label_count() assert 2 == res['a'] assert 1 == res['b'] assert 2 == res['c']
def test_label_total_duration(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 14) ]) res = ll.label_total_duration() assert res['a'] == pytest.approx(4.8) assert res['b'] == pytest.approx(3.8) assert res['c'] == pytest.approx(4.3)
def test_update(self): ll = LabelList() assert len(ll) == 0 label_a = Label('some text') label_b = Label('more text') label_c = Label('text again') ll.update([label_a, label_b, label_c]) assert len(ll) == 3 assert label_a.label_list == ll assert label_b.label_list == ll assert label_c.label_list == ll
def test_relabel_removes_unwanted_labels(self): projections = { ('a', ): '', ('b', ): 'b', } label_list = LabelList( labels=[Label('a', 3.2, 4.4), Label('b', 4.4, 5.1)]) actual = relabeling.relabel(label_list, projections) expected = LabelList(labels=[Label('b', 4.4, 5.1)]) assert actual == expected
def test_split_first_label_not_splitted(self): ll = LabelList(idx='test', labels=[Label('a', 0.0, 9.0), Label('c', 9.0, 12.0)]) res = ll.split([11.2], shift_times=True) assert len(res) == 2 assert len(res[0]) == 2 assert sorted(res[0])[0] == Label('a', 0.0, 9.0) assert sorted(res[0])[1] == Label('c', 9.0, 11.2) assert len(res[1]) == 1 assert sorted(res[1])[0] == Label('c', 0.0, pytest.approx(0.8))
def test_labels_in_range_returns_only_fully_included(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 15) ]) in_range = ll.labels_in_range(7.2, 14.99, fully_included=True) assert sorted(in_range) == [ Label('c', 7.2, 10.5), Label('a', 10.5, 14), ]
def test_apply(self): ll = LabelList(labels=[ Label('some text'), Label('more text'), Label('text again'), ]) def apply_func(label): label.value = 'app {}'.format(label.value) ll.apply(apply_func) labels = sorted(ll) assert labels[0].value == 'app more text' assert labels[1].value == 'app some text' assert labels[2].value == 'app text again'
def test_labels_in_range(self): ll = LabelList(labels=[ Label('a', 3.2, 4.5), Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14), Label('c', 13, 14) ]) in_range = ll.labels_in_range(8.2, 12.5) assert sorted(in_range) == [ Label('b', 5.1, 8.9), Label('c', 7.2, 10.5), Label('a', 10.5, 14) ]