def test_match(self): utt_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'a', 'b', 'd'}) assert utt_filter.match(tracks.Utterance('a', 'x'), None) assert utt_filter.match(tracks.Utterance('b', 'x'), None) assert utt_filter.match(tracks.Utterance('d', 'x'), None) assert not utt_filter.match(tracks.Utterance('c', 'x'), None) assert not utt_filter.match(tracks.Utterance('e', 'x'), None)
def test_match_inverse(self): filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'a', 'b', 'd'}, inverse=True) assert not filter.match(tracks.Utterance('a', 'x'), None) assert not filter.match(tracks.Utterance('b', 'x'), None) assert not filter.match(tracks.Utterance('d', 'x'), None) assert filter.match(tracks.Utterance('c', 'x'), None) assert filter.match(tracks.Utterance('e', 'x'), None)
def test_read_samples(self): path = resources.sample_wav_file('wav_1.wav') track = tracks.FileTrack('wav', path) issuer = issuers.Issuer('toni') utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30) l1 = annotations.Label('a', 0.15, 0.448) l2 = annotations.Label('a', 0.5, 0.73) ll = annotations.LabelList(labels=[l1, l2]) utt.set_label_list(ll) expected, __ = librosa.core.load(path, sr=None, offset=1.15, duration=0.298) assert np.array_equal(l1.read_samples(), expected) expected, __ = librosa.core.load(path, sr=None, offset=1.5, duration=1.73 - 1.5) print(expected.shape) print(l2.read_samples().shape) assert np.array_equal(l2.read_samples(), expected)
def generate_utterances(track, issuer, n, n_ll_range, n_label_range, rand=None): if rand is None: rand = random.Random() items = [] for i in range(n): utt_idx = '{}-utt-{}'.format(track.idx, i) start = rand.random() * 3 end = 3 + rand.random() * 8 utt = tracks.Utterance( utt_idx, track, issuer=issuer, start=start, end=end ) n_ll = rand.randint(*n_ll_range) for ll in generate_label_lists(n_ll, n_label_range, rand=rand): utt.set_label_list(ll) items.append(utt) return items
def test_split_with_cutting_point_after_end_returns_one_utt(self): utt = tracks.Utterance('utt-1', None, start=4.0, end=20.0) res = utt.split([24.5]) assert len(res) == 1 assert res[0].start == 4.0 assert res[0].end == 20.0
def test_does_utt_match_target_format_with_invalid_format_returns_false(self): file_path = resources.get_resource_path(('audio_formats', 'mp3_2_44_1k_16b.mp3')) track = tracks.FileTrack('t', file_path) utt = tracks.Utterance('u', track) c = conversion.WavAudioFileConverter() assert not c._does_utt_match_target_format(utt)
def test_does_utt_match_target_format_returns_true(self): file_path = resources.sample_wav_file('wav_1.wav') track = tracks.FileTrack('t', file_path) utt = tracks.Utterance('u', track) c = conversion.WavAudioFileConverter() assert c._does_utt_match_target_format(utt)
def test_split_utt_relative_with_labels(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0) ]) ll_2 = annotations.LabelList('words', labels=[ annotations.Label('b', start=8.0, end=30.0) ]) utt = tracks.Utterance('utt-1', 'file-x', start=10.0, end=40.0, label_lists=[ll_1, ll_2]) res = utt.split([14.0], track_relative=False) assert len(res) == 2 assert res[0].start == 10.0 assert res[0].end == 24.0 assert res[0].label_lists['phones'] == annotations.LabelList(idx='phones', labels=[ annotations.Label('alpha', 0.0, 14.0) ]) assert res[0].label_lists['words'] == annotations.LabelList(idx='words', labels=[ annotations.Label('b', 8.0, 14.0) ]) assert res[1].start == 24.0 assert res[1].end == 40.0 assert res[1].label_lists['phones'] == annotations.LabelList(idx='phones', labels=[ annotations.Label('alpha', 0.0, 16.0) ]) assert res[1].label_lists['words'] == annotations.LabelList(idx='words', labels=[ annotations.Label('b', 0.0, 16.0) ])
def test_split(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0) ]) ll_2 = annotations.LabelList('words', labels=[ annotations.Label('b', start=0.0, end=30.0) ]) utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2]) res = utt.split([14.0, 29.5]) assert len(res) == 3 assert res[0].start == 0.0 assert res[0].end == 14.0 assert 'phones' in res[0].label_lists.keys() assert 'words' in res[0].label_lists.keys() assert res[1].start == 14.0 assert res[1].end == 29.5 assert 'phones' in res[1].label_lists.keys() assert 'words' in res[1].label_lists.keys() assert res[2].start == 29.5 assert res[2].end == 40.0 assert 'phones' in res[2].label_lists.keys() assert 'words' in res[2].label_lists.keys()
def test_encode_label_ends_at_utterance_end(self): track = tracks.FileTrack('file1', resources.sample_wav_file('med_len.wav')) utt = tracks.Utterance('utt1', track, start=3, end=14) ll = annotations.LabelList(labels=[ annotations.Label('speech', 0, 4), annotations.Label('music', 4, 9), annotations.Label('speech', 9, float('inf')), ]) utt.set_label_list(ll) enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'], 'default', frame_settings=units.FrameSettings( 32000, 16000), sr=16000) actual = enc.encode_utterance(utt) expected = np.array([ [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0], ]).astype(np.float32) assert np.array_equal(expected, actual)
def test_import_utterance_no_track(self, corpus): importing_utterances = [ tracks.Utterance('a', tracks.FileTrack('notexist', 'notexist'), corpus.issuers['existing_issuer'], 0, 10) ] with pytest.raises(ValueError): corpus.import_utterances(importing_utterances)
def test_split_sets_track(self): file = tracks.FileTrack('file-1', '/some/path') utt = tracks.Utterance('utt-1', file, start=0.0, end=10.0) res = utt.split([5.2]) assert len(res) == 2 assert res[0].track == file assert res[1].track == file
def test_import_utterance_no_issuer(self, corpus): importing_utterances = [ tracks.Utterance('a', corpus.tracks['existing_file'], issuers.Issuer('notexist'), 0, 10) ] with pytest.raises(ValueError): corpus.import_utterances(importing_utterances)
def test_split_sets_issuer(self): issuer = issuers.Speaker('spk-1') utt = tracks.Utterance('utt-1', None, issuer=issuer, start=0.0, end=10.0) res = utt.split([5.2]) assert len(res) == 2 assert res[0].issuer == issuer assert res[1].issuer == issuer
def test_encode_utterance_with_single_label(self): ll = annotations.LabelList(idx='go', labels=[annotations.Label('a c b')]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c']) encoded = encoder.encode_utterance(utt) assert np.array_equal(encoded, [0, 2, 1])
def test_split_when_utt_start_is_not_zero(self): utt = tracks.Utterance('utt-1', None, start=6.0, end=20.0) res = utt.split([3.0]) assert len(res) == 2 assert res[0].start == 6.0 assert res[0].end == 9.0 assert res[1].start == 9.0 assert res[1].end == 20.
def test_split_utt_relative(self): utt = tracks.Utterance('utt-1', None, start=6.0, end=20.0) res = utt.split([8.0], track_relative=False) assert len(res) == 2 assert res[0].start == 6.0 assert res[0].end == 14.0 assert res[1].start == 14.0 assert res[1].end == 20.0
def test_split_endless(self): utt = tracks.Utterance('utt-1', None, start=0.0) res = utt.split([24.5]) assert len(res) == 2 assert res[0].start == 0.0 assert res[0].end == 24.5 assert res[1].start == 24.5 assert res[1].end == float('inf')
def test_encode_utterance_with_non_existing_label_list_raises_error(self): ll = annotations.LabelList(idx='go', labels=[annotations.Label('a c b unknown')]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('not_existing', ['a', 'b', 'c']) with pytest.raises(ValueError): encoder.encode_utterance(utt)
def new_utterance(self, utterance_idx, track_idx, issuer_idx=None, start=0, end=float('inf')): """ Add a new utterance to the corpus with the given data. Parameters: track_idx (str): The track id the utterance is in. utterance_idx (str): The id to associate with the utterance. If None or already exists, one is generated. issuer_idx (str): The issuer id to associate with the utterance. start (float): Start of the utterance within the track [seconds]. end (float): End of the utterance within the track [seconds]. ``inf`` equals the end of the track. Returns: Utterance: The newly added utterance. """ new_utt_idx = utterance_idx # Check if there is a track with the given idx if track_idx not in self._tracks.keys(): raise ValueError( 'Track with id {} does not exist!'.format(track_idx)) # Check if issuer exists issuer = None if issuer_idx is not None: if issuer_idx not in self._issuers.keys(): raise ValueError( 'Issuer with id {} does not exist!'.format(issuer_idx)) else: issuer = self._issuers[issuer_idx] # Add index to idx if already existing if new_utt_idx in self._utterances.keys(): new_utt_idx = naming.index_name_if_in_list(new_utt_idx, self._utterances.keys()) new_utt = tracks.Utterance(new_utt_idx, self.tracks[track_idx], issuer=issuer, start=start, end=end) self._utterances[new_utt_idx] = new_utt return new_utt
def test_import_utterances(self, corpus): importing_utterances = [ tracks.Utterance('a', corpus.tracks['existing_file'], corpus.issuers['existing_issuer'], 0, 10), tracks.Utterance('b', corpus.tracks['existing_file'], corpus.issuers['existing_issuer'], 10, 20), tracks.Utterance('existing_utt', corpus.tracks['existing_file'], corpus.issuers['existing_issuer'], 20, 30) ] mapping = corpus.import_utterances(importing_utterances) assert corpus.num_utterances == 4 assert 'a' in corpus.utterances.keys() assert 'b' in corpus.utterances.keys() assert 'existing_utt_1' in corpus.utterances.keys() assert len(mapping) == 3 assert mapping['a'].idx == 'a' assert mapping['b'].idx == 'b' assert mapping['existing_utt'].idx == 'existing_utt_1'
def corpus(): corpus = audiomate.Corpus() ex_file = tracks.FileTrack('existing_file', '../any/path.wav') ex_issuer = issuers.Issuer('existing_issuer') ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer) corpus.tracks['existing_file'] = ex_file corpus.issuers['existing_issuer'] = ex_issuer corpus.utterances['existing_utt'] = ex_utterance return corpus
def test_split_with_overlap(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0), annotations.Label('bravo', start=20.0, end=42.0) ]) ll_2 = annotations.LabelList( 'words', labels=[annotations.Label('b', start=8.0, end=30.0)]) utt = tracks.Utterance('utt-1', 'file-x', start=10.0, end=55.0, label_lists=[ll_1, ll_2]) res = utt.split([12.0, 24.0], track_relative=False, overlap=2.0) assert len(res) == 3 assert res[0].start == 10.0 assert res[0].end == 24.0 assert res[0].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[annotations.Label('alpha', 0.0, 14.0)]) assert res[0].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 8.0, 14.0)]) print(res[1].label_lists['phones'].labels) assert res[1].start == 20.0 assert res[1].end == 36.0 assert res[1].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[ annotations.Label('alpha', 0.0, 16.0), annotations.Label('bravo', 10.0, 16.0), ]) assert res[1].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 0.0, 16.0)]) assert res[2].start == 32.0 assert res[2].end == 55.0 assert res[2].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[ annotations.Label('alpha', 0.0, 8.0), annotations.Label('bravo', 0.0, 20.0), ]) assert res[2].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 0.0, 8.0)])
def setup_method(self): self.ll_1 = annotations.LabelList(idx='alpha', labels=[ annotations.Label('a', 3.2, 4.5), annotations.Label('b', 5.1, 8.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('d', 10.5, 14), annotations.Label('d', 15, 18) ]) self.ll_2 = annotations.LabelList(idx='bravo', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('e', 4.2, 7.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('f', 10.5, 14), annotations.Label('d', 15, 17.3) ]) self.ll_duplicate_idx = annotations.LabelList( idx='charlie', labels=[ annotations.Label('t', 1.0, 4.2), annotations.Label('h', 4.2, 7.9) ]) self.ll_3 = annotations.LabelList(idx='charlie', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('g', 4.2, 7.9) ]) self.track = tracks.FileTrack('wav', resources.sample_wav_file('wav_1.wav')) self.issuer = issuers.Issuer('toni') self.utt = tracks.Utterance('test', self.track, issuer=self.issuer, start=1.25, end=1.30, label_lists=[ self.ll_1, self.ll_2, self.ll_duplicate_idx, self.ll_3 ])
def test_utt_read_samples(benchmark): utts = [] wav_path = resources.get_test_resource_path(('wav_files', 'med_len.wav')) track = tracks.FileTrack('idx', wav_path) utts.append(tracks.Utterance('uidx', track)) utts.append(tracks.Utterance('uidx', track, start=2.8)) utts.append(tracks.Utterance('uidx', track, end=10.2)) utts.append(tracks.Utterance('uidx', track, start=2.4, end=9.8)) mp3_path = resources.get_test_resource_path( ('audio_formats', 'mp3_2_44_1k_16b.mp3')) track = tracks.FileTrack('idx', mp3_path) utts.append(tracks.Utterance('uidx', track)) utts.append(tracks.Utterance('uidx', track, start=2.8)) utts.append(tracks.Utterance('uidx', track, end=4.9)) utts.append(tracks.Utterance('uidx', track, start=0.4, end=4.8)) benchmark(run, utts)
def test_encode_utterance_with_multiple_non_overlapping_labels(self): ll = annotations.LabelList(idx='go', labels=[ annotations.Label('a c b', start=0, end=5), annotations.Label('c b b', start=5, end=9.4), annotations.Label('a a a', start=9.5, end=10.2) ]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c']) encoded = encoder.encode_utterance(utt) assert np.array_equal(encoded, [0, 2, 1, 2, 1, 1, 0, 0, 0])
def utt_with_noise(): utt = tracks.Utterance('utt-2', 'file-2') utt.set_label_list( annotations.LabelList(idx='alpha', labels=[ annotations.Label('music', 0, 5), annotations.Label('speech', 5, 12), annotations.Label('noise', 13, 15) ])) utt.set_label_list( annotations.LabelList(idx='bravo', labels=[ annotations.Label('music', 0, 1), annotations.Label('speech', 2, 6) ])) return utt
def test_encode_utterance_with_overlapping_labels_raises_error(self): ll = annotations.LabelList(idx='go', labels=[ annotations.Label('a c b', start=0, end=5), annotations.Label('c b b', start=2, end=9.4), annotations.Label('a a a', start=9.5, end=10.2) ]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c']) with pytest.raises(ValueError): encoder.encode_utterance(utt)
def test_encode_utterance_takes_lower_index_first(self): file = tracks.FileTrack('file-idx', resources.sample_wav_file('wav_1.wav')) utt = tracks.Utterance('utt-idx', file, start=0, end=5) ll = annotations.LabelList(labels=[ annotations.Label('music', 0, 3), annotations.Label('speech', 3, 5) ]) utt.set_label_list(ll) enc = encoding.FrameOrdinalEncoder(['speech', 'music', 'noise'], 'default', frame_settings=units.FrameSettings( 32000, 16000), sr=16000) actual = enc.encode_utterance(utt) expected = np.array([1, 1, 0, 0]).astype(np.int) assert np.array_equal(expected, actual)
def test_validate_utterance_returns_completly_outlying_label(self): utt = tracks.Utterance('utt-idx', None, start=10.0, end=17.9) ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=-4.0, end=-2.0), annotations.Label('b', start=19.0, end=22.0), ]) utt.set_label_list(ll) val = validation.LabelOverflowValidator('default') result = val.validate_utterance(utt) result = sorted(result, key=lambda x: x[0]) assert len(result) == 2 assert result[0] == (-4.0, -2.0, 'a') assert result[1] == (19.0, 22.0, 'b')