def test_write_segments(self, writer, tmpdir): ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == -1 assert content[1][0] == 'utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == -1 assert content[2][0] == 'utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == -1
def test_exports_wavs_from_container_tracks(self, writer, tmpdir): path = tmpdir.strpath container_ds_path = os.path.join(path, 'container_ds') out_path = os.path.join(path, 'export') ds = resources.create_dataset() ds.relocate_audio_to_single_container(container_ds_path) writer.save(ds, out_path) print(os.listdir(out_path)) track_path = os.path.join(out_path, 'audio', 'wav-1.wav') track = tracks.FileTrack(None, track_path) assert os.path.isfile(track_path) assert track.duration == pytest.approx(2.5951875) assert np.allclose(track.read_samples(), ds.tracks['wav-1'].read_samples(), atol=1e-05) track_path = os.path.join(out_path, 'audio', 'wav_2.wav') track = tracks.FileTrack(None, track_path) assert os.path.isfile(track_path) assert track.duration == pytest.approx(2.5951875) track_path = os.path.join(out_path, 'audio', 'wav_3.wav') track = tracks.FileTrack(None, track_path) assert os.path.isfile(track_path) assert track.duration == pytest.approx(2.5951875) track_path = os.path.join(out_path, 'audio', 'wav_4.wav') track = tracks.FileTrack(None, track_path) assert os.path.isfile(track_path) assert track.duration == pytest.approx(2.5951875)
def test_split_utterances_to_max_time(self): corpus = resources.create_dataset() result = corpus.split_utterances_to_max_time(max_time=1.5, overlap=0.5) assert result.num_tracks == corpus.num_tracks assert result.num_issuers == corpus.num_issuers assert result.num_utterances == 8 utt_len = 2.5951875 assert result.utterances['utt-1_0'].start == 0.0 assert result.utterances['utt-1_0'].end == utt_len / 2.0 + 0.5 assert result.utterances['utt-1_1'].start == utt_len / 2.0 - 0.5 assert result.utterances['utt-1_1'].end == float('inf') assert result.utterances['utt-2_0'].start == 0.0 assert result.utterances['utt-2_0'].end == utt_len / 2.0 + 0.5 assert result.utterances['utt-2_1'].start == utt_len / 2.0 - 0.5 assert result.utterances['utt-2_1'].end == float('inf') assert result.utterances['utt-3'].start == 0.0 assert result.utterances['utt-3'].end == 1.5 assert result.utterances['utt-4'].start == 1.5 assert result.utterances['utt-4'].end == 2.5 assert result.utterances['utt-5_0'].start == 0.0 assert result.utterances['utt-5_0'].end == utt_len / 2.0 + 0.5 assert result.utterances['utt-5_1'].start == utt_len / 2.0 - 0.5 assert result.utterances['utt-5_1'].end == float('inf')
def test_write_segments_absolute_times(self, writer, tmpdir): writer = io.KaldiWriter(use_absolute_times=True) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'spk-1-utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == pytest.approx(2.5951875) assert content[1][0] == 'spk-1-utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == pytest.approx(2.5951875) assert content[2][0] == 'spk-2-utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'spk-2-utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'spk-3-utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == pytest.approx(2.5951875)
def test_process_features_online_with_frame_hop_size_change_stores_correct(self, processor, tmpdir): ds = resources.create_dataset() in_feat_path = os.path.join(tmpdir.strpath, 'in_feats') out_feat_path = os.path.join(tmpdir.strpath, 'out_feats') in_feats = containers.FeatureContainer(in_feat_path) utt_feats = np.arange(30).reshape(5, 6) with in_feats: in_feats.sampling_rate = 16000 in_feats.frame_size = 400 in_feats.hop_size = 160 for utt_idx in ds.utterances.keys(): in_feats.set(utt_idx, utt_feats) processor.mock_frame_size_scale = 2.0 processor.mock_hop_size_scale = 2.0 processor.process_features_online(ds, in_feats, out_feat_path) out_feats = containers.FeatureContainer(out_feat_path) with out_feats: assert out_feats.frame_size == 800 assert out_feats.hop_size == 320
def test_process_features(self, processor, tmpdir): ds = resources.create_dataset() in_feat_path = os.path.join(tmpdir.strpath, 'in_feats') out_feat_path = os.path.join(tmpdir.strpath, 'out_feats') in_feats = containers.FeatureContainer(in_feat_path) utt_feats = np.arange(30).reshape(5, 6) with in_feats: in_feats.sampling_rate = 16000 in_feats.frame_size = 400 in_feats.hop_size = 160 for utt_idx in ds.utterances.keys(): in_feats.set(utt_idx, utt_feats) processor.process_features(ds, in_feats, out_feat_path) out_feats = containers.FeatureContainer(out_feat_path) with out_feats: assert len(out_feats.keys()) == 5 assert np.array_equal(out_feats.get('utt-1', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-2', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-3', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-4', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-5', mem_map=False), utt_feats)
def sample_multi_frame_dataset(tmpdir): inputs_path = os.path.join(tmpdir.strpath, 'inputs.hdf5') targets_path = os.path.join(tmpdir.strpath, 'targets.hdf5') corpus = resources.create_dataset() container_inputs = containers.Container(inputs_path) container_targets = containers.Container(targets_path) container_inputs.open() container_targets.open() container_inputs.set('utt-1', np.arange(60).reshape(15, 4)) container_inputs.set('utt-2', np.arange(80).reshape(20, 4)) container_inputs.set('utt-3', np.arange(44).reshape(11, 4)) container_inputs.set('utt-4', np.arange(12).reshape(3, 4)) container_inputs.set('utt-5', np.arange(16).reshape(4, 4)) container_targets.set('utt-1', np.arange(30).reshape(15, 2)) container_targets.set('utt-2', np.arange(40).reshape(20, 2)) container_targets.set('utt-3', np.arange(22).reshape(11, 2)) container_targets.set('utt-4', np.arange(6).reshape(3, 2)) container_targets.set('utt-5', np.arange(8).reshape(4, 2)) return feeding.MultiFrameDataset(corpus, [container_inputs, container_targets], 4)
def test_process_features_online_with_given_chunk_size(self, processor, tmpdir): ds = resources.create_dataset() in_feat_path = os.path.join(tmpdir.strpath, 'in_feats') out_feat_path = os.path.join(tmpdir.strpath, 'out_feats') in_feats = containers.FeatureContainer(in_feat_path) utt_feats = np.arange(90).reshape(15, 6) with in_feats: in_feats.sampling_rate = 16000 in_feats.frame_size = 400 in_feats.hop_size = 160 for utt_idx in ds.utterances.keys(): in_feats.set(utt_idx, utt_feats) processor.process_features_online(ds, in_feats, out_feat_path, chunk_size=4) out_feats = containers.FeatureContainer(out_feat_path) assert len(processor.called_with_data) == 4 * 5 assert processor.called_with_data[0].shape == (4, 6) assert processor.called_with_data[3].shape == (3, 6) with out_feats: assert len(out_feats.keys()) == 5 assert np.array_equal(out_feats.get('utt-1', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-2', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-3', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-4', mem_map=False), utt_feats) assert np.array_equal(out_feats.get('utt-5', mem_map=False), utt_feats)
def test_write_segments_no_speaker_prefix(self, tmpdir): writer = io.KaldiWriter(prefix_utterances_with_speaker=False) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == -1 assert content[1][0] == 'utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == -1 assert content[2][0] == 'utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == -1
def test_write_utt2spk(self, writer, tmpdir): ds = resources.create_dataset() # Add utt without issuer # so in utt2spk it ends up with "utt-idx utt-idx" ds.new_file('/random/path', 'wav-33') ds.new_utterance('utt-23', 'wav-33') path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'utt2spk'), separator=' ', max_columns=2) assert content[0][0] == 'spk-1-utt-1' assert content[0][1] == 'spk-1' assert content[1][0] == 'spk-1-utt-2' assert content[1][1] == 'spk-1' assert content[2][0] == 'spk-2-utt-3' assert content[2][1] == 'spk-2' assert content[3][0] == 'spk-2-utt-4' assert content[3][1] == 'spk-2' assert content[4][0] == 'spk-3-utt-5' assert content[4][1] == 'spk-3' assert content[5][0] == 'utt-23' assert content[5][1] == 'utt-23'
def test_parse(self): corpus = resources.create_dataset() sv = subview.Subview.parse( 'matching_utterance_ids\ninclude,utt-1,utt-3', corpus=corpus) assert len(sv.filter_criteria) == 1 assert sv.filter_criteria[0].utterance_idxs == {'utt-1', 'utt-3'}
def test_process_features_online_ignores_none(self, processor, tmpdir): ds = resources.create_dataset() in_feat_path = os.path.join(tmpdir.strpath, 'in_feats') out_feat_path = os.path.join(tmpdir.strpath, 'out_feats') in_feats = containers.FeatureContainer(in_feat_path) utt_feats = np.arange(90).reshape(15, 6) with in_feats: in_feats.sampling_rate = 16000 in_feats.frame_size = 400 in_feats.hop_size = 160 for utt_idx in ds.utterances: in_feats.set(utt_idx, utt_feats) def return_none(*args, **kwargs): return None processor.process_frames = return_none processor.process_features_online(ds, in_feats, out_feat_path, chunk_size=4) assert True
def test_save_subset_dev(self, writer, tmpdir): ds = resources.create_dataset() writer.save(ds, tmpdir.strpath) all_path = os.path.join(tmpdir.strpath, 'dev.csv') assert os.path.isfile(all_path) records = textfile.read_separated_lines(all_path, separator=',') assert len(records) == 3 # HEADER assert len(records[0]) == 3 assert records[0][1] == 'wav_filesize' assert records[0][2] == 'transcript' # DATA RECORDS utts = {r[0]: (r[1], r[2]) for r in records[1:]} path = os.path.join(tmpdir.strpath, 'audio', 'utt-4.wav') assert len(utts[path]) == 2 assert utts[path][0] == '32044' assert utts[path][1] == ds.utterances['utt-4'].label_lists[ corpus.LL_WORD_TRANSCRIPT].labels[0].value path = ds.utterances['utt-5'].track.path assert len(utts[path]) == 2 assert utts[path][0] == '83090' assert utts[path][1] == ds.utterances['utt-5'].label_lists[ corpus.LL_WORD_TRANSCRIPT].labels[0].value
def test_save(self, writer, tmpdir): ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) assert 'segments' in os.listdir(path) assert 'text' in os.listdir(path) assert 'utt2spk' in os.listdir(path) assert 'wav.scp' in os.listdir(path)
def test_process_corpus_online_sets_container_attributes(self, processor, tmpdir): ds = resources.create_dataset() feat_path = os.path.join(tmpdir.strpath, 'feats') feat_container = processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048) with feat_container: assert feat_container.frame_size == 4096 assert feat_container.hop_size == 2048 assert feat_container.sampling_rate == 16000
def test_all_tokens_returns_only_from_selected_label_lists(self): corpus = resources.create_dataset() ll = annotations.LabelList(idx='test', labels=[ annotations.Label('what can he do') ]) corpus.utterances['utt-1'].set_label_list(ll) target_lls = [audiomate.corpus.LL_WORD_TRANSCRIPT] expected_tokens = {'who', 'am', 'i', 'are', 'is', 'he', 'you', 'she', 'they'} assert corpus.all_tokens(label_list_ids=target_lls) == expected_tokens
def test_all_tokens_with_custom_delimiter(self): corpus = resources.create_dataset() ll = annotations.LabelList(idx='test', labels=[ annotations.Label('a, b, a, c') ]) corpus.utterances['utt-1'].set_label_list(ll) target_lls = ['test'] expected_tokens = {'a', 'b', 'c'} assert corpus.all_tokens(delimiter=',', label_list_ids=target_lls) == expected_tokens
def test_process_corpus_online_ignore_returning_none(self, processor, tmpdir): ds = resources.create_dataset() feat_path = os.path.join(tmpdir.strpath, 'feats') def return_none(*args, **kwargs): return None processor.process_frames = return_none processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048) assert True
def test_validate(self): ds = resources.create_dataset() ds.utterances['utt-3'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'max length here 11' ds.utterances['utt-4'].label_lists[corpus.LL_WORD_TRANSCRIPT][0].value = 'too long here' val = validation.UtteranceTranscriptionRatioValidator(10, corpus.LL_WORD_TRANSCRIPT) result = val.validate(ds) assert not result.passed assert len(result.invalid_utterances) == 1 assert 'utt-4' in result.invalid_utterances.keys()
def test_init_with_corpus_view(self): corpus = resources.create_dataset() subview = subset.Subview( corpus, filter_criteria=[ subset.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-4'}) ]) it = feeding.DataIterator(subview, [containers.Container('blub')]) assert set(it.utt_ids) == set(subview.utterances.keys())
def test_save(self): ds = resources.create_dataset() path = tempfile.mkdtemp() self.writer.save(ds, path) assert 'segments' in os.listdir(path) assert 'text' in os.listdir(path) assert 'utt2spk' in os.listdir(path) assert 'spk2gender' in os.listdir(path) assert 'wav.scp' in os.listdir(path) shutil.rmtree(path, ignore_errors=True)
def test_init_with_corpus(self, tmpdir): c = containers.Container(os.path.join(tmpdir.strpath, 'test.h5')) c.open() c.set('utt-1', data=np.arange(20)) c.set('utt-2', data=np.arange(20)) c.set('utt-3', data=np.arange(20)) c.set('utt-4', data=np.arange(20)) c.set('utt-5', data=np.arange(20)) corpus = resources.create_dataset() it = feeding.Dataset(corpus, [c]) assert it.utt_ids == ['utt-1', 'utt-2', 'utt-3', 'utt-4', 'utt-5']
def test_validate(self): ds = resources.create_dataset() ds.utterances['utt-3'].label_lists[corpus.LL_WORD_TRANSCRIPT].labels = [] del ds.utterances['utt-4'].label_lists[corpus.LL_WORD_TRANSCRIPT] val = validation.LabelCountValidator(1, corpus.LL_WORD_TRANSCRIPT) result = val.validate(ds) assert not result.passed assert len(result.invalid_utterances) == 2 assert result.invalid_utterances['utt-3'] == 'Only {} labels'.format(0) assert result.invalid_utterances['utt-4'] == 'No label-list {}'.format(corpus.LL_WORD_TRANSCRIPT)
def test_process_corpus_online_with_frame_hop_size_change_stores_correct(self, processor, tmpdir): ds = resources.create_dataset() feat_path = os.path.join(tmpdir.strpath, 'feats') processor.mock_frame_size_scale = 0.5 processor.mock_hop_size_scale = 0.25 processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048) fc = containers.FeatureContainer(feat_path) fc.open() assert fc.frame_size == 2048 assert fc.hop_size == 512 fc.close()
def ds(): ds = resources.create_dataset() file_1_path = resources.sample_wav_file('wav_1.wav') file_2_path = resources.get_resource_path( ('audio_formats', 'mp3_2_44_1k_16b.mp3')) file_3_path = resources.get_resource_path( ('audio_formats', 'flac_1_16k_16b.flac')) file_4_path = resources.sample_wav_file('wav_4.wav') ds.tracks['wav-1'].path = file_1_path ds.tracks['wav_2'].path = file_2_path ds.tracks['wav_3'].path = file_3_path ds.tracks['wav_4'].path = file_4_path return ds
def test_process_corpus_online(self, processor, tmpdir): ds = resources.create_dataset() feat_path = os.path.join(tmpdir.strpath, 'feats') processor.process_corpus_online(ds, feat_path, frame_size=4096, hop_size=2048) with h5py.File(feat_path, 'r') as f: utts = set(f.keys()) assert utts == set(ds.utterances.keys()) assert f['utt-1'].shape == (20, 4096) assert f['utt-2'].shape == (20, 4096) assert f['utt-3'].shape == (11, 4096) assert f['utt-4'].shape == (7, 4096) assert f['utt-5'].shape == (20, 4096)
def test_validate(self): ds = resources.create_dataset() ds.utterances['utt-3'].set_label_list( annotations.LabelList.create_single('max length here 11', idx=corpus.LL_WORD_TRANSCRIPT)) ds.utterances['utt-4'].set_label_list( annotations.LabelList.create_single('too long here', idx=corpus.LL_WORD_TRANSCRIPT)) val = validation.UtteranceTranscriptionRatioValidator( 10, corpus.LL_WORD_TRANSCRIPT) result = val.validate(ds) assert not result.passed assert len(result.invalid_utterances) == 1 assert 'utt-4' in result.invalid_utterances.keys()
def test_init_with_corpus_view(self, tmpdir): c = containers.Container(os.path.join(tmpdir.strpath, 'test.h5')) c.open() c.set('utt-1', data=np.arange(20)) c.set('utt-2', data=np.arange(20)) c.set('utt-3', data=np.arange(20)) c.set('utt-4', data=np.arange(20)) c.set('utt-5', data=np.arange(20)) corpus = resources.create_dataset() subview = subset.Subview( corpus, filter_criteria=[ subset.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-4'}) ]) it = feeding.Dataset(subview, [c]) assert it.utt_ids == ['utt-1', 'utt-2', 'utt-4']
def test_write_wav_scp(self, writer, tmpdir): ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'wav.scp'), separator=' ', max_columns=2) wav_base = resources.get_resource_path(['wav_files']) wav_base = os.path.abspath(wav_base) assert content[0][0] == 'wav-1' assert content[0][1] == os.path.join(wav_base, 'wav_1.wav') assert content[1][0] == 'wav_2' assert content[1][1] == os.path.join(wav_base, 'wav_2.wav') assert content[2][0] == 'wav_3' assert content[2][1] == os.path.join(wav_base, 'wav_3.wav') assert content[3][0] == 'wav_4' assert content[3][1] == os.path.join(wav_base, 'wav_4.wav')
def test_save_spk2gender(self, writer, tmpdir): writer = io.KaldiWriter(create_spk2gender=True) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) assert 'spk2gender' in os.listdir(path) content = textfile.read_separated_lines(os.path.join( path, 'spk2gender'), separator=' ', max_columns=2) assert content[0][0] == 'spk-1' assert content[0][1] == 'm' assert content[1][0] == 'spk-2' assert content[1][1] == 'f' assert content[2][0] == 'spk-3' assert content[2][1] == 'm'