def test_wav_augment_with_executor(self, exec_type): cut = self.with_cut(sampling_rate=16000, num_samples=16000) with TemporaryDirectory() as d, \ LilcomFilesWriter(storage_path=d) as storage, \ exec_type(max_workers=4) as ex: cut_set = CutSet.from_cuts(cut.with_id(str(i)) for i in range(100)) # Just test that it runs and does not hang. cut_set_feats = cut_set.compute_and_store_features( extractor=Fbank(), storage=storage, augment_fn=SoxEffectTransform(speed(16000)), executor=ex)
def main(): dataset_parts = ('dev-clean', 'test-clean', 'train-clean-100') print("Parts we will prepare: ", dataset_parts) corpus_dirs = [ Path('/export/corpora5/LibriSpeech'), Path( '/home/storage04/zhuangweiji/data/open-source-data/librispeech/LibriSpeech' ) ] corpus_dir = None for d in corpus_dirs: if os.path.exists(d): corpus_dir = d if corpus_dir is None: print( "Please create a place on your system to put the downloaded Librispeech data " "and add it to `corpus_dirs`") sys.exit(1) output_dir = Path('exp/data') print('Manifest preparation:') librispeech_manifests = prepare_librispeech(corpus_dir=corpus_dir, dataset_parts=dataset_parts, output_dir=output_dir, num_jobs=num_jobs) print('Feature extraction:') with get_executor() as ex: # Initialize the executor only once. for partition, manifests in librispeech_manifests.items(): if (output_dir / f'cuts_{partition}.json.gz').is_file(): print(f'{partition} already exists - skipping.') continue print('Processing', partition) cut_set = CutSet.from_manifests( recordings=manifests['recordings'], supervisions=manifests['supervisions']) if 'train' in partition: cut_set = cut_set + cut_set.perturb_speed( 0.9) + cut_set.perturb_speed(1.1) cut_set = cut_set.compute_and_store_features( extractor=Fbank(), executor=ex, storage=LilcomFilesWriter(f'{output_dir}/feats_{partition}')) librispeech_manifests[partition]['cuts'] = cut_set cut_set.to_json(output_dir / f'cuts_{partition}.json.gz')
def test_padded_cut_num_frames_and_samples_are_consistent( sampling_rate, num_samples, padded_duration): with make_cut(sampling_rate, num_samples) as cut, \ TemporaryDirectory() as dir, \ LilcomFilesWriter(dir) as storage: cut = cut.compute_and_store_features(extractor=Fbank(), storage=storage) cut = cut.pad(padded_duration) feats = cut.load_features() samples = cut.load_audio() assert cut.has_features assert feats.shape[0] == cut.num_frames assert feats.shape[1] == cut.num_features assert cut.has_recording assert samples.shape[0] == 1 assert samples.shape[1] == cut.num_samples
def test_mixed_cut_num_frames_example_1(): fbank = Fbank() with make_cut(sampling_rate=16000, num_samples=237920) as cut1, \ make_cut(sampling_rate=16000, num_samples=219600) as cut2, \ TemporaryDirectory() as d, \ LilcomFilesWriter(d) as storage: # These are two cuts of similar duration, concatenated together with 1 second of silence # in between, and padded to duration of 31.445. mixed: MixedCut = (cut1.compute_and_store_features( fbank, storage).pad(duration=cut1.duration + 1.0).append( cut2.compute_and_store_features(fbank, storage)).pad(duration=31.445)) assert mixed.duration == 31.445 # Padded correctly assert mixed.num_frames == 3145 # Round last 5 up assert sum( t.cut.num_frames for t in mixed.tracks ) == 3145 # Since the tracks do not overlap in this example, # The sum of individual cut num_frames should be equal to the total num_frames features = mixed.load_features() assert features.shape[ 0] == 3145 # Loaded features num frames matches the meta-data
def test_mixed_cut_num_frames_example_2(): fbank = Fbank() with make_cut(sampling_rate=16000, num_samples=252879) as cut1, \ make_cut(sampling_rate=16000, num_samples=185280) as cut2, \ make_cut(sampling_rate=16000, num_samples=204161) as cut3, \ TemporaryDirectory() as d, \ LilcomFilesWriter(d) as storage: # These are two cuts of similar duration, concatenated together with 1 second of silence # in between, and padded to duration of 31.445. mixed: MixedCut = (cut1.compute_and_store_features( fbank, storage).pad(duration=cut1.duration + 1.0).append( cut2.compute_and_store_features(fbank, storage))) mixed = (mixed.pad(duration=mixed.duration + 1.0).append( cut3.compute_and_store_features(fbank, storage))) assert mixed.duration == 42.145 # Padded correctly assert mixed.num_frames == 4215 # Round last 5 up # TODO(pzelasko): This assertion would not pass for now, as we're adding an extra frame during load_features. # assert sum(t.cut.num_frames for t in mixed.tracks) == 4215 # Since the tracks do not overlap in this example, # The sum of individual cut num_frames should be equal to the total num_frames features = mixed.load_features() assert features.shape[ 0] == 4215 # Loaded features num frames matches the meta-data
def main(): corpus_dirs = [Path('/mnt/cfs2/asr/database/AM/aishell')] corpus_dir = None for d in corpus_dirs: if os.path.exists(d): corpus_dir = d if corpus_dir is None: print( "Please create a place on your system to put the downloaded Aishell data " "and add it to `corpus_dirs`") sys.exit(1) output_dir = Path('exp/data') print('Manifest preparation:') aishell_manifests = prepare_aishell(corpus_dir=corpus_dir, output_dir=output_dir) print('Feature extraction:') with get_executor() as ex: # Initialize the executor only once. for partition, manifests in aishell_manifests.items(): if (output_dir / f'cuts_{partition}.json.gz').is_file(): print(f'{partition} already exists - skipping.') continue print('Processing', partition) cut_set = CutSet.from_manifests( recordings=manifests['recordings'], supervisions=manifests['supervisions']) if 'train' in partition: cut_set = cut_set + cut_set.perturb_speed( 0.9) + cut_set.perturb_speed(1.1) cut_set = cut_set.compute_and_store_features( extractor=Fbank(), executor=ex, storage=LilcomFilesWriter(f'{output_dir}/feats_{partition}')) aishell_manifests[partition]['cuts'] = cut_set cut_set.to_json(output_dir / f'cuts_{partition}.json.gz')
augmenter = WavAugmenter.create_predefined( 'pitch_reverb_tdrop', sampling_rate=16000) if use_data_augmentation else None # It seems when spawning multiple Python subprocesses with the same sox handle it raises "std::runtime_error: Couldn't close file" # The issue seems to happen only in a Jupyter notebook on Mac OS X, hence the work around below. if use_data_augmentation: num_jobs = 1 else: num_jobs = os.cpu_count() torch.set_num_threads(1) torch.set_num_interop_threads(1) num_jobs = 1 for partition, manifests in librispeech_manifests.items(): print(partition) with LilcomFilesWriter(f'{output_dir}/feats_{partition}' ) as storage, ProcessPoolExecutor(num_jobs) as ex: cut_set = CutSet.from_manifests( recordings=manifests['recordings'], supervisions=manifests['supervisions']).compute_and_store_features( extractor=Fbank(), storage=storage, augmenter=augmenter if 'train' in partition else None, executor=ex) librispeech_manifests[partition]['cuts'] = cut_set cut_set.to_json(output_dir + f'/cuts_{partition}.json.gz') cuts_train = SpeechRecognitionDataset( librispeech_manifests['train-clean-100']['cuts']) cuts_test = SpeechRecognitionDataset( librispeech_manifests['test-clean']['cuts'])
def _with_features(self, cut: Cut, frame_shift: Seconds) -> Cut: d = TemporaryDirectory() self.dirs.append(d) extractor = Fbank(config=FbankConfig(frame_shift=frame_shift)) with LilcomFilesWriter(d.name) as storage: return cut.compute_and_store_features(extractor, storage=storage)
def _with_features(self, cut: Cut) -> Cut: d = TemporaryDirectory() self.dirs.append(d) with LilcomFilesWriter(d.name) as storage: return cut.compute_and_store_features(Fbank(), storage=storage)