def get_mock_dataset(root_dir): """ root_dir: path to the mocked dataset """ mocked_data = [] base_dir = os.path.join(root_dir, "LJSpeech-1.1") archive_dir = os.path.join(base_dir, "wavs") os.makedirs(archive_dir, exist_ok=True) metadata_path = os.path.join(base_dir, "metadata.csv") sample_rate = 22050 with open(metadata_path, mode="w", newline='') as metadata_file: metadata_writer = csv.writer(metadata_file, delimiter="|", quoting=csv.QUOTE_NONE) for i, (transcript, normalized_transcript) in enumerate( zip(_TRANSCRIPTS, _NORMALIZED_TRANSCRIPT)): fileid = f'LJ001-{i:04d}' metadata_writer.writerow( [fileid, transcript, normalized_transcript]) filename = fileid + ".wav" path = os.path.join(archive_dir, filename) data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, dtype="int16", seed=i) save_wav(path, data, sample_rate) mocked_data.append(normalize_wav(data)) return mocked_data, _TRANSCRIPTS, _NORMALIZED_TRANSCRIPT
def get_mock_dataset(root_dir): """ root_dir: directory to the mocked dataset """ mocked_data = [] base_dir = os.path.join(root_dir, 'LibriTTS', 'train-clean-100') for i, utterance_id in enumerate(_UTTERANCE_IDS): filename = f'{"_".join(str(u) for u in utterance_id)}.wav' file_dir = os.path.join(base_dir, str(utterance_id[0]), str(utterance_id[1])) os.makedirs(file_dir, exist_ok=True) path = os.path.join(file_dir, filename) data = get_whitenoise(sample_rate=24000, duration=2, n_channels=1, dtype='int16', seed=i) save_wav(path, data, 24000) mocked_data.append(normalize_wav(data)) original_text_filename = f'{"_".join(str(u) for u in utterance_id)}.original.txt' path_original = os.path.join(file_dir, original_text_filename) with open(path_original, 'w') as file_: file_.write(_ORIGINAL_TEXT) normalized_text_filename = f'{"_".join(str(u) for u in utterance_id)}.normalized.txt' path_normalized = os.path.join(file_dir, normalized_text_filename) with open(path_normalized, 'w') as file_: file_.write(_NORMALIZED_TEXT) return mocked_data, _UTTERANCE_IDS, _ORIGINAL_TEXT, _NORMALIZED_TEXT
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() base_dir = os.path.join(cls.root_dir, "LJSpeech-1.1") archive_dir = os.path.join(base_dir, "wavs") os.makedirs(archive_dir, exist_ok=True) metadata_path = os.path.join(base_dir, "metadata.csv") sample_rate = 22050 with open(metadata_path, mode="w", newline='') as metadata_file: metadata_writer = csv.writer(metadata_file, delimiter="|", quoting=csv.QUOTE_NONE) for i, (transcript, normalized_transcript) in enumerate( zip(cls.transcripts, cls.normalized_transcripts)): fileid = f'LJ001-{i:04d}' metadata_writer.writerow( [fileid, transcript, normalized_transcript]) filename = fileid + ".wav" path = os.path.join(archive_dir, filename) data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, dtype="int16", seed=i) save_wav(path, data, sample_rate) cls.data.append(normalize_wav(data))
def get_mock_dataset(root_dir): """ root_dir: directory to the mocked dataset """ mocked_samples = [] mocked_training = [] mocked_validation = [] mocked_testing = [] sample_rate = 22050 seed = 0 for genre in gtzan.gtzan_genres: base_dir = os.path.join(root_dir, 'genres', genre) os.makedirs(base_dir, exist_ok=True) for i in range(100): filename = f'{genre}.{i:05d}' path = os.path.join(base_dir, f'{filename}.wav') data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='int16', seed=seed) save_wav(path, data, sample_rate) sample = (normalize_wav(data), sample_rate, genre) mocked_samples.append(sample) if filename in gtzan.filtered_test: mocked_testing.append(sample) if filename in gtzan.filtered_train: mocked_training.append(sample) if filename in gtzan.filtered_valid: mocked_validation.append(sample) seed += 1 return (mocked_samples, mocked_training, mocked_validation, mocked_testing)
def assert_sphere( self, dtype, sample_rate, num_channels, channels_first=True, duration=1, ): """`soundfile_backend.load` can load SPHERE format correctly.""" path = self.get_temp_path("reference.sph") num_frames = duration * sample_rate raw = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False, channels_first=False, ) soundfile.write(path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST") expected = normalize_wav(raw.t() if channels_first else raw) data, sr = soundfile_backend.load(path, channels_first=channels_first) assert sr == sample_rate self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)
def setUp(cls): cls.root_dir = cls.get_base_temp_dir() dataset_dir = os.path.join(cls.root_dir, speechcommands.FOLDER_IN_ARCHIVE, speechcommands.URL) os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz sample rate seed = 0 for label in LABELS: path = os.path.join(dataset_dir, label) os.makedirs(path, exist_ok=True) for j in range(2): # generate hash ID for speaker speaker = "{:08x}".format(j) for utterance in range(3): filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav" file_path = os.path.join(path, filename) seed += 1 data = get_whitenoise( sample_rate=sample_rate, duration=0.01, n_channels=1, dtype="int16", seed=seed, ) save_wav(file_path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, label, speaker, utterance, ) cls.samples.append(sample)
def _mock_dataset(root_dir, num_speaker): dirnames = ["mix"] + [f"s{i+1}" for i in range(num_speaker)] for dirname in dirnames: os.makedirs(os.path.join(root_dir, dirname), exist_ok=True) seed = 0 sample_rate = 8000 expected = [] for filename in _FILENAMES: mix = None src = [] for dirname in dirnames: waveform = get_whitenoise( sample_rate=8000, duration=1, n_channels=1, dtype="int16", seed=seed ) seed += 1 path = os.path.join(root_dir, dirname, filename) save_wav(path, waveform, sample_rate) waveform = normalize_wav(waveform) if dirname == "mix": mix = waveform else: src.append(waveform) expected.append((sample_rate, mix, src)) return expected
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() sample_rate = 22050 seed = 0 for genre in gtzan.gtzan_genres: base_dir = os.path.join(cls.root_dir, 'genres', genre) os.makedirs(base_dir, exist_ok=True) for i in range(100): filename = f'{genre}.{i:05d}' path = os.path.join(base_dir, f'{filename}.wav') data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='int16', seed=seed) save_wav(path, data, sample_rate) sample = (normalize_wav(data), sample_rate, genre) cls.samples.append(sample) if filename in gtzan.filtered_test: cls.testing.append(sample) if filename in gtzan.filtered_train: cls.training.append(sample) if filename in gtzan.filtered_valid: cls.validation.append(sample) seed += 1
def get_mock_dataset(root_dir, train_csv_contents, ext_audio) -> Tuple[Tensor, int, Dict[str, str]]: """ prepares mocked dataset """ mocked_data = [] # Note: extension is changed to wav for the sake of test # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. # Tsv file name difference does not mean different subset, testing as a whole dataset here tsv_filename = os.path.join(root_dir, "train.tsv") audio_base_path = os.path.join(root_dir, "clips") os.makedirs(audio_base_path, exist_ok=True) with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(_HEADERS) for i, content in enumerate(train_csv_contents): content[2] = str(content[2].encode("utf-8")) writer.writerow(content) if not content[1].endswith(ext_audio): audio_path = os.path.join(audio_base_path, content[1] + ext_audio) else: audio_path = os.path.join(audio_base_path, content[1]) data = get_whitenoise(sample_rate=_SAMPLE_RATE, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, _SAMPLE_RATE) # Append data entry mocked_data.append((normalize_wav(data), _SAMPLE_RATE, dict(zip(_HEADERS, content)))) return mocked_data
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() # The path convention commonvoice uses base_dir = os.path.join(cls.root_dir, commonvoice.FOLDER_IN_ARCHIVE, commonvoice.VERSION, "en") os.makedirs(base_dir, exist_ok=True) # Tsv file name difference does not mean different subset, testing as a whole dataset here tsv_filename = os.path.join(base_dir, commonvoice.TSV) with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(cls._headers) for i, content in enumerate(cls._train_csv_contents): audio_filename = audio_filename = content[1] writer.writerow(content) # Generate and store audio audio_base_path = os.path.join(base_dir, cls._folder_audio) os.makedirs(audio_base_path, exist_ok=True) audio_path = os.path.join(audio_base_path, audio_filename) data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, cls.sample_rate) # Append data entry cls.data.append((normalize_wav(data), cls.sample_rate, dict(zip(cls._headers, content))))
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() sample_rate = 16000 utterance = "This is a test utterance." base_dir = os.path.join(cls.root_dir, "ARCTIC", "cmu_us_aew_arctic") txt_dir = os.path.join(base_dir, "etc") os.makedirs(txt_dir, exist_ok=True) txt_file = os.path.join(txt_dir, "txt.done.data") audio_dir = os.path.join(base_dir, "wav") os.makedirs(audio_dir, exist_ok=True) seed = 42 with open(txt_file, "w") as txt: for c in ["a", "b"]: for i in range(5): utterance_id = f"arctic_{c}{i:04d}" path = os.path.join(audio_dir, f"{utterance_id}.wav") data = get_whitenoise( sample_rate=sample_rate, duration=3, n_channels=1, dtype="int16", seed=seed, ) save_wav(path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, utterance, utterance_id.split("_")[1], ) cls.samples.append(sample) txt.write(f'( {utterance_id} "{utterance}" )\n') seed += 1
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() base_dir = os.path.join(cls.root_dir, 'LibriTTS', 'train-clean-100') for i, utterance_id in enumerate(cls.utterance_ids): filename = f'{"_".join(str(u) for u in utterance_id)}.wav' file_dir = os.path.join(base_dir, str(utterance_id[0]), str(utterance_id[1])) os.makedirs(file_dir, exist_ok=True) path = os.path.join(file_dir, filename) data = get_whitenoise(sample_rate=24000, duration=2, n_channels=1, dtype='int16', seed=i) save_wav(path, data, 24000) cls.data.append(normalize_wav(data)) original_text_filename = f'{"_".join(str(u) for u in utterance_id)}.original.txt' path_original = os.path.join(file_dir, original_text_filename) with open(path_original, 'w') as file_: file_.write(cls.original_text) normalized_text_filename = f'{"_".join(str(u) for u in utterance_id)}.normalized.txt' path_normalized = os.path.join(file_dir, normalized_text_filename) with open(path_normalized, 'w') as file_: file_.write(cls.normalized_text)
def get_mock_dataset(root_dir): """ root_dir: directory to the mocked dataset """ mocked_data = [] dataset_dir = os.path.join( root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL ) os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz seed = 0 for speaker_id in range(5): speaker_path = os.path.join(dataset_dir, str(speaker_id)) os.makedirs(speaker_path, exist_ok=True) for chapter_id in range(3): chapter_path = os.path.join(speaker_path, str(chapter_id)) os.makedirs(chapter_path, exist_ok=True) trans_content = [] for utterance_id in range(10): filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav' path = os.path.join(chapter_path, filename) transcript = ' '.join( [_NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]] ) trans_content.append( f'{speaker_id}-{chapter_id}-{utterance_id:04d} {transcript}' ) data = get_whitenoise( sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='float32', seed=seed ) save_wav(path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, transcript, speaker_id, chapter_id, utterance_id ) mocked_data.append(sample) seed += 1 trans_filename = f'{speaker_id}-{chapter_id}.trans.txt' trans_path = os.path.join(chapter_path, trans_filename) with open(trans_path, 'w') as f: f.write('\n'.join(trans_content)) return mocked_data
def get_mock_dataset(dataset_dir): """ dataset_dir: directory to the mocked dataset """ mocked_samples = [] mocked_train_samples = [] mocked_valid_samples = [] mocked_test_samples = [] os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz sample rate seed = 0 valid_file = os.path.join(dataset_dir, "validation_list.txt") test_file = os.path.join(dataset_dir, "testing_list.txt") with open(valid_file, "w") as valid, open(test_file, "w") as test: for label in _LABELS: path = os.path.join(dataset_dir, label) os.makedirs(path, exist_ok=True) for j in range(6): # generate hash ID for speaker speaker = "{:08x}".format(j) for utterance in range(3): filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav" file_path = os.path.join(path, filename) seed += 1 data = get_whitenoise( sample_rate=sample_rate, duration=0.01, n_channels=1, dtype="int16", seed=seed, ) save_wav(file_path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, label, speaker, utterance, ) mocked_samples.append(sample) if j < 2: mocked_train_samples.append(sample) elif j < 4: valid.write(f'{label}/{filename}\n') mocked_valid_samples.append(sample) elif j < 6: test.write(f'{label}/{filename}\n') mocked_test_samples.append(sample) return mocked_samples, mocked_train_samples, mocked_valid_samples, mocked_test_samples
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() base_dir = os.path.join(cls.root_dir, 'waves_yesno') os.makedirs(base_dir, exist_ok=True) for i, label in enumerate(cls.labels): filename = f'{"_".join(str(l) for l in label)}.wav' path = os.path.join(base_dir, filename) data = get_whitenoise(sample_rate=8000, duration=6, n_channels=1, dtype='int16', seed=i) save_wav(path, data, 8000) cls.data.append(normalize_wav(data))
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() dataset_dir = os.path.join(cls.root_dir, speechcommands.FOLDER_IN_ARCHIVE, speechcommands.URL) os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz sample rate seed = 0 valid_file = os.path.join(dataset_dir, "validation_list.txt") test_file = os.path.join(dataset_dir, "testing_list.txt") with open(valid_file, "w") as valid, open(test_file, "w") as test: for label in LABELS: path = os.path.join(dataset_dir, label) os.makedirs(path, exist_ok=True) for j in range(6): # generate hash ID for speaker speaker = "{:08x}".format(j) for utterance in range(3): filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav" file_path = os.path.join(path, filename) seed += 1 data = get_whitenoise( sample_rate=sample_rate, duration=0.01, n_channels=1, dtype="int16", seed=seed, ) save_wav(file_path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, label, speaker, utterance, ) cls.samples.append(sample) if j < 2: cls.train_samples.append(sample) elif j < 4: valid.write(f'{label}/{filename}\n') cls.valid_samples.append(sample) elif j < 6: test.write(f'{label}/{filename}\n') cls.test_samples.append(sample)
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() dataset_dir = os.path.join(cls.root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL) os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz seed = 0 for speaker_id in range(5): speaker_path = os.path.join(dataset_dir, str(speaker_id)) os.makedirs(speaker_path, exist_ok=True) for chapter_id in range(3): chapter_path = os.path.join(speaker_path, str(chapter_id)) os.makedirs(chapter_path, exist_ok=True) trans_content = [] for utterance_id in range(10): filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav' path = os.path.join(chapter_path, filename) utterance = ' '.join([ NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id] ]) trans_content.append( f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}' ) data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='float32', seed=seed) save_wav(path, data, sample_rate) sample = (normalize_wav(data), sample_rate, utterance, speaker_id, chapter_id, utterance_id) cls.samples.append(sample) seed += 1 trans_filename = f'{speaker_id}-{chapter_id}.trans.txt' trans_path = os.path.join(chapter_path, trans_filename) with open(trans_path, 'w') as f: f.write('\n'.join(trans_content))
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() # Tsv file name difference does not mean different subset, testing as a whole dataset here tsv_filename = os.path.join(cls.root_dir, "train.tsv") audio_base_path = os.path.join(cls.root_dir, "clips") os.makedirs(audio_base_path, exist_ok=True) with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(cls._headers) for i, content in enumerate(cls._train_csv_contents): writer.writerow(content) # Generate and store audio audio_path = os.path.join(audio_base_path, content[1]) data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, cls.sample_rate) # Append data entry cls.data.append((normalize_wav(data), cls.sample_rate, dict(zip(cls._headers, content))))
def get_mock_dataset(root_dir): """ root_dir: root directory of the mocked data """ mocked_samples = [] dataset_dir = os.path.join(root_dir, 'VCTK-Corpus-0.92') os.makedirs(dataset_dir, exist_ok=True) sample_rate = 48000 seed = 0 for speaker in range(225, 230): speaker_id = 'p' + str(speaker) audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed', speaker_id) os.makedirs(audio_dir, exist_ok=True) file_dir = os.path.join(dataset_dir, 'txt', speaker_id) os.makedirs(file_dir, exist_ok=True) for utterance_id in range(1, 11): filename = f'{speaker_id}_{utterance_id:03d}_mic2' audio_file_path = os.path.join(audio_dir, filename + '.wav') data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='float32', seed=seed) save_wav(audio_file_path, data, sample_rate) txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt') transcript = _TRANSCRIPT[utterance_id - 1] with open(txt_file_path, 'w') as f: f.write(transcript) sample = (normalize_wav(data), sample_rate, transcript, speaker_id, utterance_id) mocked_samples.append(sample) seed += 1 return mocked_samples
def get_mock_dataset(root_dir): """ root_dir: directory to the mocked dataset """ mocked_data = [] sample_rate = 16000 transcript = "This is a test transcript." base_dir = os.path.join(root_dir, "ARCTIC", "cmu_us_aew_arctic") txt_dir = os.path.join(base_dir, "etc") os.makedirs(txt_dir, exist_ok=True) txt_file = os.path.join(txt_dir, "txt.done.data") audio_dir = os.path.join(base_dir, "wav") os.makedirs(audio_dir, exist_ok=True) seed = 42 with open(txt_file, "w") as txt: for c in ["a", "b"]: for i in range(5): utterance_id = f"arctic_{c}{i:04d}" path = os.path.join(audio_dir, f"{utterance_id}.wav") data = get_whitenoise( sample_rate=sample_rate, duration=3, n_channels=1, dtype="int16", seed=seed, ) save_wav(path, data, sample_rate) sample = ( normalize_wav(data), sample_rate, transcript, utterance_id.split("_")[1], ) mocked_data.append(sample) txt.write(f'( {utterance_id} "{transcript}" )\n') seed += 1 return mocked_data
def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() dataset_dir = os.path.join(cls.root_dir, 'VCTK-Corpus-0.92') os.makedirs(dataset_dir, exist_ok=True) sample_rate = 48000 seed = 0 for speaker in range(225, 230): speaker_id = 'p' + str(speaker) audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed', speaker_id) os.makedirs(audio_dir, exist_ok=True) file_dir = os.path.join(dataset_dir, 'txt', speaker_id) os.makedirs(file_dir, exist_ok=True) for utterance_id in range(1, 11): filename = f'{speaker_id}_{utterance_id:03d}_mic2' audio_file_path = os.path.join(audio_dir, filename + '.wav') data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='float32', seed=seed) save_wav(audio_file_path, data, sample_rate) txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt') utterance = UTTERANCE[utterance_id - 1] with open(txt_file_path, 'w') as f: f.write(utterance) sample = (normalize_wav(data), sample_rate, utterance, speaker_id, utterance_id) cls.samples.append(sample) seed += 1