def test_fade(self, fade_shape): transform = T.Fade(fade_shape=fade_shape) waveform = get_whitenoise(sample_rate=8000, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform], nondet_tol=1e-10)
def test_dither_noise_shaping(self): def func(tensor): return F.dither(tensor, noise_shaping=True) tensor = common_utils.get_whitenoise(n_channels=2) self._assert_consistency(func, tensor)
def test_resample(self, orig_freq, new_freq): transform = T.Resample(orig_freq=orig_freq, new_freq=new_freq) waveform = get_whitenoise(sample_rate=8000, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform])
def test_Fade(self): waveform = common_utils.get_whitenoise() fade_in_len = 3000 fade_out_len = 3000 self._assert_consistency(T.Fade(fade_in_len, fade_out_len), waveform)
def test_dither_GPDF(self): def func(tensor): return F.dither(tensor, 'GPDF') tensor = common_utils.get_whitenoise(n_channels=2) self._assert_consistency(func, tensor, shape_only=True)
def get_mock_dataset(dataset_dir): """ dataset_dir: directory of the mocked dataset """ mocked_samples = {} os.makedirs(dataset_dir, exist_ok=True) sample_rate = 16000 # 16kHz seed = 0 for release in ["release1", "release2", "release3"]: data = get_whitenoise(sample_rate=sample_rate, duration=10.00, n_channels=1, dtype="float32", seed=seed) if release in ["release1", "release2"]: release_dir = os.path.join( dataset_dir, tedlium._RELEASE_CONFIGS[release]["folder_in_archive"], tedlium._RELEASE_CONFIGS[release]["subset"], ) else: release_dir = os.path.join( dataset_dir, tedlium._RELEASE_CONFIGS[release]["folder_in_archive"], tedlium._RELEASE_CONFIGS[release]["data_path"], ) os.makedirs(release_dir, exist_ok=True) os.makedirs(os.path.join(release_dir, "stm"), exist_ok=True) # Subfolder for transcripts os.makedirs(os.path.join(release_dir, "sph"), exist_ok=True) # Subfolder for audio files filename = f"{release}.sph" path = os.path.join(os.path.join(release_dir, "sph"), filename) save_wav(path, data, sample_rate) trans_filename = f"{release}.stm" trans_path = os.path.join(os.path.join(release_dir, "stm"), trans_filename) with open(trans_path, "w") as f: f.write("".join(_UTTERANCES)) dict_filename = f"{release}.dic" dict_path = os.path.join(release_dir, dict_filename) with open(dict_path, "w") as f: f.write("\n".join(_PHONEME)) # Create a samples list to compare with mocked_samples[release] = [] for utterance in _UTTERANCES: talk_id, _, speaker_id, start_time, end_time, identifier, transcript = utterance.split( " ", 6) start_time = int(float(start_time)) * sample_rate end_time = int(float(end_time)) * sample_rate sample = ( data[:, start_time:end_time], sample_rate, transcript, talk_id, speaker_id, identifier, ) mocked_samples[release].append(sample) seed += 1 return mocked_samples
def test_mfcc(self, log_mels): sample_rate = 8000 transform = T.MFCC(sample_rate=sample_rate, log_mels=log_mels) waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform])
def test_Vol(self): waveform = common_utils.get_whitenoise() self._assert_consistency(T.Vol(1.1), waveform)
def test_SpectralCentroid(self): sample_rate = 44100 waveform = common_utils.get_whitenoise(sample_rate=sample_rate) self._assert_consistency(T.SpectralCentroid(sample_rate=sample_rate), waveform)
def test_resample_identity(self, resampling_method, sample_rate): waveform = get_whitenoise(sample_rate=sample_rate, duration=1) resampled = F.resample(waveform, sample_rate, sample_rate) self.assertEqual(waveform, resampled)
def get_mock_dataset(root_dir): """ root_dir: root directory of the mocked data """ mocked_samples = {} dataset_dir = Path(root_dir) / "DR-VCTK" / "DR-VCTK" dataset_dir.mkdir(parents=True, exist_ok=True) config_dir = dataset_dir / "configurations" config_dir.mkdir(parents=True, exist_ok=True) sample_rate = 16000 seed = 0 for subset in _SUBSETS: mocked_samples[subset] = [] for condition in _CONDITIONS: audio_dir = dataset_dir / f"{condition}_{subset}set_wav_16k" audio_dir.mkdir(parents=True, exist_ok=True) config_filepath = config_dir / f"{subset}_ch_log.txt" with open(config_filepath, "w") as f: if subset == "train": f.write("\n") f.write("File Name\tMain Source\tChannel Idx\n") for speaker_id in _SPEAKER_IDS: utterance_id = 1 for source in _SOURCES: for channel_id in _CHANNEL_IDS: filename = f"p{speaker_id}_{utterance_id:03d}.wav" f.write(f"{filename}\t{source}\t{channel_id}\n") data = {} for condition in _CONDITIONS: data[condition] = get_whitenoise( sample_rate=sample_rate, duration=0.01, n_channels=1, dtype='float32', seed=seed) audio_dir = dataset_dir / f"{condition}_{subset}set_wav_16k" audio_file_path = audio_dir / filename save_wav(audio_file_path, data[condition], sample_rate) seed += 1 sample = ( data[_CONDITIONS[0]], sample_rate, data[_CONDITIONS[1]], sample_rate, "p" + str(speaker_id), f"{utterance_id:03d}", source, channel_id, ) mocked_samples[subset].append(sample) utterance_id += 1 return mocked_samples
def test_vol(self, gain, gain_type): sample_rate = 8000 transform = T.Vol(gain=gain, gain_type=gain_type) waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform])
def test_amplitude_to_db(self): sample_rate = 8000 transform = T.AmplitudeToDB() waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform])
def test_spectral_centroid(self): sample_rate = 8000 transform = T.SpectralCentroid(sample_rate=sample_rate) waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2) self.assert_grad(transform, [waveform], nondet_tol=1e-10)
def test_lfilter_all_inputs(self): torch.random.manual_seed(2434) x = get_whitenoise(sample_rate=22050, duration=0.01, n_channels=2) a = torch.tensor([0.7, 0.2, 0.6]) b = torch.tensor([0.4, 0.2, 0.9]) self.assert_grad(F.lfilter, (x, a, b))
def test_PitchShift(self): sample_rate = 8000 n_steps = 4 waveform = common_utils.get_whitenoise(sample_rate=sample_rate) self._assert_consistency( T.PitchShift(sample_rate=sample_rate, n_steps=n_steps), waveform)
def test_biquad(self): torch.random.manual_seed(2434) x = get_whitenoise(sample_rate=22050, duration=0.01, n_channels=1) a = torch.tensor([0.7, 0.2, 0.6]) b = torch.tensor([0.4, 0.2, 0.9]) self.assert_grad(F.biquad, (x, b[0], b[1], b[2], a[0], a[1], a[2]))
def test_MuLawEncoding(self): tensor = common_utils.get_whitenoise() self._assert_consistency(T.MuLawEncoding(), tensor)
def test_Resample(self): sr1, sr2 = 16000, 8000 tensor = common_utils.get_whitenoise(sample_rate=sr1) self._assert_consistency(T.Resample(float(sr1), float(sr2)), tensor)
def test_compute_kaldi_pitch(self): sample_rate = 44100 waveform = common_utils.get_whitenoise(sample_rate=sample_rate) self.assert_batch_consistencies(F.compute_kaldi_pitch, waveform, sample_rate=sample_rate)