def test_phase_vocoder_shape(self, rate, test_pseudo_complex): """Verify the output shape of phase vocoder""" hop_length = 256 num_freq = 1025 num_frames = 400 batch_size = 2 torch.random.manual_seed(42) spec = torch.randn(batch_size, num_freq, num_frames, dtype=self.complex_dtype, device=self.device) if test_pseudo_complex: spec = torch.view_as_real(spec) phase_advance = torch.linspace(0, np.pi * hop_length, num_freq, dtype=self.real_dtype, device=self.device)[..., None] spec_stretch = F.phase_vocoder(spec, rate=rate, phase_advance=phase_advance) assert spec.dim() == spec_stretch.dim() expected_shape = torch.Size( [batch_size, num_freq, int(np.ceil(num_frames / rate))]) output_shape = (torch.view_as_complex(spec_stretch) if test_pseudo_complex else spec_stretch).shape assert output_shape == expected_shape
def forward(self, complex_specgrams: Tensor, overriding_rate: Optional[float] = None) -> Tensor: r""" Args: complex_specgrams (Tensor): complex spectrogram (..., freq, time, complex=2). overriding_rate (float or None, optional): speed up to apply to this batch. If no rate is passed, use ``self.fixed_rate``. (Default: ``None``) Returns: Tensor: Stretched complex spectrogram of dimension (..., freq, ceil(time/rate), complex=2). """ assert complex_specgrams.size( -1 ) == 2, "complex_specgrams should be a complex tensor, shape (..., complex=2)" if overriding_rate is None: rate = self.fixed_rate if rate is None: raise ValueError( "If no fixed_rate is specified" ", must pass a valid rate to the forward method.") else: rate = overriding_rate if rate == 1.0: return complex_specgrams return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)
def test_phase_vocoder(complex_specgrams, rate, hop_length): # Due to cummulative sum, numerical error in using torch.float32 will # result in bottom right values of the stretched sectrogram to not # match with librosa. complex_specgrams = complex_specgrams.type(torch.float64) phase_advance = torch.linspace(0, np.pi * hop_length, complex_specgrams.shape[-3], dtype=torch.float64)[..., None] complex_specgrams_stretch = F.phase_vocoder(complex_specgrams, rate=rate, phase_advance=phase_advance) # == Test shape expected_size = list(complex_specgrams.size()) expected_size[-2] = int(np.ceil(expected_size[-2] / rate)) assert complex_specgrams.dim() == complex_specgrams_stretch.dim() assert complex_specgrams_stretch.size() == torch.Size(expected_size) # == Test values index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3 mono_complex_specgram = complex_specgrams[index].numpy() mono_complex_specgram = mono_complex_specgram[..., 0] + \ mono_complex_specgram[..., 1] * 1j expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram, rate=rate, hop_length=hop_length) complex_stretch = complex_specgrams_stretch[index].numpy() complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1] assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5)
def forward(self, complex_specgrams: Tensor, overriding_rate: Optional[float] = None) -> Tensor: r""" Args: complex_specgrams (Tensor): Either a real tensor of dimension of ``(..., freq, num_frame, complex=2)`` or a tensor of dimension ``(..., freq, num_frame)`` with complex dtype. overriding_rate (float or None, optional): speed up to apply to this batch. If no rate is passed, use ``self.fixed_rate``. (Default: ``None``) Returns: Tensor: Stretched spectrogram. The resulting tensor is of the same dtype as the input spectrogram, but the number of frames is changed to ``ceil(num_frame / rate)``. """ if overriding_rate is None: if self.fixed_rate is None: raise ValueError( "If no fixed_rate is specified, must pass a valid rate to the forward method." ) rate = self.fixed_rate else: rate = overriding_rate return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)
def test_phase_vocoder(self, rate, test_pseudo_complex): hop_length = 256 num_freq = 1025 num_frames = 400 torch.random.manual_seed(42) # Due to cummulative sum, numerical error in using torch.float32 will # result in bottom right values of the stretched sectrogram to not # match with librosa. spec = torch.randn(num_freq, num_frames, device=self.device, dtype=torch.complex128) phase_advance = torch.linspace(0, np.pi * hop_length, num_freq, device=self.device, dtype=torch.float64)[..., None] stretched = F.phase_vocoder( torch.view_as_real(spec) if test_pseudo_complex else spec, rate=rate, phase_advance=phase_advance) expected_stretched = librosa.phase_vocoder(spec.cpu().numpy(), rate=rate, hop_length=hop_length) self.assertEqual( torch.view_as_complex(stretched) if test_pseudo_complex else stretched, torch.from_numpy(expected_stretched))
def func(tensor, device: torch.device = self.device): rate = 0.5 hop_length = 256 phase_advance = torch.linspace( 0, 3.14 * hop_length, tensor.shape[-3], dtype=torch.float64, ).to(device)[..., None] return F.phase_vocoder(tensor, rate, phase_advance)
def func(tensor): n_freq = tensor.size(-2) rate = 0.5 hop_length = 256 phase_advance = torch.linspace( 0, 3.14 * hop_length, n_freq, dtype=torch.real(tensor).dtype, device=tensor.device, )[..., None] return F.phase_vocoder(tensor, rate, phase_advance)
def func(tensor): is_complex = tensor.is_complex() n_freq = tensor.size(-2 if is_complex else -3) rate = 0.5 hop_length = 256 phase_advance = torch.linspace( 0, 3.14 * hop_length, n_freq, dtype=(torch.real(tensor) if is_complex else tensor).dtype, device=tensor.device, )[..., None] return F.phase_vocoder(tensor, rate, phase_advance)
def time_stretch(self, batch, speedup_rate, device="cuda"): if speedup_rate == 1: return batch n_fft = torch.tensor(2048) # windowsize hop_length = torch.floor(n_fft / 4.0).int().item() # time stretch stft = torch.stft(batch, n_fft.item(), hop_length=hop_length) phase_advance = torch.linspace(0, math.pi * hop_length, stft.shape[1])[..., None].to(device) # time stretch via phase_vocoder (not differentiable): vocoded = AF.phase_vocoder(stft, rate=speedup_rate, phase_advance=phase_advance) istft = AF.istft(vocoded, n_fft.item(), hop_length=hop_length).squeeze() return istft
def test_phase_vocoder(complex_specgrams, rate, hop_length): # Using a decorator here causes parametrize to fail on Python 2 if not IMPORT_LIBROSA: raise unittest.SkipTest('Librosa is not available') # Due to cummulative sum, numerical error in using torch.float32 will # result in bottom right values of the stretched sectrogram to not # match with librosa. complex_specgrams = complex_specgrams.type(torch.float64) phase_advance = torch.linspace(0, np.pi * hop_length, complex_specgrams.shape[-3], dtype=torch.float64)[..., None] complex_specgrams_stretch = F.phase_vocoder(complex_specgrams, rate=rate, phase_advance=phase_advance) # == Test shape expected_size = list(complex_specgrams.size()) expected_size[-2] = int(np.ceil(expected_size[-2] / rate)) assert complex_specgrams.dim() == complex_specgrams_stretch.dim() assert complex_specgrams_stretch.size() == torch.Size(expected_size) # == Test values index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3 mono_complex_specgram = complex_specgrams[index].numpy() mono_complex_specgram = mono_complex_specgram[..., 0] + \ mono_complex_specgram[..., 1] * 1j expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram, rate=rate, hop_length=hop_length) complex_stretch = complex_specgrams_stretch[index].numpy() complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1] assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5) def test_torchscript_create_fb_matrix(self): n_stft = 100 f_min = 0.0 f_max = 20.0 n_mels = 10 sample_rate = 16000 _test_torchscript_functional(F.create_fb_matrix, n_stft, f_min, f_max, n_mels, sample_rate) def test_torchscript_amplitude_to_DB(self): spec = torch.rand((6, 201)) multiplier = 10.0 amin = 1e-10 db_multiplier = 0.0 top_db = 80.0 _test_torchscript_functional(F.amplitude_to_DB, spec, multiplier, amin, db_multiplier, top_db) def test_torchscript_create_dct(self): n_mfcc = 40 n_mels = 128 norm = "ortho" _test_torchscript_functional(F.create_dct, n_mfcc, n_mels, norm) def test_torchscript_mu_law_encoding(self): tensor = torch.rand((1, 10)) qc = 256 _test_torchscript_functional(F.mu_law_encoding, tensor, qc) def test_torchscript_mu_law_decoding(self): tensor = torch.rand((1, 10)) qc = 256 _test_torchscript_functional(F.mu_law_decoding, tensor, qc) def test_torchscript_complex_norm(self): complex_tensor = torch.randn(1, 2, 1025, 400, 2), power = 2 _test_torchscript_functional(F.complex_norm, complex_tensor, power) def test_mask_along_axis(self): specgram = torch.randn(2, 1025, 400), mask_param = 100 mask_value = 30. axis = 2 _test_torchscript_functional(F.mask_along_axis, specgram, mask_param, mask_value, axis) def test_mask_along_axis_iid(self): specgram = torch.randn(2, 1025, 400), specgrams = torch.randn(4, 2, 1025, 400), mask_param = 100 mask_value = 30. axis = 2 _test_torchscript_functional(F.mask_along_axis_iid, specgrams, mask_param, mask_value, axis) def test_torchscript_gain(self): tensor = torch.rand((1, 1000)) gainDB = 2.0 _test_torchscript_functional(F.gain, tensor, gainDB) def test_torchscript_dither(self): tensor = torch.rand((1, 1000)) _test_torchscript_functional(F.dither, tensor) _test_torchscript_functional(F.dither, tensor, "RPDF") _test_torchscript_functional(F.dither, tensor, "GPDF")