Python phase_vocoderの例、torchaudio.functional.phase_vocoder Pythonの例

コード例 #1

0

ファイルを表示

ファイル: functional_impl.py プロジェクト: twistedmove/audio

    def test_phase_vocoder_shape(self, rate, test_pseudo_complex):
        """Verify the output shape of phase vocoder"""
        hop_length = 256
        num_freq = 1025
        num_frames = 400
        batch_size = 2

        torch.random.manual_seed(42)
        spec = torch.randn(batch_size,
                           num_freq,
                           num_frames,
                           dtype=self.complex_dtype,
                           device=self.device)
        if test_pseudo_complex:
            spec = torch.view_as_real(spec)

        phase_advance = torch.linspace(0,
                                       np.pi * hop_length,
                                       num_freq,
                                       dtype=self.real_dtype,
                                       device=self.device)[..., None]

        spec_stretch = F.phase_vocoder(spec,
                                       rate=rate,
                                       phase_advance=phase_advance)

        assert spec.dim() == spec_stretch.dim()
        expected_shape = torch.Size(
            [batch_size, num_freq,
             int(np.ceil(num_frames / rate))])
        output_shape = (torch.view_as_complex(spec_stretch)
                        if test_pseudo_complex else spec_stretch).shape
        assert output_shape == expected_shape

コード例 #2

0

ファイルを表示

    def forward(self,
                complex_specgrams: Tensor,
                overriding_rate: Optional[float] = None) -> Tensor:
        r"""
        Args:
            complex_specgrams (Tensor): complex spectrogram (..., freq, time, complex=2).
            overriding_rate (float or None, optional): speed up to apply to this batch.
                If no rate is passed, use ``self.fixed_rate``. (Default: ``None``)

        Returns:
            Tensor: Stretched complex spectrogram of dimension (..., freq, ceil(time/rate), complex=2).
        """
        assert complex_specgrams.size(
            -1
        ) == 2, "complex_specgrams should be a complex tensor, shape (..., complex=2)"

        if overriding_rate is None:
            rate = self.fixed_rate
            if rate is None:
                raise ValueError(
                    "If no fixed_rate is specified"
                    ", must pass a valid rate to the forward method.")
        else:
            rate = overriding_rate

        if rate == 1.0:
            return complex_specgrams

        return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)

コード例 #3

0

ファイルを表示

def test_phase_vocoder(complex_specgrams, rate, hop_length):
    # Due to cummulative sum, numerical error in using torch.float32 will
    # result in bottom right values of the stretched sectrogram to not
    # match with librosa.

    complex_specgrams = complex_specgrams.type(torch.float64)
    phase_advance = torch.linspace(0,
                                   np.pi * hop_length,
                                   complex_specgrams.shape[-3],
                                   dtype=torch.float64)[..., None]

    complex_specgrams_stretch = F.phase_vocoder(complex_specgrams,
                                                rate=rate,
                                                phase_advance=phase_advance)

    # == Test shape
    expected_size = list(complex_specgrams.size())
    expected_size[-2] = int(np.ceil(expected_size[-2] / rate))

    assert complex_specgrams.dim() == complex_specgrams_stretch.dim()
    assert complex_specgrams_stretch.size() == torch.Size(expected_size)

    # == Test values
    index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3
    mono_complex_specgram = complex_specgrams[index].numpy()
    mono_complex_specgram = mono_complex_specgram[..., 0] + \
        mono_complex_specgram[..., 1] * 1j
    expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram,
                                                     rate=rate,
                                                     hop_length=hop_length)

    complex_stretch = complex_specgrams_stretch[index].numpy()
    complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1]

    assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5)

コード例 #4

0

ファイルを表示

    def forward(self,
                complex_specgrams: Tensor,
                overriding_rate: Optional[float] = None) -> Tensor:
        r"""
        Args:
            complex_specgrams (Tensor):
                Either a real tensor of dimension of ``(..., freq, num_frame, complex=2)``
                or a tensor of dimension ``(..., freq, num_frame)`` with complex dtype.
            overriding_rate (float or None, optional): speed up to apply to this batch.
                If no rate is passed, use ``self.fixed_rate``. (Default: ``None``)

        Returns:
            Tensor:
                Stretched spectrogram. The resulting tensor is of the same dtype as the input
                spectrogram, but the number of frames is changed to ``ceil(num_frame / rate)``.
        """
        if overriding_rate is None:
            if self.fixed_rate is None:
                raise ValueError(
                    "If no fixed_rate is specified, must pass a valid rate to the forward method."
                )
            rate = self.fixed_rate
        else:
            rate = overriding_rate
        return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)

コード例 #5

0

ファイルを表示

ファイル: librosa_compatibility_test_impl.py プロジェクト: zkneupper/audio

    def test_phase_vocoder(self, rate, test_pseudo_complex):
        hop_length = 256
        num_freq = 1025
        num_frames = 400
        torch.random.manual_seed(42)

        # Due to cummulative sum, numerical error in using torch.float32 will
        # result in bottom right values of the stretched sectrogram to not
        # match with librosa.
        spec = torch.randn(num_freq,
                           num_frames,
                           device=self.device,
                           dtype=torch.complex128)
        phase_advance = torch.linspace(0,
                                       np.pi * hop_length,
                                       num_freq,
                                       device=self.device,
                                       dtype=torch.float64)[..., None]

        stretched = F.phase_vocoder(
            torch.view_as_real(spec) if test_pseudo_complex else spec,
            rate=rate,
            phase_advance=phase_advance)

        expected_stretched = librosa.phase_vocoder(spec.cpu().numpy(),
                                                   rate=rate,
                                                   hop_length=hop_length)

        self.assertEqual(
            torch.view_as_complex(stretched)
            if test_pseudo_complex else stretched,
            torch.from_numpy(expected_stretched))

コード例 #6

0

ファイルを表示

 def func(tensor, device: torch.device = self.device):
     rate = 0.5
     hop_length = 256
     phase_advance = torch.linspace(
         0,
         3.14 * hop_length,
         tensor.shape[-3],
         dtype=torch.float64,
     ).to(device)[..., None]
     return F.phase_vocoder(tensor, rate, phase_advance)

コード例 #7

0

ファイルを表示

ファイル: torchscript_consistency_impl.py プロジェクト: pytorch/audio

 def func(tensor):
     n_freq = tensor.size(-2)
     rate = 0.5
     hop_length = 256
     phase_advance = torch.linspace(
         0,
         3.14 * hop_length,
         n_freq,
         dtype=torch.real(tensor).dtype,
         device=tensor.device,
     )[..., None]
     return F.phase_vocoder(tensor, rate, phase_advance)

コード例 #8

0

ファイルを表示

ファイル: torchscript_consistency_impl.py プロジェクト: peterjc123/audio

        def func(tensor):
            is_complex = tensor.is_complex()

            n_freq = tensor.size(-2 if is_complex else -3)
            rate = 0.5
            hop_length = 256
            phase_advance = torch.linspace(
                0,
                3.14 * hop_length,
                n_freq,
                dtype=(torch.real(tensor) if is_complex else tensor).dtype,
                device=tensor.device,
            )[..., None]
            return F.phase_vocoder(tensor, rate, phase_advance)

コード例 #9

0

ファイルを表示

    def time_stretch(self, batch, speedup_rate, device="cuda"):
        if speedup_rate == 1:
            return batch

        n_fft = torch.tensor(2048)  # windowsize
        hop_length = torch.floor(n_fft / 4.0).int().item()

        # time stretch
        stft = torch.stft(batch, n_fft.item(), hop_length=hop_length)
        
        phase_advance = torch.linspace(0, math.pi * hop_length, stft.shape[1])[..., None].to(device)
        # time stretch via phase_vocoder (not differentiable):
        vocoded = AF.phase_vocoder(stft, rate=speedup_rate, phase_advance=phase_advance) 
        istft = AF.istft(vocoded, n_fft.item(), hop_length=hop_length).squeeze()

        return istft

コード例 #10

0

ファイルを表示

ファイル: test_functional.py プロジェクト: seemethere/audio

def test_phase_vocoder(complex_specgrams, rate, hop_length):

    # Using a decorator here causes parametrize to fail on Python 2
    if not IMPORT_LIBROSA:
        raise unittest.SkipTest('Librosa is not available')

    # Due to cummulative sum, numerical error in using torch.float32 will
    # result in bottom right values of the stretched sectrogram to not
    # match with librosa.

    complex_specgrams = complex_specgrams.type(torch.float64)
    phase_advance = torch.linspace(0,
                                   np.pi * hop_length,
                                   complex_specgrams.shape[-3],
                                   dtype=torch.float64)[..., None]

    complex_specgrams_stretch = F.phase_vocoder(complex_specgrams,
                                                rate=rate,
                                                phase_advance=phase_advance)

    # == Test shape
    expected_size = list(complex_specgrams.size())
    expected_size[-2] = int(np.ceil(expected_size[-2] / rate))

    assert complex_specgrams.dim() == complex_specgrams_stretch.dim()
    assert complex_specgrams_stretch.size() == torch.Size(expected_size)

    # == Test values
    index = [0] * (complex_specgrams.dim() - 3) + [slice(None)] * 3
    mono_complex_specgram = complex_specgrams[index].numpy()
    mono_complex_specgram = mono_complex_specgram[..., 0] + \
        mono_complex_specgram[..., 1] * 1j
    expected_complex_stretch = librosa.phase_vocoder(mono_complex_specgram,
                                                     rate=rate,
                                                     hop_length=hop_length)

    complex_stretch = complex_specgrams_stretch[index].numpy()
    complex_stretch = complex_stretch[..., 0] + 1j * complex_stretch[..., 1]

    assert np.allclose(complex_stretch, expected_complex_stretch, atol=1e-5)

    def test_torchscript_create_fb_matrix(self):

        n_stft = 100
        f_min = 0.0
        f_max = 20.0
        n_mels = 10
        sample_rate = 16000

        _test_torchscript_functional(F.create_fb_matrix, n_stft, f_min, f_max,
                                     n_mels, sample_rate)

    def test_torchscript_amplitude_to_DB(self):

        spec = torch.rand((6, 201))
        multiplier = 10.0
        amin = 1e-10
        db_multiplier = 0.0
        top_db = 80.0

        _test_torchscript_functional(F.amplitude_to_DB, spec, multiplier, amin,
                                     db_multiplier, top_db)

    def test_torchscript_create_dct(self):

        n_mfcc = 40
        n_mels = 128
        norm = "ortho"

        _test_torchscript_functional(F.create_dct, n_mfcc, n_mels, norm)

    def test_torchscript_mu_law_encoding(self):

        tensor = torch.rand((1, 10))
        qc = 256

        _test_torchscript_functional(F.mu_law_encoding, tensor, qc)

    def test_torchscript_mu_law_decoding(self):

        tensor = torch.rand((1, 10))
        qc = 256

        _test_torchscript_functional(F.mu_law_decoding, tensor, qc)

    def test_torchscript_complex_norm(self):

        complex_tensor = torch.randn(1, 2, 1025, 400, 2),
        power = 2

        _test_torchscript_functional(F.complex_norm, complex_tensor, power)

    def test_mask_along_axis(self):

        specgram = torch.randn(2, 1025, 400),
        mask_param = 100
        mask_value = 30.
        axis = 2

        _test_torchscript_functional(F.mask_along_axis, specgram, mask_param,
                                     mask_value, axis)

    def test_mask_along_axis_iid(self):

        specgram = torch.randn(2, 1025, 400),
        specgrams = torch.randn(4, 2, 1025, 400),
        mask_param = 100
        mask_value = 30.
        axis = 2

        _test_torchscript_functional(F.mask_along_axis_iid, specgrams,
                                     mask_param, mask_value, axis)

    def test_torchscript_gain(self):
        tensor = torch.rand((1, 1000))
        gainDB = 2.0

        _test_torchscript_functional(F.gain, tensor, gainDB)

    def test_torchscript_dither(self):
        tensor = torch.rand((1, 1000))

        _test_torchscript_functional(F.dither, tensor)
        _test_torchscript_functional(F.dither, tensor, "RPDF")
        _test_torchscript_functional(F.dither, tensor, "GPDF")