Ejemplo n.º 1
0
    def forward(self, specgram: Tensor) -> Tensor:
        r"""
        Args:
            specgram (Tensor): A spectrogram STFT of dimension (..., freq, time).

        Returns:
            Tensor: Mel frequency spectrogram of size (..., ``n_mels``, time).
        """

        # pack batch
        shape = specgram.size()
        specgram = specgram.reshape(-1, shape[-2], shape[-1])

        if self.fb.numel() == 0:
            tmp_fb = F.create_fb_matrix(specgram.size(1), self.f_min,
                                        self.f_max, self.n_mels,
                                        self.sample_rate, self.norm,
                                        self.mel_scale)
            # Attributes cannot be reassigned outside __init__ so workaround
            self.fb.resize_(tmp_fb.size())
            self.fb.copy_(tmp_fb)

        # (channel, frequency, time).transpose(...) dot (frequency, n_mels)
        # -> (channel, time, n_mels).transpose(...)
        mel_specgram = torch.matmul(specgram.transpose(1, 2),
                                    self.fb).transpose(1, 2)

        # unpack batch
        mel_specgram = mel_specgram.reshape(shape[:-2] +
                                            mel_specgram.shape[-2:])

        return mel_specgram
Ejemplo n.º 2
0
    def __init__(self,
                 n_stft: int,
                 n_mels: int = 128,
                 sample_rate: int = 16000,
                 f_min: float = 0.,
                 f_max: Optional[float] = None,
                 max_iter: int = 100000,
                 tolerance_loss: float = 1e-5,
                 tolerance_change: float = 1e-8,
                 sgdargs: Optional[dict] = None) -> None:
        super(InverseMelScale, self).__init__()
        self.n_mels = n_mels
        self.sample_rate = sample_rate
        self.f_max = f_max or float(sample_rate // 2)
        self.f_min = f_min
        self.max_iter = max_iter
        self.tolerance_loss = tolerance_loss
        self.tolerance_change = tolerance_change
        self.sgdargs = sgdargs or {'lr': 0.1, 'momentum': 0.9}

        assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % (
            f_min, self.f_max)

        fb = F.create_fb_matrix(n_stft, self.f_min, self.f_max, self.n_mels,
                                self.sample_rate)
        self.register_buffer('fb', fb)
Ejemplo n.º 3
0
 def func(_):
     n_stft = 100
     f_min = 0.0
     f_max = 20.0
     n_mels = 10
     sample_rate = 16000
     norm = "slaney"
     return F.create_fb_matrix(n_stft, f_min, f_max, n_mels, sample_rate, norm)
Ejemplo n.º 4
0
 def __init__(self,
              sample_rate=22050,
              n_fft=2048,
              n_mels=256,
              f_min=0.,
              f_max=None):
     f_max = float(sample_rate // 2) if f_max is None else f_max
     assert f_min <= f_max
     self.fb = create_fb_matrix(n_fft // 2 + 1, f_min, f_max, n_mels)
Ejemplo n.º 5
0
    def test_torchscript_create_fb_matrix(self):
        @torch.jit.script
        def jit_method(n_stft, f_min, f_max, n_mels):
            # type: (int, float, float, int) -> Tensor
            return F.create_fb_matrix(n_stft, f_min, f_max, n_mels)

        n_stft = 100
        f_min = 0.
        f_max = 20.
        n_mels = 10

        jit_out = jit_method(n_stft, f_min, f_max, n_mels)
        py_out = F.create_fb_matrix(n_stft, f_min, f_max, n_mels)

        self.assertTrue(torch.allclose(jit_out, py_out))
Ejemplo n.º 6
0
 def __init__(self,
              n_mels=128,
              sample_rate=16000,
              f_min=0.,
              f_max=None,
              n_stft=None):
     super(MelScale, self).__init__()
     self.n_mels = n_mels
     self.sample_rate = sample_rate
     self.f_max = f_max if f_max is not None else float(sample_rate // 2)
     assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % (
         f_min, self.f_max)
     self.f_min = f_min
     fb = torch.empty(0) if n_stft is None else F.create_fb_matrix(
         n_stft, self.f_min, self.f_max, self.n_mels)
     self.fb = fb
Ejemplo n.º 7
0
    def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0):
        librosa_fb = librosa.filters.mel(sr=sample_rate,
                                         n_fft=n_fft,
                                         n_mels=n_mels,
                                         fmax=fmax,
                                         fmin=fmin,
                                         htk=True,
                                         norm=None)
        fb = F.create_fb_matrix(sample_rate=sample_rate,
                                n_mels=n_mels,
                                f_max=fmax,
                                f_min=fmin,
                                n_freqs=(n_fft // 2 + 1))

        for i_mel_bank in range(n_mels):
            assert torch.allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), atol=1e-4)
Ejemplo n.º 8
0
    def __init__(self,
                 n_mels: int = 128,
                 sample_rate: int = 16000,
                 f_min: float = 0.,
                 f_max: Optional[float] = None,
                 n_stft: Optional[int] = None) -> None:
        super(MelScale, self).__init__()
        self.n_mels = n_mels
        self.sample_rate = sample_rate
        self.f_max = f_max if f_max is not None else float(sample_rate // 2)
        self.f_min = f_min

        assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % (f_min, self.f_max)

        fb = torch.empty(0) if n_stft is None else F.create_fb_matrix(
            n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate)
        self.register_buffer('fb', fb)
Ejemplo n.º 9
0
    def forward(self, specgram):
        r"""
        Args:
            specgram (torch.Tensor): A spectrogram STFT of dimension (channel, freq, time)

        Returns:
            torch.Tensor: Mel frequency spectrogram of size (channel, ``n_mels``, time)
        """
        if self.fb.numel() == 0:
            tmp_fb = F.create_fb_matrix(specgram.size(1), self.f_min,
                                        self.f_max, self.n_mels)
            # Attributes cannot be reassigned outside __init__ so workaround
            self.fb.resize_(tmp_fb.size())
            self.fb.copy_(tmp_fb)

        # (channel, frequency, time).transpose(...) dot (frequency, n_mels)
        # -> (channel, time, n_mels).transpose(...)
        mel_specgram = torch.matmul(specgram.transpose(1, 2),
                                    self.fb).transpose(1, 2)
        return mel_specgram
Ejemplo n.º 10
0
    def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0):
        # Using a decorator here causes parametrize to fail on Python 2
        if not IMPORT_LIBROSA:
            raise unittest.SkipTest('Librosa is not available')

        librosa_fb = librosa.filters.mel(sr=sample_rate,
                                         n_fft=n_fft,
                                         n_mels=n_mels,
                                         fmax=fmax,
                                         fmin=fmin,
                                         htk=True,
                                         norm=None)
        fb = F.create_fb_matrix(sample_rate=sample_rate,
                                n_mels=n_mels,
                                f_max=fmax,
                                f_min=fmin,
                                n_freqs=(n_fft // 2 + 1))

        for i_mel_bank in range(n_mels):
            assert torch.allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), atol=1e-4)
Ejemplo n.º 11
0
    def __init__(self,
                 n_mels: int = 128,
                 sample_rate: int = 16000,
                 f_min: float = 0.,
                 f_max: Optional[float] = None,
                 n_stft: Optional[int] = None,
                 norm: Optional[str] = None,
                 mel_scale: str = "htk") -> None:
        super(MelScale, self).__init__()
        self.n_mels = n_mels
        self.sample_rate = sample_rate
        self.f_max = f_max if f_max is not None else float(sample_rate // 2)
        self.f_min = f_min
        self.norm = norm
        self.mel_scale = mel_scale

        assert f_min <= self.f_max, 'Require f_min: {} < f_max: {}'.format(f_min, self.f_max)

        fb = torch.empty(0) if n_stft is None else F.create_fb_matrix(
            n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate, self.norm,
            self.mel_scale)
        self.register_buffer('fb', fb)
Ejemplo n.º 12
0
 def jit_method(n_stft, f_min, f_max, n_mels):
     # type: (int, float, float, int) -> Tensor
     return F.create_fb_matrix(n_stft, f_min, f_max, n_mels)
Ejemplo n.º 13
0
 def test_warning(self):
     with pytest.warns(None) as w:
         F.create_fb_matrix(201, 0, 8000, 128, 16000)
     assert len(w) == 1
Ejemplo n.º 14
0
 def test_no_warning_low_n_mels(self):
     with pytest.warns(None) as w:
         F.create_fb_matrix(201, 0, 8000, 89, 16000)
     assert len(w) == 0
Ejemplo n.º 15
0
 def test_warning(self):
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         F.create_fb_matrix(201, 0, 8000, 128, 16000)
     assert len(w) == 1
Ejemplo n.º 16
0
 def test_no_warning_high_n_freq(self):
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         F.create_fb_matrix(288, 0, 8000, 128, 16000)
     assert len(w) == 0
Ejemplo n.º 17
0
 def test_create_fb_matrix_no_warning_low_n_mels(self):
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         F.create_fb_matrix(201, 0, 8000, 89, 16000)
     assert len(w) == 0
# ---------------
#
# ``torchaudio.functional.create_fb_matrix`` generates the filter bank
# for converting frequency bins to mel-scale bins.
#
# Since this function does not require input audio/features, there is no
# equivalent transform in ``torchaudio.transforms``.
#

n_fft = 256
n_mels = 64
sample_rate = 6000

mel_filters = F.create_fb_matrix(int(n_fft // 2 + 1),
                                 n_mels=n_mels,
                                 f_min=0.,
                                 f_max=sample_rate / 2.,
                                 sample_rate=sample_rate,
                                 norm='slaney')
plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio")

######################################################################
# Comparison against librosa
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# For reference, here is the equivalent way to get the mel filter bank
# with ``librosa``.
#

mel_filters_librosa = librosa.filters.mel(
    sample_rate,
    n_fft,
Ejemplo n.º 19
0
 def test_no_warning_high_n_freq(self):
     with pytest.warns(None) as w:
         F.create_fb_matrix(288, 0, 8000, 128, 16000)
     assert len(w) == 0