def test_definition_ortho(self):
     """Test orthornomal mode."""
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         y = dct(x, norm="ortho", type=2)
         xi = dct(y, norm="ortho", type=3)
         self.assertTrue(xi.dtype == self.rdt, "Output dtype is %s, expected %s" % (xi.dtype, self.rdt))
         assert_array_almost_equal(xi, x, decimal=self.dec)
Esempio n. 2
0
def dct_2d_ref(x, **kwargs):
    """ used as a reference in testing dct2. """
    x = np.array(x, copy=True)
    for row in range(x.shape[0]):
        x[row, :] = dct(x[row, :], **kwargs)
    for col in range(x.shape[1]):
        x[:, col] = dct(x[:, col], **kwargs)
    return x
Esempio n. 3
0
 def test_definition_ortho(self):
     # Test orthornomal mode.
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         y = dct(x, norm='ortho', type=2)
         xi = dct(y, norm="ortho", type=3)
         assert_equal(xi.dtype, self.rdt)
         assert_array_almost_equal(xi, x, decimal=self.dec)
Esempio n. 4
0
def dct_2d_ref(x, **kwargs):
    """Calculate reference values for testing dct2."""
    x = np.array(x, copy=True)
    for row in range(x.shape[0]):
        x[row, :] = dct(x[row, :], **kwargs)
    for col in range(x.shape[1]):
        x[:, col] = dct(x[:, col], **kwargs)
    return x
Esempio n. 5
0
 def test_definition_ortho(self):
     # Test orthornomal mode.
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         dt = np.result_type(np.float32, self.rdt)
         y = dct(x, norm='ortho', type=2)
         xi = dct(y, norm="ortho", type=3)
         assert_equal(xi.dtype, dt)
         assert_array_almost_equal(xi, x, decimal=self.dec)
Esempio n. 6
0
 def test_definition_ortho(self):
     """Test orthornomal mode."""
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         y = dct(x, norm='ortho', type=2)
         xi = dct(y, norm="ortho", type=3)
         self.assertTrue(
             xi.dtype == self.rdt,
             "Output dtype is %s, expected %s" % (xi.dtype, self.rdt))
         assert_array_almost_equal(xi, x, decimal=self.dec)
    def test_axis(self):
        nt = 2
        for i in [7, 8, 9, 16, 32, 64]:
            x = np.random.randn(nt, i)
            y = dct(x, type=self.type)
            for j in range(nt):
                assert_array_almost_equal(y[j], dct(x[j], type=self.type), decimal=self.dec)

            x = x.T
            y = dct(x, axis=0, type=self.type)
            for j in range(nt):
                assert_array_almost_equal(y[:, j], dct(x[:, j], type=self.type), decimal=self.dec)
    def test_axis(self):
        nt = 2
        for i in [7, 8, 9, 16, 32, 64]:
            x = np.random.randn(nt, i)
            y = dct(x, type=self.type)
            for j in range(nt):
                assert_array_almost_equal(y[j], dct(x[j], type=self.type),
                        decimal=self.dec)

            x = x.T
            y = dct(x, axis=0, type=self.type)
            for j in range(nt):
                assert_array_almost_equal(y[:,j], dct(x[:,j], type=self.type),
                        decimal=self.dec)
Esempio n. 9
0
def gene_mfcc(s, fs, nperseg, filterbank):
    f, t, spec = signal.stft(s, fs=fs, nperseg=nperseg)
    mspec = np.dot(filterbank, np.abs(spec[:-1]))
    mspec_db = librosa.amplitude_to_db(mspec)
    ceps = dct(mspec_db, axis=0)
    mfcc = ceps[1:13]
    return spec, mspec_db, mfcc
def st_mfcc(cur_pos_signal, fbank, nceps):
    """
    短时mfcc
    """
    mspec = numpy.log10(numpy.dot(cur_pos_signal, fbank.T) + EPS)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]
    return ceps
Esempio n. 11
0
 def create_mfcc(self):
     spec_mfb \
         = np.dot(self.mel_filter_bank, self.spec)  # スペクトル[dB]とメルフィルタバンクの内積
     self.mfcc \
         = dct(spec_mfb, type=2, norm="ortho", axis=0)[:MFCC_DIM]  # 離散コサイン変換
     self.d_mfcc = self.create_delta(self.mfcc, DELTA_LENGTH)  # ΔMFCC
     self.dd_mfcc = self.create_delta(self.d_mfcc, DELTA_LENGTH)  # ΔΔMFCC
Esempio n. 12
0
def calc_mfcc(wav, hop, win_length, filterbank):
    """
    Calculate Mel Frequency Cepstrum Coeffcient(MFCC).

    Parameters:
        wav : ndarray, real-valued
            Time series of measurement values.
        hop : float
            Hop (Overlap) size.
        win_length : int
            Window size.
        filter_bank : ndarray
            mel filter bank

    Returns:
        mel_spec : ndarray (n_channels, n_frames)
            Mel scale spectrogram.
        mfcc : ndarray (n_channels, n_frames)
            Mel Frequency Cepstrum Coeffcient(MFCC).
    """
    pre_wav = utils.pre_emphasis(wav, p=0.97)
    spec = utils.stft(pre_wav, hop=hop, win_length=win_length)
    # hop_length = int(win_length * hop)
    # spec = spec[:, :hop_length]
    mel_spec = np.dot(filterbank, np.abs(spec[:-1]))

    mfcc = np.zeros_like(mel_spec)
    for i in range(mel_spec.shape[1]):
        mfcc[:, i] = dct(mel_spec[:, i], type=2, norm="ortho", axis=-1)

    return mel_spec, mfcc
Esempio n. 13
0
def stMFCC(X, fbank, nceps):
    """
    Computes the MFCCs of a frame, given the fft mag

    ARGUMENTS:
        X:        fft magnitude abs(FFT)
        fbank:    filter bank (see mfccInitFilterBanks)
    RETURN
        ceps:     MFCCs (13 element vector)

    Note:    MFCC calculation is, in general, taken from the scikits.talkbox library (MIT Licence),
    #    with a small number of modifications to make it more compact and suitable for the pyAudioAnalysis Lib
    """

    qtdDeleted = fbank.T.size - X.size

    for i in range(0, qtdDeleted):
        fbank = numpy.delete(fbank.T, [0.])

    fbank = fbank.reshape((fbank.size / 2), 2)
    mspec = numpy.log10(numpy.dot(X, fbank.T) + eps)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]

    resp = []
    for i in range(0, ceps[0].size):
        if ceps[0][i] != 0.0:
            resp.append(ceps[0][i])

    return numpy.asarray(resp)
Esempio n. 14
0
    def __dct(self, mspec, nceps):
        ceps = realtransforms.dct(mspec, type=2, norm="ortho", axis=-1)
        #return lower features by n
        return ceps[:nceps]


#end of class MFCC
Esempio n. 15
0
def specPS(input_wav, pitch):
    N = len(input_wav)
    samps = N / pitch
    if samps == 0:
        samps = 1
    frames = N / samps
    data = input_wav[0:frames]
    specs = periodogram(data, nfft=4096)
    for i in range(1, int(samps)):
        data = input_wav[frames * i:frames * (i + 1)]
        peri = periodogram(data, nfft=4096)
        for sp in range(len(peri[0])):
            specs[0][sp] += peri[0][sp]
    for s in range(len(specs[0])):
        specs[0][s] /= float(samps)
    peri = []
    for k, l in zip(specs[0], specs[1]):
        if k == 0 and l == 0:
            peri.append(epsilon)
        else:
            peri.append(math.log(math.sqrt((k**2) + (l**2))))
    # Fix values<=0 to prevent nan
    if sum(n < 0 for n in peri) > 0:
        eps = np.finfo(float).eps
        peri = [eps if p <= 0 else p for p in peri]
    # Filter the spectrum through the triangle filterbank
    mspec = np.log10(peri)
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)
    return ceps[:50]
Esempio n. 16
0
 def smoothData(self,x,y,weight,nMiss=0):
   '''
   smooth data
   '''
   import scipy.optimize.lbfgsb as lbfgsb
   from scipy.fftpack.realtransforms import dct,idct
   n0 = len(x)
   #x = np.array([x,x,x]).flatten()
   #y = np.array([y,y,y]).flatten()
   #weight = np.array([weight,weight,weight]).flatten()
   n = len(x)
   weight = 1./weight
   # scale 0 to 1
   weight = weight/np.max(weight)
   i = np.arange(1,n+1)
   eigenvalues = -2. + 2.*np.cos((i-1)*np.pi/n)
   DCTy = dct(y,norm='ortho',type=2)
   dcty2 = DCTy**2
   eigenvalues2 = eigenvalues**2
   x0 = np.atleast_1d(1.)
   y_hat = np.zeros_like(y)
   xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,x0,fprime=None,factr=10.,\
          approx_grad=True,args=(y,weight,eigenvalues2,n,nMiss,y_hat))
   solvedGamma = np.exp(xpost)[0]
   return y_hat,solvedGamma 
Esempio n. 17
0
def mfcc(input, nceps=13):
    """Compute Mel Frequency Cepstral Coefficients.

    Parameters
    ----------
    input: ndarray
        input spectrogram from which the coefficients are computed

    Returns
    -------
    ceps: ndarray
        Mel-cepstrum coefficients
    mspec: ndarray
        Log-spectrum in the mel-domain.

    Notes
    -----
    MFCC are computed as follows:
        * Pre-processing in time-domain (pre-emphasizing)
        * Compute the spectrum amplitude by windowing with a Hamming window
        * Filter the signal in the spectral domain with a triangular
        filter-bank, whose filters are approximatively linearly spaced on the
        mel scale, and have equal bandwith in the mel scale
        * Compute the DCT of the log-spectrum

    This is based on the talkbox module:
    http://pydoc.net/Python/scikits.talkbox/0.2.4.dev/scikits.talkbox.features.mfcc/

    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""


    nfft = input.metadata.sampling_configuration.dft_length
    fs = input.metadata.sampling_configuration.fs
    over = input.metadata.sampling_configuration.window_length \
        - input.metadata.sampling_configuration.window_step

    #lowfreq = 400 / 3.
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200/3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nlinfil + nlogfil

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
    fbank = fbank.T[0:input.data.shape[0], :]

    mspec = np.log10(np.maximum(np.dot(fbank.T, input.data), 0.0000001)).T

    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]

    return ceps
Esempio n. 18
0
def mfcc(f, fs, frameLength, nceps=13):
    nfft = frameLength * 2
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200 / 3.
    logsc = 1.0711703
    #三角滤波器组的几个参数

    nlinfil = 13
    nlogfil = 27
    #滤波器的个数

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)
    data = np.array([frame.data for frame in f])  #所有帧的内容
    # Compute the spectrum magnitude
    spec = np.abs(fft(data, nfft, axis=-1))
    # Filter the spectrum through the triangle filterbank
    mspec = np.log10(np.dot(spec, fbank.T))
    #由于通过短时能量筛选去除了静音帧,理论上此处不会出现系数为0的情况
    #如果删除了排除静音帧的步骤,有可能会存在0系数导致无法计算,此时可用下方代码替代
    #epsilon = 1e-6
    #mspec = np.log10(np.dot(np.maximum(spec, epsilon), fbank.T))
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, 1:nceps]
    # 一般取DCT后的第2个到第13个系数作为MFCC系数

    #计算一阶△MFCC,以反映音频的动态特征
    deltamfcc = delta(ceps)
    ceps = np.concatenate((ceps, deltamfcc), axis=1)  #将差分mfcc参数扩展到原ceps后

    #继续计算二阶差分△△MFCC
    deltadeltamfcc = delta(deltamfcc)
    ceps = np.concatenate((ceps, deltadeltamfcc), axis=1)
    return ceps
Esempio n. 19
0
def FFTcoefficient(sig,
                   samplerate=16000,
                   win_length=0.025,
                   win_step=0.01,
                   pre_emphasis_coeff=0.97,
                   NFFT=512):
    '''
    计算初始IDCT系数
    :param sig:
    :param samplerate:
    :param win_length:
    :param win_step:
    :param pre_emphasis_coeff:
    :return:
    '''

    #预处理
    signal = pre_emphasis(sig, pre_emphasis_coeff)
    #分帧
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  # 得到帧数组
    #加窗
    frames *= np.hamming(int(round(win_length * samplerate)))  # 加窗
    #FFT
    fftfeat = spectrum_power(frames, NFFT)  # 进行快速傅里叶变换 得到幅值系数
    feat = np.where(fftfeat == 0, np.finfo(float).eps, fftfeat)

    #TODO  滤波

    feat = np.log(fftfeat)

    feat = dct(feat, type=2, axis=1, norm='ortho')

    return feat
Esempio n. 20
0
def st_mfcc(cur_pos_signal, fbank, nceps):
    """
    Mfcc à court terme
    """
    mspec = numpy.log10(numpy.dot(cur_pos_signal, fbank.T) + EPS)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]
    return ceps
Esempio n. 21
0
def get_mfcc(path):
    """Finds the MFCCs and FFTs of a WAVE file.

  Args:
    path: The path to a WAVE file.

  Returns:
    A tuple of two iterables, the FFTs and MFCCs of the frames of the
    WAVE file.
  """
    global COMP_FRAME_SIZE
    # Read the file, and determine its length in frames
    (sample, data) = utils.read_wave_from_file(path)
    total_frames = (data.size / sample) / COMP_FRAME_SIZE

    step = COMP_FRAME_SIZE * sample
    window = hamming(step)

    # Allocate space for the FFT decompositions of each frame of sound data
    fft_out = []
    mfcc_out = []

    # Loop invariant:
    #   0 <= frame_index <= total_frames
    #   results in an array (fft_out) of FFTs that correspond to the
    #    frames of the WAVE file
    filterbank_cache = {}
    frame_index = 0

    while frame_index + (1 - FRAME_OVERLAP_FACTOR) < total_frames:
        # Obtain the frame_indexth frame from the data
        frame = data[frame_index * step : (frame_index + 1) * step]

        # Generate the FFT of the frame windowed by the hamming window
        frame_fft = numpy.fft.rfft(frame * window, n=256)
        frame_fft[frame_fft == 0] = 0.000003
        nfft = len(frame_fft)

        # Compute the mel triangular filterbank or get a cached version
        fb_key = (sample, nfft)
        if fb_key in filterbank_cache:
            filterbank = filterbank_cache[fb_key]
        else:
            filterbank = triangular_filters(sample, nfft).T
            filterbank[filterbank == 0] = 0.00003
            filterbank_cache[fb_key] = filterbank

        # The power spectrum of the frame
        power_spectrum = numpy.abs(frame_fft)
        # Filtered by the mel filterbank
        mel_power_spectrum = numpy.log10(numpy.dot(power_spectrum, filterbank))
        # With the discrete cosine transform to find the cepstrum
        cepstrum = dct(mel_power_spectrum, type=2, norm="ortho", axis=-1)

        fft_out.append(frame_fft)
        mfcc_out.append(cepstrum[: int(len(cepstrum) * SIGNIFICANT_MFCC)])
        frame_index = frame_index + FRAME_OVERLAP_FACTOR

    return numpy.array(mfcc_out)
Esempio n. 22
0
def mfcc(input, nceps=13):
    """Compute Mel Frequency Cepstral Coefficients.

    Parameters
    ----------
    input: ndarray
        input spectrogram from which the coefficients are computed

    Returns
    -------
    ceps: ndarray
        Mel-cepstrum coefficients
    mspec: ndarray
        Log-spectrum in the mel-domain.

    Notes
    -----
    MFCC are computed as follows:
        * Pre-processing in time-domain (pre-emphasizing)
        * Compute the spectrum amplitude by windowing with a Hamming window
        * Filter the signal in the spectral domain with a triangular
        filter-bank, whose filters are approximatively linearly spaced on the
        mel scale, and have equal bandwith in the mel scale
        * Compute the DCT of the log-spectrum

    This is based on the talkbox module:
    http://pydoc.net/Python/scikits.talkbox/0.2.4.dev/scikits.talkbox.features.mfcc/

    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""

    nfft = input.metadata.sampling_configuration.dft_length
    fs = input.metadata.sampling_configuration.fs
    over = input.metadata.sampling_configuration.window_length \
        - input.metadata.sampling_configuration.window_step

    #lowfreq = 400 / 3.
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200 / 3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nlinfil + nlogfil

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
    fbank = fbank.T[0:input.data.shape[0], :]

    mspec = np.log10(np.maximum(np.dot(fbank.T, input.data), 0.0000001)).T

    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]

    return ceps
Esempio n. 23
0
 def test_definition_matlab(self):
     # Test correspondance with matlab (orthornomal mode).
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         yr = Y[i]
         y = dct(x, norm="ortho", type=2)
         assert_equal(y.dtype, self.rdt)
         assert_array_almost_equal(y, yr, decimal=self.dec)
Esempio n. 24
0
 def test_definition_matlab(self):
     # Test correspondance with matlab (orthornomal mode).
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         yr = Y[i]
         y = dct(x, norm="ortho", type=2)
         assert_equal(y.dtype, self.rdt)
         assert_array_almost_equal(y, yr, decimal=self.dec)
Esempio n. 25
0
 def test_definition_matlab(self):
     # Test correspondence with MATLAB (orthornomal mode).
     dt = np.result_type(np.float32, self.rdt)
     for xr, yr in zip(X, Y):
         x = np.array(xr, dtype=dt)
         y = dct(x, norm="ortho", type=2)
         assert_equal(y.dtype, dt)
         assert_array_almost_equal(y, yr, decimal=self.dec)
 def test_definition_matlab(self):
     """Test correspondance with matlab (orthornomal mode)."""
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         yr = Y[i]
         y = dct(x, norm="ortho", type=2)
         self.assertTrue(y.dtype == self.rdt, "Output dtype is %s, expected %s" % (y.dtype, self.rdt))
         assert_array_almost_equal(y, yr, decimal=self.dec)
Esempio n. 27
0
 def test_definition_ortho(self):
     # Test orthornomal mode.
     dt = np.result_type(np.float32, self.rdt)
     for xr in X:
         x = np.array(xr, dtype=self.rdt)
         y = dct(x, norm='ortho', type=1)
         y2 = naive_dct1(x, norm='ortho')
         assert_equal(y.dtype, dt)
         assert_array_almost_equal(y / np.max(y), y2 / np.max(y), decimal=self.dec)
Esempio n. 28
0
 def test_definition_ortho(self):
     # Test orthornomal mode.
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         dt = np.result_type(np.float32, self.rdt)
         y = dct(x, norm='ortho', type=4)
         y2 = naive_dct4(x, norm='ortho')
         assert_equal(y.dtype, dt)
         assert_array_almost_equal(y / np.max(y), y2 / np.max(y), decimal=self.dec)
 def test_definition_matlab(self):
     """Test correspondance with matlab (orthornomal mode)."""
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         yr = Y[i]
         y = dct(x, norm="ortho", type=2)
         self.assertTrue(y.dtype == self.rdt,
                 "Output dtype is %s, expected %s" % (y.dtype, self.rdt))
         assert_array_almost_equal(y, yr, decimal=self.dec)
 def test_definition_ortho(self):
     # Test orthornomal mode.
     for i in range(len(X)):
         x = np.array(X[i], dtype=self.rdt)
         dt = np.result_type(np.float32, self.rdt)
         y = dct(x, norm='ortho', type=4)
         y2 = naive_dct4(x, norm='ortho')
         assert_equal(y.dtype, dt)
         assert_array_almost_equal(y / np.max(y), y2 / np.max(y), decimal=self.dec)
 def test_definition(self):
     for i in FFTWDATA_SIZES:
         x, yr = fftw_ref(self.type, i, self.rdt)
         y = dct(x, type=self.type)
         self.assertTrue(y.dtype == self.rdt, "Output dtype is %s, expected %s" % (y.dtype, self.rdt))
         # XXX: we divide by np.max(y) because the tests fail otherwise. We
         # should really use something like assert_array_approx_equal. The
         # difference is due to fftw using a better algorithm w.r.t error
         # propagation compared to the ones from fftpack.
         assert_array_almost_equal(y / np.max(y), yr / np.max(y), decimal=self.dec, err_msg="Size %d failed" % i)
Esempio n. 32
0
    def __compute_mfcc_for_window(self, s, window_index):
        self.__fft_mag[window_index, :] = np.abs(np.fft.fft(s, n = self.__fft_window_length))      

        for i in range(0, self.__nfilters):
            self.__filtered_spectra[window_index, i, :] = np.multiply(self.__filter_banks[i, :], self.__fft_mag[window_index, :])
            self.__filtered_spectra_sums[window_index, i] = np.sum(self.__filtered_spectra[window_index, i, :])

        self.__filtered_spectra_sums_log[window_index, :] = np.log10(self.__filtered_spectra_sums[window_index, :])

        return dct(self.__filtered_spectra_sums_log[window_index, :], norm='ortho')
    def test_definition_matlab(self):
        # Test correspondence with MATLAB (orthornomal mode).
        for i in range(len(X)):
            dt = np.result_type(np.float32, self.rdt)
            x = np.array(X[i], dtype=dt)

            yr = Y[i]
            y = dct(x, norm="ortho", type=2)
            assert_equal(y.dtype, dt)
            assert_array_almost_equal(y, yr, decimal=self.dec)
Esempio n. 34
0
def mfcc_framed(framed, nfft=512, fs=16000, nceps=13):
    """Compute Mel Frequency Cepstral Coefficients.

    Parameters
    ----------
    input: ndarray
        input from which the coefficients are computed

    Returns
    -------
    ceps: ndarray
        Mel-cepstrum coefficients
    mspec: ndarray
        Log-spectrum in the mel-domain.

    Notes
    -----
    MFCC are computed as follows:
        * Pre-processing in time-domain (pre-emphasizing)
        * Compute the spectrum amplitude by windowing with a Hamming window
        * Filter the signal in the spectral domain with a triangular
        filter-bank, whose filters are approximatively linearly spaced on the
        mel scale, and have equal bandwith in the mel scale
        * Compute the DCT of the log-spectrum

    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""

    #lowfreq = 400 / 3.
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200 / 3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nfil = nlinfil + nlogfil

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]

    #------------------
    # Compute the MFCC
    #------------------    # Compute the spectrum magnitude
    spec = np.abs(fft(framed, nfft, axis=-1))
    # Filter the spectrum through the triangle filterbank
    mspec = np.log10(np.dot(spec, fbank.T))
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]

    return ceps, mspec, spec
Esempio n. 35
0
 def test_definition(self):
     for i in FFTWDATA_SIZES:
         x, yr, dt = fftw_dct_ref(self.type, i, self.rdt)
         y = dct(x, type=self.type)
         assert_equal(y.dtype, dt)
         # XXX: we divide by np.max(y) because the tests fail otherwise. We
         # should really use something like assert_array_approx_equal. The
         # difference is due to fftw using a better algorithm w.r.t error
         # propagation compared to the ones from fftpack.
         assert_array_almost_equal(y / np.max(y), yr / np.max(y), decimal=self.dec,
                 err_msg="Size %d failed" % i)
def cepstrum(input, nceps):
    """
    Calulates Cepstral coefficients from mel spectrum applying Discrete Cosine Transform

    Args:
        input: array of log outputs of Mel scale filterbank [N x nmelfilters] where N is the
               number of frames and nmelfilters the length of the filterbank
        nceps: number of output cepstral coefficients
    Output:
        array of Cepstral coefficients [N x nceps]
    Note: you can use the function dct from scipy.fftpack.realtransforms
    """
    return dct(input)[:, :nceps]
Esempio n. 37
0
    def __init_mfcc(self,
                    num_mel_bands=DEFAULT_MFCC_BANDS,
                    num_mfcc=DEFAULT_NUM_MFCC_COEFFICIENTS,
                    delta_N=DEFAULT_MFCC_DELTA_N):
        mel_bin_matrix, freqs = self.get_mel_binning_matrix(num_mel_bands)
        Pxx2 = np.dot(self.specgram.T, mel_bin_matrix)

        # Unlike the mlab implementation, we threshold and log our FFT magnitudes
        # before returning
        Pxx2[Pxx2 < 1e-10] = 1e-10
        Pxx2 = 10. * np.log10(Pxx2)
        Pxx2[Pxx2 <= 0.0] = 0.0

        # http://pydoc.net/Python/scikits.talkbox/0.2.4.dev/scikits.talkbox.features.mfcc/
        ceps = dct(Pxx2, type=2, norm='ortho', axis=-1)[:, :num_mfcc]
        ceps = np.flipud(ceps)

        deltas = np.zeros(ceps.shape)
        delta_deltas = np.zeros(ceps.shape)
        for cep_frame_i in xrange(len(ceps)):
            if cep_frame_i < delta_N:
                del_N = cep_frame_i
            elif cep_frame_i > len(ceps) - delta_N - 1:
                del_N = len(ceps) - cep_frame_i - 1
            else:
                del_N = delta_N
            if del_N == 0:
                continue
            deltas[cep_frame_i] = sum([
                n * (ceps[cep_frame_i + n] - ceps[cep_frame_i - n])
                for n in xrange(1, del_N + 1)
            ]) / (2.0 * sum([n**2 for n in xrange(1, del_N + 1)]))
        for cep_frame_i in xrange(len(deltas)):
            if cep_frame_i < delta_N:
                del_N = cep_frame_i
            elif cep_frame_i > len(ceps) - delta_N - 1:
                del_N = len(ceps) - cep_frame_i - 1
            else:
                del_N = delta_N
            if del_N == 0:
                continue
            delta_deltas[cep_frame_i] = sum([
                n * (deltas[cep_frame_i + n] - deltas[cep_frame_i - n])
                for n in xrange(1, del_N + 1)
            ]) / (2.0 * sum([n**2 for n in xrange(1, del_N + 1)]))
        ceps = np.fliplr(ceps.T[1:])
        deltas = np.fliplr(deltas.T[1:])
        delta_deltas = np.fliplr(delta_deltas.T[1:])
        return ceps, deltas, delta_deltas
Esempio n. 38
0
def stMFCC(X, fbank, nceps):
    """
    Computes the MFCCs of a frame, given the fft mag
    ARGUMENTS:
        X:        fft magnitude abs(FFT)
        fbank:    filter bank (see mfccInitFilterBanks)
    RETURN
        ceps:     MFCCs (13 element vector)
    Note:    MFCC calculation is, in general, taken from the scikits.talkbox library (MIT Licence),
    #    with a small number of modifications to make it more compact and suitable for the pyAudioAnalysis Lib
    """

    mspec = numpy.log10(numpy.dot(X, fbank.T)+eps)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]
    return ceps
def short_term_MFCC(X, fbank, nceps):
    """
    Calculating the MFCCs of a frame, given the fft mag
    ARGUMENTS:
        X:        fft magnitude abs(FFT)
        fbank:    filter bank (see mfcc_init_filter_banks)
    RETURN
        ceps:     MFCCs (13 element vector)
    Note:    MFCC calculation is, in general, taken from the scikits.talkbox library (MIT Licence),
    #    with a small number of modifications to make it more compact and suitable for the pyAudioAnalysis Lib
    """

    mspec = numpy.log10(numpy.dot(X, fbank.T) + eps)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]
    return ceps
Esempio n. 40
0
def mfcc(fft_magnitude, fbank, num_mfcc_feats):
    """
    Computes the MFCCs of a frame, given the fft mag
    ARGUMENTS:
        fft_magnitude:  fft magnitude abs(FFT)
        fbank:          filter bank (see mfccInitFilterBanks)
    RETURN
        ceps:           MFCCs (13 element vector)
    Note:    MFCC calculation is, in general, taken from the 
             scikits.talkbox library (MIT Licence),
    #    with a small number of modifications to make it more 
         compact and suitable for the pyAudioAnalysis Lib
    """
    mspec = np.log10(np.dot(fft_magnitude, fbank.T) + eps)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:num_mfcc_feats]
    return ceps
Esempio n. 41
0
def mfcc(x,
         framesize=1024,
         hopsize=512,
         fs=44100,
         window="hamming",
         min_freq=0,
         max_freq=22050,
         n_mel_bands=40,
         n_ceps=13,
         preemp=False):
    """ Calculate MFCC
    @param x input signal
    @param framesize STFT frame size
    @param hopsize STFT hop size
    @param fs sampling rate
    @param window type of window function
    @param min_freq minimum frequency of mel filterbank
    @param max_freq maximum frequency of mel filterbank
    @param n_mel_bands number of channels of mel filterbank
    @param n_ceps number of coefficients
    @param preemp flag for using pre-emphasis
    @return (MFCC coefficients, center frequencies)
    """

    # プリエンファシス
    if preemp:
        coef = 0.97
        xemp = _pre_emphasis(x, coef)
    else:
        xemp = x

    # mel-scale spectrogram
    mel_spe, center_freqs = mel_spectrogram(xemp, framesize, hopsize, fs,
                                            window, min_freq, max_freq,
                                            n_mel_bands)
    mel_spe = sp.log10(mel_spe + 1e-10)

    # DCT (ケプストラムに変換=MFCC)
    ceps = dct(mel_spe, type=2, norm="ortho", axis=-1)[:, :n_ceps]

    # nan check & inf check
    ceps = feature.check_nan_2d(ceps)
    ceps = feature.check_inf_2d(ceps)

    return ceps, center_freqs
Esempio n. 42
0
def gcv(gamma_,y,weight,eigenvalues2,n,nMiss,y_hat_final):
  # a GCV function for the smoother
  from scipy.fftpack.realtransforms import dct,idct
  gamma = np.exp((gamma_))
  G = 1./(1+gamma*eigenvalues2)
  y0 = y.copy()
  e = 1e20
  while (e > 1e-10):
    y_hat = idct(G*dct(weight*weight*(y-y0)+y0,norm='ortho',type=2),norm='ortho',type=2)
    dy = y_hat - y0
    e = np.mean(dy*dy)
    y0 = y_hat
  y_hat_final[:] = y_hat
  d = weight*(y_hat-y)
  numerator = np.dot(d,d)/(n-nMiss)
  traceH = (1./(1 + gamma*eigenvalues2)).sum()
  denominator = (1 - traceH/n)**2
  return numerator/denominator
Esempio n. 43
0
def arspecs(input_wav, order, Atal=False):
    epsilon = 0.0000000001
    data = input_wav
    if Atal:
        ar = atal(data, order, 30)
        return ar
    else:
        ar = []
        ars = arspec(data, order, 4096)
        for k, l in zip(ars[0], ars[1]):
            ar.append(math.log(math.sqrt((k**2) + (l**2))))
        for val in range(0, len(ar)):
            if ar[val] == 0.0:
                ar[val] = deepcopy(epsilon)
        mspec1 = np.log10(ar)
        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
        ar = dct(mspec1, type=2, norm='ortho', axis=-1)
        return ar[:30]
def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
    import numpy as numpy
    from scipy.io import loadmat
    from scipy.signal import lfilter, hamming
    from scipy.fftpack import fft
    from scipy.fftpack.realtransforms import dct
    over = nwin - 160
    prefac = 0.97

    #lowfreq = 400 / 3.
    lowfreq = 133.33

    linsc = 200/3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nfil = nlinfil + nlogfil

    w = hamming(nwin, sym=0)

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
    if fbank<=0:
        fbank=0.0001
    #------------------
    # Compute the MFCC
    #------------------
    extract = preemp(input, prefac)
    framed = segment_axis(extract, nwin, over) * w

    # Compute the spectrum magnitude
    spec = numpy.abs(fft(framed, nfft, axis=-1))

    if spec<=0:
        spec=0.00001

    # Filter the spectrum through the triangle filterbank
    arr= numpy.dot(spec,fbank.T)                                         ##CHANGED CODE
    print "LOG ARRAy =",arr
    mspec=numpy.log10(arr)
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]

    return ceps
Esempio n. 45
0
  def __init_mfcc(self,
      num_mel_bands = DEFAULT_MFCC_BANDS,
      num_mfcc = DEFAULT_NUM_MFCC_COEFFICIENTS,
      delta_N = DEFAULT_MFCC_DELTA_N):
    mel_bin_matrix, freqs = self.get_mel_binning_matrix(num_mel_bands)
    Pxx2 = np.dot(self.specgram.T, mel_bin_matrix)

    # Unlike the mlab implementation, we threshold and log our FFT magnitudes
    # before returning
    Pxx2[Pxx2 < 1e-10] = 1e-10
    Pxx2 = 10. * np.log10(Pxx2)
    Pxx2[Pxx2 <= 0.0] = 0.0

    # http://pydoc.net/Python/scikits.talkbox/0.2.4.dev/scikits.talkbox.features.mfcc/
    ceps = dct(Pxx2, type=2, norm='ortho', axis=-1)[:, :num_mfcc]
    ceps = np.flipud(ceps)

    deltas = np.zeros(ceps.shape)
    delta_deltas = np.zeros(ceps.shape)
    for cep_frame_i in xrange(len(ceps)):
      if cep_frame_i < delta_N:
        del_N = cep_frame_i
      elif cep_frame_i > len(ceps) - delta_N - 1:
        del_N = len(ceps) - cep_frame_i - 1
      else:
        del_N = delta_N
      if del_N == 0:
        continue
      deltas[cep_frame_i] = sum([n*(ceps[cep_frame_i + n] - ceps[cep_frame_i - n]) for n in xrange(1,del_N+1)]) / (2.0*sum([n**2 for n in xrange(1, del_N + 1)]))
    for cep_frame_i in xrange(len(deltas)):
      if cep_frame_i < delta_N:
        del_N = cep_frame_i
      elif cep_frame_i > len(ceps) - delta_N - 1:
        del_N = len(ceps) - cep_frame_i - 1
      else:
        del_N = delta_N
      if del_N == 0:
        continue
      delta_deltas[cep_frame_i] = sum([n*(deltas[cep_frame_i + n] - deltas[cep_frame_i - n]) for n in xrange(1,del_N+1)]) / (2.0*sum([n**2 for n in xrange(1, del_N + 1)]))
    ceps = np.fliplr(ceps.T[1:])
    deltas = np.fliplr(deltas.T[1:])
    delta_deltas = np.fliplr(delta_deltas.T[1:])
    return ceps, deltas, delta_deltas
Esempio n. 46
0
 def test_definition(self):
     for i in FFTWDATA_SIZES:
         xr, yr = fftw_ref(self.type, i, self.rdt)
         y = dct(xr, type=self.type)
         x = idct(yr, type=self.type)
         if self.type == 1:
             x /= 2 * (i - 1)
         else:
             x /= 2 * i
         self.assertTrue(
             x.dtype == self.rdt,
             "Output dtype is %s, expected %s" % (x.dtype, self.rdt))
         # XXX: we divide by np.max(y) because the tests fail otherwise. We
         # should really use something like assert_array_approx_equal. The
         # difference is due to fftw using a better algorithm w.r.t error
         # propagation compared to the ones from fftpack.
         assert_array_almost_equal(x / np.max(x),
                                   xr / np.max(x),
                                   decimal=self.dec,
                                   err_msg="Size %d failed" % i)
Esempio n. 47
0
def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
    """Compute Mel Frequency Cepstral Coefficients.

    Parameters
    ----------
    input: ndarray
        input from which the coefficients are computed

    Returns
    -------
    ceps: ndarray
        Mel-cepstrum coefficients
    mspec: ndarray
        Log-spectrum in the mel-domain.

    Notes
    -----
    MFCC are computed as follows:
        * Pre-processing in time-domain (pre-emphasizing)
        * Compute the spectrum amplitude by windowing with a Hamming window
        * Filter the signal in the spectral domain with a triangular
        filter-bank, whose filters are approximatively linearly spaced on the
        mel scale, and have equal bandwith in the mel scale
        * Compute the DCT of the log-spectrum

    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""

    # MFCC parameters: taken from auditory toolbox
    over = nwin - 160
    # Pre-emphasis factor (to take into account the -6dB/octave rolloff of the
    # radiation at the lips level)
    prefac = 0.97

    #lowfreq = 400 / 3.
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200/3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nfil = nlinfil + nlogfil

    w = hamming(nwin, sym=0)

    fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]

    #------------------
    # Compute the MFCC
    #------------------
    extract = preemp(input, prefac)
    framed = segment_axis(extract, nwin, over) * w

    # Compute the spectrum magnitude
    spec = np.abs(fft(framed, nfft, axis=-1))
    # Filter the spectrum through the triangle filterbank
    mspec = np.log10(np.dot(spec, fbank.T))
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]

    return ceps, mspec, spec
Esempio n. 48
0
def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
    """Compute Mel Frequency Cepstral Coefficients.

    Parameters
    ----------
    input: ndarray
        input from which the coefficients are computed

    Returns
    -------
    ceps: ndarray
        Mel-cepstrum coefficients
    mspec: ndarray
        Log-spectrum in the mel-domain.

    Notes
    -----
    MFCC are computed as follows:
        * Pre-processing in time-domain (pre-emphasizing)
        * Compute the spectrum amplitude by windowing with a Hamming window
        * Filter the signal in the spectral domain with a triangular
        filter-bank, whose filters are approximatively linearly spaced on the
        mel scale, and have equal bandwith in the mel scale
        * Compute the DCT of the log-spectrum

    References
    ----------
    .. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
           representations for monosyllabic word recognition in continuously
           spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
           ASSP-28 (4): 357-366, August 1980."""

    # Number of overlapping samples in each frame
    t_overlap = 10*10**(-3) # Time in seconds of overlapping between frames
    over = int(t_overlap*fs)
#     over = nwin - 160

    # Pre-emphasis factor (to take into account the -6dB/octave rolloff of the
    # radiation at the lips level)
    prefac = 0.97

    #lowfreq = 400 / 3.
    lowfreq = 133.33
    #highfreq = 6855.4976
    linsc = 200/3.
    logsc = 1.0711703

    nlinfil = 13
    nlogfil = 27
    nfil = nlinfil + nlogfil

    w = hamming(nwin, sym=0)
    
    [fbank, freqs] = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil) # "fbank" is a nfil-by-nfft Numpy 2D array.
    
    '''
    # Visualizando o banco de filtros:
    plt.figure()
    nfiltros,lenfiltros = fbank.shape
    for i in range(nfiltros):
        plt.plot(range(lenfiltros),fbank[i,:])
    plt.axis([0, lenfiltros, 0, np.max(fbank)])
    plt.show()
    '''
     
    #------------------
    # Compute the MFCC
    #------------------
    extract = preemp(input, prefac)
    framed = segment_axis(extract, nwin, over) * w


    # Compute the spectrum magnitude
    spec = np.abs(fft(framed, nfft, axis=-1))
    # Filter the spectrum through the triangle filterbank
    mspec = np.log10(np.dot(spec, fbank.T))
    # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]
    nframes = ceps.shape[0]
    print 'nframes: ', nframes
	    
#     -----------------------------------------
#     Cepstrum mean subtraction
#     mean_along_frames = np.mean(mspec,axis=0) # Mean along the vertical dimension of the mel-spectrum
#     
#     mean_along_frames_stack = mean_along_frames
#     for i in range(nframes-1):
#         mean_along_frames_stack = np.vstack((mean_along_frames_stack, mean_along_frames))
#     ceps = ceps - mean_along_frames_stack[:,0:nceps]
    
    return ceps, mspec, spec
Esempio n. 49
0
 def test_dct_complex64(self):
     y = dct(1j * np.arange(5, dtype=np.complex64))
     x = 1j * dct(np.arange(5))
     assert_array_almost_equal(x, y)
Esempio n. 50
0
def MFCC(X, fbank, nceps):
    mspec = np.log10(np.dot(X, fbank.T)+eps)
    ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:nceps]
    return ceps   
Esempio n. 51
0
 def test_dct_complex(self):
     y = dct(np.arange(5) * 1j)
     x = 1j * dct(np.arange(5))
     assert_array_almost_equal(x, y)