コード例 #1
0
def get_mfcc(rate, sig):
    features = mfcc.mfcc(sig,rate)
    features = mfcc.logfbank(sig)
    features = mfcc.lifter(features)

    sum_of_squares = []
    index = -1
    for r in features:
        sum_of_squares.append(0)
        index = index + 1
        for n in r:
            sum_of_squares[index] = sum_of_squares[index] + n**2

    strongest_frame = sum_of_squares.index(max(sum_of_squares))
    hz = mfcc.mel2hz(features[strongest_frame])

    min_hz = min(hz)

    speech_booster = AudioEffectsChain().lowshelf(frequency=min_hz*(-1), gain=12.0, slope=0.5).highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.5).limiter(gain=8.0)
    y_speech_boosted = speech_booster(sig)

    features = mfcc.mfcc(y_speech_boosted, rate, 0.025, 0.01, 16, nfilt=40, nfft=512, appendEnergy = False, winfunc=np.hamming)

    features = preprocessing.scale(features) #scaling to ensure that all values are within 0 and 1

    return features[1:5, :]
コード例 #2
0
def mfcc_(signal,
          samplerate=16000,
          winlen=0.08,
          winstep=0.04,
          numcep=39,
          nfilt=39,
          nfft=2048,
          lowfreq=12.5,
          highfreq=None,
          preemph=0.97,
          ceplifter=39,
          appendEnergy=True,
          winfunc=lambda x: numpy.ones((x, ))):
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft,
                         lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,
               n=max(numcep, feat.shape[1]),
               type=2,
               axis=1,
               norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy:
        feat[:, 0] = numpy.log(
            energy
        )  # replace first cepstral coefficient with log of frame energy
    return feat
コード例 #3
0
def get_MFCC(sr, audio):

    features = mfcc.mfcc(audio, sr)

    #############################
    #                           #
    #      Noise Removal        #
    #                           #
    #############################

    features = mfcc.logfbank(
        audio)  #computes the filterbank energy from an audio signal
    features = mfcc.lifter(
        features)  #increases magnitude of high frequency DCT coefficients

    sum_of_squares = []
    index = -1

    for r in features:
        """
        Since signals can be either positive or negative, taking n**2 allows us to compare the magnitudes 
        """
        sum_of_squares.append(0)
        index = index + 1
        for n in r:
            sum_of_squares[index] = sum_of_squares[index] + n**2

    strongest_frame = sum_of_squares.index(max(sum_of_squares))
    hz = mfcc.mel2hz(features[strongest_frame]
                     )  #converts the strongest frame's mfcc to hertz

    max_hz = max(hz)
    min_hz = min(hz)

    speech_booster = AudioEffectsChain().lowshelf(
        frequency=min_hz * (-1), gain=20.0,
        slope=0.5)  #creates an audio booster that removes low hz
    y_speech_boosted = speech_booster(audio)  #apply booster to original audio

    #############################
    #                           #
    #  FINAL MFCC CALCULATION   #
    #                           #
    #############################

    features = mfcc.mfcc(y_speech_boosted,
                         sr,
                         0.025,
                         0.01,
                         16,
                         nfilt=40,
                         nfft=512,
                         appendEnergy=False,
                         winfunc=np.hamming)

    features = preprocessing.scale(
        features)  #scaling to ensure that all values are within 0 and 1

    return features
コード例 #4
0
    def _get_MFCC_features(self, index, winstep, nfft=512):

        # first load the .wav file
        audio_sampling_rate, audio_signal = self._get_wav_data(index)

        # now convert to MFCCs
        if audio_signal is None:
            # No need to warn: that will have been done in _get_wav_data.
            # NB: This sets the MFCCs to *length-zero* sequences of vectors,
            #  each *of length num_MFCC_features*.  When called by self.get(),
            #  the sequences will anyway be padded out to self.max_samples. But
            #  when the generator is called directly, zero-length sequences
            #  will indeed by returned.
            return np.zeros((0, self.num_MFCC_features))
        elif self.num_MFCC_features == 0:
            print('WARNING: no MFCCs requested')
            # NB: You have yet to use this.  That is, in theory this allows one
            #  to request that no MFCCs be packaged with the other data; but in
            #  practice when training a SequenceNetwork w/o encoder targetting,
            #  you don't bother (you wouldn't want to have to re-create the tf
            #  records), and instead just set encoder targets penalty=0.
            Nsamples = int(audio_signal.shape[0] / audio_sampling_rate /
                           winstep)
            return np.zeros((Nsamples, 0))
        else:
            # unpack the log-mel calculations, because you may just use them
            lowfreq = 0
            highfreq = None
            preemph = 0.97
            ceplifter = 22
            features, energy = fbank(audio_signal, audio_sampling_rate,
                                     self.mfcc_winlen, winstep,
                                     self.num_mel_features, nfft, lowfreq,
                                     highfreq, preemph, lambda x: np.ones(
                                         (x, )))
            features = np.log(features)

            # use MFCCs (as opposed to log-mels)
            if not self.USE_LOG_MELS:
                features = dct(features, type=2, axis=1, norm='ortho')
                features = features[:, :self.num_cepstral_coeffs]
                features = lifter(features, ceplifter)
                features[:, 0] = np.log(energy)
            else:
                features = np.concatenate((features, np.log(energy)[:, None]),
                                          axis=1)

            # use deltas?
            mfccs = (np.concatenate((features, delta(features, N=2)), axis=1)
                     if self.USE_MFCC_DELTAS else features)

            return mfccs
コード例 #5
0
def local_mfcc(signal,
               samplerate=16000,
               winlen=0.025,
               winstep=0.01,
               numcep=13,
               nfilt=26,
               nfft=512,
               lowfreq=0,
               highfreq=None,
               preemph=0.97,
               ceplifter=22,
               filtertype='mel',
               appendEnergy=True,
               winfunc=lambda x: np.hamming((x, ))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat, energy = local_fbank(signal=signal,
                               samplerate=samplerate,
                               winlen=winlen,
                               winstep=winstep,
                               nfilt=nfilt,
                               nfft=nfft,
                               lowfreq=lowfreq,
                               highfreq=highfreq,
                               preemph=preemph,
                               winfunc=winfunc,
                               filtertype=filtertype)
    feat = np.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy:
        feat[:, 0] = np.log(
            energy
        )  # replace first cepstral coefficient with log of frame energy
    return feat
コード例 #6
0
ファイル: tasks.py プロジェクト: kingsleydon/tuna_server
def mfcc(frames,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
        nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):

    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = python_speech_features.get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = python_speech_features.lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
コード例 #7
0
def mfcc_energy(signal,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=40,
                nfft=512,
                lowfreq=0,
                highfreq=None,
                preemph=0.97,
                ceplifter=22,
                appendEnergy=True,
                winfunc=np.hamming):
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft,
                         lowfreq, highfreq, preemph, winfunc)
    log_fbank = np.log(feat)
    # discard the 0-th dct coefficient
    mfcc = dct(log_fbank, type=2, axis=1, norm='ortho')[:, 1:numcep]
    mfcc = lifter(mfcc, ceplifter)
    energy = np.reshape(np.log(energy), (energy.shape[0], 1))
    return mfcc, energy
コード例 #8
0
def lift(signal,
         samplerate=16000,
         winlen=0.08,
         winstep=0.04,
         numcep=39,
         nfilt=39,
         nfft=2048,
         lowfreq=12.5,
         highfreq=None,
         preemph=0.97,
         ceplifter=39,
         winfunc=lambda x: numpy.ones((x, ))):
    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft,
                         lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat,
               n=max(numcep, feat.shape[1]),
               type=2,
               axis=1,
               norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    return feat
コード例 #9
0
def get_features(filename, numcep, numfilt, winlen, winstep, grad):

    f = Sndfile(filename, 'r')

    frames = f.nframes
    samplerate = f.samplerate
    data = f.read_frames(frames)
    data = np.asarray(data)

    #calc mfcc
    feat_raw, energy = sf.fbank(data,
                                samplerate,
                                winlen,
                                winstep,
                                nfilt=numfilt)
    feat = np.log(feat_raw)
    feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
    feat = sf.lifter(feat, L=22)
    feat = np.asarray(feat)

    #calc log energy
    log_energy = np.log(energy)  #np.log( np.sum(feat_raw**2, axis=1) )
    log_energy = log_energy.reshape([log_energy.shape[0], 1])

    mat = (feat - np.mean(feat, axis=0)) / (0.5 * np.std(feat, axis=0))
    mat = np.concatenate((mat, log_energy), axis=1)

    #calc first order derivatives
    if grad >= 1:
        gradf = np.gradient(mat)[0]
        mat = np.concatenate((mat, gradf), axis=1)

    #calc second order derivatives
    if grad == 2:
        grad2f = np.gradient(gradf)[0]
        mat = np.concatenate((mat, grad2f), axis=1)

    return mat, frames, samplerate
コード例 #10
0
def extract_mfcc(signal,
                 samplerate=16000,
                 winlen=0.025,
                 winstep=0.01,
                 numcep=13,
                 nfilt=26,
                 nfft=512,
                 lowfreq=0,
                 highfreq=None,
                 preemph=0.97,
                 ceplifter=22,
                 appendEnergy=True,
                 winfunc=lambda x: numpy.ones((x, ))):

    feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft,
                         lowfreq, highfreq, preemph, winfunc)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
    feat = lifter(feat, ceplifter)
    if appendEnergy:
        feat = numpy.c_[feat, numpy.log(
            energy)]  # append cepstral coefficient with log of frame energy
    return feat, numpy.log(energy)
コード例 #11
0
def logfbank_features(signal,
                      samplerate=44100,
                      fps=24,
                      num_filt=40,
                      num_cepstra=40,
                      nfft=8192,
                      **kwargs):
    winstep = 2 / fps
    winlen = winstep * 2
    feat, energy = psf.fbank(signal=signal,
                             samplerate=samplerate,
                             winlen=winlen,
                             winstep=winstep,
                             nfilt=num_filt,
                             nfft=nfft)
    feat = np.log(feat)
    feat = psf.dct(feat, type=2, axis=1, norm='ortho')[:, :num_cepstra]
    feat = psf.lifter(feat, L=22)
    feat = np.asarray(feat)

    energy = np.log(energy)
    energy = energy.reshape([energy.shape[0], 1])

    if feat.shape[0] > 1:
        std = 0.5 * np.std(feat, axis=0)
        mat = (feat - np.mean(feat, axis=0)) / std
    else:
        mat = feat

    mat = np.concatenate((mat, energy), axis=1)

    duration = signal.shape[0] / samplerate
    expected_frames = fps * duration
    assert mat.shape[
        0] - expected_frames <= 1, "Producted feature number does not match framerate"
    return mat
コード例 #12
0
def get_features(filename,
                 numcep,
                 numfilt,
                 winlen,
                 winstep,
                 method=1,
                 quaternion=False):

    #f = Sndfile(filename, 'r')
    #frames = f.nframes
    #samplerate = f.samplerate
    #data = f.read_frames(frames)
    #data = np.asarray(data)
    samplerate, data = wav.read(filename)

    # Claculate mfcc
    feat_raw, energy = sf.fbank(data,
                                samplerate,
                                winlen,
                                winstep,
                                nfilt=numfilt)
    feat = np.log(feat_raw)
    feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:, :numcep]
    feat = sf.lifter(feat, L=22)
    feat = np.asarray(feat)

    #calc log energy
    log_energy = np.log(energy)  #np.log( np.sum(feat_raw**2, axis=1) )
    log_energy = log_energy.reshape([log_energy.shape[0], 1])

    mat = (feat - np.mean(feat, axis=0)) / (0.5 * np.std(feat, axis=0))
    mat = np.concatenate((mat, log_energy), axis=1)

    # Calculate first order derivatives
    # if grad >= 1:
    #     gradf = np.gradient(mat)[0]
    #     mat = np.concatenate((mat, gradf), axis=1)

    # #calc second order derivatives
    # if grad == 2:
    #     grad2f = np.gradient(gradf)[0]
    #     mat = np.concatenate((mat, grad2f), axis=1)

    # Calculate 1st-2nd-3rd order derivatives
    if method:
        gradf = np.gradient(mat)[0]
        mat = np.concatenate((mat, gradf), axis=1)

        grad2f = np.gradient(gradf)[0]
        mat = np.concatenate((mat, grad2f), axis=1)

        grad3f = np.gradient(grad2f)[0]
        mat = np.concatenate((mat, grad3f), axis=1)
    else:
        zerof = np.zeros(shape=mat.shape)
        mat = np.concatenate((mat, zerof), axis=1)

        gradf = np.gradient(mat)[0]
        mat = np.concatenate((mat, gradf), axis=1)

        grad2f = np.gradient(gradf)[0]
        mat = np.concatenate((mat, grad2f), axis=1)

    if quaternion:
        Q_mat = np.reshape(mat, (mat.shape[0], 4, mat.shape[1] // 4))
        mat = Q_mat

    return mat, data, samplerate
コード例 #13
0
assert (get_error(csf.mel2hz(5190), csf.mel2hz(5190)) <= acceptable_error)
assert (get_error(csf.hz2mel(csf.mel2hz(2595)), 2595) <= acceptable_error)
print ' ✓'

print ''
print 'get_filterbanks'
print '==============='
psf_filterbanks = psf.get_filterbanks()
csf_filterbanks = csf.get_filterbanks()
assert (np.shape(psf_filterbanks) == np.shape(csf_filterbanks))
error2d(psf_filterbanks, csf_filterbanks)

print ''
print 'lifter'
print '======'
psf_lifter = psf.lifter(psf_feat)
csf_lifter = csf.lifter(np.array(psf_feat, dtype=np.float32))
assert (np.shape(psf_lifter) == np.shape(csf_lifter))
error2d(psf_lifter, csf_lifter)

print ''
print 'delta'
print '====='
psf_delta = psf.delta(psf_mfcc, 3)
csf_delta = csf.delta(np.array(psf_mfcc, dtype=np.float32), 3)
assert (np.shape(psf_delta) == np.shape(csf_delta))
error2d(psf_delta, csf_delta)

print ''
print 'Testing sigproc'