Ejemplo n.º 1
0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """           
    # In fbank changed to do things on unique part of spectrum only i.e from frequency bins 1 to nfft/2+1
    # change in sigproc to use hamming window by default
    #MAKE SURE THAT nfft is even or next power of two after window length...in particular use something as NFFT=2^(ceil(log(winpts)/log(2)));

    #feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    
    #K = nfft/2 + 1 # unique part of spectrum  0 to nfft/2 -- Already taken care of by numpy.fft.rfft -- returns unique part only

    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate,'hamm')
    pspec = sigproc.powspec(frames,nfft) # in this power spectrum computation normalization has been done..check 1/nfft factor..removed as of now
    mspec = sigproc.magspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
        
    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) # filter bank returned here is nfilt by nfft/2 + 1 
    featx = numpy.dot(pspec,fb.T) # compute the filterbank energies
    featx = numpy.where(featx == 0,numpy.finfo(float).eps,featx) # if feat is zero, we get problems with log
    
    feat = numpy.log(featx)
    logmelspec = feat
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat,mspec,logmelspec
Ejemplo n.º 2
0

from features import mfcc
from features import logfbank

import scipy.io.wavfile as wav

import matplotlib.pyplot as plt
from features.sigproc import preemphasis, framesig, magspec, powspec

(rate,sig) = wav.read('../Data/roycer/roycer.wav')

mfcc_feat = mfcc(sig,rate)
fbank_feat = logfbank(sig, rate)

magspec_result = magspec(mfcc_feat,1)
powspec_result = powspec(mfcc_feat, 1)
sig2 = preemphasis(sig,0.95)



print mfcc_feat

plt.hist(mfcc_feat)
#print(mfcc_feat[0:12,0:12])
#print(fbank_feat[0:12,0:12])
#print magspec_result
print powspec_result

enojado = 0
feliz = 0