Ejemplo n.º 1
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
    
    return numpy.dot(pspec*R,fb.T) / feat
Ejemplo n.º 2
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
    
    return numpy.dot(pspec*R,fb.T) / feat
Ejemplo n.º 3
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    return feat,energy
Ejemplo n.º 4
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
        
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
    
    return feat,energy
Ejemplo n.º 5
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
          winfunc=lambda x:numpy.ones((x,))):
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log

    return feat,energy
Ejemplo n.º 6
0
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """           
    # In fbank changed to do things on unique part of spectrum only i.e from frequency bins 1 to nfft/2+1
    # change in sigproc to use hamming window by default
    #MAKE SURE THAT nfft is even or next power of two after window length...in particular use something as NFFT=2^(ceil(log(winpts)/log(2)));

    #feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    
    #K = nfft/2 + 1 # unique part of spectrum  0 to nfft/2 -- Already taken care of by numpy.fft.rfft -- returns unique part only

    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate,'hamm')
    pspec = sigproc.powspec(frames,nfft) # in this power spectrum computation normalization has been done..check 1/nfft factor..removed as of now
    mspec = sigproc.magspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
        
    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) # filter bank returned here is nfilt by nfft/2 + 1 
    featx = numpy.dot(pspec,fb.T) # compute the filterbank energies
    featx = numpy.where(featx == 0,numpy.finfo(float).eps,featx) # if feat is zero, we get problems with log
    
    feat = numpy.log(featx)
    logmelspec = feat
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat,mspec,logmelspec
Ejemplo n.º 7
0
from features import mfcc
from features import logfbank

import scipy.io.wavfile as wav

import matplotlib.pyplot as plt
from features.sigproc import preemphasis, framesig, magspec, powspec

(rate,sig) = wav.read('../Data/roycer/roycer.wav')

mfcc_feat = mfcc(sig,rate)
fbank_feat = logfbank(sig, rate)

magspec_result = magspec(mfcc_feat,1)
powspec_result = powspec(mfcc_feat, 1)
sig2 = preemphasis(sig,0.95)



print mfcc_feat

plt.hist(mfcc_feat)
#print(mfcc_feat[0:12,0:12])
#print(fbank_feat[0:12,0:12])
#print magspec_result
print powspec_result

enojado = 0
feliz = 0
tristes = 0
normal = 0
def logFilterbankFeatures(signal,
                          samplerate=16000,
                          winlen=0.0255,
                          winstep=0.01,
                          nfilt=40,
                          nfft=512,
                          lowfreq=133.3333,
                          highfreq=6855.4976,
                          preemph=0.97,
                          winSzForDelta=2):
    '''
    Computes log filterbank energies on a mel scale + total energy using 
    with the code taken from features.fbank, which does not accept
    window function as a param. 
    function from package 'python_speech_features', see
    http://python-speech-features.readthedocs.org/en/latest/ or
    https://github.com/jameslyons/python_speech_features

    Therefore it calculates the FFT of the signal and sums the the weighted
    bins, distributed on a mel scale. Weighting is done with tri-angular filters.
    For these filter energies + total energy, deltas are calculated.
    
    :parameters:
        - signal : np.ndarray, dtype=float
            input vector of the speech signal
        - samplerate : int
        - winlen: float
            length of analysis window in seconds
        - winstep: float
            step size between successive windows in seconds
        - nfilt: int
             number of filter energies to compute (total energy not included).
             e.g. 40 --> Output dim = (40+1)*3
        - nfft: int
            FFT size
        - lowfreq: int
            lower end on mel frequency scale, on which filter banks are distributed
        - highfreq: int
            upper end on mel frequency scale, on which filter banks are distributed
        - preemph: float
            pre-emphasis coefficient
        - deltafeat: np.ndarray, dtype=float
            deltas of the input features
        - winSzForDelta: int
            window size for computing deltas. E.g. 2 --> t-2, t-1, t+1 and t+2 are
            for calculating the deltas
    :returns:
        - features: numpy.array: float
            feature-matrix. 1st dimension: time steps of 'winstep',
            2nd dim: feature dimension: (nfilt + 1)*3,
            +1 for energy, *3 because of deltas

    '''
    # Part of the following code is copied from function features.fbank
    # Unfortunately, one can't specify the window function in features.fbank
    # Hamming window is used here

    highfreq = highfreq or samplerate / 2
    signal = sigproc.preemphasis(signal, preemph)
    frames = sigproc.framesig(signal,
                              winlen * samplerate,
                              winstep * samplerate,
                              winfunc=hamming)
    pspec = sigproc.powspec(frames, nfft)
    energy = np.sum(pspec, 1)  # this stores the total energy in each frame
    energy = np.where(energy == 0,
                      np.finfo(float).eps,
                      energy)  # if energy is zero, we get problems with log
    fb = features.get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
    feat = np.dot(pspec, fb.T)  # compute the filterbank energies
    feat = np.where(feat == 0,
                    np.finfo(float).eps,
                    feat)  # if feat is zero, we get problems with log

    # Use log feature bank and log energy
    feat = np.column_stack((np.log(energy), np.log(feat)))
    # calculate delta and acceleration
    deltaFeat = delta(feat, winSzForDelta)
    accFeat = delta(deltaFeat, winSzForDelta)
    # stack features + delta + acceleration
    return np.concatenate((feat, deltaFeat, accFeat), axis=1)
def logFilterbankFeatures(signal,samplerate=16000,winlen=0.0255,winstep=0.01,
          nfilt=40,nfft=512,lowfreq=133.3333,highfreq=6855.4976,preemph=0.97,
          winSzForDelta=2):
    '''
    Computes log filterbank energies on a mel scale + total energy using 
    with the code taken from features.fbank, which does not accept
    window function as a param. 
    function from package 'python_speech_features', see
    http://python-speech-features.readthedocs.org/en/latest/ or
    https://github.com/jameslyons/python_speech_features

    Therefore it calculates the FFT of the signal and sums the the weighted
    bins, distributed on a mel scale. Weighting is done with tri-angular filters.
    For these filter energies + total energy, deltas are calculated.
    
    :parameters:
        - signal : np.ndarray, dtype=float
            input vector of the speech signal
        - samplerate : int
        - winlen: float
            length of analysis window in seconds
        - winstep: float
            step size between successive windows in seconds
        - nfilt: int
             number of filter energies to compute (total energy not included).
             e.g. 40 --> Output dim = (40+1)*3
        - nfft: int
            FFT size
        - lowfreq: int
            lower end on mel frequency scale, on which filter banks are distributed
        - highfreq: int
            upper end on mel frequency scale, on which filter banks are distributed
        - preemph: float
            pre-emphasis coefficient
        - deltafeat: np.ndarray, dtype=float
            deltas of the input features
        - winSzForDelta: int
            window size for computing deltas. E.g. 2 --> t-2, t-1, t+1 and t+2 are
            for calculating the deltas
    :returns:
        - features: numpy.array: float
            feature-matrix. 1st dimension: time steps of 'winstep',
            2nd dim: feature dimension: (nfilt + 1)*3,
            +1 for energy, *3 because of deltas

    '''
    # Part of the following code is copied from function features.fbank
    # Unfortunately, one can't specify the window function in features.fbank
    # Hamming window is used here
    
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate,winfunc=hamming)
    pspec = sigproc.powspec(frames,nfft)
    energy = np.sum(pspec,1) # this stores the total energy in each frame
    energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log  
    fb = features.get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = np.dot(pspec,fb.T) # compute the filterbank energies
    feat = np.where(feat == 0,np.finfo(float).eps,feat) # if feat is zero, we get problems with log
    
    # Use log feature bank and log energy
    feat = np.column_stack((np.log(energy),np.log(feat)))
    # calculate delta and acceleration
    deltaFeat = delta(feat, winSzForDelta)
    accFeat = delta(deltaFeat, winSzForDelta)
    # stack features + delta + acceleration
    return np.concatenate((feat,deltaFeat,accFeat),axis=1)