예제 #1
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
        nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
        winfunc=lambda x:np.ones((x,))):
    """Compute Spectral Subband Centroid features from an audio signal.
    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the sample rate of the signal we are working with, in Hz.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
    """
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
    pspec = sigproc.powspec(frames,nfft)
    pspec = np.where(pspec == 0,np.finfo(float).eps,pspec) # if things are all zeros we get problems

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = np.dot(pspec,fb.T) # compute the filterbank energies
    R = np.tile(np.linspace(1,samplerate/2,np.size(pspec,1)),(np.size(pspec,0),1))

    return np.dot(pspec*R,fb.T) / feat
예제 #2
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
    
    return numpy.dot(pspec*R,fb.T) / feat
예제 #3
0
    def ssc(signal,
            samplerate=16000,
            winlen=0.025,
            winstep=0.01,
            nfilt=26,
            nfft=512,
            lowfreq=0,
            highfreq=None,
            preemph=0.97,
            winfunc=lambda x: np.ones((x, ))):

        highfreq = highfreq or samplerate / 2
        signal = sigproc.preemphasis(signal, preemph)
        frames = sigproc.framesig(signal, winlen * samplerate,
                                  winstep * samplerate, winfunc)
        pspec = sigproc.powspec(frames, nfft)
        pspec = np.where(pspec == 0,
                         np.finfo(float).eps,
                         pspec)  # if things are all zeros we get problems

        fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
        feat = np.dot(pspec, fb.T)  # compute the filterbank energies
        R = np.tile(np.linspace(1, samplerate / 2, np.size(pspec, 1)),
                    (np.size(pspec, 0), 1))

        return np.dot(pspec * R, fb.T) / feat
예제 #4
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """          
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    return feat,energy
예제 #5
0
    def fbank(signal,
              samplerate=16000,
              winlen=0.025,
              winstep=0.01,
              nfilt=26,
              nfft=512,
              lowfreq=0,
              highfreq=None,
              preemph=0.97,
              winfunc=lambda x: np.ones((x, ))):

        highfreq = highfreq or samplerate / 2
        signal = sigproc.preemphasis(signal, preemph)
        frames = sigproc.framesig(signal, winlen * samplerate,
                                  winstep * samplerate, winfunc)
        pspec = sigproc.powspec(frames, nfft)
        energy = np.sum(pspec, 1)  # this stores the total energy in each frame
        energy = np.where(
            energy == 0,
            np.finfo(float).eps,
            energy)  # if energy is zero, we get problems with log

        fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
        feat = np.dot(pspec, fb.T)  # compute the filterbank energies
        feat = np.where(feat == 0,
                        np.finfo(float).eps,
                        feat)  # if feat is zero, we get problems with log

        return feat, energy
예제 #6
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.
    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log

    return feat,energy
예제 #7
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.
    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
    """
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))

    return numpy.dot(pspec*R,fb.T) / feat
예제 #8
0
def powspec(signal, samplerate, conf):
    """
    Compute squared magnitude spectrogram features from an audio signal.

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
        samplerate: the samplerate of the signal we are working with.
        conf: feature configuration

    Returns:
        A numpy array of size (NUMFRAMES by numfreq) containing features. Each
        row holds 1 feature vector, a numpy vector containing the magnitude
        spectrum of the corresponding frame
    """
    signal = sigproc.preemphasis(signal, float(conf['preemph']))

    winfunc = _get_winfunc(conf['winfunc'])

    frames = sigproc.framesig(signal, float(conf['winlen'])*samplerate,
                              float(conf['winstep'])*samplerate,
                              winfunc)
    powspec = sigproc.powspec(frames, int(conf['nfft']))

    return powspec
예제 #9
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
          winfunc=lambda x:np.ones((x,))):
    """Compute Mel-filterbank energy features from an audio signal.
    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the sample rate of the signal we are working with, in Hz.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
    pspec = sigproc.powspec(frames,nfft)
    energy = np.sum(pspec,1) # this stores the total energy in each frame
    energy = np.where(energy == 0,np.finfo(float).eps,energy) # if energy is zero, we get problems with log

    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = np.dot(pspec,fb.T) # compute the filterbank energies
    feat = np.where(feat == 0,np.finfo(float).eps,feat) # if feat is zero, we get problems with log

    return feat,energy
예제 #10
0
 def do_mfcc(self, fft_n, mffc_ch):
     """ produces magnitude-> pow.spec on a freq scale """
     self.nfft = fft_n
     self.nmffc = mffc_ch
     self.mag = sigproc.magspec(self.samples, self.nfft)
     self.powspec2 = np.square(self.mag)
     self.powspec1 = sigproc.powspec(self.samples,self.nfft)        
     self.bank = get_filterbanks(nfilt=self.nmffc, nfft=self.nfft, samplerate=self.hz)
     self.mfc = np.dot(self.powspec1, self.bank.T)
     return self.mfc
예제 #11
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """          
    highfreq= highfreq or samplerate/2
    print "preemph %s"%(preemph)
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    matchframes(frames[0], frames[1])
    pspec = sigproc.powspec(frames,nfft)
    energy = pylab.sum(pspec,1) # this stores the total energy in each frame
    energy = pylab.where(energy == 0, pylab.finfo(float).eps, energy) # if energy is zero, we get problems with log
    fb = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
    print "len(fb) %s"%(len(fb))
    colour = "k-"
    for i in range(len(fb)):
        if colour == "k-":
            colour = "r-"
        else:
            colour = "k-"
        startedplot = False
        midpoint = 0
        for j in range(len(fb[i])):
            if fb[i][j] > 0:
                if startedplot == False:
                    startedplot = j
                if j > 0:
                    pylab.plot([j-1, j], [fb[i][j-1], fb[i][j]], colour)
                    if fb[i][j] == 1.0:
                        midpoint = j
            else:
                if not startedplot == False:
                    pylab.plot([j-1, j], [fb[i][j-1], 0], colour)
                    try:
                        print "slope to midpoint %.3f, slope from midpoint %.3f"%(1.0/float(midpoint-startedplot), 1.0/float(midpoint-j+1))
                    except:
                        pass
                    break
    pylab.show()
    feat = pylab.dot(pspec, fb.T) # compute the filterbank energies
    feat = pylab.where(feat == 0, pylab.finfo(float).eps, feat) # if feat is zero, we get problems with log
    return feat, energy
예제 #12
0
파일: mfcc.py 프로젝트: bongtrop/NongDinsor
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97): 
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
    
    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
    
    return numpy.dot(pspec*R,fb.T) / feat
예제 #13
0
파일: mfcc.py 프로젝트: bongtrop/NongDinsor
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):       
    highfreq= highfreq or samplerate/2
    signal = sigproc.preemphasis(signal,preemph)
    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sigproc.powspec(frames,nfft)
    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
    energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
        
    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
    feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
    
    return feat,energy
예제 #14
0
def fbank(signal,
          samplerate=16000,
          winlen=0.025,
          winstep=0.01,
          nfilt=26,
          nfft=512,
          lowfreq=0,
          highfreq=None,
          preemph=0.95):
    """
    Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.95. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq = highfreq or samplerate / 2
    signal = sigproc.preemphasis(signal, preemph)
    # print type(signal[0])
    frames = sigproc.framesig(signal,
                              winlen * samplerate,
                              winstep * samplerate,
                              winfunc=hamming_window)
    powspec = sigproc.powspec(frames, nfft)
    # numpy.savetxt("result.txt", powspec, delimiter=",")
    energy = numpy.sum(powspec,
                       1)  # this stores the total energy in each frame
    energy = numpy.where(
        energy == 0,
        numpy.finfo(float).eps, energy
    )  # if energy is zero, we get problems with log, use numpy.finfo(float).eps to replace 0

    filterbanks = get_filterbanks(nfilt, nfft, samplerate, lowfreq, highfreq)
    # print powspec.shape, filterbanks.shape
    feat = numpy.dot(powspec, filterbanks.T)  # compute the filterbank energies
    feat = numpy.where(feat == 0,
                       numpy.finfo(float).eps,
                       feat)  # if feat is zero, we get problems with logs
    # print feat.shape
    return feat, energy
예제 #15
0
def fbank(signal, samplerate, conf):
    '''
    Compute fbank features from an audio signal.
	从一个声音信号中计算fbank特征向量
    Args:
	参数:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
			要计算特征的声音信号,一个N*1维的数组
        samplerate: the samplerate of the signal we are working with.
			要处理信号的采样率
        conf: feature configuration
			特征的配置

    Returns:
	返回值:
        A numpy array of size (NUMFRAMES by nfilt) containing features, a numpy
        vector containing the signal energy
		返回一个包含特征向量的numpy数组,一个包含信号能量的numpy向量
    '''

    highfreq = int(conf['highfreq'])
    if highfreq < 0:
        highfreq = samplerate/2

    signal = sigproc.preemphasis(signal, float(conf['preemph']))
    frames = sigproc.framesig(signal, float(conf['winlen'])*samplerate,
                              float(conf['winstep'])*samplerate)
    pspec = sigproc.powspec(frames, int(conf['nfft']))

    # this stores the total energy in each frame
    energy = numpy.sum(pspec, 1)

    # if energy is zero, we get problems with log
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)

    filterbank = get_filterbanks(int(conf['nfilt']), int(conf['nfft']),
                                 samplerate, int(conf['lowfreq']), highfreq)

    # compute the filterbank energies
    feat = numpy.dot(pspec, filterbank.T)

    # if feat is zero, we get problems with log
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)

    return feat, energy
예제 #16
0
def fbankVTLP(signal,
              samplerate=16000,
              winlen=0.025,
              winstep=0.01,
              nfilt=26,
              nfft=512,
              lowfreq=0,
              highfreq=None,
              preemph=0.97,
              appendEnergy=False,
              alpha=1.0):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq = highfreq or samplerate / 2
    signal = sigproc.preemphasis(signal, preemph)
    frames = sigproc.framesig(signal, winlen * samplerate,
                              winstep * samplerate)
    pspec = sigproc.powspec(frames, nfft)
    energy = numpy.sum(pspec, 1)  # this stores the total energy in each frame
    energy = numpy.where(energy == 0,
                         numpy.finfo(float).eps,
                         energy)  # if energy is zero, we get problems with log

    fb = get_filterbanksVTLP(nfilt, nfft, samplerate, lowfreq, highfreq, alpha)
    feat = numpy.dot(pspec, fb.T)  # compute the filterbank energies
    feat = numpy.where(feat == 0,
                       numpy.finfo(float).eps,
                       feat)  # if feat is zero, we get problems with log

    if appendEnergy:
        feat = numpy.c_[feat, numpy.log(
            energy
        )]  # replace first cepstral coefficient with log of frame energy
    return feat, energy
예제 #17
0
def fbank(signal, samplerate, conf):
    """
    Compute fbank features from an audio signal.

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
        samplerate: the samplerate of the signal we are working with.
        conf: feature configuration

    Returns:
        A numpy array of size (NUMFRAMES by nfilt) containing features, a numpy
        vector containing the signal energy
    """

    raise BaseException('Not yet implemented')
    highfreq = int(conf['highfreq'])
    if highfreq < 0:
        highfreq = samplerate / 2

    signal = sigproc.preemphasis(signal, float(conf['preemph']))
    frames = sigproc.framesig(signal,
                              float(conf['winlen']) * samplerate,
                              float(conf['winstep']) * samplerate)
    pspec = sigproc.powspec(frames, int(conf['nfft']))

    # this stores the total energy in each frame
    energy = numpy.sum(pspec, 1)

    # if energy is zero, we get problems with log
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)

    filterbank = get_filterbanks(int(conf['nfilt']), int(conf['nfft']),
                                 samplerate, int(conf['lowfreq']), highfreq)

    # compute the filterbank energies
    feat = numpy.dot(pspec, filterbank.T)

    # if feat is zero, we get problems with log
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)

    return feat, energy
예제 #18
0
def ssc(signal, samplerate, conf):
    '''
    Compute ssc features from an audio signal.

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array
        samplerate: the samplerate of the signal we are working with.
        conf: feature configuration

    Returns:
        A numpy array of size (NUMFRAMES by nfilt) containing features, a numpy
        vector containing the signal log-energy
    '''

    highfreq = int(conf['highfreq'])
    if highfreq < 0:
        highfreq = samplerate / 2
    signal = sigproc.preemphasis(signal, float(conf['preemph']))
    frames = sigproc.framesig(signal,
                              float(conf['winlen']) * samplerate,
                              float(conf['winstep']) * samplerate)
    pspec = sigproc.powspec(frames, int(conf['nfft']))

    # this stores the total energy in each frame
    energy = numpy.sum(pspec, 1)

    # if energy is zero, we get problems with log
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)

    filterbank = get_filterbanks(int(conf['nfilt']), int(conf['nfft']),
                                 samplerate, int(conf['lowfreq']), highfreq)

    # compute the filterbank energies
    feat = numpy.dot(pspec, filterbank.T)
    tiles = numpy.tile(numpy.linspace(1, samplerate / 2, numpy.size(pspec, 1)),
                       (numpy.size(pspec, 0), 1))

    return numpy.dot(pspec * tiles, filterbank.T) / feat, numpy.log(energy)