Example #1
0
def fbank(signal,
          samplerate=16000,
          winlen=0.025,
          winstep=0.01,
          nfilt=26,
          nfft=512,
          lowfreq=0,
          highfreq=None,
          preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """
    highfreq = highfreq or samplerate / 2
    signal = sp.preemphasis(signal, preemph)
    frames = sp.framesig(signal, winlen * samplerate, winstep * samplerate)
    pspec = sp.powspec(frames, nfft)
    energy = np.sum(pspec, 1)  # this stores the total energy in each frame

    fb = get_filterbanks(nfilt, nfft, samplerate)
    feat = np.dot(pspec, fb.T)  # compute the filterbank energies
    return feat, energy
Example #2
0
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 
    """          
    highfreq= highfreq or samplerate/2
    signal = sp.preemphasis(signal,preemph)
    frames = sp.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sp.powspec(frames,nfft)
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = np.dot(pspec,fb.T) # compute the filterbank energies
    R = np.tile(np.linspace(1,samplerate/2,np.size(pspec,1)),(np.size(pspec,0),1))
    
    return np.dot(pspec*R,fb.T) / feat
Example #3
0
def ssc(signal,
        samplerate=16000,
        winlen=0.025,
        winstep=0.01,
        nfilt=26,
        nfft=512,
        lowfreq=0,
        highfreq=None,
        preemph=0.97):
    """Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. 
    """
    highfreq = highfreq or samplerate / 2
    signal = sp.preemphasis(signal, preemph)
    frames = sp.framesig(signal, winlen * samplerate, winstep * samplerate)
    pspec = sp.powspec(frames, nfft)

    fb = get_filterbanks(nfilt, nfft, samplerate)
    feat = np.dot(pspec, fb.T)  # compute the filterbank energies
    R = np.tile(np.linspace(1, samplerate / 2, np.size(pspec, 1)),
                (np.size(pspec, 0), 1))

    return np.dot(pspec * R, fb.T) / feat
Example #4
0
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
    """Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between seccessive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    """          
    highfreq= highfreq or samplerate/2
    signal = sp.preemphasis(signal,preemph)
    frames = sp.framesig(signal, winlen*samplerate, winstep*samplerate)
    pspec = sp.powspec(frames,nfft)
    energy = np.sum(pspec,1) # this stores the total energy in each frame
    
    fb = get_filterbanks(nfilt,nfft,samplerate)
    feat = np.dot(pspec,fb.T) # compute the filterbank energies
    return feat,energy