Beispiel #1
0
def ssc(signal,
        samplerate=16000,
        win_length=0.025,
        win_step=0.01,
        filters_num=26,
        NFFT=512,
        low_freq=0,
        high_freq=None,
        pre_emphasis_coeff=0.97):
    '''
    待补充
    '''
    high_freq = high_freq or samplerate / 2
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    spec_power = spectrum_power(frames, NFFT)
    spec_power = numpy.where(spec_power == 0,
                             numpy.finfo(float).eps, spec_power)  #能量谱
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    feat = numpy.dot(spec_power, fb.T)  #计算能量
    R = numpy.tile(
        numpy.linspace(1, samplerate / 2, numpy.size(spec_power, 1)),
        (numpy.size(spec_power, 0), 1))
    return numpy.dot(spec_power * R, fb.T) / feat
Beispiel #2
0
def fbank(signal,
          samplerate=16000,
          win_length=0.025,
          win_step=0.01,
          filters_num=26,
          NFFT=512,
          low_freq=0,
          high_freq=None,
          pre_emphasis_coeff=0.97):
    """Perform pre-emphasis -> framing -> get magnitude -> FFT -> Mel Filtering.
    Args:
        signal: 1-D numpy array.
        samplerate: Sampling rate. Defaulted to 16KHz.
        win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
        win_step: Interval between the start points of adjacent frames.
            Defaulted to 0.01, which is 10ms.
        cep_num: Numbers of cepstral coefficients. Defaulted to 13.
        filters_num: Numbers of filters. Defaulted to 26.
        NFFT: Size of FFT. Defaulted to 512.
        low_freq: Lowest frequency.
        high_freq: Highest frequency.
        pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
            the energy of signal at higher frequency. Defaulted to 0.97.
    Returns:
        feat: Features.
        energy: Energy.
    """
    # Calculate the highest frequency.
    high_freq = high_freq or samplerate / 2
    # Pre-emphasis
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    # rames: 2-D numpy array with shape (frame_num, frame_length)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    # Caculate energy and modify all zeros to eps.
    spec_power = spectrum_power(frames, NFFT)
    energy = numpy.sum(spec_power, 1)
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)
    # Get Mel filter banks.
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    # Get MFCC and modify all zeros to eps.
    feat = numpy.dot(spec_power, fb.T)
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)

    return feat, energy