Example #1
0
def fbank(signal,
          samplerate=16000,
          win_length=0.025,
          win_step=0.01,
          filters_num=26,
          NFFT=512,
          low_freq=0,
          high_freq=None,
          pre_emphasis_coeff=0.97):
    '''计算音频信号的MFCC
    参数说明:
    samplerate:采样频率
    win_length:窗长度
    win_step:窗间隔
    filters_num:梅尔滤波器个数
    NFFT:FFT大小
    low_freq:最低频率
    high_freq:最高频率
    pre_emphasis_coeff:预加重系数
    '''

    high_freq = high_freq or samplerate / 2  #计算音频样本的最大频率
    signal = pre_emphasis(signal, pre_emphasis_coeff)  #对原始信号进行预加重处理
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  #得到帧数组
    spec_power = spectrum_power(frames, NFFT)  #得到每一帧FFT以后的能量谱
    energy = numpy.sum(spec_power, 1)  #对每一帧的能量谱进行求和
    energy = numpy.where(energy == 0,
                         numpy.finfo(float).eps,
                         energy)  #对能量为0的地方调整为eps,这样便于进行对数处理
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq,
                          high_freq)  #获得每一个滤波器的频率宽度
    feat = numpy.dot(spec_power, fb.T)  #对滤波器和能量谱进行点乘
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)  #同样不能出现0
    return feat, energy
Example #2
0
def ssc(signal,
        samplerate=16000,
        win_length=0.025,
        win_step=0.01,
        filters_num=26,
        NFFT=512,
        low_freq=0,
        high_freq=None,
        pre_emphasis_coeff=0.97):
    '''
    待补充
    '''
    high_freq = high_freq or samplerate / 2
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    spec_power = spectrum_power(frames, NFFT)
    spec_power = numpy.where(spec_power == 0,
                             numpy.finfo(float).eps, spec_power)  #能量谱
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    feat = numpy.dot(spec_power, fb.T)  #计算能量
    R = numpy.tile(
        numpy.linspace(1, samplerate / 2, numpy.size(spec_power, 1)),
        (numpy.size(spec_power, 0), 1))
    return numpy.dot(spec_power * R, fb.T) / feat
Example #3
0
def FFTcoefficient(sig,
                   samplerate=16000,
                   win_length=0.025,
                   win_step=0.01,
                   pre_emphasis_coeff=0.97,
                   NFFT=512):
    '''
    计算初始IDCT系数
    :param sig:
    :param samplerate:
    :param win_length:
    :param win_step:
    :param pre_emphasis_coeff:
    :return:
    '''

    #预处理
    signal = pre_emphasis(sig, pre_emphasis_coeff)
    #分帧
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  # 得到帧数组
    #加窗
    frames *= np.hamming(int(round(win_length * samplerate)))  # 加窗
    #FFT
    fftfeat = spectrum_power(frames, NFFT)  # 进行快速傅里叶变换 得到幅值系数
    feat = np.where(fftfeat == 0, np.finfo(float).eps, fftfeat)

    #TODO  滤波

    feat = np.log(fftfeat)

    feat = dct(feat, type=2, axis=1, norm='ortho')

    return feat
Example #4
0
 def f_spectrum_power(self, wavefile):
     assert os.path.isfile(wavefile)
     (rate, sig) = self.__readwav(wavefile)
     high_freq = rate / 2  # 计算音频样本的最大频率
     signal = pre_emphasis(sig, self.p_pre_emphasis_coeff)  # 对原始信号进行预加重处理
     frames = audio2frame(signal, self.p_win_len * rate,
                          self.p_win_step * rate)  # 得到帧数组
     frames *= np.hamming(int(round(self.p_win_len * rate)))  # 加窗
     spec_power = spectrum_power(frames, self.p_nfft)  # 得到每一帧FFT以后的能量谱
     return spec_power
Example #5
0
def log_spectrum_power(frames,NFFT,norm=1):
    '''计算每一帧的功率谱的对数形式
    参数说明:
    frames:帧矩阵,即audio2frame返回的矩阵
    NFFT:FFT变换的大小
    norm:范数,即归一化系数
    '''
    spec_power=spectrum_power(frames,NFFT)
    spec_power[spec_power<1e-30]=1e-30 #为了防止出现功率谱等于0,因为0无法取对数
    log_spec_power=10*numpy.log10(spec_power)
    if norm:
        return log_spec_power-numpy.max(log_spec_power)
    else:
        return log_spec_power
Example #6
0
def fbank(signal,
          samplerate=16000,
          win_length=0.025,
          win_step=0.01,
          filters_num=26,
          NFFT=512,
          low_freq=0,
          high_freq=None,
          pre_emphasis_coeff=0.97):
    """Perform pre-emphasis -> framing -> get magnitude -> FFT -> Mel Filtering.
    Args:
        signal: 1-D numpy array.
        samplerate: Sampling rate. Defaulted to 16KHz.
        win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
        win_step: Interval between the start points of adjacent frames.
            Defaulted to 0.01, which is 10ms.
        cep_num: Numbers of cepstral coefficients. Defaulted to 13.
        filters_num: Numbers of filters. Defaulted to 26.
        NFFT: Size of FFT. Defaulted to 512.
        low_freq: Lowest frequency.
        high_freq: Highest frequency.
        pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
            the energy of signal at higher frequency. Defaulted to 0.97.
    Returns:
        feat: Features.
        energy: Energy.
    """
    # Calculate the highest frequency.
    high_freq = high_freq or samplerate / 2
    # Pre-emphasis
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    # rames: 2-D numpy array with shape (frame_num, frame_length)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    # Caculate energy and modify all zeros to eps.
    spec_power = spectrum_power(frames, NFFT)
    energy = numpy.sum(spec_power, 1)
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)
    # Get Mel filter banks.
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    # Get MFCC and modify all zeros to eps.
    feat = numpy.dot(spec_power, fb.T)
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)

    return feat, energy
Example #7
0
def fbank(signal,samplerate=16000,win_length=0.025,win_step=0.01,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97):
    '''计算音频信号的MFCC
    参数说明:
    samplerate:采样频率
    win_length:窗长度
    win_step:窗间隔
    filters_num:梅尔滤波器个数
    NFFT:FFT大小
    low_freq:最低频率
    high_freq:最高频率
    pre_emphasis_coeff:预加重系数
    '''
    
    high_freq=high_freq or samplerate/2  #计算音频样本的最大频率
    signal=pre_emphasis(signal,pre_emphasis_coeff)  #对原始信号进行预加重处理
    frames=audio2frame(signal,win_length*samplerate,win_step*samplerate) #得到帧数组
    spec_power=spectrum_power(frames,NFFT)  #得到每一帧FFT以后的能量谱
    energy=numpy.sum(spec_power,1)  #对每一帧的能量谱进行求和
    energy=numpy.where(energy==0,numpy.finfo(float).eps,energy)  #对能量为0的地方调整为eps,这样便于进行对数处理
    fb=get_filter_banks(filters_num,NFFT,samplerate,low_freq,high_freq)  #获得每一个滤波器的频率宽度
    feat=numpy.dot(spec_power,fb.T)  #对滤波器和能量谱进行点乘
    feat=numpy.where(feat==0,numpy.finfo(float).eps,feat)  #同样不能出现0
    return feat,energy