def fbank(signal, samplerate=16000, win_length=0.025, win_step=0.01, filters_num=26, NFFT=512, low_freq=0, high_freq=None, pre_emphasis_coeff=0.97): '''计算音频信号的MFCC 参数说明: samplerate:采样频率 win_length:窗长度 win_step:窗间隔 filters_num:梅尔滤波器个数 NFFT:FFT大小 low_freq:最低频率 high_freq:最高频率 pre_emphasis_coeff:预加重系数 ''' high_freq = high_freq or samplerate / 2 #计算音频样本的最大频率 signal = pre_emphasis(signal, pre_emphasis_coeff) #对原始信号进行预加重处理 frames = audio2frame(signal, win_length * samplerate, win_step * samplerate) #得到帧数组 spec_power = spectrum_power(frames, NFFT) #得到每一帧FFT以后的能量谱 energy = numpy.sum(spec_power, 1) #对每一帧的能量谱进行求和 energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy) #对能量为0的地方调整为eps,这样便于进行对数处理 fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq) #获得每一个滤波器的频率宽度 feat = numpy.dot(spec_power, fb.T) #对滤波器和能量谱进行点乘 feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat) #同样不能出现0 return feat, energy
def ssc(signal, samplerate=16000, win_length=0.025, win_step=0.01, filters_num=26, NFFT=512, low_freq=0, high_freq=None, pre_emphasis_coeff=0.97): ''' 待补充 ''' high_freq = high_freq or samplerate / 2 signal = pre_emphasis(signal, pre_emphasis_coeff) frames = audio2frame(signal, win_length * samplerate, win_step * samplerate) spec_power = spectrum_power(frames, NFFT) spec_power = numpy.where(spec_power == 0, numpy.finfo(float).eps, spec_power) #能量谱 fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq) feat = numpy.dot(spec_power, fb.T) #计算能量 R = numpy.tile( numpy.linspace(1, samplerate / 2, numpy.size(spec_power, 1)), (numpy.size(spec_power, 0), 1)) return numpy.dot(spec_power * R, fb.T) / feat
def FFTcoefficient(sig, samplerate=16000, win_length=0.025, win_step=0.01, pre_emphasis_coeff=0.97, NFFT=512): ''' 计算初始IDCT系数 :param sig: :param samplerate: :param win_length: :param win_step: :param pre_emphasis_coeff: :return: ''' #预处理 signal = pre_emphasis(sig, pre_emphasis_coeff) #分帧 frames = audio2frame(signal, win_length * samplerate, win_step * samplerate) # 得到帧数组 #加窗 frames *= np.hamming(int(round(win_length * samplerate))) # 加窗 #FFT fftfeat = spectrum_power(frames, NFFT) # 进行快速傅里叶变换 得到幅值系数 feat = np.where(fftfeat == 0, np.finfo(float).eps, fftfeat) #TODO 滤波 feat = np.log(fftfeat) feat = dct(feat, type=2, axis=1, norm='ortho') return feat
def f_spectrum_power(self, wavefile): assert os.path.isfile(wavefile) (rate, sig) = self.__readwav(wavefile) high_freq = rate / 2 # 计算音频样本的最大频率 signal = pre_emphasis(sig, self.p_pre_emphasis_coeff) # 对原始信号进行预加重处理 frames = audio2frame(signal, self.p_win_len * rate, self.p_win_step * rate) # 得到帧数组 frames *= np.hamming(int(round(self.p_win_len * rate))) # 加窗 spec_power = spectrum_power(frames, self.p_nfft) # 得到每一帧FFT以后的能量谱 return spec_power
def log_spectrum_power(frames,NFFT,norm=1): '''计算每一帧的功率谱的对数形式 参数说明: frames:帧矩阵,即audio2frame返回的矩阵 NFFT:FFT变换的大小 norm:范数,即归一化系数 ''' spec_power=spectrum_power(frames,NFFT) spec_power[spec_power<1e-30]=1e-30 #为了防止出现功率谱等于0,因为0无法取对数 log_spec_power=10*numpy.log10(spec_power) if norm: return log_spec_power-numpy.max(log_spec_power) else: return log_spec_power
def fbank(signal, samplerate=16000, win_length=0.025, win_step=0.01, filters_num=26, NFFT=512, low_freq=0, high_freq=None, pre_emphasis_coeff=0.97): """Perform pre-emphasis -> framing -> get magnitude -> FFT -> Mel Filtering. Args: signal: 1-D numpy array. samplerate: Sampling rate. Defaulted to 16KHz. win_length: Window length. Defaulted to 0.025, which is 25ms/frame. win_step: Interval between the start points of adjacent frames. Defaulted to 0.01, which is 10ms. cep_num: Numbers of cepstral coefficients. Defaulted to 13. filters_num: Numbers of filters. Defaulted to 26. NFFT: Size of FFT. Defaulted to 512. low_freq: Lowest frequency. high_freq: Highest frequency. pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase the energy of signal at higher frequency. Defaulted to 0.97. Returns: feat: Features. energy: Energy. """ # Calculate the highest frequency. high_freq = high_freq or samplerate / 2 # Pre-emphasis signal = pre_emphasis(signal, pre_emphasis_coeff) # rames: 2-D numpy array with shape (frame_num, frame_length) frames = audio2frame(signal, win_length * samplerate, win_step * samplerate) # Caculate energy and modify all zeros to eps. spec_power = spectrum_power(frames, NFFT) energy = numpy.sum(spec_power, 1) energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy) # Get Mel filter banks. fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq) # Get MFCC and modify all zeros to eps. feat = numpy.dot(spec_power, fb.T) feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat) return feat, energy
def fbank(signal,samplerate=16000,win_length=0.025,win_step=0.01,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97): '''计算音频信号的MFCC 参数说明: samplerate:采样频率 win_length:窗长度 win_step:窗间隔 filters_num:梅尔滤波器个数 NFFT:FFT大小 low_freq:最低频率 high_freq:最高频率 pre_emphasis_coeff:预加重系数 ''' high_freq=high_freq or samplerate/2 #计算音频样本的最大频率 signal=pre_emphasis(signal,pre_emphasis_coeff) #对原始信号进行预加重处理 frames=audio2frame(signal,win_length*samplerate,win_step*samplerate) #得到帧数组 spec_power=spectrum_power(frames,NFFT) #得到每一帧FFT以后的能量谱 energy=numpy.sum(spec_power,1) #对每一帧的能量谱进行求和 energy=numpy.where(energy==0,numpy.finfo(float).eps,energy) #对能量为0的地方调整为eps,这样便于进行对数处理 fb=get_filter_banks(filters_num,NFFT,samplerate,low_freq,high_freq) #获得每一个滤波器的频率宽度 feat=numpy.dot(spec_power,fb.T) #对滤波器和能量谱进行点乘 feat=numpy.where(feat==0,numpy.finfo(float).eps,feat) #同样不能出现0 return feat,energy