コード例 #1
0
def ssc(signal,
        samplerate=16000,
        win_length=0.025,
        win_step=0.01,
        filters_num=26,
        NFFT=512,
        low_freq=0,
        high_freq=None,
        pre_emphasis_coeff=0.97):
    '''
    待补充
    '''
    high_freq = high_freq or samplerate / 2
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    spec_power = spectrum_power(frames, NFFT)
    spec_power = numpy.where(spec_power == 0,
                             numpy.finfo(float).eps, spec_power)  #能量谱
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    feat = numpy.dot(spec_power, fb.T)  #计算能量
    R = numpy.tile(
        numpy.linspace(1, samplerate / 2, numpy.size(spec_power, 1)),
        (numpy.size(spec_power, 0), 1))
    return numpy.dot(spec_power * R, fb.T) / feat
コード例 #2
0
def fbank(signal,
          samplerate=16000,
          win_length=0.025,
          win_step=0.01,
          filters_num=26,
          NFFT=512,
          low_freq=0,
          high_freq=None,
          pre_emphasis_coeff=0.97):
    '''计算音频信号的MFCC
    参数说明:
    samplerate:采样频率
    win_length:窗长度
    win_step:窗间隔
    filters_num:梅尔滤波器个数
    NFFT:FFT大小
    low_freq:最低频率
    high_freq:最高频率
    pre_emphasis_coeff:预加重系数
    '''

    high_freq = high_freq or samplerate / 2  #计算音频样本的最大频率
    signal = pre_emphasis(signal, pre_emphasis_coeff)  #对原始信号进行预加重处理
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  #得到帧数组
    spec_power = spectrum_power(frames, NFFT)  #得到每一帧FFT以后的能量谱
    energy = numpy.sum(spec_power, 1)  #对每一帧的能量谱进行求和
    energy = numpy.where(energy == 0,
                         numpy.finfo(float).eps,
                         energy)  #对能量为0的地方调整为eps,这样便于进行对数处理
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq,
                          high_freq)  #获得每一个滤波器的频率宽度
    feat = numpy.dot(spec_power, fb.T)  #对滤波器和能量谱进行点乘
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)  #同样不能出现0
    return feat, energy
コード例 #3
0
def getDataSet(wav_files,
               label,
               img_rows=864,
               FrameLen=256,
               FrameInc=64,
               NFFT=800,
               winfunc=lambda x: np.ones((x, ))):
    X = []
    Y = []
    for i, singleWav in enumerate(wav_files):
        (rate, sig) = wav.read(singleWav)
        frames = audio2frame(sig, FrameLen, FrameInc,
                             winfunc)  #blackman window alpha=0.16
        logPowSpec = log_spectrum_power(frames, NFFT, norm=1)
        #logPowSpec = spectrum_power(frames,NFFT)
        #logPowSpec = log_fbank(sig,win_length=0.025,win_step=0.01,filters_num=39,NFFT)
        #logPowSpec = calcMFCC_delta_delta(sig,win_length=0.025,win_step=0.01,cep_num=20,filters_num=26,NFFT)
        dim1, dim2 = logPowSpec.shape
        if dim1 >= img_rows:
            divNum = dim1 / img_rows
            logPowSpec = logPowSpec[0:divNum * img_rows, :]
            logPowSpec = logPowSpec.reshape(divNum, img_rows, -1)
            X.extend(logPowSpec)
            Y.extend([label[i]] * divNum)
        else:
            divNum = img_rows / dim1
            mod = img_rows % dim1
            logPowSpec = np.array([logPowSpec] * divNum).reshape(
                (divNum * dim1, dim2))
            logPowSpec = np.concatenate((logPowSpec, logPowSpec[0:mod, :]))
            X.append(logPowSpec)
            Y.append(label[i])
    return X, Y
コード例 #4
0
def FFTcoefficient(sig,
                   samplerate=16000,
                   win_length=0.025,
                   win_step=0.01,
                   pre_emphasis_coeff=0.97,
                   NFFT=512):
    '''
    计算初始IDCT系数
    :param sig:
    :param samplerate:
    :param win_length:
    :param win_step:
    :param pre_emphasis_coeff:
    :return:
    '''

    #预处理
    signal = pre_emphasis(sig, pre_emphasis_coeff)
    #分帧
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  # 得到帧数组
    #加窗
    frames *= np.hamming(int(round(win_length * samplerate)))  # 加窗
    #FFT
    fftfeat = spectrum_power(frames, NFFT)  # 进行快速傅里叶变换 得到幅值系数
    feat = np.where(fftfeat == 0, np.finfo(float).eps, fftfeat)

    #TODO  滤波

    feat = np.log(fftfeat)

    feat = dct(feat, type=2, axis=1, norm='ortho')

    return feat
コード例 #5
0
 def f_spectrum_power(self, wavefile):
     assert os.path.isfile(wavefile)
     (rate, sig) = self.__readwav(wavefile)
     high_freq = rate / 2  # 计算音频样本的最大频率
     signal = pre_emphasis(sig, self.p_pre_emphasis_coeff)  # 对原始信号进行预加重处理
     frames = audio2frame(signal, self.p_win_len * rate,
                          self.p_win_step * rate)  # 得到帧数组
     frames *= np.hamming(int(round(self.p_win_len * rate)))  # 加窗
     spec_power = spectrum_power(frames, self.p_nfft)  # 得到每一帧FFT以后的能量谱
     return spec_power
コード例 #6
0
def fbank(signal,
          samplerate=16000,
          win_length=0.025,
          win_step=0.01,
          filters_num=26,
          NFFT=512,
          low_freq=0,
          high_freq=None,
          pre_emphasis_coeff=0.97):
    """Perform pre-emphasis -> framing -> get magnitude -> FFT -> Mel Filtering.
    Args:
        signal: 1-D numpy array.
        samplerate: Sampling rate. Defaulted to 16KHz.
        win_length: Window length. Defaulted to 0.025, which is 25ms/frame.
        win_step: Interval between the start points of adjacent frames.
            Defaulted to 0.01, which is 10ms.
        cep_num: Numbers of cepstral coefficients. Defaulted to 13.
        filters_num: Numbers of filters. Defaulted to 26.
        NFFT: Size of FFT. Defaulted to 512.
        low_freq: Lowest frequency.
        high_freq: Highest frequency.
        pre_emphasis_coeff: Coefficient for pre-emphasis. Pre-emphasis increase
            the energy of signal at higher frequency. Defaulted to 0.97.
    Returns:
        feat: Features.
        energy: Energy.
    """
    # Calculate the highest frequency.
    high_freq = high_freq or samplerate / 2
    # Pre-emphasis
    signal = pre_emphasis(signal, pre_emphasis_coeff)
    # rames: 2-D numpy array with shape (frame_num, frame_length)
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)
    # Caculate energy and modify all zeros to eps.
    spec_power = spectrum_power(frames, NFFT)
    energy = numpy.sum(spec_power, 1)
    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy)
    # Get Mel filter banks.
    fb = get_filter_banks(filters_num, NFFT, samplerate, low_freq, high_freq)
    # Get MFCC and modify all zeros to eps.
    feat = numpy.dot(spec_power, fb.T)
    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat)

    return feat, energy
コード例 #7
0
ファイル: mfcc.py プロジェクト: dusong7/Python_Exc
def fbank(signal,samplerate=16000,win_length=0.025,win_step=0.01,filters_num=26,NFFT=512,low_freq=0,high_freq=None,pre_emphasis_coeff=0.97):
    '''计算音频信号的MFCC
    参数说明:
    samplerate:采样频率
    win_length:窗长度
    win_step:窗间隔
    filters_num:梅尔滤波器个数
    NFFT:FFT大小
    low_freq:最低频率
    high_freq:最高频率
    pre_emphasis_coeff:预加重系数
    '''
    
    high_freq=high_freq or samplerate/2  #计算音频样本的最大频率
    signal=pre_emphasis(signal,pre_emphasis_coeff)  #对原始信号进行预加重处理
    frames=audio2frame(signal,win_length*samplerate,win_step*samplerate) #得到帧数组
    spec_power=spectrum_power(frames,NFFT)  #得到每一帧FFT以后的能量谱
    energy=numpy.sum(spec_power,1)  #对每一帧的能量谱进行求和
    energy=numpy.where(energy==0,numpy.finfo(float).eps,energy)  #对能量为0的地方调整为eps,这样便于进行对数处理
    fb=get_filter_banks(filters_num,NFFT,samplerate,low_freq,high_freq)  #获得每一个滤波器的频率宽度
    feat=numpy.dot(spec_power,fb.T)  #对滤波器和能量谱进行点乘
    feat=numpy.where(feat==0,numpy.finfo(float).eps,feat)  #同样不能出现0
    return feat,energy
コード例 #8
0
def IDCTcoefficient(sig,
                    samplerate=16000,
                    win_length=0.025,
                    win_step=0.01,
                    pre_emphasis_coeff=0.97):
    '''
    计算初始IDCT系数
    :param sig:
    :param samplerate:
    :param win_length:
    :param win_step:
    :param pre_emphasis_coeff:
    :return:
    '''

    #预处理
    signal = pre_emphasis(sig, pre_emphasis_coeff)
    #分帧
    frames = audio2frame(signal, win_length * samplerate,
                         win_step * samplerate)  # 得到帧数组
    #加窗
    frames *= np.hamming(int(round(win_length * samplerate)))  # 加窗
    #DCT
    dctfeat = dct(frames, type=2, axis=1, norm='ortho')  # 进行离散余弦变换
    #为0无法取对数
    #dctfeat[dctfeat < 1e-30] = 1e-30
    dctfeatnoz = np.where(dctfeat == 0,
                          np.finfo(float).eps,
                          dctfeat)  #对为0的地方调整为eps,这样便于进行对数处理
    dctfeatnoz = np.absolute(dctfeatnoz)
    #取对数
    logfeat = np.log(dctfeatnoz)
    del dctfeatnoz
    #取IDCT
    idctfeat = idct(logfeat, type=2, axis=1, norm='ortho')  # 进行反离散余弦变换

    return idctfeat