def extract(self, file_name): fs, signal = wav.read(file_name) # print(file_name) signal = signal[:, 0] mfcc = speechpy.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40, fft_length=512, low_frequency=0, high_frequency=None) #print(mfcc.shape) #print(mfcc) mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc) #print('mfcc feature cube shape:' , mfcc_feature_cube.shape) #print(mfcc_feature_cube) return mfcc, mfcc_feature_cube
lib_path = os.path.abspath(os.path.join('..')) print(lib_path) sys.path.append(lib_path) import speechpy import os file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav') fs, signal = wav.read(file_name) signal = signal[:, 0] ############# Extract MFCC features ############# mfcc = speechpy.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40, fft_length=512, low_frequency=0, high_frequency=None) mfcc_cmvn = speechpy.cmvnw(mfcc, win_size=301, variance_normalization=True) print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape) mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc) print('mfcc feature cube shape=', mfcc_feature_cube.shape) ############# Extract logenergy features ############# logenergy = speechpy.lmfe(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40,
import time from speechpy import mfcc from speechpy import delta from speechpy import log_filter_bank import scipy.io.wavfile as wav from scipy import signal as sig import matplotlib.pyplot as plt from random import shuffle if __name__ == '__main__': filename = './sample_24414.wav' rate, signal = wav.read(filename) mfcc_feature = mfcc(signal, rate) d_mfcc_feature = delta(mfcc_feature, 2) filter_bank_feature = log_filter_bank(signal, rate) feature = filter_bank_feature[1:3, :] print('signal:', signal) print('rate:', rate) print('np.shape(signal):', np.shape(signal)) print('mfcc_feature:', mfcc_feature) print('np.shape(mfcc_feature):', np.shape(mfcc_feature)) print('d_mfcc_feature:', d_mfcc_feature) print('np.shape(d_mfcc_feature):', np.shape(d_mfcc_feature)) print('feature:', feature) print('np.shape(feature):', np.shape(feature)) # exit(0)
def get_mfcc(filename, downsample=0, delta=False, noisereduction=True, normalizemean=False, numcoeff=13, verbose=False): def print_if(string, verb): if verb: print(string) """ Returns the MFCC of a given WAV file as a numpy array. Options include: delta: append delta (velocity) features to MFCC; doubles # features per frame noisereduction: apply noise reduction before computing MFCC normalizemean: output MFCC as normalized global mean - mutually exclusive from delta numcoeff: specify number of cepstral coefficients; usually scaled linearly with sampling rate verbose: enable detailed print statements """ # Perform downsampling and creates another downsampled wav file if specified if downsample > 0: print_if('Downsampling On by factor of ' + str(downsample), verbose) resample(filename, downsample) filename = "resampled.wav" else: print_if('Downsampling Off', verbose) # Perform noise reduction before calculating coefficients if noisereduction: print_if('Noise Reduction On', verbose) fs, signal = reduce_noise(filename) else: print_if('Noise Reduction Off', verbose) fs, signal = wav.read(filename) print_if('\nMFCC (Mel Frequency Cepstral Coefficients)\n' + div, verbose) print_if('File sampling frequency: ' + str(fs) + '\n', verbose) mfcc = speechpy.mfcc( signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.020, # Frame overlap amount (0.02 is no overlap) num_cepstral=numcoeff, # Default 13; scale with sample rate num_filters=40, fft_length=512, low_frequency=0, high_frequency=None, dc_elimination=True) mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc) length, numfeatures, dim = mfcc_feature_cube.shape print_if('MFCC dimension: ' + str(mfcc_feature_cube.shape), verbose) print_if(mfcc, verbose) if delta: delta_features = get_delta(mfcc) mfcc_delta = np.empty((length, numfeatures * 2)) for i in range(len(mfcc)): mfcc_delta[i] = np.concatenate([mfcc[i], delta_features[i]]) mfcc_delta_feature_cube = speechpy.extract_derivative_feature( mfcc_delta) print_if( '\nMFCC with Delta dimension: ' + str(mfcc_delta_feature_cube.shape), verbose) print_if(mfcc_delta, verbose) return mfcc_delta # Option to return normalized cepstral mean (for each set of coeffs, subtracts mean from each coeff) if normalizemean: mfcc_normalizedmean = speechpy.cmvn(mfcc, variance_normalization=False) mfcc_normalizedmean_feature_cube = speechpy.extract_derivative_feature( mfcc_normalizedmean) print_if( '\nMFCC with normalized mean dimension: ' + str(mfcc_normalizedmean_feature_cube.shape), verbose) print_if(mfcc_normalizedmean, verbose) return mfcc_normalizedmean return mfcc