def mfcc(Frames, fs, mfccNumber): """This function uses "audioFeatureExtraction" from the pyAudioAnalysis package in order to compute the Mel-Frequency Cepstral Coefficients of the signal. ARGUMENTS: Frames: List of signal frames. fs: Sampling frequency of the signal. mfccNumber: Number of MFC coefficients to be computed. RETURNS: The mean, maximum and minimum values of each MFC coefficient.""" win = len(Frames[0]) nFFT = int(win / 2) [fbank, freqs] = afe.mfccInitFilterBanks(fs, nFFT) seq = [] for frame in Frames: ft = abs(fft(frame)) ft = ft[0:nFFT] ft = ft / win seq.append(afe.stMFCC(ft, fbank, mfccNumber)) Mean = np.mean(seq, 0) Max = np.amax(seq, 0) Min = np.amin(seq, 0) return (np.concatenate((Mean, Max, Min)))
def stFeatureExtraction(signal, fs, win, step, feats): """ This function implements the shor-term windowing process. For each short-term window a set of features is extracted. This results to a sequence of feature vectors, stored in a numpy matrix. ARGUMENTS signal: the input signal samples fs: the sampling freq (in Hz) win: the short-term window size (in samples) step: the short-term window step (in samples) steps: list of main features to compute ("mfcc" and/or "gfcc") RETURNS st_features: a numpy array (n_feats x numOfShortTermWindows) """ if "gfcc" in feats: ngfcc = 22 gfcc = getGfcc.GFCCFeature(fs) else: ngfcc = 0 if "mfcc" in feats: n_mfcc_feats = 13 else: n_mfcc_feats = 0 win = int(win) step = int(step) # Signal normalization signal = numpy.double(signal) signal = signal / (2.0**15) DC = signal.mean() MAX = (numpy.abs(signal)).max() signal = (signal - DC) / (MAX + 0.0000000001) N = len(signal) # total number of samples cur_p = 0 count_fr = 0 nFFT = int(win / 2) [fbank, freqs] = mfccInitFilterBanks( fs, nFFT ) # compute the triangular filter banks used in the mfcc calculation n_harmonic_feats = 0 feature_names = [] if "spectral" in feats: n_time_spectral_feats = 8 feature_names.append("zcr") feature_names.append("energy") feature_names.append("energy_entropy") feature_names += ["spectral_centroid", "spectral_spread"] feature_names.append("spectral_entropy") feature_names.append("spectral_flux") feature_names.append("spectral_rolloff") else: n_time_spectral_feats = 0 if "mfcc" in feats: feature_names += [ "mfcc_{0:d}".format(mfcc_i) for mfcc_i in range(1, n_mfcc_feats + 1) ] if "gfcc" in feats: feature_names += [ "gfcc_{0:d}".format(gfcc_i) for gfcc_i in range(1, ngfcc + 1) ] if "chroma" in feats: nChroma, nFreqsPerChroma = stChromaFeaturesInit(nFFT, fs) n_chroma_feats = 13 feature_names += [ "chroma_{0:d}".format(chroma_i) for chroma_i in range(1, n_chroma_feats) ] feature_names.append("chroma_std") else: n_chroma_feats = 0 n_total_feats = n_time_spectral_feats + n_mfcc_feats + n_harmonic_feats + n_chroma_feats + ngfcc st_features = [] while (cur_p + win - 1 < N): # for each short-term window until the end of signal count_fr += 1 x = signal[cur_p:cur_p + win] # get current window cur_p = cur_p + step # update window position X = abs(fft(x)) # get fft magnitude X = X[0:nFFT] # normalize fft X = X / len(X) if count_fr == 1: X_prev = X.copy() # keep previous fft mag (used in spectral flux) curFV = numpy.zeros((n_total_feats, 1)) if "spectral" in feats: curFV[0] = stZCR(x) # zero crossing rate curFV[1] = stEnergy(x) # short-term energy curFV[2] = stEnergyEntropy(x) # short-term entropy of energy [curFV[3], curFV[4]] = stSpectralCentroidAndSpread( X, fs) # spectral centroid and spread curFV[5] = stSpectralEntropy(X) # spectral entropy curFV[6] = stSpectralFlux(X, X_prev) # spectral flux curFV[7] = stSpectralRollOff(X, 0.90, fs) # spectral rolloff if "mfcc" in feats: curFV[n_time_spectral_feats:n_time_spectral_feats+n_mfcc_feats, 0] = \ stMFCC(X, fbank, n_mfcc_feats).copy() # MFCCs if "gfcc" in feats: curFV[n_time_spectral_feats + n_mfcc_feats:n_time_spectral_feats + n_mfcc_feats + ngfcc, 0] = gfcc.get_gfcc(x) if "chroma" in feats: chromaNames, chromaF = stChromaFeatures(X, fs, nChroma, nFreqsPerChroma) curFV[n_time_spectral_feats + n_mfcc_feats + ngfcc: n_time_spectral_feats + n_mfcc_feats + n_chroma_feats + ngfcc - 1] = \ chromaF curFV[n_time_spectral_feats + n_mfcc_feats + n_chroma_feats + ngfcc - 1] = \ chromaF.std() st_features.append(curFV) X_prev = X.copy() st_features = numpy.concatenate(st_features, 1) return st_features, feature_names
def mfcc_coeffs(an_wndw, sample_rate): """Return the five first mfcc coefficients""" an_wndw_size = an_wndw.shape[0] [filter_bank, _] = audioFE.mfccInitFilterBanks(sample_rate, an_wndw_size) return audioFE.stMFCC(an_wndw, filter_bank, 5)