def get_mfcc(rate, sig): features = mfcc.mfcc(sig,rate) features = mfcc.logfbank(sig) features = mfcc.lifter(features) sum_of_squares = [] index = -1 for r in features: sum_of_squares.append(0) index = index + 1 for n in r: sum_of_squares[index] = sum_of_squares[index] + n**2 strongest_frame = sum_of_squares.index(max(sum_of_squares)) hz = mfcc.mel2hz(features[strongest_frame]) min_hz = min(hz) speech_booster = AudioEffectsChain().lowshelf(frequency=min_hz*(-1), gain=12.0, slope=0.5).highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.5).limiter(gain=8.0) y_speech_boosted = speech_booster(sig) features = mfcc.mfcc(y_speech_boosted, rate, 0.025, 0.01, 16, nfilt=40, nfft=512, appendEnergy = False, winfunc=np.hamming) features = preprocessing.scale(features) #scaling to ensure that all values are within 0 and 1 return features[1:5, :]
def mfcc_(signal, samplerate=16000, winlen=0.08, winstep=0.04, numcep=39, nfilt=39, nfft=2048, lowfreq=12.5, highfreq=None, preemph=0.97, ceplifter=39, appendEnergy=True, winfunc=lambda x: numpy.ones((x, ))): feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) feat = numpy.log(feat) feat = dct(feat, n=max(numcep, feat.shape[1]), type=2, axis=1, norm='ortho')[:, :numcep] feat = lifter(feat, ceplifter) if appendEnergy: feat[:, 0] = numpy.log( energy ) # replace first cepstral coefficient with log of frame energy return feat
def get_MFCC(sr, audio): features = mfcc.mfcc(audio, sr) ############################# # # # Noise Removal # # # ############################# features = mfcc.logfbank( audio) #computes the filterbank energy from an audio signal features = mfcc.lifter( features) #increases magnitude of high frequency DCT coefficients sum_of_squares = [] index = -1 for r in features: """ Since signals can be either positive or negative, taking n**2 allows us to compare the magnitudes """ sum_of_squares.append(0) index = index + 1 for n in r: sum_of_squares[index] = sum_of_squares[index] + n**2 strongest_frame = sum_of_squares.index(max(sum_of_squares)) hz = mfcc.mel2hz(features[strongest_frame] ) #converts the strongest frame's mfcc to hertz max_hz = max(hz) min_hz = min(hz) speech_booster = AudioEffectsChain().lowshelf( frequency=min_hz * (-1), gain=20.0, slope=0.5) #creates an audio booster that removes low hz y_speech_boosted = speech_booster(audio) #apply booster to original audio ############################# # # # FINAL MFCC CALCULATION # # # ############################# features = mfcc.mfcc(y_speech_boosted, sr, 0.025, 0.01, 16, nfilt=40, nfft=512, appendEnergy=False, winfunc=np.hamming) features = preprocessing.scale( features) #scaling to ensure that all values are within 0 and 1 return features
def _get_MFCC_features(self, index, winstep, nfft=512): # first load the .wav file audio_sampling_rate, audio_signal = self._get_wav_data(index) # now convert to MFCCs if audio_signal is None: # No need to warn: that will have been done in _get_wav_data. # NB: This sets the MFCCs to *length-zero* sequences of vectors, # each *of length num_MFCC_features*. When called by self.get(), # the sequences will anyway be padded out to self.max_samples. But # when the generator is called directly, zero-length sequences # will indeed by returned. return np.zeros((0, self.num_MFCC_features)) elif self.num_MFCC_features == 0: print('WARNING: no MFCCs requested') # NB: You have yet to use this. That is, in theory this allows one # to request that no MFCCs be packaged with the other data; but in # practice when training a SequenceNetwork w/o encoder targetting, # you don't bother (you wouldn't want to have to re-create the tf # records), and instead just set encoder targets penalty=0. Nsamples = int(audio_signal.shape[0] / audio_sampling_rate / winstep) return np.zeros((Nsamples, 0)) else: # unpack the log-mel calculations, because you may just use them lowfreq = 0 highfreq = None preemph = 0.97 ceplifter = 22 features, energy = fbank(audio_signal, audio_sampling_rate, self.mfcc_winlen, winstep, self.num_mel_features, nfft, lowfreq, highfreq, preemph, lambda x: np.ones( (x, ))) features = np.log(features) # use MFCCs (as opposed to log-mels) if not self.USE_LOG_MELS: features = dct(features, type=2, axis=1, norm='ortho') features = features[:, :self.num_cepstral_coeffs] features = lifter(features, ceplifter) features[:, 0] = np.log(energy) else: features = np.concatenate((features, np.log(energy)[:, None]), axis=1) # use deltas? mfccs = (np.concatenate((features, delta(features, N=2)), axis=1) if self.USE_MFCC_DELTAS else features) return mfccs
def local_mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, filtertype='mel', appendEnergy=True, winfunc=lambda x: np.hamming((x, ))): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat, energy = local_fbank(signal=signal, samplerate=samplerate, winlen=winlen, winstep=winstep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, preemph=preemph, winfunc=winfunc, filtertype=filtertype) feat = np.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep] feat = lifter(feat, ceplifter) if appendEnergy: feat[:, 0] = np.log( energy ) # replace first cepstral coefficient with log of frame energy return feat
def mfcc(frames,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True): pspec = sigproc.powspec(frames,nfft) energy = numpy.sum(pspec,1) # this stores the total energy in each frame energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log fb = python_speech_features.get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) feat = numpy.dot(pspec,fb.T) # compute the filterbank energies feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = python_speech_features.lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat
def mfcc_energy(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=40, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=np.hamming): feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) log_fbank = np.log(feat) # discard the 0-th dct coefficient mfcc = dct(log_fbank, type=2, axis=1, norm='ortho')[:, 1:numcep] mfcc = lifter(mfcc, ceplifter) energy = np.reshape(np.log(energy), (energy.shape[0], 1)) return mfcc, energy
def lift(signal, samplerate=16000, winlen=0.08, winstep=0.04, numcep=39, nfilt=39, nfft=2048, lowfreq=12.5, highfreq=None, preemph=0.97, ceplifter=39, winfunc=lambda x: numpy.ones((x, ))): feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) feat = numpy.log(feat) feat = dct(feat, n=max(numcep, feat.shape[1]), type=2, axis=1, norm='ortho')[:, :numcep] feat = lifter(feat, ceplifter) return feat
def get_features(filename, numcep, numfilt, winlen, winstep, grad): f = Sndfile(filename, 'r') frames = f.nframes samplerate = f.samplerate data = f.read_frames(frames) data = np.asarray(data) #calc mfcc feat_raw, energy = sf.fbank(data, samplerate, winlen, winstep, nfilt=numfilt) feat = np.log(feat_raw) feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:, :numcep] feat = sf.lifter(feat, L=22) feat = np.asarray(feat) #calc log energy log_energy = np.log(energy) #np.log( np.sum(feat_raw**2, axis=1) ) log_energy = log_energy.reshape([log_energy.shape[0], 1]) mat = (feat - np.mean(feat, axis=0)) / (0.5 * np.std(feat, axis=0)) mat = np.concatenate((mat, log_energy), axis=1) #calc first order derivatives if grad >= 1: gradf = np.gradient(mat)[0] mat = np.concatenate((mat, gradf), axis=1) #calc second order derivatives if grad == 2: grad2f = np.gradient(gradf)[0] mat = np.concatenate((mat, grad2f), axis=1) return mat, frames, samplerate
def extract_mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=lambda x: numpy.ones((x, ))): feat, energy = fbank(signal, samplerate, winlen, winstep, nfilt, nfft, lowfreq, highfreq, preemph, winfunc) feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:, :numcep] feat = lifter(feat, ceplifter) if appendEnergy: feat = numpy.c_[feat, numpy.log( energy)] # append cepstral coefficient with log of frame energy return feat, numpy.log(energy)
def logfbank_features(signal, samplerate=44100, fps=24, num_filt=40, num_cepstra=40, nfft=8192, **kwargs): winstep = 2 / fps winlen = winstep * 2 feat, energy = psf.fbank(signal=signal, samplerate=samplerate, winlen=winlen, winstep=winstep, nfilt=num_filt, nfft=nfft) feat = np.log(feat) feat = psf.dct(feat, type=2, axis=1, norm='ortho')[:, :num_cepstra] feat = psf.lifter(feat, L=22) feat = np.asarray(feat) energy = np.log(energy) energy = energy.reshape([energy.shape[0], 1]) if feat.shape[0] > 1: std = 0.5 * np.std(feat, axis=0) mat = (feat - np.mean(feat, axis=0)) / std else: mat = feat mat = np.concatenate((mat, energy), axis=1) duration = signal.shape[0] / samplerate expected_frames = fps * duration assert mat.shape[ 0] - expected_frames <= 1, "Producted feature number does not match framerate" return mat
def get_features(filename, numcep, numfilt, winlen, winstep, method=1, quaternion=False): #f = Sndfile(filename, 'r') #frames = f.nframes #samplerate = f.samplerate #data = f.read_frames(frames) #data = np.asarray(data) samplerate, data = wav.read(filename) # Claculate mfcc feat_raw, energy = sf.fbank(data, samplerate, winlen, winstep, nfilt=numfilt) feat = np.log(feat_raw) feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:, :numcep] feat = sf.lifter(feat, L=22) feat = np.asarray(feat) #calc log energy log_energy = np.log(energy) #np.log( np.sum(feat_raw**2, axis=1) ) log_energy = log_energy.reshape([log_energy.shape[0], 1]) mat = (feat - np.mean(feat, axis=0)) / (0.5 * np.std(feat, axis=0)) mat = np.concatenate((mat, log_energy), axis=1) # Calculate first order derivatives # if grad >= 1: # gradf = np.gradient(mat)[0] # mat = np.concatenate((mat, gradf), axis=1) # #calc second order derivatives # if grad == 2: # grad2f = np.gradient(gradf)[0] # mat = np.concatenate((mat, grad2f), axis=1) # Calculate 1st-2nd-3rd order derivatives if method: gradf = np.gradient(mat)[0] mat = np.concatenate((mat, gradf), axis=1) grad2f = np.gradient(gradf)[0] mat = np.concatenate((mat, grad2f), axis=1) grad3f = np.gradient(grad2f)[0] mat = np.concatenate((mat, grad3f), axis=1) else: zerof = np.zeros(shape=mat.shape) mat = np.concatenate((mat, zerof), axis=1) gradf = np.gradient(mat)[0] mat = np.concatenate((mat, gradf), axis=1) grad2f = np.gradient(gradf)[0] mat = np.concatenate((mat, grad2f), axis=1) if quaternion: Q_mat = np.reshape(mat, (mat.shape[0], 4, mat.shape[1] // 4)) mat = Q_mat return mat, data, samplerate
assert (get_error(csf.mel2hz(5190), csf.mel2hz(5190)) <= acceptable_error) assert (get_error(csf.hz2mel(csf.mel2hz(2595)), 2595) <= acceptable_error) print ' ✓' print '' print 'get_filterbanks' print '===============' psf_filterbanks = psf.get_filterbanks() csf_filterbanks = csf.get_filterbanks() assert (np.shape(psf_filterbanks) == np.shape(csf_filterbanks)) error2d(psf_filterbanks, csf_filterbanks) print '' print 'lifter' print '======' psf_lifter = psf.lifter(psf_feat) csf_lifter = csf.lifter(np.array(psf_feat, dtype=np.float32)) assert (np.shape(psf_lifter) == np.shape(csf_lifter)) error2d(psf_lifter, csf_lifter) print '' print 'delta' print '=====' psf_delta = psf.delta(psf_mfcc, 3) csf_delta = csf.delta(np.array(psf_mfcc, dtype=np.float32), 3) assert (np.shape(psf_delta) == np.shape(csf_delta)) error2d(psf_delta, csf_delta) print '' print 'Testing sigproc'