def sparkFeatureExt(line): print line string1 = file cut = string1[72:] cut = cut[:-10] intClass = ClassToInt(cut) (samplerate, wavedata) = wavfile.read(file) (s1, n1) = spectral_centroid(wavedata, 512, samplerate) (sr1, nr1) = spectral_rolloff(wavedata, 512, samplerate) (sf1, nf1) = spectral_flux(wavedata, 512, samplerate) (rms, ts) = root_mean_square(wavedata, 512, samplerate) rms = rms[~np.isnan( rms )] #rms array contains NAN values and we have to remove these values (zcr, ts1) = zero_crossing_rate(wavedata, 512, samplerate) (MFCCs, mspec, spec) = mfcc(wavedata) MFCC_coef = list() ran = MFCCs.shape ran1 = ran[0] for ind1 in range(13): sum = 0 summ = 0 for ind in range(ran1): sum += MFCCs[ind, ind1] MFCC_coef.append(sum / ran1) eng = stEnergy(wavedata) #Win = 0.050 #Step = 0.050 #eps = 0.00000001 return s1, sr1, sf1, rms, zcr, eng, MFCC_coef, intClass
def apply(self, data, meta=None): all_ceps = [] for ch in data: ceps, mspec, spec = mfcc(ch) all_ceps.append(ceps.ravel()) return to_np_array(all_ceps)
def extract_mfcc(full_audio_path): sample_rate, wave = wavfile.read(full_audio_path) mfcc_features = mfcc(wave, nwin=int(sample_rate * 0.03), fs=sample_rate, nceps=12)[0] return mfcc_features
def single(): # import wav files files = import_wav_data_in_dir(datapath) # prepare for training # get feature vectors by mfcc X = [] y = [] for f in files: x, sample_rate = sf.read(datapath + f) x = np.clip(x, 1e-10, 1) ceps, mspec, spec = mfcc(x, nwin=256, nfft=512, fs=8000, nceps=13) X.append(np.mean(ceps, axis=0)) if '-hu.' in f: y.append(label_dict['hu']) elif '-ti.' in f: y.append(label_dict['ti']) else: y.append(label_dict['dc']) X = np.array(X) y = np.array(y) # training clf = RandomForestClassifier(n_estimators=498, random_state=random_state) # clf = XGBClassifier(max_depth=8, learning_rate=0.05, n_estimators=700, seed=random_state) clf.fit(X, y) # save model joblib.dump(clf, 'trained_models/clf_rf2.pkl.cmp', compress=True)
def BED_extract(path, nfft): list_data = numpy.array([]) list_label = numpy.array([]) """ dic = {'W':[1,0],'L':[0,1],'E':[0,1],'A':[0,1],'F':[1,0],'T':[0,1],'N':[0.5,0.5]} """ dic = {'W':[0,1],'L':[0,1],'E':[0,1],'A':[0,1],'F':[1,0],'T':[0,1],'N':[0.5,0.5]} for root, dir, files in os.walk(path): rootpath = os.path.join(os.path.abspath(path), root) for file in files: if os.path.splitext(file)[1].lower()=='.wav': filepath = os.path.join(rootpath, file) SR, X = wavfile.read(filepath) _, _, spec = mfcc(X, fs=SR, nfft=(nfft*2)) list_data = numpy.append(list_data, numpy.mean(spec, axis=0)[:nfft]/numpy.max(spec)) list_label = numpy.append(list_label, dic[file[5]]) list_data = numpy.reshape(list_data, (len(list_data)/nfft, nfft)) list_label = numpy.reshape(list_label, (len(list_label)/label_length, label_length)) return list_data, list_label
def apply(self, data): all_ceps = [] for ch in data: ceps, mspec, spec = mfcc(ch) all_ceps.append(ceps.ravel()) return np.array(all_ceps)
def compute_features(source, features): """ compute features for all the tracks """ for label in source.keys(): for i in range(0,100): base_path = os.path.join(FT_DIR, "%s_%d" % (label, i)) ft = [] if 'zcr' in features: zcr, ts = zero_crossing_rate(source[label][i]['wavedata'], 512, source[label][i]['sample_rate']) ft.append(zcr) if 'rms' in features: rms, ts = root_mean_square(source[label][i]['wavedata'], 512, source[label][i]['sample_rate']) ft.append(rms) if 'sc' in features: sc, ts = spectual_centroid(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate']) ft.append(sc) if 'sr' in features: sr, ts = spectral_rolloff(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate']) ft.append(sr) if 'sf' in features: sf, ts = spectral_flux(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate']) ft.append(sf) if 'mfcc' in features: ceps, mspec, spec = mfcc(source[label][i]['wavedata']) ft.append(ceps) write_features(ft, base_path)
def calc_mfcc(data, fs): mfcc_data, trush, trush = mfcc(data, nwin=256, nfft=512, fs=fs, nceps=13) meanceps = np.zeros(mfcc_data[0].size) for mc in mfcc_data: meanceps += mc return meanceps / mfcc_data[0].size
def extractFeature(self): ''' mfcc feature extraction ''' self.features = mfcc(self.sound, nwin=int(self.soundSamplerate * 0.03), fs=self.soundSamplerate, nceps=13)[0]
def create_test_ceps(): """ Creates the MFCC features from the test files, saves them to disk, and returns the saved file name. """ for subdir, dirs, files in os.walk(TEST_DATASET_DIR): genre = subdir[subdir.rfind('/', 0) + 1:] print(genre) if genre in genre_list: count = 0 genre_ceps = np.zeros((30, 13), dtype=float) print(subdir) for file in files: path = subdir + '/' + file #print path if path.endswith("wav"): #print path #create_ceps(path) sample_rate, X = scipy.io.wavfile.read(path) ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) ceps = np.mean(ceps[int(num_ceps / 10):int(num_ceps * 9 / 10)], axis=0) genre_ceps[count] = ceps count = count + 1 print(count) #genre_ceps = np.array(genre_ceps) print(genre_ceps.shape) #break write_ceps(genre_ceps, path)
def set_mfcc_matrix(self): self.mfcc_matrix = mfcc(self.signal, nwin=int(self.sample_rate * 0.03), fs=self.sample_rate, nceps=13)[0] self.mfcc_matrix = self.mfcc_matrix[~np.isnan(self.mfcc_matrix).any( axis=1)]
def load_validation_set(): """ Output a tuple of features: (fft features, mfcc features, mean-std features) Description extracts three types of features from validation set. """ ffts = dict() mfccs = dict() mean_stds = dict() for i in validation_ids: path = './validation/validation.{i}.wav'.format(i=i) _, X = read_wav(path) # FFT fft = np.array(abs(sp.fft(X)[:1000])) ffts.update({i: fft}) # MFCC ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) x = np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0) mfccs.update({i: x}) # Mean-Std [Fs, x] = audioBasicIO.readAudioFile(path); F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs); mean_std = [] for f in F: mean_std.extend([f.mean(), f.std()]) mean_stds.update({i: np.array(mean_std)}) return (ffts, mfccs, mean_stds)
def mfccnceps(filenam): print("creating ceps") sample_rate, X = scipy.io.wavfile.read(filenam) ceps, mspec, spec = mfcc(X) basename, extn = os.path.splitext(filenam) datafile = basename + ".ceps" np.save(datafile, ceps) # cache results so that ML becomes fast
def classify(file_name): accents = [ "CH", "EN", "IN", "IR", "IT", "JA", "KO", ] models = glob("models/model*.xml") models.sort() if len(models) < 1: print "no models found" exit() print "using model: {}".format(models[-1]) net = NetworkReader.readFrom(models[-1]) sample_rate, X = scipy.io.wavfile.read(file_name) ceps, mspec, spec = mfcc(X) x = [] num_ceps = len(ceps) x.append(np.mean(ceps[int(num_ceps / 10):int(num_ceps * 9 / 10)], axis=0)) vx = np.array(x) result = net.activate(vx[0].tolist()).tolist() # print result accent = accents[result.index(max(result))] return accent
def BED_extract(path, nfft=784): list_data = numpy.array([]) list_label = numpy.array([]) #W:anger:0 N:neutral:1 F:happiness:2 T:sadness:3 dic = {'W':0,'L':1,'E':3,'A':0,'F':2,'T':3,'N':1} for root, dir, files in os.walk(path): rootpath = os.path.join(os.path.abspath(path), root) for file in files: if os.path.splitext(file)[1].lower()=='.wav': filepath = os.path.join(rootpath, file) SR, X = wavfile.read(filepath) _, _, spec = mfcc(X, fs=SR, nfft=(nfft*2)) print(filepath) list_data = numpy.append(list_data, numpy.mean(spec, axis=0)[:nfft]/numpy.max(spec)) list_label = numpy.append(list_label, int(dic[file[5]])) list_data = numpy.reshape(list_data, (len(list_data)/nfft, nfft)) return list_data, list_label
def mfccIFY(dict_read): dict = {} for each in dict_read.keys(): [sample_rate,X] = dict_read.get(each) ceps, mspec, spec = mfcc(X) dict[each] = ceps return dict
def emsenble(): # import wav files files = import_wav_data_in_dir(datapath) # prepare for training # get feature vectors by mfcc X = [] y = [] for f in files: x, sample_rate = sf.read(datapath + f) x = np.clip(x, 1e-10, 1) ceps, mspec, spec = mfcc(x, nwin=256, nfft=512, fs=8000, nceps=13) X.append(np.mean(ceps, axis=0)) if '-hu.' in f: y.append(label_dict['hu']) elif '-ti.' in f: y.append(label_dict['ti']) else: y.append(label_dict['dc']) X = np.array(X) y = np.array(y) # training clf1 = RandomForestClassifier(n_estimators=498, random_state=random_state) clf2 = KNeighborsClassifier(n_neighbors=3, weights='uniform', p=1) clf3 = QuadraticDiscriminantAnalysis() eclf = VotingClassifier(estimators=[('rf', clf1), ('knn', clf2), ('qda', clf3)], voting='hard') eclf.fit(X, y) # save model joblib.dump(eclf, 'trained_models/clf_rf_knn_qda.pkl.cmp', compress=True)
def getmfccdata(path): """ This function extracts the mfcc data from the wav files Parameters: ----------- path - path to get the directory name of the songs present "E:\UNM\CS 529 - Intro to Machine Learning\Assignment 3\opihi.cs.uvic.ca\sound\genres" Returns: -------- mfccdata - mfcc data matrix of size (600,13) """ classesmatrix = np.zeros((no_of_docs, 1)) # Stores the song, genre information in classesmatrix.txt file -> Line number as song index, genre mfccdata = np.zeros((no_of_docs, no_of_mfcc_features)) # Matrix (600,13) to store the fft features information of all the songs in 6 genres fileindex = 0 # to store the current offset of the song for subdir, dirs, files in os.walk(path): # Traversing all the files in 6 genres if os.path.basename(subdir) in genres.keys(): for f in files: if f.endswith('.wav'): print "Processing file : " + f sample_rate, X = scipy.io.wavfile.read(os.path.join(subdir, f)) ceps, mspec, spec = mfcc(X) num_ceps = ceps.shape[0] mfcc_features = np.mean(ceps[int(num_ceps * 1 / 10):int(num_ceps * 9 / 10)], axis=0) # Extracts 13 features. for i in range(len(mfcc_features)): mfccdata[fileindex][i] = mfcc_features[i] classesmatrix[fileindex] = genres[os.path.basename(subdir)] # Storing the genre of every song in a matrix. fileindex += 1 np.savetxt('classesmatrix.txt', classesmatrix, '%d') # Writing the classesmatrix to a file. return mfccdata
def create_ceps(fn): #print(">>>: [%s]" % fn) sample_rate, X = scipy.io.wavfile.read(fn) #print("> - [%d]" % sample_rate) ceps, mspec, spec = mfcc(X,nceps=13) write_ceps(ceps, fn)
def sparkFeatureExt(line): print line string1= file cut=string1[72:] cut=cut[:-10] intClass=ClassToInt(cut) (samplerate, wavedata) = wavfile.read(file) (s1,n1)= spectral_centroid(wavedata,512,samplerate) (sr1,nr1)= spectral_rolloff(wavedata,512,samplerate) (sf1,nf1)= spectral_flux(wavedata,512,samplerate) (rms,ts) = root_mean_square(wavedata, 512, samplerate); rms= rms[~np.isnan(rms)] #rms array contains NAN values and we have to remove these values (zcr,ts1) = zero_crossing_rate(wavedata, 512, samplerate); (MFCCs, mspec, spec) = mfcc(wavedata) MFCC_coef=list() ran=MFCCs.shape ran1=ran[0] for ind1 in range(13): sum=0 summ=0 for ind in range(ran1): sum+=MFCCs[ind,ind1] MFCC_coef.append(sum/ran1) eng= stEnergy(wavedata) #Win = 0.050 #Step = 0.050 #eps = 0.00000001 return s1,sr1,sf1,rms,zcr,eng,MFCC_coef,intClass
def create_ceps(fn): """ Creating the MFCC features. """ sample_rate, X = scipy.io.wavfile.read(fn) X[X == 0] = 1 ceps, mspec, spec = mfcc(X) write_ceps(ceps, fn)
def mfccnceps(filenam): print("creating ceps") sample_rate, X = scipy.io.wavfile.read(filenam) ceps, mspec, spec = mfcc(X) basename, extn = os.path.splitext(filenam) datafile = basename + ".ceps" np.save(datafile, ceps) return basename
def create_ceps(fn): """ Creates the MFCC features. """ sample_rate, X = scipy.io.wavfile.read(fn) X[X==0]=1 ceps, mspec, spec = mfcc(X) write_ceps(ceps, fn)
def generateMfcc(wavFile): filteredFile = filtering(wavFile, 2800, 3400) audio, fs, enc = wavread(filteredFile) size = getFrameSize(filteredFile) ceps, mspec, spec = mfcc(audio, nwin=size, nfft=size, fs=fs, nceps=13) return ceps
def mfcc_sound_features(self): ceps, mspec, spec = mfcc(self.audio) num_ceps = len(ceps) #v = np.mean(ceps[int(num_ceps / 10):int(num_ceps * 9 / 10)], axis=0) v = np.mean(ceps, axis=0) return v
def coeff(arr): ceps, mspec, spec = mfcc(arr,fs=11025) #print np.shape(ceps) auxceps = np.zeros(len(ceps)*13) for i in range(0,len(ceps)): for j in range(0,13): auxceps[13*i+j]=ceps[i][j] return(auxceps)
def convert(path): data = {} data["sample_rate"], X = scipy.io.wavfile.read(path) data["ceps"], data["mspec"], data["spec"] = mfcc(X) #save everything it gives us in case it's useful lmao cep_count = len(data["ceps"]) input_vector = np.array([np.mean(data["ceps"][int(cep_count / 10):int(cep_count * 9 / 10)], axis=0)]) return input_vector
def create_ceps(fn): print "creating :", fn sample_rate, X = w.read(fn) ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) return [ np.mean(ceps[int(num_ceps * 1 / 10):int(num_ceps * 9 / 10)], axis=0) ]
def show(self, example): sound = audiolab.sndfile(self.base + example.file) frames = sound.read_frames(sound.get_nframes()) * 0.8 mfcc = features.mfcc(frames[example.start:example.stop:2], fs=41000) print mfcc[0].shape fig = plt.figure() fig.set_size_inches(20, 20) ax = fig.add_subplot(111) ax.imshow(mfcc[0].transpose()[:, :100])
def MFCC(package): if type(package) is eT.EEGpackage: all_ceps = [] for ch in package.packet: ceps, mspec, spec = mfcc(ch) #mfcc not defined all_ceps.append(ceps.ravel()) return np.array(all_ceps) else: return False
def create_ceps(wavfile): sampling_rate, song_array = scipy.io.wavfile.read(wavfile) """Get MFCC ceps : ndarray of MFCC mspec : ndarray of log-spectrum in the mel-domain spec : spectrum magnitude """ ceps, mspec, spec = mfcc(song_array) write_ceps(ceps, wavfile)
def create_ceps(fn): sample_rate, X = io.wavfile.read(fn) ceps, mspec, spec = mfcc(X) isNan = False for num in ceps: if np.isnan(num[1]): isNan = True if isNan == False: write_ceps(ceps, fn)
def performMFCC(self): sample_rate, X = scipy.io.wavfile.read(self.filename) X[X==0]=1 ceps, mspec, spec = mfcc(X) self.write_ceps(ceps) plt.plot(ceps) #plt.show() plt.title(self.filename) plt.savefig(os.path.splitext(self.filename)[0]+".png")
def show(self, example): sound = audiolab.sndfile(self.base + example.file) frames = sound.read_frames(sound.get_nframes()) * 0.8 mfcc = features.mfcc(frames[example.start: example.stop:2], fs=41000) print mfcc[0].shape fig = plt.figure() fig.set_size_inches(20, 20) ax = fig.add_subplot(111) ax.imshow(mfcc[0].transpose()[:, :100])
def get_ceps(filepath): """ :param filepath: takes wav audio file path :return: returns first 13 Mel-frequency cepstral coefficients """ sample_rate, X = scipy.io.wavfile.read(filepath) ceps, mspec, spec = mfcc(X) ceps_ = np.mean(ceps, axis=0) return ceps_
def generate_and_save_ceps(wave_file): sample_rate, X = sp.io.wavfile.read(wave_file) ceps, mspec, spec = mfcc(X) #default 13 coefficients #ceps is a 2d array #frames*#coefficients num_frames = len(ceps) ceps_average_over_frames = np.mean( ceps[int(num_frames * 0.1):int(num_frames * 0.9)], axis=0) # leave starting one-tenth's and last one-tenth's ceps_file = wave_file[:-3] + "ceps" np.save(ceps_file, ceps_average_over_frames)
def create_ceps(fn, op='plain'): sample_rate, X = wavfile.read(fn) if op == 'norm': norm = StandardScaler() X = norm.fit_transform(X) ceps, mspec, spec = mfcc(X) write_ceps(ceps, fn, op)
def get_features(filename,feature_type): if feature_type == 'mfcc': test_audio_file = wave.open(filename, 'r') fileContents = get_audio(test_audio_file) # get mfcc coeffs and don't look at first (corresponds to energy in signal) coeffs = mfcc(fileContents,fs=44100)[0][1:] # [1] is mel coeffs, and [2] is entire FFT data??? test_audio_file.close() elif feature_type == 'nlse': coeffs = timbrespace(filename) # default hopsize=3, i.e. 50%, i.e. 150 ms return coeffs
def extractfeatures(): genredirs = sorted(os.listdir(GTZAN_PATH)) for dirname in genredirs: files = sorted(glob.glob(GTZAN_PATH + '/' + dirname + '/' + '*.wav')) for filename in files: f = filename [_, data] = scipy.io.wavfile.read(f) ceps, _, _ = mfcc(data) fceps = FEAT_DATA_PATH + '/' + dirname + '/' + os.path.basename( filename) + '.ceps' numpy.save(fceps, ceps)
def get_mfcc2(filename): from scikits.talkbox.features import mfcc import os import scipy rate, signal = scipy.io.wavfile.read(filename) ceps, mspec, spec = mfcc(signal) base_filename, ext = os.path.splitext(filename) data_filename = base_filename + ".ceps" np.save(data_filename, ceps) print(" Written %s" % data_filename)
def update(self, step): self.end_of_sample = False self.frames = self.state.read_frames(step) if self.frames is None: if isinstance(self.state, PlaySample): self.end_of_sample = True self.state = self.state.next() self.frames = self.state.read_frames(step) assert self.frames is not None, "There are must be some frames" self.mfcc = mfcc(self.frames, fs=len(self.frames)/step, nceps=13)[0][0]
def create_ceps(): for genre in GENRES: wav_list = glob.glob(os.path.join(BASE_DIR, genre, '*.' + 'wav')) npy_list = glob.glob(os.path.join(BASE_DIR, genre, '*.' + 'npy')) npy_list = [name[:-9] for name in npy_list] for fn in wav_list: if fn[:-4] not in npy_list: sample_rate, X = scipy.io.wavfile.read(fn) ceps, mspec, spec = mfcc(X) write_ceps(ceps, fn) print 'created ', fn[:-4] + '.ceps.npy'
def generate_mfcc(path,test = False): """ generating each song's fft """ sample_rate, X = wavfile.read(path) ceps, mspec, spec = mfcc(X) if test==True: print "writing test data" write_mfcc(path,ceps,True) else: write_mfcc(path,ceps)
def create_fft(fn, myclass): try: sample_rate, X = scipy.io.wavfile.read(fn) except ValueError: return ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) x = np.mean(ceps[int(num_ceps * 1 / 10):int(num_ceps * 9 / 10)], axis=0) y = np.int(myclass) XA.append(x) ya.append(y)
def show_specgram(speech): sound = audiolab.sndfile(speech, 'read') sound_info = sound.read_frames(sound.get_nframes()) #spectrogram = plt.specgram(sound_info) mfcc = talkfeat.mfcc(sound_info) #print mfcc plt.imshow(mfcc[0].transpose()) plt.title('Spectrogram of %s' % sys.argv[1]) plt.show() sound.close()
def readwav(trainfolder,testfolder): os.chdir(trainfolder) for f in glob.glob("*.wav"): speaker,letter,_ = f.split('.')[0].split('-') mfccname = f.split('.')[0]+".mfc" data, fs = wavread(f)[:2] cep= mfcc(data, fs=fs, nwin=int(fs*0.025))[0] np.savetxt(mfccname,cep,fmt='%.10f') os.chdir(testfolder) for f in glob.glob("*.wav"): speaker,letter,_ = f.split('.')[0].split('-') mfccname = f.split('.')[0]+".mfc" data, fs = wavread(f)[:2] cep= mfcc(data, fs=fs, nwin=int(fs*0.025))[0] np.savetxt(mfccname,cep,fmt='%.10f')
def create_ceps_test(fn): """ Creates the MFCC features from the test files, saves them to disk, and returns the saved file name. """ sample_rate, X = scipy.io.wavfile.read(fn) X[X==0]=1 np.nan_to_num(X) ceps, mspec, spec = mfcc(X) base_fn, ext = os.path.splitext(fn) data_fn = base_fn + ".ceps" np.save(data_fn, ceps) print "Written ", data_fn return data_fn
def generate_mfcc(voice, word, rate, path): filename = path+"/ogg/{0}_{1}_{2}.ogg".format(word, voice, rate) cmd = "say '{0}' -v{1} -r{2} -o '{3}'".format(word, voice, rate, filename) os.system(cmd) # ogg aiff m4a or caff signal, sample_rate = librosa.load(filename, mono=True) # mel_features = librosa.feature.mfcc(signal, sample_rate) # sample_rate, wave = scipy.io.wavfile.read(filename) # 2nd lib mel_features, mspec, spec = mfcc(signal, fs=sample_rate, nceps=26) # mel_features=python_speech_features.mfcc(signal, numcep=26, nfilt=26*2,samplerate=sample_rate) # 3rd lib # print len(mel_features) # print len(mel_features[0]) # print("---") mel_features=np.swapaxes(mel_features,0,1)# timesteps x nFeatures -> nFeatures x timesteps np.save(path + "/mfcc/%s_%s_%d.npy" % (word,voice,rate), mel_features)
def audio_stream(path, nfft): SR, X = wavfile.read(path) try: X = numpy.mean(X, axis=1) except: pass _, _, spec = mfcc(X, fs=SR, nfft=(nfft*2)) list_data = numpy.array(numpy.mean(spec, axis=0)[:nfft]/numpy.max(spec)) list_data = numpy.reshape(list_data, (len(list_data)/nfft, nfft)) return list_data
def extract_features(path): sample_rate, X = scipy.io.wavfile.read(path) fft_features = abs(scipy.fft(X) [:1000]) n = fft_features.size timestep = (sample_rate/2.)/1000 max_time = timestep*n freq = np.arange(0, max_time, timestep) centroid = centroid = np.sum(fft_features*freq)/np.sum(fft_features) ceps, mspec, spec = mfcc(X) num_ceps = ceps.shape[0] mfcc_features = np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis = 0) return fft_features, mfcc_features, centroid
def convert_to_mfcc(voice_path): sample_rate, X = scipy.io.wavfile.read(voice_path) ceps, mspec, spec = mfcc(X) ave_cept = np.zeros((1, 13)) count = 0 for one_ceps in ceps: if np.isnan(one_ceps[1]): continue ave_cept += one_ceps count += 1 if count == 0: return None ave_cept /= count return ave_cept
def record_plot_(self): ax = self.figure.add_subplot(312) ax.hold(False) SR, X = wavfile.read('output.wav') X = np.mean(X, axis=1) plt.plot(X) plt.xlim(0,len(X)) y = X + np.random.rand(X.shape[0]) ceps, mspec, spec = mfcc(y, fs=SR, nfft=nFFT) ax = self.figure.add_subplot(313) ax.hold(False) plt.pcolormesh(mspec.T) plt.xlim(0,len(mspec)) self.canvas.draw()
def add_mfcc(X, label_id, features, labels): """ Input X: song data label_id: label(genre) id features: array of ffts labels: array of labels Description extracts MFCC from X using scikits.talkbox and appends it to features. """ ceps, mspec, spec = mfcc(X) num_ceps = len(ceps) x = np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0) if np.isfinite(np.array(x).sum()): features.append(x) labels.append(label_id) else: raise ValueError('fft non-finite data')