def test(file_name): demo_file = file_name # demo=data.load_wav_file(data.path + demo_file) demo = data.load_wav_file("data/test/" + demo_file) result = model.predict([demo]) conf = numpy.amax(result) * 100 result = data.one_hot_to_item(result, speakers) print("predicted speaker for %s : result = %s confidence = %.2f" % (demo_file, result, conf))
def handle_speaker_rec_test_intent(self, message): speakers = data.get_speakers() number_classes = len(speakers) #print("speakers",speakers) #batch=data.wave_batch_generator(batch_size=1000, source=data.Source.DIGIT_WAVES, target=data.Target.speaker) #X,Y=next(batch) # Classification #tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5) net = tflearn.input_data(shape=[None, 8192]) #Two wave chunks net = tflearn.fully_connected(net, 64) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, number_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') model = tflearn.DNN(net) #model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100) CWD_PATH = os.path.dirname(__file__) path_to_model = os.path.join(CWD_PATH, 'model', 'model.tfl') model.load(path_to_model) demo_file = "8_Vicki_260.wav" #demo_file = "8_Bruce_260.wav" demo = data.load_wav_file(data.path + demo_file) result = model.predict([demo]) result = data.one_hot_to_item(result, speakers) if result == "Vicki": self.speak("I am confident I'm speaking to %s" % (result)) # ~ 97% correct else: self.speak("I'm sorry I don't recognize your voice")
number_classes = len(speakers) ##print("speakers",speakers) WORD_WAVs = "spoken_words" batch = data.wave_batch_generator(batch_size=1000, source=WORD_WAVs, target=data.Target.speaker) X, Y = next(batch) # Classification tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5) net = tflearn.input_data(shape=[None, 8192]) #Two wave chunks net = tflearn.fully_connected(net, 64) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, number_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') model = tflearn.DNN(net) ##model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100) model.load("tflearn.dnn.model") # demo_file = "8_Vicki_260.wav" demo_file = "8_Bruce_260.wav" demo = data.load_wav_file(data.path + demo_file) result = model.predict([demo]) result = data.one_hot_to_item(result, speakers) model.save("tflearn.lstm.model") print("predicted speaker for %s : result = %s " % (demo_file, result))
mfccs = [] Y = [] for f in audio_files: Y.append(speech_data.one_hot_from_item(speech_data.speaker(f), speakers)) y, sr = librosa.load(data + f) mfccs.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)) net = tflearn.input_data(shape=[None, 13, 44]) net = tflearn.fully_connected(net, 64) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, len(speakers), activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') model = tflearn.DNN(net,tensorboard_dir='/home/cc/working/tboard/', tensorboard_verbose=3) model.fit(mfccs, Y, n_epoch=2000, show_metric=True, snapshot_step=100) os.chdir('/home/cc/working/data/devclean_test/') test = [] for f1 in os.listdir(os.getcwd()): y, sr = librosa.load(f1) test.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)) result=model.predict(test) c = 0 for f,r in zip(os.listdir(os.getcwd()), result): res = speech_data.one_hot_to_item(r, speakers) if res in f: c = c + 1 print('correct: %s ; total: %s' %(str(c), str(len(test))))
def test(model, speakers, buffer): demo = data.wave_mfcc(buffer) result = model.predict([demo]) conf = numpy.amax(result) * 100 result = data.one_hot_to_item(result, speakers) print("predicted : result = %s confidence = %.2f" % (result, conf))
def predict(model, speakers, buffer): demo = data.wave_mfcc(buffer) result = model.predict([demo]) conf = numpy.amax(result) * 100 result = data.one_hot_to_item(result, speakers) return result, conf
# | Adam | epoch: 030 | loss: 0.05330 - acc: 0.9966 -- iter: 0000/1000 # 'predicted speaker for 9_Vicki_260 : result = ', 'Vicki' speakers = data.get_speakers() number_classes=len(speakers) print("speakers",speakers) WORD_WAVs="spoken_words" batch=data.wave_batch_generator(batch_size=1000,source=WORD_WAVs,target=data.Target.speaker) X,Y=next(batch) # Classification tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5) net = tflearn.input_data(shape=[None, 8192]) #Two wave chunks net = tflearn.fully_connected(net, 64) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, number_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') model = tflearn.DNN(net) model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100) # demo_file = "8_Vicki_260.wav" demo_file = "8_Bruce_260.wav" demo=data.load_wav_file(data.path + demo_file) result=model.predict([demo]) result=data.one_hot_to_item(result,speakers) print("predicted speaker for %s : result = %s "%(demo_file,result))
y, sr = librosa.load(train_data + f) X.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)) # define the network and the model tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5) net = tflearn.input_data(shape=[None, 13, 44]) net = tflearn.fully_connected(net, 64) net = tflearn.dropout(net, 0.5) net = tflearn.fully_connected(net, number_classes, activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy') model = tflearn.DNN(net) model.fit(X, Y, n_epoch=2000, show_metric=True, snapshot_step=100) # test the model using the testing directory test = [] for f1 in os.listdir(test_data): y, sr = librosa.load(test_data + f1) test.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)) result = model.predict(test) c = 0 for f, r in zip(os.listdir(test_data), result): res = data.one_hot_to_item(r, speakers) if res in f: c = c + 1 acc = float(c) / float(len(test)) print('Test set accuracy: %s' % str(acc))