def test(file_name):
    demo_file = file_name
    # demo=data.load_wav_file(data.path + demo_file)
    demo = data.load_wav_file("data/test/" + demo_file)
    result = model.predict([demo])
    conf = numpy.amax(result) * 100
    result = data.one_hot_to_item(result, speakers)
    print("predicted speaker for %s : result = %s  confidence = %.2f" %
          (demo_file, result, conf))
Ejemplo n.º 2
0
    def handle_speaker_rec_test_intent(self, message):
        speakers = data.get_speakers()
        number_classes = len(speakers)
        #print("speakers",speakers)

        #batch=data.wave_batch_generator(batch_size=1000, source=data.Source.DIGIT_WAVES, target=data.Target.speaker)
        #X,Y=next(batch)

        # Classification
        #tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5)

        net = tflearn.input_data(shape=[None, 8192])  #Two wave chunks
        net = tflearn.fully_connected(net, 64)
        net = tflearn.dropout(net, 0.5)
        net = tflearn.fully_connected(net,
                                      number_classes,
                                      activation='softmax')
        net = tflearn.regression(net,
                                 optimizer='adam',
                                 loss='categorical_crossentropy')

        model = tflearn.DNN(net)
        #model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100)

        CWD_PATH = os.path.dirname(__file__)
        path_to_model = os.path.join(CWD_PATH, 'model', 'model.tfl')
        model.load(path_to_model)

        demo_file = "8_Vicki_260.wav"
        #demo_file = "8_Bruce_260.wav"
        demo = data.load_wav_file(data.path + demo_file)
        result = model.predict([demo])
        result = data.one_hot_to_item(result, speakers)
        if result == "Vicki":
            self.speak("I am confident I'm speaking to %s" %
                       (result))  # ~ 97% correct
        else:
            self.speak("I'm sorry I don't recognize your voice")
number_classes = len(speakers)
##print("speakers",speakers)

WORD_WAVs = "spoken_words"
batch = data.wave_batch_generator(batch_size=1000,
                                  source=WORD_WAVs,
                                  target=data.Target.speaker)
X, Y = next(batch)

# Classification
tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5)

net = tflearn.input_data(shape=[None, 8192])  #Two wave chunks
net = tflearn.fully_connected(net, 64)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, number_classes, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         loss='categorical_crossentropy')

model = tflearn.DNN(net)
##model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100)
model.load("tflearn.dnn.model")
# demo_file = "8_Vicki_260.wav"
demo_file = "8_Bruce_260.wav"
demo = data.load_wav_file(data.path + demo_file)
result = model.predict([demo])
result = data.one_hot_to_item(result, speakers)
model.save("tflearn.lstm.model")
print("predicted speaker for %s : result = %s " % (demo_file, result))
mfccs = []
Y = []
for f in audio_files:
  Y.append(speech_data.one_hot_from_item(speech_data.speaker(f), speakers))
  y, sr = librosa.load(data + f)
  mfccs.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13))

net = tflearn.input_data(shape=[None, 13, 44]) 
net = tflearn.fully_connected(net, 64)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, len(speakers), activation='softmax')
net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy')

model = tflearn.DNN(net,tensorboard_dir='/home/cc/working/tboard/', tensorboard_verbose=3)
model.fit(mfccs, Y, n_epoch=2000, show_metric=True, snapshot_step=100)

os.chdir('/home/cc/working/data/devclean_test/')

test = []
for f1 in os.listdir(os.getcwd()):
  y, sr = librosa.load(f1)
  test.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13))
result=model.predict(test)
c = 0
for f,r in zip(os.listdir(os.getcwd()), result):
  res = speech_data.one_hot_to_item(r, speakers)
  if res in f:
    c = c + 1
print('correct: %s ; total: %s' %(str(c), str(len(test))))
Ejemplo n.º 5
0
def test(model, speakers, buffer):
    demo = data.wave_mfcc(buffer)
    result = model.predict([demo])
    conf = numpy.amax(result) * 100
    result = data.one_hot_to_item(result, speakers)
    print("predicted : result = %s  confidence = %.2f" % (result, conf))
Ejemplo n.º 6
0
def predict(model, speakers, buffer):
    demo = data.wave_mfcc(buffer)
    result = model.predict([demo])
    conf = numpy.amax(result) * 100
    result = data.one_hot_to_item(result, speakers)
    return result, conf
# | Adam | epoch: 030 | loss: 0.05330 - acc: 0.9966 -- iter: 0000/1000
# 'predicted speaker for 9_Vicki_260 : result = ', 'Vicki'

speakers = data.get_speakers()
number_classes=len(speakers)
print("speakers",speakers)

WORD_WAVs="spoken_words"
batch=data.wave_batch_generator(batch_size=1000,source=WORD_WAVs,target=data.Target.speaker)
X,Y=next(batch)


# Classification
tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5)

net = tflearn.input_data(shape=[None, 8192]) #Two wave chunks
net = tflearn.fully_connected(net, 64)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, number_classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy')

model = tflearn.DNN(net)
model.fit(X, Y, n_epoch=100, show_metric=True, snapshot_step=100)

# demo_file = "8_Vicki_260.wav"
demo_file = "8_Bruce_260.wav"
demo=data.load_wav_file(data.path + demo_file)
result=model.predict([demo])
result=data.one_hot_to_item(result,speakers)
print("predicted speaker for %s : result = %s "%(demo_file,result))
    y, sr = librosa.load(train_data + f)
    X.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13))

# define the network and the model
tflearn.init_graph(num_cores=8, gpu_memory_fraction=0.5)

net = tflearn.input_data(shape=[None, 13, 44])
net = tflearn.fully_connected(net, 64)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, number_classes, activation='softmax')
net = tflearn.regression(net,
                         optimizer='adam',
                         loss='categorical_crossentropy')

model = tflearn.DNN(net)
model.fit(X, Y, n_epoch=2000, show_metric=True, snapshot_step=100)

# test the model using the testing directory
test = []
for f1 in os.listdir(test_data):
    y, sr = librosa.load(test_data + f1)
    test.append(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13))
result = model.predict(test)
c = 0
for f, r in zip(os.listdir(test_data), result):
    res = data.one_hot_to_item(r, speakers)
    if res in f:
        c = c + 1
acc = float(c) / float(len(test))
print('Test set accuracy: %s' % str(acc))