def process_wav_file(stream): audio_embedder = vggish_embeddings.VGGishEmbedder(None) processed_embedding = audio_embedder.convert_audio_to_embedding(stream) p = predict_laugh(np.expand_dims(processed_embedding, axis=0)) print('Laugh Score: {}'.format(p)) return p
@eel.expose def saveEel(param): print("Saving file...") # writer.close() # writer = open("l_" + str(datetime.now()), 'w') writer.write("NEW SESSION\n") print("New file started") return eel.start('index.html', block=False) if __name__ == '__main__': model = keras.models.load_model(FLAGS.keras_model) audio_embed = vggish_embeddings.VGGishEmbedder() context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:5555") writer = open("l_" + str(datetime.now()) + ".csv", 'w') window = [0.5] * FLAGS.avg_window with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() # vol = np for chunk in audio_generator: try: arr = np.frombuffer(chunk, dtype=np.int16) vol = np.sqrt(np.mean(arr**2))
MODEL_PATH = '/Users/kaushikandra/laughter-detection/LaughDetection/Models/LSTM_SingleLayer_100Epochs.h5' def predict_laugh(processed_embedding): model = keras.models.load_model(MODEL_PATH) return model.predict(processed_embedding) def process_wav_file(stream): audio_embedder = vggish_embeddings.VGGishEmbedder(None) processed_embedding = audio_embedder.convert_audio_to_embedding(stream) p = predict_laugh(np.expand_dims(processed_embedding, axis=0)) print('Laugh Score: {}'.format(p)) return p if __name__ == '__main__': audio_embedder = vggish_embeddings.VGGishEmbedder(FLAGS.tfrecord_file) if FLAGS.wav_directory: files = glob.glob(FLAGS.wav_directory+'/*.wav') embeddings = [audio_embedder.convert_audio_to_embedding(f) for f in files] max_len = np.max([e.shape[0] for e in embeddings]) embeddings = np.array([np.append(e, np.zeros([(max_len - e.shape[0]), 128], np.float32), axis=0) for e in embeddings]) scores = predict_laugh(embeddings) for name, score in zip(files, scores[:, 0]): print('{:>12}: {:0.6f}'.format(name, score)) else: processed_embedding = audio_embedder.convert_audio_to_embedding(FLAGS.wav_file) p = predict_laugh(np.expand_dims(processed_embedding, axis=0)) print('Laugh Score: {}'.format(p))