예제 #1
0
def process_wav_file(stream):
    audio_embedder = vggish_embeddings.VGGishEmbedder(None)
    processed_embedding =  audio_embedder.convert_audio_to_embedding(stream)
    p = predict_laugh(np.expand_dims(processed_embedding, axis=0))
    print('Laugh Score: {}'.format(p))
    return p
예제 #2
0
@eel.expose
def saveEel(param):
    print("Saving file...")
    # writer.close()
    # writer = open("l_" + str(datetime.now()), 'w')
    writer.write("NEW SESSION\n")
    print("New file started")
    return


eel.start('index.html', block=False)

if __name__ == '__main__':
    model = keras.models.load_model(FLAGS.keras_model)
    audio_embed = vggish_embeddings.VGGishEmbedder()
    context = zmq.Context()
    socket = context.socket(zmq.REP)
    socket.bind("tcp://*:5555")

    writer = open("l_" + str(datetime.now()) + ".csv", 'w')

    window = [0.5] * FLAGS.avg_window

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        # vol = np
        for chunk in audio_generator:
            try:
                arr = np.frombuffer(chunk, dtype=np.int16)
                vol = np.sqrt(np.mean(arr**2))
예제 #3
0
MODEL_PATH = '/Users/kaushikandra/laughter-detection/LaughDetection/Models/LSTM_SingleLayer_100Epochs.h5'

def predict_laugh(processed_embedding):
    model = keras.models.load_model(MODEL_PATH)
    return model.predict(processed_embedding)

def process_wav_file(stream):
    audio_embedder = vggish_embeddings.VGGishEmbedder(None)
    processed_embedding =  audio_embedder.convert_audio_to_embedding(stream)
    p = predict_laugh(np.expand_dims(processed_embedding, axis=0))
    print('Laugh Score: {}'.format(p))
    return p


if __name__ == '__main__':
    audio_embedder = vggish_embeddings.VGGishEmbedder(FLAGS.tfrecord_file)

    if FLAGS.wav_directory:
        files = glob.glob(FLAGS.wav_directory+'/*.wav')
        embeddings = [audio_embedder.convert_audio_to_embedding(f) for f in files]
        max_len = np.max([e.shape[0] for e in embeddings])
        embeddings = np.array([np.append(e, np.zeros([(max_len - e.shape[0]), 128], np.float32), axis=0) for e in embeddings])
        scores = predict_laugh(embeddings)
        for name, score in zip(files, scores[:, 0]):
            print('{:>12}:  {:0.6f}'.format(name, score))

    else:
        processed_embedding =  audio_embedder.convert_audio_to_embedding(FLAGS.wav_file)
        p = predict_laugh(np.expand_dims(processed_embedding, axis=0))
        print('Laugh Score: {}'.format(p))