def read_data_to_array(path, max_len=21, rescale=False): X = [] y = [] for wavfile in os.listdir(path): if wavfile.startswith('.') or not wavfile.endswith('.wav'): continue y.append(LABEL_MAPPING[wavfile[5]]) X.append(wav2mfcc(os.path.join(path, wavfile), max_len, rescale)) y_counter = Counter(y) print(y_counter.most_common()) label_to_int_dict = { l: idx for idx, (l, _) in enumerate(y_counter.most_common()) } y = [label_to_int_dict[l] for l in y] X, y = np.stack(X), np.stack(y) y = to_categorical(y) return X, y, label_to_int_dict
def predict(name): data = {"path": name} params = flask.request.json if (params == None): params = flask.request.args # if parameters are found, return a prediction if (params != None): with graph.as_default(): sample = preprocess.wav2mfcc('C://Users//Stage//Downloads//' + name + '.wav') print(name) sample_reshaped = sample.reshape(1, 40, 47, 1) data["prediction"] = preprocess.get_labels()[0][np.argmax( model.predict(sample_reshaped))] data["success"] = True # return a response in json format return flask.jsonify(data)
def predictTest(name): data = {"path": name} params = flask.request.json if (params == None): params = flask.request.args # if parameters are found, return a prediction if (params != None): with graph.as_default(): dir = "C://Users//Stage//final project//test//" + name filename = random.choice(os.listdir(dir)) print(filename) sample = preprocess.wav2mfcc(dir + "//" + filename) print(name) sample_reshaped = sample.reshape(1, 40, 47, 1) data["prediction"] = preprocess.get_labels()[0][np.argmax( model.predict(sample_reshaped))] data["success"] = True # return a response in json format return flask.jsonify(data)
This script is used to make inferences from a trained Convolutional Neural Network on new data. The data is given as arguments to the program. The Preprocess module is used to generate the MFCCs of the audio data and load them into memory. It requires keras to be installed. ''' from preprocess import wav2mfcc from keras.models import load_model import numpy as np import sys import time def predict(model, mfcc): reshaped = mfcc.reshape(1, 20, 11, 1) return model.predict(reshaped)[0] PATH_TO_MODEL = 'trained.h5' LABELS = ['car_horn', 'dog_bark'] initial = time.time() model = load_model(PATH_TO_MODEL) print(f'Model took {time.time() - initial} seconds to load.') for path in sys.argv[1:]: initial = time.time() prediction = predict(model, wav2mfcc(path)) print( f'Prediction for {path}: {LABELS[np.argmax(prediction)]}; {prediction}; prediction took {time.time() - initial} seconds.' )
def predict(filepath, model=None): # predict english word based CNN sample = wav2mfcc(filepath) feature_dim_1, feature_dim_2, channel = 20, 11, 1 sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel) return get_labels()[0][np.argmax(model.predict(sample_reshaped))]
def predict(filepath, model): sample = wav2mfcc(filepath) sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel) return get_labels()[0][np.argmax(model.predict(sample_reshaped))]