def select_device_and_run(event): myLabel = tk.Label(window, text="Device: " + variable.get()).pack() w.destroy() device_label.destroy() prediction_label.place(relx=.5, rely=.5, anchor="center") # set device device_info = sd.query_devices(get_host_id(variable.get()), 'input') device = get_host_id(variable.get()) sample_rate = device_info['default_samplerate'] channels = 1 init_pre_processing() init_activity_detection() init_feature_extraction(func_type="all", n_mfcc_arg=10, norm_file=normalization_values) init_classificator(knn_model=knn_model) init(sample_rate, buffer_size) stream = sd.InputStream(device=device, channels=channels, dtype='float32', latency='high', samplerate=sample_rate, callback=audio_callback, blocksize=buffer_size) window.protocol("WM_DELETE_WINDOW", on_closing) with stream: print("Recording***") window.mainloop()
def run_test(wav_dir, csv_dir, buffer_size, log_file): ''' Run test function: input: - wav_dir: Location of the audio - csv_dir: Location of the csv annotation - buffer_size: Default is 512 but it can be modified to test the system on different buffer sizes - log_file: Location of the file where all results are logged. ''' # Load audio and its annotation print(wav_dir) audio = Waveform(path=wav_dir) groundtruth_annotation = load_annotation(csv_dir) # Init system simulation init_pre_processing() init_activity_detection(func_type=1) init_feature_extraction(by_pass=True) init_classificator(by_pass=True) # run simulation result = main(audio, buffer_size) # Init groundtruth activity array groundtruth_activity = np.zeros(len(result['ONSET_LOCATIONS'])) sample_rate = audio.sample_rate # Transform annotation in the desired format (1 activity, 0 non-activity) for i in range(0, len(groundtruth_annotation), 2): sample_instant_1 = int( float(groundtruth_annotation[i][0]) * sample_rate) sample_instant_2 = int( float(groundtruth_annotation[i + 1][0]) * sample_rate) groundtruth_activity[sample_instant_1:sample_instant_2] = 1 # evaluate activity detection precision, recall, f1_score, accuracy = evaluate_activity_detection( groundtruth_activity, result['ONSET_LOCATIONS']) row = [wav_dir, precision, recall, f1_score, accuracy] with open(log_file, 'a+', newline='') as file: w = csv.writer(file) w.writerow(row) file.close()
def run_test(wav_dir, csv_dir, buffer_size, log_file, proposal): ''' Run test function: input: - wav_dir: Location of the audio - csv_dir: Location of the csv annotation - buffer_size: Default is 512 but it can be modified to test the system on different buffer sizes - log_file: Location of the file where all results are logged. ''' # Load audio and its annotation audio = Waveform(path=wav_dir) groundtruth = read_csv(csv_dir) print(wav_dir) # Init system simulation init_pre_processing() init_activity_detection() init_feature_extraction(func_type=proposal, n_mfcc_arg=20, norm_file=normalization_values) init_classificator(knn_model=knn_model) # run simulation result = main(audio, buffer_size) prediction = get_prediction_time_instants(result['ONSET_LOCATIONS'], result['PREDICTION'], audio.sample_rate) # evaluate activity detection precision, recall, fscore = evaluate_system(groundtruth, prediction) row = [wav_dir, precision, recall, fscore] with open(log_file, 'a+', newline='') as file: w = csv.writer(file) w.writerow(row) file.close()
def classify_and_plot(data): with open(log_file, 'w', newline='') as f: # create the csv writer writer = csv.writer(f) # write a row to the csv file header = [ 'audio_class', 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10', 'mfcc_mean_11', 'mfcc_mean_12', 'mfcc_mean_13', 'mfcc_mean_14', 'mfcc_mean_15', 'mfcc_mean_16', 'mfcc_mean_17', 'mfcc_mean_18', 'mfcc_mean_19', 'mfcc_mean_20', 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10', 'mfcc_std_11', 'mfcc_std_12', 'mfcc_std_13', 'mfcc_std_14', 'mfcc_std_15', 'mfcc_std_16', 'mfcc_std_17', 'mfcc_std_18', 'mfcc_std_19', 'mfcc_std_20' ] writer.writerow(header) # close the file f.close() with open(model_normalization, 'w', newline='') as f: # create the csv writer writer = csv.writer(f) # write a row to the csv file header = [ 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10', 'mfcc_mean_11', 'mfcc_mean_12', 'mfcc_mean_13', 'mfcc_mean_14', 'mfcc_mean_15', 'mfcc_mean_16', 'mfcc_mean_17', 'mfcc_mean_18', 'mfcc_mean_19', 'mfcc_mean_20', 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10', 'mfcc_std_11', 'mfcc_std_12', 'mfcc_std_13', 'mfcc_std_14', 'mfcc_std_15', 'mfcc_std_16', 'mfcc_std_17', 'mfcc_std_18', 'mfcc_std_19', 'mfcc_std_20' ] writer.writerow(header) f.close() X = [] Y = [] for audio in data: buffer_len = 512 # Init system init_pre_processing() init_activity_detection() init_feature_extraction(n_mfcc_arg=mfcc) init_classificator(by_pass=True) # Call system response = main(audio, buffer_len) features = response['FEATURES'] if len(features) != 0: # plot_audio(audio.waveform,response['SIGNAL_PROCESSED'],audio.sample_rate) # plot_odf(audio.filename,audio.waveform,response['SIGNAL_PROCESSED'],audio.sample_rate,response['ONSET_LOCATIONS'],response['HFC'],response['THRESHOLD']) features = features[:40] row = [audio.class_type] row.extend(features) # Store features values for evaluation with open(log_file, 'a+', newline='') as file: w = csv.writer(file) w.writerow(row) file.close() X.append(features) Y.append(audio.class_type) else: print(len(features)) X = np.array(normalize(X)) Y = np.array(Y) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1) # init vars n_neighbors = 1 h = .1 # step size in the mesh sss = X[:, 0] # Create color maps cmap_light = ListedColormap(['#FFAAAA', '#AAAAFF', '#AAFFAA']) cmap_bold = ['darkorange', 'c', 'darkblue'] rcParams['figure.figsize'] = 5, 5 for weights in ['uniform', 'distance']: # we create an instance of Neighbours Classifier and fit the data. knn_classifier = KNeighborsClassifier(n_neighbors, weights=weights) knn_classifier.fit(X_train, y_train) # evaluate y_expected = y_test y_predicted = knn_classifier.predict(X_test) # print results print( '----------------------------------------------------------------------' ) print('Classification report') print( '----------------------------------------------------------------------' ) print('\n', classification_report(y_expected, y_predicted)) print( '----------------------------------------------------------------------' ) k_range = range(1, 20) scores = [] for k in k_range: knn = KNeighborsClassifier(n_neighbors=k) knn.fit(X_train, y_train) scores.append(knn.score(X_test, y_test)) plt.figure() plt.title("Accuracy of the model according to k value") plt.xlabel('k') plt.ylabel('accuracy') plt.scatter(k_range, scores) plt.xticks([0, 5, 10, 15, 20]) plt.show() return knn_classifier
normalization_values = pd.read_csv(model_normalization) path = '../../TrainDataset/LOD_1624992635178/Snare_LOD' audio = Waveform(path=path + ".wav") groundtruth = load_annotation(path + ".csv") # Init system init_pre_processing() init_activity_detection(func_type=1) init_feature_extraction(func_type="mfcc", by_pass=False, n_mfcc_arg=20, norm_file=normalization_values) init_classificator(knn_model=knn_model, by_pass=False) buffer_len = 512 # Call system result = main(audio, buffer_len) prediction = get_prediction_time_instants(result['ONSET_LOCATIONS'], result['PREDICTION'], audio.sample_rate) # Plot results plot_audio(audio.waveform, result['SIGNAL_PROCESSED'], audio.sample_rate) plot_odf(audio.filename, audio.waveform, result['SIGNAL_PROCESSED'], audio.sample_rate, result['ONSET_LOCATIONS'], result['HFC'], result['THRESHOLD']) groundtruth_activity = np.zeros(len(result['ONSET_LOCATIONS']))