def leave_one_user_out_training(training_data_file_path): for window_size in [15, 30, 45, 60, 90, 120, 150, 180, 210, 240, 270, 300]: print() print( '###############################################################') print(f'Running evaluation for window size {window_size}') print() step_size = 10 majority_smoother = MajorityVoteSmoother(10, 15) data_loader = AccelerometerDatasetLoader(training_data_file_path, window_size, step_size, True) results_file_name = \ f'w{window_size}_s{step_size}_unibo_magnitude_fft_' \ f'w_maj_vote_w_interpolation.csv' with open(results_file_name, 'w') as res_file: csv_writer = csv.writer(res_file) csv_header = [ 'test_user', 'num_samples', 'algorithm', 'matched', 'missed', 'acc', 'smoothed_matched', 'smoothed_missed', 'smoothed_acc', 'maj_vote_smoothed_acc' ] csv_writer.writerow(csv_header) for user in data_loader.users: classifiers = { 'decision_tree': DecisionTreeClassifier(), 'random_forrest': RandomForestClassifier(), 'svm': SVC(gamma='scale', decision_function_shape='ovo', probability=True), 'adaboost': AdaBoostClassifier(), 'gradient_boosting': GradientBoostingClassifier(), 'extra_trees': ExtraTreesClassifier() } train_users = data_loader.users.copy() train_users.remove(user) test_user = user train_windows = [] train_labels = [] for train_user in train_users: print(f'Preparing training data for user {train_user}') for window, label in \ data_loader.get_user_data_windows(train_user): magnitude = \ np.sqrt(window.x**2 + window.y**2 + window.z**2) frequency_window = np.fft.fft(magnitude).real train_windows.append(frequency_window) train_labels.append(label) print('Finished train data preparation') for classifier_name, classifier in classifiers.items(): print(f'Training classifier {classifier_name}') classifier.fit(train_windows, train_labels) print('Finished training') test_windows = [] test_labels = [] print(f'Preparing test data for user {test_user}') for window, label in \ data_loader.get_user_data_windows(test_user): magnitude = np.sqrt(window.x**2 + window.y**2 + window.z**2) frequency_window = np.fft.fft(magnitude).real test_windows.append(frequency_window) test_labels.append(label) print('Finished test data preparation') if len(test_windows) == 0: print(f'No data for test user {test_user}. Skipping...') continue for classifier_name, classifier in classifiers.items(): print(f'Running prediction for classifier ' f'{classifier_name}') predictions = classifier.predict(test_windows) same = sum(predictions == test_labels) not_same = sum(predictions != test_labels) acc = same / (same + not_same) * 100 print(f'o_len: {len(predictions)}') print(f'o_==: {same}') print(f'o_!=: {not_same}') print(f'o_acc: {acc}') prediction_ints = [MODE_TO_INT[m] for m in predictions] print('Running smoothing...') smoothed_ints = pwctvdrobust.pwc_tvdrobust(prediction_ints) majority_smoothed = \ np.array(majority_smoother.smooth(predictions)) print('Smoothing done...') smoothed_predictions = np.array( [INT_TO_MODE[int(round(f))] for f in smoothed_ints]) smoothed_same = sum(smoothed_predictions == test_labels) smoothed_not_same = sum( smoothed_predictions != test_labels) smoothed_acc = smoothed_same / len(test_labels) * 100 print(f's_len: {len(predictions)}') print(f's_==: {smoothed_same}') print(f's_!=: {smoothed_not_same}') print(f's_acc: {smoothed_acc}') majority_smoothed_same = \ sum(majority_smoothed == test_labels) majority_smoothed_not_same = \ sum(majority_smoothed != test_labels) majority_smoothed_acc = \ majority_smoothed_same / len(test_labels) * 100 print(f'm_len: {len(predictions)}') print(f'm_==: {majority_smoothed_same}') print(f'm_!=: {majority_smoothed_not_same}') print(f'm_acc: {majority_smoothed_acc}') # test_user, num_samples, algorithm, matched, missed, acc, # smoothed_matched, smoothed_missed, smoothed_acc csv_writer.writerow([ test_user, len(predictions), classifier_name, same, not_same, acc, smoothed_same, smoothed_not_same, smoothed_acc, majority_smoothed_acc ])
def leave_one_user_out_training(training_data_file_path): for window_size in [ 1, 2, 5, 10, 15, 20, 30, 45, 60, 90, 120, 150, 180, 210, 240, 270, 300 ]: print() print( '###############################################################') print(f'Running evaluation for window size {window_size}') print() step_size = 10 majority_smoother = MajorityVoteSmoother(10, 15) data_loader = AccelerometerDatasetLoader(training_data_file_path, window_size, step_size, True) results_file_name = \ f'w{window_size}_s{step_size}__all_features__w_interpolation_' \ f'w_maj_vote__w_pvcrobust__shuffled__1st_2nd_3rd.csv' with open(results_file_name, 'w') as res_file: csv_writer = csv.writer(res_file) csv_header = [ 'test_user', 'num_samples', 'algorithm', 'avg_window_entropy', 'matched', 'missed', 'acc', 'smoothed_matched', 'smoothed_missed', 'smoothed_acc', 'maj_vote_smoothed_acc' ] csv_writer.writerow(csv_header) for user in data_loader.users: classifiers = { 'decision_tree': DecisionTreeClassifier(), 'random_forrest': RandomForestClassifier(), 'svm': SVC(gamma='scale', decision_function_shape='ovo', probability=True), 'adaboost': AdaBoostClassifier(), 'gradient_boosting': GradientBoostingClassifier(), 'extra_trees': ExtraTreesClassifier() } train_users = data_loader.users.copy() train_users.remove(user) test_user = user train_windows = [] train_labels = [] for train_user in train_users: print(f'Preparing training data for user {train_user}') for window_data, label in \ data_loader.get_user_data_windows(train_user): features = prepare_features(window_data) train_windows.append(features) train_labels.append(label) print('Finished train data preparation') train_data = list(zip(train_windows, train_labels)) shuffle(train_data) # clean up cleaned_train_windows = [] cleaned_train_labels = [] for i in range(len(train_data)): train_window = train_data[i][0] if sum(np.isfinite(train_window)) == len(train_window): cleaned_train_windows.append(train_window) cleaned_train_labels.append(train_data[i][1]) if len(cleaned_train_windows) == 0: print('No train data left after cleaning...') import pdb pdb.set_trace() continue for classifier_name, classifier in classifiers.items(): print(f'Training classifier {classifier_name}') classifier.fit(cleaned_train_windows, cleaned_train_labels) print('Finished training') test_windows = [] test_labels = [] print(f'Preparing test data for user {test_user}') for window_data, label in \ data_loader.get_user_data_windows(test_user): features = prepare_features(window_data) test_windows.append(features) test_labels.append(label) print('Finished test data preparation') if len(test_windows) == 0: print(f'No data for test user {test_user}. Skipping...') continue # clean up cleaned_test_windows = [] cleaned_test_labels = [] for i in range(len(test_windows)): test_window = test_windows[i] if sum(np.isfinite(test_window)) == len(test_window): cleaned_test_windows.append(test_window) cleaned_test_labels.append(test_labels[i]) if len(cleaned_test_windows) == 0: print('No test data left after cleaning...') continue for classifier_name, classifier in classifiers.items(): print(f'Running prediction for classifier ' f'{classifier_name}') predictions = classifier.predict(cleaned_test_windows) same = sum(predictions == cleaned_test_labels) not_same = sum(predictions != cleaned_test_labels) acc = same / (same + not_same) * 100 print(f'o_len: {len(predictions)}') print(f'o_==: {same}') print(f'o_!=: {not_same}') print(f'o_acc: {acc}') prediction_ints = [MODE_TO_INT[m] for m in predictions] print('Running smoothing...') smoothed_ints = pwctvdrobust.pwc_tvdrobust(prediction_ints) majority_smoothed = \ np.array(majority_smoother.smooth(predictions)) print('Smoothing done...') smoothed_predictions = np.array( [INT_TO_MODE[int(round(f))] for f in smoothed_ints]) smoothed_same = \ sum(smoothed_predictions == cleaned_test_labels) smoothed_not_same = \ sum(smoothed_predictions != cleaned_test_labels) smoothed_acc = \ smoothed_same / len(cleaned_test_labels) * 100 print(f's_len: {len(predictions)}') print(f's_==: {smoothed_same}') print(f's_!=: {smoothed_not_same}') print(f's_acc: {smoothed_acc}') majority_smoothed_same = \ sum(majority_smoothed == cleaned_test_labels) majority_smoothed_not_same = \ sum(majority_smoothed != cleaned_test_labels) majority_smoothed_acc = \ majority_smoothed_same / len(cleaned_test_labels) * 100 print(f'm_len: {len(predictions)}') print(f'm_==: {majority_smoothed_same}') print(f'm_!=: {majority_smoothed_not_same}') print(f'm_acc: {majority_smoothed_acc}') # test_user, num_samples, algorithm, matched, missed, acc, # smoothed_matched, smoothed_missed, smoothed_acc csv_writer.writerow([ test_user, len(predictions), classifier_name, same, not_same, acc, smoothed_same, smoothed_not_same, smoothed_acc, majority_smoothed_acc ])