def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[ 0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[ 0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj * (num_subjects - 1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming( predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info('when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj - incorrect_predict) * 1.0 / num_epochs_per_subj)) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj): # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i+1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(list(zip(training_data, training_data)), training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') predict = clf.predict(list(zip(test_data, test_data))) print(predict) print(clf.decision_function(list(zip(test_data, test_data)))) incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' % (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) print(clf.score(list(zip(test_data, test_data)), test_labels))
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples = num_epochs_per_subj * (num_subjects - 1) clf.fit( list( zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list( zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming( predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info('when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj - incorrect_predict) * 1.0 / num_epochs_per_subj)) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj*(num_subjects-1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
def test_classification(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') training_data = fake_raw_data[0:12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
def test_classification_with_two_components(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] fake_raw_data2 = [create_epoch(i, 6) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') training_data = fake_raw_data[0: 12] training_data2 = fake_raw_data2[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data2)), labels[0:12]) expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336, 1.07028798, -1.04420007, 0.97647772, -1.0498268, 1.04970111]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data2)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) # specifying num_training_samples is for coverage clf.fit(list(zip(training_data, training_data2)), labels[0:12], num_training_samples=12) expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726, 4.46505975, -4.19933099, 4.08313584, -4.23070437, 4.31779758]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
def test_classification(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0:12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
def test_classification_with_two_components(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] fake_raw_data2 = [create_epoch(i, 6) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0: 12] training_data2 = fake_raw_data2[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data2)), labels[0:12]) expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336, 1.07028798, -1.04420007, 0.97647772, -1.0498268, 1.04970111]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data2)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) # specifying num_training_samples is for coverage clf.fit(list(zip(training_data, training_data2)), labels[0:12], num_training_samples=12) expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726, 4.46505975, -4.19933099, 4.08313584, -4.23070437, 4.31779758]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'