def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[ 0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[ 0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj * (num_subjects - 1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming( predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info('when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj - incorrect_predict) * 1.0 / num_epochs_per_subj)) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj): # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i+1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(list(zip(training_data, training_data)), training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') predict = clf.predict(list(zip(test_data, test_data))) print(predict) print(clf.decision_function(list(zip(test_data, test_data)))) incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' % (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) print(clf.score(list(zip(test_data, test_data)), test_labels))
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples = num_epochs_per_subj * (num_subjects - 1) clf.fit( list( zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list( zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming( predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info('when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj - incorrect_predict) * 1.0 / num_epochs_per_subj)) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
def test_classification(): fake_raw_data = [create_epoch(i) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 4 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels) y_pred = clf.predict(fake_raw_data[12:]) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels[0:12]) y_pred = clf.predict(fake_raw_data[12:]) hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results'
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj): # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel # when the kernel matrix is computed in portions; also, this method only works # for self-correlation, i.e. correlation between the same data matrix. # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation # no shuffling in cv skf = model_selection.StratifiedKFold(n_splits=num_subjects, shuffle=False) scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)), y=labels, cv=skf) print(scores) logger.info( 'the overall cross validation accuracy is %.2f' % np.mean(scores) )
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj*(num_subjects-1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
def test_classification(): fake_raw_data = [create_epoch(i) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 4 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0:12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels) expected_confidence = np.array([ -1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868 ]) recomputed_confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence)) assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(fake_raw_data[12:]) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels[0:12]) expected_confidence = np.array([ -4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609 ]) recomputed_confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence)) assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(fake_raw_data[12:]) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
data_dir = sys.argv[1] extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] epoch_list = np.load(epoch_file) num_subjects = len(epoch_list) num_epochs_per_subj = epoch_list[0].shape[1] raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file) # no shrinking, set C=1 use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #use_clf = LogisticRegression() clf = Classifier(use_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i + 1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(training_data, training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') print(clf.predict(test_data)) print(clf.decision_function(test_data))
def test_classification(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') training_data = fake_raw_data[0:12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
# Set up data so that the internal mask is correlated with the extrinsic mask rearranged_ext_data = ext_data_training + ext_data_testing corr_obj = list(zip(rearranged_ext_data, rearranged_int_data)) else: # Set up data so that the internal mask is correlated with the internal mask if is_memory_efficient == 1: corr_obj = list(zip(rearranged_int_data, rearranged_int_data)) else: training_obj = list(zip(int_data_training, int_data_training)) testing_obj = list(zip(int_data_testing, int_data_testing)) # no shrinking, set C=1 svm_clf = SVC(kernel='precomputed', shrinking=False, C=1) clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) # Train the model on the training data if is_memory_efficient == 1: clf.fit(corr_obj, rearranged_labels, num_training_samples) else: clf.fit(training_obj, labels_training) # What is the cv accuracy? if is_memory_efficient == 0: cv_prediction = clf.predict(training_obj) # Test on the testing data if is_memory_efficient == 1: predict = clf.predict() else:
if __name__ == '__main__': data_dir = sys.argv[1] extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] epoch_list = np.load(epoch_file) num_subjects = len(epoch_list) num_epochs_per_subj = epoch_list[0].shape[1] raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file) # no shrinking, set C=1 use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #use_clf = LogisticRegression() clf = Classifier(use_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i+1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(training_data, training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') print(clf.predict(test_data)) print(clf.decision_function(test_data))
def test_classification_with_two_components(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] fake_raw_data2 = [create_epoch(i, 6) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') training_data = fake_raw_data[0: 12] training_data2 = fake_raw_data2[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data2)), labels[0:12]) expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336, 1.07028798, -1.04420007, 0.97647772, -1.0498268, 1.04970111]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data2)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) # specifying num_training_samples is for coverage clf.fit(list(zip(training_data, training_data2)), labels[0:12], num_training_samples=12) expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726, 4.46505975, -4.19933099, 4.08313584, -4.23070437, 4.31779758]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
import numpy as np #from sklearn.externals import joblib format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' # if want to output log to a file instead of outputting log to the console, # replace "stream=sys.stdout" with "filename='fcma.log'" logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout) logger = logging.getLogger(__name__) # python classification.py /Users/yidawang/data/face_scene/raw nii.gz # /Users/yidawang/data/face_scene/prefrontal_top_mask.nii.gz data/fs_epoch_labels.npy 12 if __name__ == '__main__': data_dir = sys.argv[1] extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] raw_data, labels = prepare_data(data_dir, extension, mask_file, epoch_file) epochs_per_subj = int(sys.argv[5]) # no shrinking, set C=1 use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #use_clf = LogisticRegression() clf = Classifier(use_clf, epochs_per_subj=epochs_per_subj) training_data = raw_data[0:204] test_data = raw_data[204:] clf.fit(training_data, labels[0:204]) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') print(clf.predict(test_data)) print(np.asanyarray(labels[204:]))
def test_classification(): fake_raw_data = [create_epoch(i) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 4 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels) expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868]) recomputed_confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence)) assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(fake_raw_data[12:]) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(training_data, labels[0:12]) expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609]) recomputed_confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence)) assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(fake_raw_data[12:]) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(fake_raw_data[12:]) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
def test_classification(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0:12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738, 1.11746593, -0.83275891, 0.9486868]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data)), labels[0:12]) expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872, 4.42613412, -3.35616616, 3.77716609]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:]))) expected_output = [0, 0, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence) ) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
def test_classification_with_two_components(): fake_raw_data = [create_epoch(i, 5) for i in range(20)] fake_raw_data2 = [create_epoch(i, 6) for i in range(20)] labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1] # 5 subjects, 4 epochs per subject epochs_per_subj = 4 # svm svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) training_data = fake_raw_data[0: 12] training_data2 = fake_raw_data2[0: 12] clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(training_data, training_data2)), labels[0:12]) expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336, 1.07028798, -1.04420007, 0.97647772, -1.0498268, 1.04970111]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of SVM with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM does not provide correct results' confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM without recomputation ' \ 'does not provide correct results' y = [0, 1, 0, 1, 0, 1, 0, 1] score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y) assert np.isclose([hamming(y_pred, y)], [1-score])[0], \ 'the prediction score is incorrect' # svm with partial similarity matrix computation clf = Classifier(svm_clf, num_processed_voxels=2, epochs_per_subj=epochs_per_subj) clf.fit(list(zip(fake_raw_data, fake_raw_data2)), labels, num_training_samples=12) y_pred = clf.predict() expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via SVM (partial sim) does not ' \ 'provide correct results' confidence = clf.decision_function() hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of SVM (partial sim) without recomputation ' \ 'does not provide correct results' # logistic regression lr_clf = LogisticRegression() clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj) # specifying num_training_samples is for coverage clf.fit(list(zip(training_data, training_data2)), labels[0:12], num_training_samples=12) expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726, 4.46505975, -4.19933099, 4.08313584, -4.23070437, 4.31779758]) recomputed_confidence = clf.decision_function(list(zip( fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(recomputed_confidence) ) * expected_confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression with recomputation ' \ 'does not provide correct results' y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) expected_output = [0, 1, 0, 1, 0, 1, 0, 1] hamming_distance = hamming(y_pred, expected_output) * len(y_pred) assert hamming_distance <= 1, \ 'classification via logistic regression ' \ 'does not provide correct results' confidence = clf.decision_function(list(zip(fake_raw_data[12:], fake_raw_data2[12:]))) hamming_distance = hamming(np.sign(expected_confidence), np.sign(confidence)) * confidence.size assert hamming_distance <= 1, \ 'decision function of logistic regression without precomputation ' \ 'does not provide correct results'
#from sklearn.externals import joblib format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' # if want to output log to a file instead of outputting log to the console, # replace "stream=sys.stdout" with "filename='fcma.log'" logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout) logger = logging.getLogger(__name__) # python classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy 12 if __name__ == '__main__': data_dir = sys.argv[1] extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file) epochs_per_subj = int(sys.argv[5]) # no shrinking, set C=1 use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1) #use_clf = LogisticRegression() clf = Classifier(use_clf, epochs_per_subj=epochs_per_subj) training_data = raw_data[0:204] test_data = raw_data[204:] clf.fit(training_data, labels[0:204]) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') print(clf.predict(test_data)) print(clf.decision_function(test_data)) print(np.asanyarray(labels[204:]))