Example #1
0
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects,
                                      num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf,
                     num_processed_voxels=1000,
                     epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[
        0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[
        0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)),
            rearranged_labels,
            num_training_samples=num_epochs_per_subj * (num_subjects - 1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(
        predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info('when aggregating the similarity matrix to save memory, '
                'the accuracy is %d / %d = %.2f' %
                (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj,
                 (num_epochs_per_subj - incorrect_predict) * 1.0 /
                 num_epochs_per_subj))
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
Example #2
0
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj):
    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i+1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(list(zip(training_data, training_data)), training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        predict = clf.predict(list(zip(test_data, test_data)))
        print(predict)
        print(clf.decision_function(list(zip(test_data, test_data))))
        incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
        logger.info(
            'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' %
            (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
             (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
        )
        print(clf.score(list(zip(test_data, test_data)), test_labels))
Example #3
0
def example_of_correlating_two_components(raw_data, raw_data2, labels,
                                          num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples = num_epochs_per_subj * (num_subjects - 1)
    clf.fit(
        list(
            zip(raw_data[0:num_training_samples],
                raw_data2[0:num_training_samples])),
        labels[0:num_training_samples])
    X = list(
        zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(
        predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info('when aggregating the similarity matrix to save memory, '
                'the accuracy is %d / %d = %.2f' %
                (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj,
                 (num_epochs_per_subj - incorrect_predict) * 1.0 /
                 num_epochs_per_subj))
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
Example #4
0
def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
       'classification via SVM does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    y_pred = clf.predict(fake_raw_data[12:])
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
Example #5
0
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    )
Example #6
0
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
Example #7
0
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    expected_confidence = np.array([
        -1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738,
        1.11746593, -0.83275891, 0.9486868
    ])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    expected_confidence = np.array([
        -4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872,
        4.42613412, -3.35616616, 3.77716609
    ])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #9
0
    data_dir = sys.argv[1]
    extension = sys.argv[2]
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]

    epoch_list = np.load(epoch_file)
    num_subjects = len(epoch_list)
    num_epochs_per_subj = epoch_list[0].shape[1]

    raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file,
                                         epoch_file)

    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=num_epochs_per_subj)

    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i + 1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(training_data, training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        print(clf.predict(test_data))
        print(clf.decision_function(test_data))
Example #10
0
def test_classification():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679,
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695,
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #11
0
     # Set up data so that the internal mask is correlated with the extrinsic mask
     rearranged_ext_data = ext_data_training + ext_data_testing
     corr_obj = list(zip(rearranged_ext_data, rearranged_int_data))
 else:
     
     # Set up data so that the internal mask is correlated with the internal mask
     if is_memory_efficient == 1:
         corr_obj = list(zip(rearranged_int_data, rearranged_int_data))
     else:
         training_obj = list(zip(int_data_training, int_data_training))
         testing_obj = list(zip(int_data_testing, int_data_testing))
 
 # no shrinking, set C=1
 svm_clf = SVC(kernel='precomputed', shrinking=False, C=1)
 
 clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
 
 # Train the model on the training data
 if is_memory_efficient == 1:
     clf.fit(corr_obj, rearranged_labels, num_training_samples)
 else:
     clf.fit(training_obj, labels_training)
 
 # What is the cv accuracy?
 if is_memory_efficient == 0:
     cv_prediction = clf.predict(training_obj)
 
 # Test on the testing data
 if is_memory_efficient == 1:
     predict = clf.predict()
 else:
Example #12
0
if __name__ == '__main__':
    data_dir = sys.argv[1]
    extension = sys.argv[2]
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]

    epoch_list = np.load(epoch_file)
    num_subjects = len(epoch_list)
    num_epochs_per_subj = epoch_list[0].shape[1]

    raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file)

    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=num_epochs_per_subj)

    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i+1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(training_data, training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        print(clf.predict(test_data))
        print(clf.decision_function(test_data))
Example #13
0
def test_classification_with_two_components():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    fake_raw_data2 = [create_epoch(i, 6) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    training_data = fake_raw_data[0: 12]
    training_data2 = fake_raw_data2[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data2)), labels[0:12])
    expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336,
                                    1.07028798, -1.04420007, 0.97647772,
                                    -1.0498268, 1.04970111])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data2)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    # specifying num_training_samples is for coverage
    clf.fit(list(zip(training_data, training_data2)),
            labels[0:12],
            num_training_samples=12)
    expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726,
                                    4.46505975, -4.19933099, 4.08313584,
                                    -4.23070437, 4.31779758])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #14
0
import numpy as np
#from sklearn.externals import joblib

format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# if want to output log to a file instead of outputting log to the console,
# replace "stream=sys.stdout" with "filename='fcma.log'"
logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout)
logger = logging.getLogger(__name__)

# python classification.py /Users/yidawang/data/face_scene/raw nii.gz
#     /Users/yidawang/data/face_scene/prefrontal_top_mask.nii.gz data/fs_epoch_labels.npy 12
if __name__ == '__main__':
    data_dir = sys.argv[1]
    extension = sys.argv[2]
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]
    raw_data, labels = prepare_data(data_dir, extension, mask_file, epoch_file)
    epochs_per_subj = int(sys.argv[5])
    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=epochs_per_subj)
    training_data = raw_data[0:204]
    test_data = raw_data[204:]
    clf.fit(training_data, labels[0:204])
    # joblib can be used for saving and loading models
    #joblib.dump(clf, 'model/logistic.pkl')
    #clf = joblib.load('model/svm.pkl')
    print(clf.predict(test_data))
    print(np.asanyarray(labels[204:]))
Example #15
0
def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
			       np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
def test_classification():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679,
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695,
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
def test_classification_with_two_components():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    fake_raw_data2 = [create_epoch(i, 6) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    training_data2 = fake_raw_data2[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data2)), labels[0:12])
    expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336,
                                    1.07028798, -1.04420007, 0.97647772,
                                    -1.0498268, 1.04970111])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data2)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    # specifying num_training_samples is for coverage
    clf.fit(list(zip(training_data, training_data2)),
            labels[0:12],
            num_training_samples=12)
    expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726,
                                    4.46505975, -4.19933099, 4.08313584,
                                    -4.23070437, 4.31779758])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #18
0
#from sklearn.externals import joblib

format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# if want to output log to a file instead of outputting log to the console,
# replace "stream=sys.stdout" with "filename='fcma.log'"
logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout)
logger = logging.getLogger(__name__)


# python classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy 12
if __name__ == '__main__':
    data_dir = sys.argv[1]
    extension = sys.argv[2]
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]
    raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file)
    epochs_per_subj = int(sys.argv[5])
    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=epochs_per_subj)
    training_data = raw_data[0:204]
    test_data = raw_data[204:]
    clf.fit(training_data, labels[0:204])
    # joblib can be used for saving and loading models
    #joblib.dump(clf, 'model/logistic.pkl')
    #clf = joblib.load('model/svm.pkl')
    print(clf.predict(test_data))
    print(clf.decision_function(test_data))
    print(np.asanyarray(labels[204:]))