def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    expected_confidence = np.array([
        -1.18234421, 0.97403604, -1.04005679, 0.92403019, -0.95567738,
        1.11746593, -0.83275891, 0.9486868
    ])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    expected_confidence = np.array([
        -4.49666484, 3.73025553, -4.04181695, 3.73027436, -3.77043872,
        4.42613412, -3.35616616, 3.77716609
    ])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #2
0
def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679, 
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence),
			       np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695, 
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(recomputed_confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(fake_raw_data[12:])
    hamming_distance = hamming(np.sign(expected_confidence), 
			       np.sign(confidence))
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #3
0
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj):
    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i+1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(list(zip(training_data, training_data)), training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        predict = clf.predict(list(zip(test_data, test_data)))
        print(predict)
        print(clf.decision_function(list(zip(test_data, test_data))))
        incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
        logger.info(
            'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' %
            (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
             (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
        )
        print(clf.score(list(zip(test_data, test_data)), test_labels))
Example #4
0
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects,
                                      num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf,
                     num_processed_voxels=1000,
                     epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[
        0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[
        0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)),
            rearranged_labels,
            num_training_samples=num_epochs_per_subj * (num_subjects - 1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(
        predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info('when aggregating the similarity matrix to save memory, '
                'the accuracy is %d / %d = %.2f' %
                (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj,
                 (num_epochs_per_subj - incorrect_predict) * 1.0 /
                 num_epochs_per_subj))
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
Example #5
0
def example_of_correlating_two_components(raw_data, raw_data2, labels,
                                          num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples = num_epochs_per_subj * (num_subjects - 1)
    clf.fit(
        list(
            zip(raw_data[0:num_training_samples],
                raw_data2[0:num_training_samples])),
        labels[0:num_training_samples])
    X = list(
        zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(
        predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info('when aggregating the similarity matrix to save memory, '
                'the accuracy is %d / %d = %.2f' %
                (num_epochs_per_subj - incorrect_predict, num_epochs_per_subj,
                 (num_epochs_per_subj - incorrect_predict) * 1.0 /
                 num_epochs_per_subj))
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
Example #6
0
def test_classification():
    fake_raw_data = [create_epoch(i) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 4 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels)
    y_pred = clf.predict(fake_raw_data[12:])
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
       'classification via SVM does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(training_data, labels[0:12])
    y_pred = clf.predict(fake_raw_data[12:])
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
Example #7
0
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
Example #8
0
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
Example #9
0
            testing_obj = list(zip(int_data_testing, int_data_testing))
    
    # no shrinking, set C=1
    svm_clf = SVC(kernel='precomputed', shrinking=False, C=1)
    
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    
    # Train the model on the training data
    if is_memory_efficient == 1:
        clf.fit(corr_obj, rearranged_labels, num_training_samples)
    else:
        clf.fit(training_obj, labels_training)
    
    # What is the cv accuracy?
    if is_memory_efficient == 0:
        cv_prediction = clf.predict(training_obj)
    
    # Test on the testing data
    if is_memory_efficient == 1:
        predict = clf.predict()
    else:
        predict = clf.predict(testing_obj)

    # Report results on the first rank core
    if MPI.COMM_WORLD.Get_rank()==0:
        print('--RESULTS--')
        print(clf.decision_function())
        print(clf.predict())
        
        # How often does the prediction match the target
        num_correct = (np.asanyarray(predict) == np.asanyarray(labels_testing)).sum()
Example #10
0
def test_classification():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679,
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695,
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #11
0
def test_classification_with_two_components():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    fake_raw_data2 = [create_epoch(i, 6) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    training_data = fake_raw_data[0: 12]
    training_data2 = fake_raw_data2[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data2)), labels[0:12])
    expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336,
                                    1.07028798, -1.04420007, 0.97647772,
                                    -1.0498268, 1.04970111])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data2)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    # specifying num_training_samples is for coverage
    clf.fit(list(zip(training_data, training_data2)),
            labels[0:12],
            num_training_samples=12)
    expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726,
                                    4.46505975, -4.19933099, 4.08313584,
                                    -4.23070437, 4.31779758])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
def test_classification():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0:12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-1.18234421, 0.97403604, -1.04005679,
                                    0.92403019, -0.95567738, 1.11746593,
                                    -0.83275891, 0.9486868])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data)), labels[0:12])
    expected_confidence = np.array([-4.49666484, 3.73025553, -4.04181695,
                                    3.73027436, -3.77043872, 4.42613412,
                                    -3.35616616, 3.77716609])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data[12:])))
    expected_output = [0, 0, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)
                               ) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
def test_classification_with_two_components():
    fake_raw_data = [create_epoch(i, 5) for i in range(20)]
    fake_raw_data2 = [create_epoch(i, 6) for i in range(20)]
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # 5 subjects, 4 epochs per subject
    epochs_per_subj = 4
    # svm
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    training_data = fake_raw_data[0: 12]
    training_data2 = fake_raw_data2[0: 12]
    clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(training_data, training_data2)), labels[0:12])
    expected_confidence = np.array([-1.23311606, 1.02440964, -0.93898336,
                                    1.07028798, -1.04420007, 0.97647772,
                                    -1.0498268, 1.04970111])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM does not provide correct results'
    confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM without recomputation ' \
        'does not provide correct results'
    y = [0, 1, 0, 1, 0, 1, 0, 1]
    score = clf.score(list(zip(fake_raw_data[12:], fake_raw_data2[12:])), y)
    assert np.isclose([hamming(y_pred, y)], [1-score])[0], \
        'the prediction score is incorrect'
    # svm with partial similarity matrix computation
    clf = Classifier(svm_clf, num_processed_voxels=2,
                     epochs_per_subj=epochs_per_subj)
    clf.fit(list(zip(fake_raw_data, fake_raw_data2)),
            labels,
            num_training_samples=12)
    y_pred = clf.predict()
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via SVM (partial sim) does not ' \
        'provide correct results'
    confidence = clf.decision_function()
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of SVM (partial sim) without recomputation ' \
        'does not provide correct results'
    # logistic regression
    lr_clf = LogisticRegression()
    clf = Classifier(lr_clf, epochs_per_subj=epochs_per_subj)
    # specifying num_training_samples is for coverage
    clf.fit(list(zip(training_data, training_data2)),
            labels[0:12],
            num_training_samples=12)
    expected_confidence = np.array([-4.90819848, 4.22548132, -3.76255726,
                                    4.46505975, -4.19933099, 4.08313584,
                                    -4.23070437, 4.31779758])
    recomputed_confidence = clf.decision_function(list(zip(
        fake_raw_data[12:], fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(recomputed_confidence)
                               ) * expected_confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression with recomputation ' \
        'does not provide correct results'
    y_pred = clf.predict(list(zip(fake_raw_data[12:], fake_raw_data2[12:])))
    expected_output = [0, 1, 0, 1, 0, 1, 0, 1]
    hamming_distance = hamming(y_pred, expected_output) * len(y_pred)
    assert hamming_distance <= 1, \
        'classification via logistic regression ' \
        'does not provide correct results'
    confidence = clf.decision_function(list(zip(fake_raw_data[12:],
                                                fake_raw_data2[12:])))
    hamming_distance = hamming(np.sign(expected_confidence),
                               np.sign(confidence)) * confidence.size
    assert hamming_distance <= 1, \
        'decision function of logistic regression without precomputation ' \
        'does not provide correct results'
Example #14
0
#from sklearn.externals import joblib

format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# if want to output log to a file instead of outputting log to the console,
# replace "stream=sys.stdout" with "filename='fcma.log'"
logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout)
logger = logging.getLogger(__name__)


# python classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy 12
if __name__ == '__main__':
    data_dir = sys.argv[1]
    extension = sys.argv[2]
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]
    raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file, epoch_file)
    epochs_per_subj = int(sys.argv[5])
    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=epochs_per_subj)
    training_data = raw_data[0:204]
    test_data = raw_data[204:]
    clf.fit(training_data, labels[0:204])
    # joblib can be used for saving and loading models
    #joblib.dump(clf, 'model/logistic.pkl')
    #clf = joblib.load('model/svm.pkl')
    print(clf.predict(test_data))
    print(clf.decision_function(test_data))
    print(np.asanyarray(labels[204:]))
Example #15
0
    mask_file = sys.argv[3]
    epoch_file = sys.argv[4]

    epoch_list = np.load(epoch_file)
    num_subjects = len(epoch_list)
    num_epochs_per_subj = epoch_list[0].shape[1]

    raw_data, labels = prepare_fcma_data(data_dir, extension, mask_file,
                                         epoch_file)

    # no shrinking, set C=1
    use_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #use_clf = LogisticRegression()
    clf = Classifier(use_clf, epochs_per_subj=num_epochs_per_subj)

    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i + 1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(training_data, training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        print(clf.predict(test_data))
        print(clf.decision_function(test_data))
        print(np.asanyarray(test_labels))