Example #1
0
def build_and_run_rvc(X_train_scaled, y_train, X_test_scaled, y_test):
    '''
    Takes: training and testing data
    Returns: time to fit, time to predict, plus it prints
    '''

    # build RVC
    rvc_model = RVC()
    print("fitting RVM:")
    start = time.time()
    rvc_model.fit(X_train_scaled, y_train)
    delta0 = time.time() - start
    print("time to fit RVM: ", delta0)

    start = time.time()
    rvc_predict = rvc_model.predict(X_test_scaled)
    delta1 = time.time() - start
    print("time to predict with RVM: ", delta1)

    # print parameters
    print("RVM hyperparameters:")
    print(rvc_model.get_params())

    # evaluate RVC
    print(helpers.confusion_matrix(y_test, rvc_predict))
    print(classification_report(y_test, rvc_predict))

    return delta0, delta1
def rvc_analysis(random_seed, save_path):
    # Load the data
    # TODO: change the path
    save_path = os.path.join(save_path, 'random_seed_%03d' %random_seed)
    print('Random seed: %03d' %random_seed)
    # Load the saved validation dataset
    project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset)
    with open(os.path.join(save_path, 'splitted_dataset_%s.pickle' %dataset), 'rb') as handle:
            splitted_dataset = pickle.load(handle)

    # Train the model
    model = RVC(kernel='linear')
    model.fit(splitted_dataset['Xtrain_scaled'], splitted_dataset['Ytrain'])

    # make cross validated predictions
    print('Perform prediction in test data')
    y_prediction_test = model.predict(splitted_dataset['Xtest_scaled'])

    y_prediction_validation = model.predict(splitted_dataset['Xvalidate_scaled'])

    # -----------------------------------------------------------------------------
    # Do some statistics. Calculate the confusion matrix

    # Test dataset
    # Look at the confusion matrix for test data
    class_name = np.array(['young', 'old', 'adult'], dtype='U10')
    ax, cm_test = plot_confusion_matrix(splitted_dataset['Ytest'], y_prediction_test,
                          classes=class_name,
                          normalize=True)
    # Look at accuracy
    accuracy_test = accuracy_score(splitted_dataset['Ytest'], y_prediction_test)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps'))

   # Predict on the validation dataset
    ax, cm_validation = plot_confusion_matrix(splitted_dataset['Yvalidate'], y_prediction_validation,
                          classes=class_name,
                          normalize=True)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_validation_rvc.eps'))
    # Look at accuracy
    accuracy_val = accuracy_score(splitted_dataset['Yvalidate'],
                                   y_prediction_validation)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps'))
    return cm_test, cm_validation, accuracy_test, accuracy_val
Example #3
0
    def test_predict_three_classes(self):
        """Check predict works with three classes."""
        clf = RVC(kernel='linear')

        X = np.array([[5, 5], [5, -5], [-5, 0]])

        y = np.array(['A', 'B', 'C'])

        clf.fit(X, y)

        prediction = clf.predict(np.array([[10, 10]]))
        np.testing.assert_array_equal(prediction, np.array(['A']))
Example #4
0
def RVM(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params):
    clf = RVC(n_iter=100, tol=0.1)
    start = time.clock()

    X_train_reduced = X_train
    X_validate_reduced = X_validate

    train_size = params['train_size']
    test_size = params['test_size']
    train = params['train']

    if train:
        clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size])
        writeObj('rvm_model.pkl', clf)

        Y_pred = clf.predict(X_validate_reduced[:test_size])
        return Y_pred, clf
    else:
        clf = readObj('rvm_model.pkl')
        Y_pred = clf.predict(X_validate_reduced[:test_size])
        return Y_pred, clf

    print "training took ", time.clock() - start, "s"
Example #5
0
    def test_predict_two_classes(self):
        """Check that predict works with two classes."""
        clf = RVC(kernel='linear')

        X = np.array([
            [2, 1],
            [1, 2],
        ])

        y = np.array(['A', 'B'])

        clf.fit(X, y)

        prediction = clf.predict(np.array([[0, 3]]))
        np.testing.assert_array_equal(prediction, np.array(['A']))
Example #6
0
def TrainMyRVM(XEstimate,
               XValidate,
               ClassLabelsEstimate,
               ClassLabelsValidate,
               Parameters=None):

    training_labels = np.int8(np.zeros(ClassLabelsEstimate.shape[0]))
    validate_labels = np.int8(np.zeros(ClassLabelsValidate.shape[0]))
    for i in range(ClassLabelsEstimate.shape[0]):
        training_labels[i] = np.where(ClassLabelsEstimate[i] == 1)[0]
    for i in range(ClassLabelsValidate.shape[0]):
        validate_labels[i] = np.where(ClassLabelsValidate[i] == 1)[0]

    #get 4000 samples of training data
    #this will get the indices and will give the data and labels the same indices
    idx_training = np.random.choice(np.arange(len(training_labels)),
                                    4000,
                                    replace=False)
    training_labels_sampled = training_labels[idx_training]
    XEstimate_sampled = XEstimate[idx_training]

    #get 1000 samples of training data
    #this will get the indices and will give the data and labels the same indices
    idx_validate = np.random.choice(np.arange(len(validate_labels)),
                                    1000,
                                    replace=False)
    XValidate_sampled = XValidate[idx_validate]
    ClassLabelsValidate_sampled = ClassLabelsValidate[idx_validate]

    #initialize RVM with classification (RVC class)
    rvm = RVC(kernel='rbf', n_iter=1, alpha=1.e-6, beta=1.e-6, verbose=True)
    #fit RVM
    rvm.fit(XEstimate_sampled, training_labels_sampled)
    #predict and return an array of classes for each input
    Yvalidate = rvm.predict(XValidate_sampled)
    EstParameters = rvm
    Rvectors = 1
    return Yvalidate, EstParameters, Rvectors, ClassLabelsValidate_sampled, idx2
Example #7
0
    SumD = 0
    for RVNum in range(1,NumRVs):
        d1 = RVs[RVNum-1,]
        d2 = sum(numpy.ndarray.flatten(
                RVs[numpy.int8(
                        numpy.setdiff1d(numpy.linspace(0,NumRVs-1,NumRVs),RVNum))]))
        SumD = SumD + (d1/d2)
    SumD = SumD/NumRVs
    return SumD


RVs = RVCMod.relevance_
DVals = RVMFeatImp(RVs)

RVCPred1 = RVCMod.predict_proba(XTestResHvM)
RVCPred2 = RVCMod.predict(XTestResHvM)

# Plot Receiver Operating Characteristic (ROC) Curve
scikitplot.metrics.plot_roc(YTestResHvM,RVCPred1, title = 'HCvMCI: RVC')
# Plot the Confusion Matrix for additional insight
scikitplot.metrics.plot_confusion_matrix(YTestResHvM,RVCPred2)

#%%
# Running RLR - HCvMCI

#Testing for multicollinearity 

coef1 = np.corrcoef(HCvMCI, rowvar = False)
plt.hist(coef1)

#resampling w/ SMOTE to account for uneven sampling
Example #8
0
p1_classes = full_class_array[0:690]

#normalize
p1_normal_data = preprocessing.scale(p1_data)  #normalize

#PCA
pca = PCA(10, svd_solver='auto')
pca.fit(p1_normal_data)
p1_pca_data = pca.transform(p1_normal_data)  #transform data to xx components

p1_pca_data
#classes_numbered, class_numbers_names = class2numbers(p1_classes)

## RVM classification
clf1 = RVC()
clf1.fit(p1_pca_data, p1_classes)

pred = clf1.predict(p1_pca_data)

correct = 0
for i in range(np.size(pred, 0)):
    if pred[i] == p1_classes[i]:
        correct += 1

clf1

params = clf1.get_params
params.alpha_

clf1.predict_proba
    SumD = 0
    for RVNum in range(1, NumRVs):
        d1 = RVs[RVNum - 1, ]
        d2 = sum(
            np.ndarray.flatten(RVs[np.int8(
                np.setdiff1d(np.linspace(0, NumRVs - 1, NumRVs), RVNum))]))
        SumD = SumD + (d1 / d2)
    SumD = SumD / NumRVs
    return SumD


RVs = RVCMod.relevance_
DVals = RVMFeatImp(RVs)

RVCPred1 = RVCMod.predict_proba(RXTestHvM)
RVCPred2 = RVCMod.predict(RXTestHvM)
# Evaluate Performance (DON'T RELY ON ACCURACY!!!)
# Plot Receiver Operating Characteristic (ROC) Curve
scikitplot.metrics.plot_roc(RYTestHvM, RVCPred1, title='HCvMCI: RVC')
# Plot the Confusion Matrix for additional insight
scikitplot.metrics.plot_confusion_matrix(RYTestHvM, RVCPred2)

#%%
# Running RLR - HCvMCI

#Testing for multicollinearity

coef1 = np.corrcoef(HCvMCI, rowvar=False)
plt.hist(coef1)

coef2 = np.corrcoef(MCIvAD, rowvar=False)
Example #10
0
          full_isAnimal_array[train_indicies])
clf.score(full_normPCA123_array[test_indicies],
          full_isAnimal_array[test_indicies])

#predictions = clf.predict(full_normPCA128_array[test_indicies])
#
#corr=0
#for i in range(np.size(predictions)):
#    if predictions[i] == full_isAnimal_array[test_indicies][i]:
#        corr += 1

############BINARY CASE
clf = RVC(kernel="rbf", coef1=63e-6)  # coef1:  1=46 0.1same
clf.fit(full_normPCA123_array[train_indicies],
        full_isAnimal_array[train_indicies])
pred = clf.predict(full_normPCA123_array[test_indicies])
corr = is_correct(pred, full_isAnimal_array[test_indicies])
N = np.size(full_isAnimal_array[test_indicies])
accuracy = np.sqrt(corr * (100 - corr) / (100 * N))
#################Creating the most beatiful graphs
classes = np.unique(full_subClass_array)
confu_matrix = np.zeros((2, np.size(classes)))
for i in range(np.size(classes)):
    for j in range(np.size(pred)):
        if full_subClass_array[test_indicies[j]] == classes[i]:
            if pred[j] == full_isAnimal_array[test_indicies[j]]:  #TRUE
                confu_matrix[0, i] += 1
            else:  #FALSE
                confu_matrix[1, i] += 1

confu_matrix_percent = np.zeros(np.shape(confu_matrix))