def build_and_run_rvc(X_train_scaled, y_train, X_test_scaled, y_test): ''' Takes: training and testing data Returns: time to fit, time to predict, plus it prints ''' # build RVC rvc_model = RVC() print("fitting RVM:") start = time.time() rvc_model.fit(X_train_scaled, y_train) delta0 = time.time() - start print("time to fit RVM: ", delta0) start = time.time() rvc_predict = rvc_model.predict(X_test_scaled) delta1 = time.time() - start print("time to predict with RVM: ", delta1) # print parameters print("RVM hyperparameters:") print(rvc_model.get_params()) # evaluate RVC print(helpers.confusion_matrix(y_test, rvc_predict)) print(classification_report(y_test, rvc_predict)) return delta0, delta1
def rvc_analysis(random_seed, save_path): # Load the data # TODO: change the path save_path = os.path.join(save_path, 'random_seed_%03d' %random_seed) print('Random seed: %03d' %random_seed) # Load the saved validation dataset project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset) with open(os.path.join(save_path, 'splitted_dataset_%s.pickle' %dataset), 'rb') as handle: splitted_dataset = pickle.load(handle) # Train the model model = RVC(kernel='linear') model.fit(splitted_dataset['Xtrain_scaled'], splitted_dataset['Ytrain']) # make cross validated predictions print('Perform prediction in test data') y_prediction_test = model.predict(splitted_dataset['Xtest_scaled']) y_prediction_validation = model.predict(splitted_dataset['Xvalidate_scaled']) # ----------------------------------------------------------------------------- # Do some statistics. Calculate the confusion matrix # Test dataset # Look at the confusion matrix for test data class_name = np.array(['young', 'old', 'adult'], dtype='U10') ax, cm_test = plot_confusion_matrix(splitted_dataset['Ytest'], y_prediction_test, classes=class_name, normalize=True) # Look at accuracy accuracy_test = accuracy_score(splitted_dataset['Ytest'], y_prediction_test) plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps')) # Predict on the validation dataset ax, cm_validation = plot_confusion_matrix(splitted_dataset['Yvalidate'], y_prediction_validation, classes=class_name, normalize=True) plt.savefig(os.path.join(save_path, 'confusion_matrix_validation_rvc.eps')) # Look at accuracy accuracy_val = accuracy_score(splitted_dataset['Yvalidate'], y_prediction_validation) plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps')) return cm_test, cm_validation, accuracy_test, accuracy_val
def test_predict_three_classes(self): """Check predict works with three classes.""" clf = RVC(kernel='linear') X = np.array([[5, 5], [5, -5], [-5, 0]]) y = np.array(['A', 'B', 'C']) clf.fit(X, y) prediction = clf.predict(np.array([[10, 10]])) np.testing.assert_array_equal(prediction, np.array(['A']))
def RVM(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params): clf = RVC(n_iter=100, tol=0.1) start = time.clock() X_train_reduced = X_train X_validate_reduced = X_validate train_size = params['train_size'] test_size = params['test_size'] train = params['train'] if train: clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size]) writeObj('rvm_model.pkl', clf) Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf else: clf = readObj('rvm_model.pkl') Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf print "training took ", time.clock() - start, "s"
def test_predict_two_classes(self): """Check that predict works with two classes.""" clf = RVC(kernel='linear') X = np.array([ [2, 1], [1, 2], ]) y = np.array(['A', 'B']) clf.fit(X, y) prediction = clf.predict(np.array([[0, 3]])) np.testing.assert_array_equal(prediction, np.array(['A']))
def TrainMyRVM(XEstimate, XValidate, ClassLabelsEstimate, ClassLabelsValidate, Parameters=None): training_labels = np.int8(np.zeros(ClassLabelsEstimate.shape[0])) validate_labels = np.int8(np.zeros(ClassLabelsValidate.shape[0])) for i in range(ClassLabelsEstimate.shape[0]): training_labels[i] = np.where(ClassLabelsEstimate[i] == 1)[0] for i in range(ClassLabelsValidate.shape[0]): validate_labels[i] = np.where(ClassLabelsValidate[i] == 1)[0] #get 4000 samples of training data #this will get the indices and will give the data and labels the same indices idx_training = np.random.choice(np.arange(len(training_labels)), 4000, replace=False) training_labels_sampled = training_labels[idx_training] XEstimate_sampled = XEstimate[idx_training] #get 1000 samples of training data #this will get the indices and will give the data and labels the same indices idx_validate = np.random.choice(np.arange(len(validate_labels)), 1000, replace=False) XValidate_sampled = XValidate[idx_validate] ClassLabelsValidate_sampled = ClassLabelsValidate[idx_validate] #initialize RVM with classification (RVC class) rvm = RVC(kernel='rbf', n_iter=1, alpha=1.e-6, beta=1.e-6, verbose=True) #fit RVM rvm.fit(XEstimate_sampled, training_labels_sampled) #predict and return an array of classes for each input Yvalidate = rvm.predict(XValidate_sampled) EstParameters = rvm Rvectors = 1 return Yvalidate, EstParameters, Rvectors, ClassLabelsValidate_sampled, idx2
SumD = 0 for RVNum in range(1,NumRVs): d1 = RVs[RVNum-1,] d2 = sum(numpy.ndarray.flatten( RVs[numpy.int8( numpy.setdiff1d(numpy.linspace(0,NumRVs-1,NumRVs),RVNum))])) SumD = SumD + (d1/d2) SumD = SumD/NumRVs return SumD RVs = RVCMod.relevance_ DVals = RVMFeatImp(RVs) RVCPred1 = RVCMod.predict_proba(XTestResHvM) RVCPred2 = RVCMod.predict(XTestResHvM) # Plot Receiver Operating Characteristic (ROC) Curve scikitplot.metrics.plot_roc(YTestResHvM,RVCPred1, title = 'HCvMCI: RVC') # Plot the Confusion Matrix for additional insight scikitplot.metrics.plot_confusion_matrix(YTestResHvM,RVCPred2) #%% # Running RLR - HCvMCI #Testing for multicollinearity coef1 = np.corrcoef(HCvMCI, rowvar = False) plt.hist(coef1) #resampling w/ SMOTE to account for uneven sampling
p1_classes = full_class_array[0:690] #normalize p1_normal_data = preprocessing.scale(p1_data) #normalize #PCA pca = PCA(10, svd_solver='auto') pca.fit(p1_normal_data) p1_pca_data = pca.transform(p1_normal_data) #transform data to xx components p1_pca_data #classes_numbered, class_numbers_names = class2numbers(p1_classes) ## RVM classification clf1 = RVC() clf1.fit(p1_pca_data, p1_classes) pred = clf1.predict(p1_pca_data) correct = 0 for i in range(np.size(pred, 0)): if pred[i] == p1_classes[i]: correct += 1 clf1 params = clf1.get_params params.alpha_ clf1.predict_proba
SumD = 0 for RVNum in range(1, NumRVs): d1 = RVs[RVNum - 1, ] d2 = sum( np.ndarray.flatten(RVs[np.int8( np.setdiff1d(np.linspace(0, NumRVs - 1, NumRVs), RVNum))])) SumD = SumD + (d1 / d2) SumD = SumD / NumRVs return SumD RVs = RVCMod.relevance_ DVals = RVMFeatImp(RVs) RVCPred1 = RVCMod.predict_proba(RXTestHvM) RVCPred2 = RVCMod.predict(RXTestHvM) # Evaluate Performance (DON'T RELY ON ACCURACY!!!) # Plot Receiver Operating Characteristic (ROC) Curve scikitplot.metrics.plot_roc(RYTestHvM, RVCPred1, title='HCvMCI: RVC') # Plot the Confusion Matrix for additional insight scikitplot.metrics.plot_confusion_matrix(RYTestHvM, RVCPred2) #%% # Running RLR - HCvMCI #Testing for multicollinearity coef1 = np.corrcoef(HCvMCI, rowvar=False) plt.hist(coef1) coef2 = np.corrcoef(MCIvAD, rowvar=False)
full_isAnimal_array[train_indicies]) clf.score(full_normPCA123_array[test_indicies], full_isAnimal_array[test_indicies]) #predictions = clf.predict(full_normPCA128_array[test_indicies]) # #corr=0 #for i in range(np.size(predictions)): # if predictions[i] == full_isAnimal_array[test_indicies][i]: # corr += 1 ############BINARY CASE clf = RVC(kernel="rbf", coef1=63e-6) # coef1: 1=46 0.1same clf.fit(full_normPCA123_array[train_indicies], full_isAnimal_array[train_indicies]) pred = clf.predict(full_normPCA123_array[test_indicies]) corr = is_correct(pred, full_isAnimal_array[test_indicies]) N = np.size(full_isAnimal_array[test_indicies]) accuracy = np.sqrt(corr * (100 - corr) / (100 * N)) #################Creating the most beatiful graphs classes = np.unique(full_subClass_array) confu_matrix = np.zeros((2, np.size(classes))) for i in range(np.size(classes)): for j in range(np.size(pred)): if full_subClass_array[test_indicies[j]] == classes[i]: if pred[j] == full_isAnimal_array[test_indicies[j]]: #TRUE confu_matrix[0, i] += 1 else: #FALSE confu_matrix[1, i] += 1 confu_matrix_percent = np.zeros(np.shape(confu_matrix))