def build_and_run_rvc(X_train_scaled, y_train, X_test_scaled, y_test): ''' Takes: training and testing data Returns: time to fit, time to predict, plus it prints ''' # build RVC rvc_model = RVC() print("fitting RVM:") start = time.time() rvc_model.fit(X_train_scaled, y_train) delta0 = time.time() - start print("time to fit RVM: ", delta0) start = time.time() rvc_predict = rvc_model.predict(X_test_scaled) delta1 = time.time() - start print("time to predict with RVM: ", delta1) # print parameters print("RVM hyperparameters:") print(rvc_model.get_params()) # evaluate RVC print(helpers.confusion_matrix(y_test, rvc_predict)) print(classification_report(y_test, rvc_predict)) return delta0, delta1
def test_fit_two_classes(self): """Check that fitting with two classes works directly.""" clf = RVC() X = np.array([[1, 2], [2, 1]]) y = np.array(['A', 'B']) clf.fit(X, y) np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B']))
def test_fit_three_classes(self): """Check that fitting with three classes uses OneVSOne.""" clf = RVC() X = np.array([[1, 2], [2, 1], [2, 2]]) y = np.array(['A', 'B', 'C']) clf.fit(X, y) self.assertIsInstance(clf.multi_, OneVsOneClassifier) np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B', 'C']))
def test_classification_three_classes(self): """Check classification works with three classes.""" iris = load_iris() X = iris.data y = iris.target clf = RVC() clf.fit(X, y) self.assertGreater(clf.score(X, y), 0.95)
def test_predict_three_classes(self): """Check predict works with three classes.""" clf = RVC(kernel='linear') X = np.array([[5, 5], [5, -5], [-5, 0]]) y = np.array(['A', 'B', 'C']) clf.fit(X, y) prediction = clf.predict(np.array([[10, 10]])) np.testing.assert_array_equal(prediction, np.array(['A']))
def test_fit_one_class(self): """Check that fitting with only one class raises an exception.""" clf = RVC() X = np.array([[1, 2], [2, 1]]) y = np.array(['A', 'A']) try: clf.fit(X, y) except ValueError as error: self.assertEqual(str(error), "Need 2 or more classes.") else: self.fail()
def test_predict_two_classes(self): """Check that predict works with two classes.""" clf = RVC(kernel='linear') X = np.array([ [2, 1], [1, 2], ]) y = np.array(['A', 'B']) clf.fit(X, y) prediction = clf.predict(np.array([[0, 3]])) np.testing.assert_array_equal(prediction, np.array(['A']))
def test_fit_two_classes_imbalanced(self): """Check that fitting with two classes works with unequal samples.""" clf = RVC() X = np.array([ [1, 2], [1, 4], [4, 2], [2, 1], [3, 1.5], ]) y = np.array(['A', 'A', 'B', 'B', 'B']) clf.fit(X, y) np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B']))
def rvc_analysis(random_seed, save_path): # Load the data # TODO: change the path save_path = os.path.join(save_path, 'random_seed_%03d' %random_seed) print('Random seed: %03d' %random_seed) # Load the saved validation dataset project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset) with open(os.path.join(save_path, 'splitted_dataset_%s.pickle' %dataset), 'rb') as handle: splitted_dataset = pickle.load(handle) # Train the model model = RVC(kernel='linear') model.fit(splitted_dataset['Xtrain_scaled'], splitted_dataset['Ytrain']) # make cross validated predictions print('Perform prediction in test data') y_prediction_test = model.predict(splitted_dataset['Xtest_scaled']) y_prediction_validation = model.predict(splitted_dataset['Xvalidate_scaled']) # ----------------------------------------------------------------------------- # Do some statistics. Calculate the confusion matrix # Test dataset # Look at the confusion matrix for test data class_name = np.array(['young', 'old', 'adult'], dtype='U10') ax, cm_test = plot_confusion_matrix(splitted_dataset['Ytest'], y_prediction_test, classes=class_name, normalize=True) # Look at accuracy accuracy_test = accuracy_score(splitted_dataset['Ytest'], y_prediction_test) plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps')) # Predict on the validation dataset ax, cm_validation = plot_confusion_matrix(splitted_dataset['Yvalidate'], y_prediction_validation, classes=class_name, normalize=True) plt.savefig(os.path.join(save_path, 'confusion_matrix_validation_rvc.eps')) # Look at accuracy accuracy_val = accuracy_score(splitted_dataset['Yvalidate'], y_prediction_validation) plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps')) return cm_test, cm_validation, accuracy_test, accuracy_val
def test_classification_two_classes(self): """Check classification works with two classes.""" iris = load_iris() X = iris.data[:, 1:] y = iris.target # Only 2 classes needed X = X[y != 0] y = y[y != 0] clf = RVC() clf.fit(X, y) self.assertGreater(clf.score(X, y), 0.95) prob = clf.predict_proba(X[0, :]) p_target = np.array([[0.999, 5.538e-4]]) np.testing.assert_allclose(prob, p_target, rtol=1e-2, atol=1e-2)
def TrainMyRVM(XEstimate, XValidate, ClassLabelsEstimate, ClassLabelsValidate, Parameters=None): training_labels = np.int8(np.zeros(ClassLabelsEstimate.shape[0])) validate_labels = np.int8(np.zeros(ClassLabelsValidate.shape[0])) for i in range(ClassLabelsEstimate.shape[0]): training_labels[i] = np.where(ClassLabelsEstimate[i] == 1)[0] for i in range(ClassLabelsValidate.shape[0]): validate_labels[i] = np.where(ClassLabelsValidate[i] == 1)[0] #get 4000 samples of training data #this will get the indices and will give the data and labels the same indices idx_training = np.random.choice(np.arange(len(training_labels)), 4000, replace=False) training_labels_sampled = training_labels[idx_training] XEstimate_sampled = XEstimate[idx_training] #get 1000 samples of training data #this will get the indices and will give the data and labels the same indices idx_validate = np.random.choice(np.arange(len(validate_labels)), 1000, replace=False) XValidate_sampled = XValidate[idx_validate] ClassLabelsValidate_sampled = ClassLabelsValidate[idx_validate] #initialize RVM with classification (RVC class) rvm = RVC(kernel='rbf', n_iter=1, alpha=1.e-6, beta=1.e-6, verbose=True) #fit RVM rvm.fit(XEstimate_sampled, training_labels_sampled) #predict and return an array of classes for each input Yvalidate = rvm.predict(XValidate_sampled) EstParameters = rvm Rvectors = 1 return Yvalidate, EstParameters, Rvectors, ClassLabelsValidate_sampled, idx2
def RVM(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params): clf = RVC(n_iter=100, tol=0.1) start = time.clock() X_train_reduced = X_train X_validate_reduced = X_validate train_size = params['train_size'] test_size = params['test_size'] train = params['train'] if train: clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size]) writeObj('rvm_model.pkl', clf) Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf else: clf = readObj('rvm_model.pkl') Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf print "training took ", time.clock() - start, "s"
""" Authors: Mrunmayee Deshpande, Lu Gan, Bruce Huang, Abhishek Venkataraman """ import timeit from skrvm import RVC import numpy as np import os.path import scipy.io from import_data import import_data ## Set data path parsed_data_path = 'parsed_data/' [X, Y, valX, valY, testX, testY] = import_data(parsed_data_path) scipy.io.savemat('train.mat', dict(X=X, Y=Y)) scipy.io.savemat('val.mat', dict(valX=valX, valY=valY)) scipy.io.savemat('test.mat', dict(testX=testX, testY=testY)) ## Train a RVM clf = RVC(verbose=True) print(clf) clf.fit(valX, valY) clf.score(testX, testY)
#Reintegrate the validation set into the train/test sets for RVC RXTrainHvM = HCvMCI[train_inds,] RYTrainHvM = YHvM[train_inds] RXTestHvM = HCvMCI[test_inds,] RYTestHvM = YHvM[test_inds] #resampling w/ SMOTE to account for uneven sampling [XTrainResHvM,YTrainResHvM] = SMOTE(random_state = 100,k_neighbors = 3).fit_resample(RXTrainHvM,RYTrainHvM) [XTestResHvM,YTestResHvM] = SMOTE(random_state = 100,k_neighbors = 3).fit_resample(RXTestHvM,RYTestHvM) from skrvm import RVC RVCMod = RVC(kernel = 'linear', verbose = True) RVCMod.fit(XTrainResHvm,YTrainResHvM) #create feature importance evaluation function def RVMFeatImp(RVs): NumRVs = RVs.shape[0] SumD = 0 for RVNum in range(1,NumRVs): d1 = RVs[RVNum-1,] d2 = sum(numpy.ndarray.flatten( RVs[numpy.int8( numpy.setdiff1d(numpy.linspace(0,NumRVs-1,NumRVs),RVNum))])) SumD = SumD + (d1/d2) SumD = SumD/NumRVs return SumD
def TrainMyClassifier(XEstimate, ClassLabels, XValidate, Parameters): # RVM if Parameters['algorithm'] == 'RVM': Parameters = Parameters['parameters'] clf = RVC(alpha=Parameters.get('alpha'), beta=Parameters.get('beta'), n_iter=Parameters.get('n_iter')) clf.fit(XEstimate, ClassLabels) if np.shape(clf.classes_)[0] == 2: Yvalidate = clf.predict_proba(XValidate) else: Yvalidate = predict_proba(clf, XValidate) EstParameters = get_params(clf) return Yvalidate, EstParameters #SVM elif Parameters['algorithm'] == 'SVM': svc = get_svc(Parameters) svc_train(svc, XEstimate, ClassLabels) prob = svc_probability(svc, XValidate) EstParameters = svc_get_para(svc) prob_std = np.ndarray.std(prob, axis=1)[:, np.newaxis] sigmoid = 1 - expit(prob_std) Yvalidate = np.concatenate([prob, sigmoid], axis=1) Yvalidate = Yvalidate / np.repeat( (sigmoid + 1), axis=1, repeats=len(svc.classes_) + 1) return Yvalidate, EstParameters #GPR elif Parameters["algorithm"] == "GPR": # get the classes from the labels classes = np.unique(ClassLabels, axis=0) sorted(classes, reverse=True) num_class = len(classes) # get data and label based on classes data = [] for cla in classes: data.append(XEstimate[ClassLabels == cla]) target = [] for cla in classes: target.append(ClassLabels[ClassLabels == cla]) # put data and label into a matrix, so that we could do a easier calculation for probability # the following calculation is all based on the matrix data_matrix = [] for i in range(num_class - 1): data_matrix.append([]) for j in range(num_class - 1): data_matrix[i].append(None) target_matrix = [] for i in range(num_class - 1): target_matrix.append([]) for j in range(num_class - 1): target_matrix[i].append(None) for i in range(num_class - 1): for j in range(i, num_class - 1): data_matrix[i][j] = np.concatenate([data[i], data[j + 1]], axis=0) target_matrix[i][j] = np.concatenate( [target[i], target[j + 1]], axis=0) classifier_matrix = [] for i in range(num_class - 1): classifier_matrix.append([]) for j in range(num_class - 1): classifier_matrix[i].append(None) for i in range(num_class - 1): for j in range(i, num_class - 1): gpc_classifier = GaussianProcessClassifier( kernel=Parameters["parameters"]["kernel"], optimizer=Parameters["parameters"]["optimizer"], n_restarts_optimizer=Parameters["parameters"] ["n_restarts_optimizer"], max_iter_predict=Parameters["parameters"] ["max_iter_predict"], warm_start=Parameters["parameters"]["warm_start"], copy_X_train=Parameters["parameters"]["copy_X_train"], random_state=Parameters["parameters"]["random_state"], multi_class="one_vs_rest", n_jobs=Parameters["parameters"]["n_jobs"]) gpc_classifier.fit(data_matrix[i][j], target_matrix[i][j]) classifier_matrix[i][j] = gpc_classifier out_matrix = [] for i in range(num_class - 1): out_matrix.append([]) for j in range(num_class - 1): out_matrix[i].append(None) for i in range(num_class - 1): for j in range(i, num_class - 1): out_matrix[i][j] = classifier_matrix[i][j].predict_proba( XValidate) # calculate the whole prediction prob val_shape = XValidate.shape[0] predict_prob_list = [] for i in range(num_class): predict_prob_list.append(np.zeros(shape=[val_shape, 1])) for i in range(num_class - 1): for j in range(i, num_class - 1): predict_prob_list[i] += out_matrix[i][j][:, 0][:, np.newaxis] / ( num_class * 2) predict_prob_list[ j + 1] += out_matrix[i][j][:, 1][:, np.newaxis] / (num_class * 2) # get the result of num_class probability result = np.concatenate(predict_prob_list, axis=1) # calculate the probability for the one more class std = np.std(result, axis=1)[:, np.newaxis] other_prob = np.exp(-std) / (1 + np.exp(std * 5)) result = np.concatenate([result, other_prob], axis=1) result = result / np.repeat( (other_prob + 1), axis=1, repeats=num_class + 1) # put all the parameters into a dict estParameters = {} estParameters["class_num"] = num_class estParameters["parameters"] = [] for i in range(num_class - 1): for j in range(i, num_class - 1): estParameters["parameters"].append({ "log_marginal_likelihood_value_": classifier_matrix[i][j].log_marginal_likelihood_value_, "classes_": classifier_matrix[i][j].classes_, "n_classes_": classifier_matrix[i][j].n_classes_, "base_estimator_": classifier_matrix[i][j].base_estimator_ }) return result, estParameters
from skrvm import RVC from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split def load_data(): f = open('F:/importantfilecopy/tandomrepeat/2019.121test/5results', 'r') # f = open('F:/importantfilecopy/tandomrepeat/3/1x/total.txt', 'r') data_set = [] label_set = [] for line in f: line = line.strip().split("\t") line_list = line[0].strip().split(" ") data_set.append(line_list) label_set.append(line[1]) return data_set, label_set data, label = load_data() X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.3, random_state=0) clf = RVC() # clf.fit(rvm_data, rvm_target) # print(clf.score(rvm_data, rvm_target)) clf.fit(X_train, y_train) scoring = 'accuracy' scores = cross_val_score(clf, X_test, y_test, cv=7) print(scores.mean())
p1_classes = full_class_array[0:690] #normalize p1_normal_data = preprocessing.scale(p1_data) #normalize #PCA pca = PCA(10, svd_solver='auto') pca.fit(p1_normal_data) p1_pca_data = pca.transform(p1_normal_data) #transform data to xx components p1_pca_data #classes_numbered, class_numbers_names = class2numbers(p1_classes) ## RVM classification clf1 = RVC() clf1.fit(p1_pca_data, p1_classes) pred = clf1.predict(p1_pca_data) correct = 0 for i in range(np.size(pred, 0)): if pred[i] == p1_classes[i]: correct += 1 clf1 params = clf1.get_params params.alpha_ clf1.predict_proba
from skrvm import RVC from sklearn.datasets import load_iris if __name__ == "__main__": clf = RVC() clf.verbose = True # Print iteration, alpha, beta, gamma, m, Relevance vectors data = load_iris() trainData = data.data trainTargets = data.target print(clf.fit(trainData, trainTargets)) #print(clf.score(trainData, trainTargets))
YTrainHvM = YHvM[TrainInds] XValHvM = HCvMCI[ValInds, ] YValHvM = YHvM[ValInds] XTestHvM = HCvMCI[TestInds, ] YTestHvM = YHvM[TestInds] #Running RVC - HCvMCI RXTrainHvM = HCvMCI[train_inds, ] RYTrainHvM = YHvM[train_inds] RXTestHvM = HCvMCI[test_inds, ] RYTestHvM = YHvM[test_inds] from skrvm import RVC RVCMod = RVC(kernel='linear', verbose=True) RVCMod.fit(RXTrainHvM, RYTrainHvM) def RVMFeatImp(RVs): NumRVs = RVs.shape[0] SumD = 0 for RVNum in range(1, NumRVs): d1 = RVs[RVNum - 1, ] d2 = sum( np.ndarray.flatten(RVs[np.int8( np.setdiff1d(np.linspace(0, NumRVs - 1, NumRVs), RVNum))])) SumD = SumD + (d1 / d2) SumD = SumD / NumRVs return SumD
New[i, :] = n.params #mdl = smt.AR(d_tot[100,:]).fit(maxlag=30, ic='aic', trend='nc') #est_order=smt.AR(d_tot[1,:]).select_order(maxlag=30, ic='aic', trend='nc') #print(est_order) #p_orders=np.zeros([1380,1]) #for i in range(1380): # X=AR(d_tot[i,:]) # n=X.fit(maxlag=4,ic='aic') # p_orders[i,0]=len(n.params) #np.mean(p_orders) #plt.scatter(New[:,1],New[:,3]) An_lab = Animal_label(tot) #shuffle data to take them in random order indx = random.sample(range(10350), 10350) a = New[indx, :] b = An_lab[indx] #define trainning and test set x_train = a[0:8000, :] y_train = b[0:8000] x_test = a[8001:, :] y_test = b[8001:] #SVM with 600 out of 690 samples as training data from skrvm import RVC clf1 = RVC(kernel='rbf') clf1.fit(x_train, y_train) clf1.score(x_test, y_test)
feature_scaler = StandardScaler() X_train = feature_scaler.fit_transform(X_train) X_test = feature_scaler.transform(X_test) #################################################################################################################### #CROSS VALIDATION SPLIT IN K-folds ###################################################################################################################### kf = KFold(n_splits=5) kf.get_n_splits(X_train,X_test) print(kf) #CREATE WIDTHS FOR GRID SEARCH width1= np.linspace(-5,4,10) width=10**width1 #create matrix to input the scores and widths of each iteration score_width=np.ones([len(width),2]) ################## #FIRST for loop gives the values for different widths, SECOND for loop does Kfold_validation for i in range(len(width)): score=0 for train_index, test_index in kf.split(X_train): print("TRAIN:", train_index, "TEST:", test_index) X_train1, X_test1 = X_train[train_index], X_train[test_index] y_train1, y_test1 =y_train[train_index], y_train[test_index] clf1=RVC(kernel='rbf',coef1=width[i]) clf1.fit(X_train1,y_train1) score=score+clf1.score(X_test1,y_test1) score_width[i,0]=score score_width[i,1]=width[i] ######################################################################################################### idx=np.argmax(score_width[:,0]) best_width=score_width[idx,1]
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Sep 20 21:26:07 2018 @author: pan """ from skrvm import RVC from sklearn.datasets import load_iris clf = RVC() iris = load_iris() clf.fit(iris.data, iris.target) #RVC(alpha=1e-06, beta=1e-06, beta_fixed=False, bias_used=True, coef0=0.0, #coef1=None, degree=3, kernel='rbf', n_iter=3000, n_iter_posterior=50, #threshold_alpha=1000000000.0, tol=0.001, verbose=False) clf.score(iris.data, iris.target)
X = full_normPCA123_array[train_indicies] Y = full_isAnimal_array[train_indicies] params = rvc_param_selection(X, Y, 5) # #params = rvc_param_selection2(X,Y,5) ####################TO TEST test_err = np.zeros((15)) train_err = np.zeros((15)) coef = np.multiply( [1000, 500, 200, 150, 100, 70, 60, 50, 40, 30, 20, 10, 5, 2, 1], 1e-6) for i in range(15): #coef[i] = 5e-5-(i+1)*1e-5 clf = RVC(kernel="rbf", coef1=coef[i]) # coef1: 1=46 0.1same clf.fit(full_normPCA123_array[train_indicies], full_subClass_array[train_indicies]) train_err[i] = clf.score(full_normPCA123_array[train_indicies], full_subClass_array[train_indicies]) test_err[i] = clf.score(full_normPCA123_array[test_indicies], full_subClass_array[test_indicies]) print(coef[i]) print(train_err[i]) print(test_err[i]) print("\n\n") ##################################################################################################################### rvm_tested_coef1 = np.load("rvm_tested_coef1.npy") rvm_tested_coef1_again = np.load("rvm_tested_coef1_again.npy") train1 = np.load("rvm_tested_coef1_trainerr.npy") train2 = np.load("rvm_tested_coef1_trainerr_again.npy")
pd.set_option('display.max_columns', 100) pd.set_option('display.notebook_repr_html', True) import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") # RVM from skrvm import RVC clf = RVC() ### read data from single csv file and transform to a list dir = '/Users/weng/Downloads/' # raw = pd.read_csv(os.path.join(dir, 'test.txt'), header=0, delimiter=';') raw_list = pd.read_csv(os.path.join(dir, 'test.txt'), sep="\t", header=None) # array format is needed for further processing (df -> list -> matrix(array) ) raw_X = raw_list[0].values.tolist() raw_X = np.asarray(raw_X) raw_y = raw_list[1].values.tolist() raw_y = np.asarray(raw_y) print "Read %d rows of data\n" % len(raw) clf.fit(raw_X, raw_y) from sklearn.datasets import load_iris iris = load_iris() iris.data iris.target clf.fit(iris.data, iris.target) clf.score(iris.data, iris.target)