def test_regression_sinc(self): """Check regression works with y=sinc(x).""" clf = RVR() x = np.linspace(0, 10, 101) y = np.sinc(x) np.random.seed(1) y = y + 0.1 * np.random.randn(y.shape[0]) X = x[:, np.newaxis] clf.fit(X, y) score = clf.score(X, y) m_target = [ 1.117655e+00, -6.334513e-01, 5.868671e-01, -4.370936e-01, 2.320311e-01, -4.638864e-05, -7.505325e-02, 6.133291e-02 ] self.assertGreater(score, 0.85) np.testing.assert_allclose(clf.m_, m_target, rtol=1e-3) self.assertEqual(clf.relevance_.shape, (8, 1)) prediction, mse = clf.predict(np.array([[0.5]]), eval_MSE=True) self.assertAlmostEqual(prediction[0], 0.611, places=3) self.assertAlmostEqual(mse[0], 0.00930, places=5)
def test_regression_linear_noise(self): """Check regression works with a linear function with added noise.""" clf = RVR(kernel='linear', alpha=1e11) x = np.arange(1, 101) y = x + 5 np.random.seed(1) y = y + 0.1 * np.random.randn(y.shape[0]) X = x[:, np.newaxis] clf.fit(X, y) score = clf.score(X, y) m_target = np.array([1, 5]) rel_target = np.array([[1]]) self.assertGreater(score, 0.99) np.testing.assert_allclose(clf.m_, m_target, rtol=1e-2) np.testing.assert_allclose(clf.relevance_, rel_target) self.assertAlmostEqual(clf.beta_, 126.583, places=3) prediction, mse = clf.predict(np.array([[50]]), eval_MSE=True) self.assertAlmostEqual(prediction[0], 55.006, places=3) self.assertAlmostEqual(mse[0], 0.00798, places=5)
def test_fit(self): """Check the fit function works correctly.""" clf = RVR(kernel='linear', threshold_alpha=1e3, verbose=True) X = np.array([ [1], [2], [3], ]) y = np.array([1, 2, 3]) np.random.seed(1) y = y + 0.1 * np.random.randn(y.shape[0]) clf.fit(X, y) m_target = np.array([0.065906, 0.131813, 0.197719, 0.159155]) np.testing.assert_array_equal(clf.relevance_, X) np.testing.assert_allclose(clf.m_, m_target, rtol=1e-3)
def test_regression_linear(self): """Check regression works with a linear function.""" clf = RVR(kernel='linear', alpha=1e11) x = np.arange(1, 100) y = x + 5 X = x[:, np.newaxis] clf.fit(X, y) score = clf.score(X, y) m_target = np.array([1, 5]) self.assertGreater(score, 0.99) np.testing.assert_allclose(clf.m_, m_target) prediction, mse = clf.predict(np.array([[50]]), eval_MSE=True) self.assertAlmostEqual(prediction[0], 55, places=3) self.assertAlmostEqual(mse[0], 6.18e-6, places=3)
def rvr_pipeline(x,y,pca,kernel,x_p=0,y_p=0,fold=10,seed=2019,predict_data=False): rvr = RVR(kernel=kernel) kf = KFold(n_splits=fold, shuffle=True, random_state=seed) score = np.zeros((fold,)) i = 0 for train,test in kf.split(x,y): t1 = time.time() x_train, y_train = x[train], y[train] x_test, y_test = x[test], y[test] pca.fit(x_train) new_train = pca.transform(x_train) new_test = pca.transform(x_test) scaler.fit(new_train) new_train = scaler.transform(new_train) new_test = scaler.transform(new_test) rvr.fit(new_train,y_train) pred = rvr.predict(new_test) mse = abs(pred-y_test) score[i] = sum(mse)/mse.shape[0] i+=1 t2 = time.time() print('fold '+str(i)+':',t2-t1,'sec') print('='*40) print('MAE:',np.mean(score)) if predict_data: pca.fit(x) new_train = pca.transform(x) new_test = pca.transform(x_p) scaler.fit(new_train) new_train = scaler.transform(new_train) new_test = scaler.transform(new_test) rvr.fit(new_train,y) pred = rvr.predict(new_test) error = abs(pred-y_p) print('Test MAE:',sum(error)/error.shape[0]) return pred
class RVRDegradationModel: def __init__(self, HI): if (HI.shape[0] == 1): HI = HI.reshape(1, 1) timesteps = np.array([i for i in range(len(HI)) ]).reshape(len(HI), HI.shape[0]) else: timesteps = np.array([i for i in range(len(HI)) ]).reshape(len(HI), HI.shape[1]) self.rvrmodel = RVR(kernel='linear') self.optimize(timesteps, HI) def optimize(self, X, Y): self.rvrmodel.fit(X, Y) def update(self, X, Y): self.optimize(X, Y) def predict(self, X): # self.rvrmodel.fit(X, X) Yp = self.rvrmodel.predict(X) print(Yp) return Yp
def rvr_analysis(random_seed, save_path, n_folds, analysis): save_path = save_path / ('random_seed_%03d' % random_seed) print('Random seed: %03d' % random_seed) # Load the saved validation dataset project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset) with open(save_path / ('splitted_dataset_%s.pickle' % dataset), 'rb') as handle: splitted_dataset = pickle.load(handle) kf = KFold(n_splits=n_folds, random_state=random_seed) mae_cv = np.zeros((n_folds, 1)) pearsons_corr = np.zeros((n_folds, 1)) pearsons_pval = np.zeros((n_folds, 1)) # Set target and features x = splitted_dataset['Xtest_scaled'] y = splitted_dataset['Ytest'] t_time_train = [] t_time_test = [] for i_fold, (train_idx, test_idx) in enumerate(kf.split(x, y)): x_train, x_test = x[train_idx, :], x[test_idx, :] y_train, y_test = y[train_idx], y[test_idx] print('CV iteration: %d' % (i_fold + 1)) print('Shape of the trainig and test dataset') print(y_train.shape, y_test.shape) # train the model model = RVR(kernel='linear') cv_time_train = time.process_time() model.fit(x_train, y_train) elapsed_time = time.process_time() - cv_time_train print('CV - Elapased time in seconds to train:') t_time_train.append(elapsed_time) print('%.03f' % elapsed_time) # test the model cv_time_test = time.process_time() y_predicted = model.predict(x_test) elapsed_time = time.process_time() - cv_time_test t_time_test.append(elapsed_time) print('CV - Elapased time in seconds to test:') print('%.03f' % elapsed_time) mae_kfold = mean_absolute_error(y_test, y_predicted) mae_cv[i_fold, :] = mae_kfold # now look at the pearson's correlation r_test, r_p_value_test = pearsonr(y_test, y_predicted) pearsons_corr[i_fold, :] = r_test pearsons_pval[i_fold, :] = r_p_value_test print('CV results') print('MAE: Mean(SD) = %.3f(%.3f)' % (mae_cv.mean(), mae_cv.std())) print('Pearson\'s Correlation: Mean(SD) = %.3f(%.3f)' % (r_test.mean(), r_test.std())) print('Mean CV time: %.3f s ' % np.mean(t_time_train)) print('SD CV time: %.3f s' % np.std(t_time_train)) print('Mean CV time: %.3f s ' % np.mean(t_time_test)) print('SD CV time: %.3f s' % np.std(t_time_test)) print('') if analysis == 'vanilla_combi': # Train the entire dataset x_train_all, x_test_all, y_train_all, y_test_all = \ train_test_split(x, y, test_size=.85, random_state=random_seed) print('All: Shape of the trainig and test dataset') print(y_train_all.shape, y_test_all.shape) elif analysis == 'uniform_dist': # Train the entire dataset x_train_all, x_test_all, y_train_all, y_test_all = \ train_test_split(x, y, test_size=.20, random_state=random_seed) print('ALL: Shape of the trainig and test dataset') print(y_train_all.shape, y_test_all.shape) print('Training RVR model:') model_all = RVR(kernel='linear') model_all.fit(x_train_all, y_train_all) # plot predicted vs true for the test (Entire sample) print('Plotting Predicted Vs True Age for all the sample') y_predicted_test = model.predict(x_test_all) output_path_test = save_path / ( 'rvr_test_predicted_true_age_rnd_seed%d.eps' % random_seed) plot_predicted_vs_true(y_test_all, y_predicted_test, output_path_test, 'Age') return mae_cv, r_test, t_time_train, t_time_test
y, test_size=0.2, random_state=2 ) sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.transform(X_test) model = LinearRegression() model2 = SVR(kernel="rbf", C=1000.0, epsilon=6.5) model3 = RVR(kernel="rbf") model.fit(X_train_std, y_train) model2.fit(X_train_std, y_train) model3.fit(X_train_std, y_train) y_train_pred = model2.predict(X_train_std) y_test_pred = model2.predict(X_test_std) print("SVR MSE train: {0}, test: {1}".format( mean_squared_error(y_train, y_train_pred), mean_squared_error(y_test, y_test_pred) )) y_train_pred = model3.predict(X_train_std) y_test_pred = model3.predict(X_test_std) print("RVR MSE train: {0}, test: {1}".format( mean_squared_error(y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)
def benchmark(): # Any integer value between 1 and 3 to select the number of subplots to show: num_figures = 2 # Parameters to generate training data num_samples = 100 noise_level = 0.1 training_data_range = 10 # Training data X, y = generate_training_data(num_samples, noise_level, training_data_range) # Fit gpr = GaussianProcessRegressor(kernel=RBF() + WhiteKernel()) gpr.fit(X, y) ## Implementation of RVR by skrvm rvr = RVR(kernel='rbf') rvr.fit(X, y) ## Implementation of RVR by sklearn_rvm # Caveat: Since sklearn v.0.22, the default value of gamma changed from ‘auto’ to ‘scale’. # Reference: https://github.com/Mind-the-Pineapple/sklearn-rvm/issues/9 emrvr = EMRVR(kernel='rbf', gamma='auto') emrvr.fit(X, y) # Predict plot_params = get_plot_params() X_plot = np.linspace(plot_params['x_low'], plot_params['x_high'], 10000)[:, None] # Caveat: # generating the variance of the predictive distribution takes considerably longer than just predicting the mean. # Reference: # https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_compare_gpr_krr.html y_gpr, y_gpr_std = gpr.predict(X_plot, return_std=True) ## Implementation of RVR by skrvm y_rvr = rvr.predict(X_plot) y_rvr_std = None ## Implementation of RVR by sklearn_rvm y_emrvr, y_emrvr_std = emrvr.predict(X_plot, return_std=True) # Plot fig, axs = plt.subplots(num_figures, 1, figsize=(15, 7)) try: # In case there are stricly more than 1 subplot, there is no issue. num_sub_plots = len(axs) except TypeError: # In case there is exactly 1 subplot, we have to ensure that axs is a list, for code compatibility. axs = [axs] num_sub_plots = len(axs) print('Plotting {} subplots.'.format(num_sub_plots)) plot_results(X, y, emrvr, gpr, X_plot, y_emrvr, y_gpr, "sklearn_rvm", "GPR", y_emrvr_std, y_gpr_std, rvr_color='navy', gpr_color='darkorange', training_data_range=training_data_range, ax=axs[0]) if len(axs) > 1: plot_results(X, y, emrvr, rvr, X_plot, y_emrvr, y_rvr, "sklearn_rvm", "skrvm", y_emrvr_std, y_rvr_std, rvr_color='navy', gpr_color='purple', training_data_range=training_data_range, ax=axs[1]) if len(axs) > 2: plot_results(X, y, rvr, gpr, X_plot, y_rvr, y_gpr, "skrvm", "GPR", y_rvr_std, y_gpr_std, rvr_color='purple', gpr_color='darkorange', training_data_range=training_data_range, ax=axs[2]) plt.show() return
from skrvm import RVR from skrvm import RVC from sklearn.datasets import load_iris X = [[0, 0], [2, 2]] y = [0.5, 2.5] clf = RVR(kernel='linear') # clf = RVR(kernel='rbf') # clf = RVR(kernel='poly') clf.fit(X, y) RVR(alpha=1e-06, beta=1e-06, beta_fixed=False, bias_used=True, coef0=0.0, coef1=None, degree=3, kernel='linear', n_iter=3000, threshold_alpha=1000000000.0, tol=0.001, verbose=True) print(clf.predict([[1, 1]])) # clf = RVC() # clf.fit(load_iris().data, load_iris().target) # RVC(alpha=1e-06, beta=1e-06, beta_fixed=False, bias_used=True, coef0=0.0, # coef1=None, degree=3, kernel='rbf', n_iter=3000, n_iter_posterior=50, # threshold_alpha=1000000000.0, tol=0.001, verbose=False)
def train(x, y): model = RVR(kernel='rbf') model.fit(x, y) return model
normal_data_all = preprocessing.scale(full_data_matrix)#normalize pca = PCA(10, svd_solver='auto') pca.fit(normal_data_all) normal_data_pca = pca.transform(normal_data_all)#transform data to xx components ##################################################################################### ##We use the image_sematics from each to train after n_subject_to_use = 1 n_observations = n_subject_to_use*690 X_train, X_test, y_train_index, y_test_index = train_test_split(normal_data_pca[range(n_observations),:],range(n_observations),test_size=0.2) mean_err = np.zeros((2048,1)) for i in range(2048): n_semantic_as_y = i #the 1st semantic is used as output clf1=RVR(kernel='rbf') clf1.fit(X_train,full_semantics_matrix[y_train_index,n_semantic_as_y]) predicted_out = clf1.predict(X_test) #full_semantics_matrix[y_test_index,n_semantic_as_y] ## calc error from test output err = predicted_out-full_semantics_matrix[y_test_index,n_semantic_as_y] mean_err[i] = np.mean(err) print(i) plt.figure() plt.plot(mean_err) plt.show()