Exemple #1
0
    def test_regression_sinc(self):
        """Check regression works with y=sinc(x)."""
        clf = RVR()
        x = np.linspace(0, 10, 101)
        y = np.sinc(x)

        np.random.seed(1)
        y = y + 0.1 * np.random.randn(y.shape[0])

        X = x[:, np.newaxis]

        clf.fit(X, y)
        score = clf.score(X, y)

        m_target = [
            1.117655e+00, -6.334513e-01, 5.868671e-01, -4.370936e-01,
            2.320311e-01, -4.638864e-05, -7.505325e-02, 6.133291e-02
        ]

        self.assertGreater(score, 0.85)
        np.testing.assert_allclose(clf.m_, m_target, rtol=1e-3)
        self.assertEqual(clf.relevance_.shape, (8, 1))

        prediction, mse = clf.predict(np.array([[0.5]]), eval_MSE=True)
        self.assertAlmostEqual(prediction[0], 0.611, places=3)
        self.assertAlmostEqual(mse[0], 0.00930, places=5)
Exemple #2
0
    def test_regression_linear_noise(self):
        """Check regression works with a linear function with added noise."""
        clf = RVR(kernel='linear', alpha=1e11)

        x = np.arange(1, 101)
        y = x + 5

        np.random.seed(1)
        y = y + 0.1 * np.random.randn(y.shape[0])

        X = x[:, np.newaxis]

        clf.fit(X, y)
        score = clf.score(X, y)

        m_target = np.array([1, 5])
        rel_target = np.array([[1]])

        self.assertGreater(score, 0.99)
        np.testing.assert_allclose(clf.m_, m_target, rtol=1e-2)
        np.testing.assert_allclose(clf.relevance_, rel_target)
        self.assertAlmostEqual(clf.beta_, 126.583, places=3)

        prediction, mse = clf.predict(np.array([[50]]), eval_MSE=True)
        self.assertAlmostEqual(prediction[0], 55.006, places=3)
        self.assertAlmostEqual(mse[0], 0.00798, places=5)
Exemple #3
0
    def test_predict(self):
        """Check the predict function works with pre-set values."""
        clf = RVR(kernel='linear', bias_used=False)

        clf.relevance_ = np.array([[1, 1]])
        clf.m_ = np.array([1])

        y = clf.predict(np.array([1, 1]))
        self.assertEqual(y, 2)
def rvr_pipeline(x,y,pca,kernel,x_p=0,y_p=0,fold=10,seed=2019,predict_data=False):
    
    rvr = RVR(kernel=kernel)
    kf = KFold(n_splits=fold, shuffle=True, random_state=seed)
    score = np.zeros((fold,))
    i = 0
    for train,test in kf.split(x,y):
        t1 = time.time()
        x_train, y_train = x[train], y[train]
        x_test, y_test = x[test], y[test]
        pca.fit(x_train)
        new_train = pca.transform(x_train)
        new_test = pca.transform(x_test)
        scaler.fit(new_train)
        new_train = scaler.transform(new_train)
        new_test = scaler.transform(new_test)
        rvr.fit(new_train,y_train)
        pred = rvr.predict(new_test)
        mse = abs(pred-y_test)
        score[i] = sum(mse)/mse.shape[0]
        i+=1
        t2 = time.time()
        print('fold '+str(i)+':',t2-t1,'sec')
    print('='*40)
    print('MAE:',np.mean(score))
    
    if predict_data:
        pca.fit(x)
        new_train = pca.transform(x)
        new_test = pca.transform(x_p)
        scaler.fit(new_train)
        new_train = scaler.transform(new_train)
        new_test = scaler.transform(new_test)
        rvr.fit(new_train,y)
        pred = rvr.predict(new_test)
        error = abs(pred-y_p)
        print('Test MAE:',sum(error)/error.shape[0])
    
    return pred
Exemple #5
0
    def test_regression_linear(self):
        """Check regression works with a linear function."""
        clf = RVR(kernel='linear', alpha=1e11)

        x = np.arange(1, 100)
        y = x + 5

        X = x[:, np.newaxis]

        clf.fit(X, y)

        score = clf.score(X, y)

        m_target = np.array([1, 5])

        self.assertGreater(score, 0.99)
        np.testing.assert_allclose(clf.m_, m_target)

        prediction, mse = clf.predict(np.array([[50]]), eval_MSE=True)
        self.assertAlmostEqual(prediction[0], 55, places=3)
        self.assertAlmostEqual(mse[0], 6.18e-6, places=3)
Exemple #6
0
class RVRDegradationModel:
    def __init__(self, HI):
        if (HI.shape[0] == 1):
            HI = HI.reshape(1, 1)
            timesteps = np.array([i for i in range(len(HI))
                                  ]).reshape(len(HI), HI.shape[0])
        else:
            timesteps = np.array([i for i in range(len(HI))
                                  ]).reshape(len(HI), HI.shape[1])
        self.rvrmodel = RVR(kernel='linear')
        self.optimize(timesteps, HI)

    def optimize(self, X, Y):
        self.rvrmodel.fit(X, Y)

    def update(self, X, Y):
        self.optimize(X, Y)

    def predict(self, X):
        # self.rvrmodel.fit(X, X)
        Yp = self.rvrmodel.predict(X)
        print(Yp)
        return Yp
def rvr_analysis(random_seed, save_path, n_folds, analysis):
    save_path = save_path / ('random_seed_%03d' % random_seed)
    print('Random seed: %03d' % random_seed)
    # Load the saved validation dataset
    project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset)
    with open(save_path / ('splitted_dataset_%s.pickle' % dataset),
              'rb') as handle:
        splitted_dataset = pickle.load(handle)

    kf = KFold(n_splits=n_folds, random_state=random_seed)
    mae_cv = np.zeros((n_folds, 1))
    pearsons_corr = np.zeros((n_folds, 1))
    pearsons_pval = np.zeros((n_folds, 1))

    # Set target and features
    x = splitted_dataset['Xtest_scaled']
    y = splitted_dataset['Ytest']

    t_time_train = []
    t_time_test = []

    for i_fold, (train_idx, test_idx) in enumerate(kf.split(x, y)):
        x_train, x_test = x[train_idx, :], x[test_idx, :]
        y_train, y_test = y[train_idx], y[test_idx]

        print('CV iteration: %d' % (i_fold + 1))
        print('Shape of the trainig and test dataset')
        print(y_train.shape, y_test.shape)

        # train the model
        model = RVR(kernel='linear')
        cv_time_train = time.process_time()
        model.fit(x_train, y_train)
        elapsed_time = time.process_time() - cv_time_train
        print('CV - Elapased time in seconds to train:')
        t_time_train.append(elapsed_time)
        print('%.03f' % elapsed_time)

        # test the model
        cv_time_test = time.process_time()
        y_predicted = model.predict(x_test)
        elapsed_time = time.process_time() - cv_time_test
        t_time_test.append(elapsed_time)
        print('CV - Elapased time in seconds to test:')
        print('%.03f' % elapsed_time)

        mae_kfold = mean_absolute_error(y_test, y_predicted)
        mae_cv[i_fold, :] = mae_kfold
        # now look at the pearson's correlation
        r_test, r_p_value_test = pearsonr(y_test, y_predicted)
        pearsons_corr[i_fold, :] = r_test
        pearsons_pval[i_fold, :] = r_p_value_test

    print('CV results')
    print('MAE: Mean(SD) = %.3f(%.3f)' % (mae_cv.mean(), mae_cv.std()))
    print('Pearson\'s Correlation: Mean(SD) = %.3f(%.3f)' %
          (r_test.mean(), r_test.std()))
    print('Mean CV time: %.3f s ' % np.mean(t_time_train))
    print('SD CV time: %.3f s' % np.std(t_time_train))
    print('Mean CV time: %.3f s ' % np.mean(t_time_test))
    print('SD CV time: %.3f s' % np.std(t_time_test))
    print('')

    if analysis == 'vanilla_combi':
        # Train the entire dataset
        x_train_all, x_test_all, y_train_all, y_test_all = \
                train_test_split(x, y, test_size=.85, random_state=random_seed)
        print('All: Shape of the trainig and test dataset')
        print(y_train_all.shape, y_test_all.shape)
    elif analysis == 'uniform_dist':
        # Train the entire dataset
        x_train_all, x_test_all, y_train_all, y_test_all = \
                train_test_split(x, y, test_size=.20,  random_state=random_seed)
        print('ALL: Shape of the trainig and test dataset')
        print(y_train_all.shape, y_test_all.shape)
    print('Training RVR model:')
    model_all = RVR(kernel='linear')
    model_all.fit(x_train_all, y_train_all)
    # plot predicted vs true for the test (Entire sample)
    print('Plotting Predicted Vs True Age for all the sample')
    y_predicted_test = model.predict(x_test_all)
    output_path_test = save_path / (
        'rvr_test_predicted_true_age_rnd_seed%d.eps' % random_seed)
    plot_predicted_vs_true(y_test_all, y_predicted_test, output_path_test,
                           'Age')

    return mae_cv, r_test, t_time_train, t_time_test
model2 = SVR(kernel="rbf", C=1000.0, epsilon=6.5)
model3 = RVR(kernel="rbf")

model.fit(X_train_std, y_train)
model2.fit(X_train_std, y_train)
model3.fit(X_train_std, y_train)

y_train_pred = model2.predict(X_train_std)
y_test_pred = model2.predict(X_test_std)

print("SVR MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)
))

y_train_pred = model3.predict(X_train_std)
y_test_pred = model3.predict(X_test_std)

print("RVR MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)
))


plt.figure(figsize=(8, 4))

X_plt = np.arange(X_train_std.min(), X_train_std.max(), 0.1)[:, None]
y_plt_pred = model.predict(X_plt)
y_plt_pred2 = model2.predict(X_plt)
y_plt_pred3 = model3.predict(X_plt)
Exemple #9
0
def benchmark():
    # Any integer value between 1 and 3 to select the number of subplots to show:
    num_figures = 2

    # Parameters to generate training data
    num_samples = 100
    noise_level = 0.1
    training_data_range = 10

    # Training data
    X, y = generate_training_data(num_samples, noise_level, training_data_range)

    # Fit
    gpr = GaussianProcessRegressor(kernel=RBF() + WhiteKernel())
    gpr.fit(X, y)

    ## Implementation of RVR by skrvm
    rvr = RVR(kernel='rbf')
    rvr.fit(X, y)

    ## Implementation of RVR by sklearn_rvm
    # Caveat: Since sklearn v.0.22, the default value of gamma changed from ‘auto’ to ‘scale’.
    # Reference: https://github.com/Mind-the-Pineapple/sklearn-rvm/issues/9
    emrvr = EMRVR(kernel='rbf',
                  gamma='auto')
    emrvr.fit(X, y)

    # Predict
    plot_params = get_plot_params()
    X_plot = np.linspace(plot_params['x_low'], plot_params['x_high'], 10000)[:, None]

    # Caveat:
    # generating the variance of the predictive distribution takes considerably longer than just predicting the mean.
    # Reference:
    # https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_compare_gpr_krr.html
    y_gpr, y_gpr_std = gpr.predict(X_plot, return_std=True)

    ## Implementation of RVR by skrvm
    y_rvr = rvr.predict(X_plot)
    y_rvr_std = None

    ## Implementation of RVR by sklearn_rvm
    y_emrvr, y_emrvr_std = emrvr.predict(X_plot, return_std=True)

    # Plot
    fig, axs = plt.subplots(num_figures, 1, figsize=(15, 7))

    try:
        # In case there are stricly more than 1 subplot, there is no issue.
        num_sub_plots = len(axs)
    except TypeError:
        # In case there is exactly 1 subplot, we have to ensure that axs is a list, for code compatibility.
        axs = [axs]
        num_sub_plots = len(axs)

    print('Plotting {} subplots.'.format(num_sub_plots))

    plot_results(X, y, emrvr, gpr, X_plot, y_emrvr, y_gpr,
                 "sklearn_rvm", "GPR", y_emrvr_std, y_gpr_std,
                 rvr_color='navy', gpr_color='darkorange',
                 training_data_range=training_data_range,
                 ax=axs[0])

    if len(axs) > 1:
        plot_results(X, y, emrvr, rvr, X_plot, y_emrvr, y_rvr,
                     "sklearn_rvm", "skrvm", y_emrvr_std, y_rvr_std,
                     rvr_color='navy', gpr_color='purple',
                     training_data_range=training_data_range,
                     ax=axs[1])

    if len(axs) > 2:
        plot_results(X, y, rvr, gpr, X_plot, y_rvr, y_gpr,
                     "skrvm", "GPR", y_rvr_std, y_gpr_std,
                     rvr_color='purple', gpr_color='darkorange',
                     training_data_range=training_data_range,
                     ax=axs[2])

    plt.show()

    return
Exemple #10
0
X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
clf = RVR(kernel='linear')
# clf = RVR(kernel='rbf')
# clf = RVR(kernel='poly')
clf.fit(X, y)

RVR(alpha=1e-06,
    beta=1e-06,
    beta_fixed=False,
    bias_used=True,
    coef0=0.0,
    coef1=None,
    degree=3,
    kernel='linear',
    n_iter=3000,
    threshold_alpha=1000000000.0,
    tol=0.001,
    verbose=True)

print(clf.predict([[1, 1]]))

# clf = RVC()
# clf.fit(load_iris().data, load_iris().target)
# RVC(alpha=1e-06, beta=1e-06, beta_fixed=False, bias_used=True, coef0=0.0,
#     coef1=None, degree=3, kernel='rbf', n_iter=3000, n_iter_posterior=50,
#     threshold_alpha=1000000000.0, tol=0.001, verbose=False)
# score = clf.score(load_iris().data, load_iris().target)
# print(score)
model1 = RandomForestRegressor(bootstrap=True, criterion="mse")
model2 = RVR(kernel="rbf")
model3 = Lasso(alpha=0.1)

model1.fit(X_train_std, y_train)
model2.fit(X_train_std, y_train)
model3.fit(X_train_std, y_train)

y_train_pred = model1.predict(X_train_std)
y_test_pred = model1.predict(X_test_std)

print("Random Forest MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)))

y_train_pred = model2.predict(X_train_std)
y_test_pred = model2.predict(X_test_std)

print("RVR MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)))

y_train_pred = model3.predict(X_train_std)
y_test_pred = model3.predict(X_test_std)

print("Lasso MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)))

# 相関係数を確認
print(data.corr())

normal_data_all = preprocessing.scale(full_data_matrix)#normalize
pca = PCA(10, svd_solver='auto')
pca.fit(normal_data_all)
normal_data_pca = pca.transform(normal_data_all)#transform data to xx components


#####################################################################################
##We use the image_sematics from each to train after
n_subject_to_use = 1
n_observations = n_subject_to_use*690
X_train, X_test, y_train_index, y_test_index = train_test_split(normal_data_pca[range(n_observations),:],range(n_observations),test_size=0.2)

mean_err = np.zeros((2048,1))
for i in range(2048):   
    n_semantic_as_y = i #the 1st semantic is used as output
    clf1=RVR(kernel='rbf')
    clf1.fit(X_train,full_semantics_matrix[y_train_index,n_semantic_as_y])
    predicted_out = clf1.predict(X_test) #full_semantics_matrix[y_test_index,n_semantic_as_y]

    ## calc error from test output
    err = predicted_out-full_semantics_matrix[y_test_index,n_semantic_as_y]
    mean_err[i] = np.mean(err)
    print(i)



plt.figure()
plt.plot(mean_err)
plt.show()