Beispiel #1
0
 def test_modelrecreation(self):
     par = toy_model()
     pcopy = GPRegression(par.X.copy(),
                          par.Y.copy(),
                          kernel=par.kern.copy())
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assertIsNot(par.param_array, pcopy.param_array)
     self.assertIsNot(par.gradient_full, pcopy.gradient_full)
     self.assertTrue(pcopy.checkgrad())
     self.assert_(np.any(pcopy.gradient != 0.0))
     pcopy.optimize('bfgs')
     par.optimize('bfgs')
     np.testing.assert_allclose(pcopy.param_array,
                                par.param_array,
                                atol=1e-6)
     par.randomize()
     with tempfile.TemporaryFile('w+b') as f:
         par.pickle(f)
         f.seek(0)
         pcopy = pickle.load(f)
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full,
                                pcopy.gradient_full,
                                atol=1e-6)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assert_(pcopy.checkgrad())
Beispiel #2
0
def fit_single_GP_model(X, Y, parameter_list, ard=False):
    kernel = RBF(X.shape[1],
                 ARD=parameter_list[3],
                 lengthscale=parameter_list[0],
                 variance=parameter_list[1])
    gp = GPRegression(X=X, Y=Y, kernel=kernel, noise_var=parameter_list[2])
    gp.likelihood.variance.fix(1e-2)
    gp.optimize()
    return gp
def compare_against_mmd_test():
    data = loadmat("../data/02-solar.mat")
    X = data["X"]
    y = data["y"]

    X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y)

    kernel = RBF(input_dim=1, variance=0.608, lengthscale=0.207)
    m = GPRegression(X_train, y_train, kernel, noise_var=0.283)
    m.optimize()
    pred_mean, pred_std = m.predict(X_test)

    s = GaussianQuadraticTest(None)
    gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std)
    U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0])
    num_test_samples = 10000
    null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples)
    #     null_samples = sample_null_simulated_gp(s, pred_mean, pred_std, num_test_samples)
    p_value_ours = 1.0 - np.mean(null_samples <= stat)

    y_rep = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
    y_rep = np.atleast_2d(y_rep).T
    A = np.hstack((X_test, y_test))
    B = np.hstack((X_test, y_rep))
    feats_p = RealFeatures(A.T)
    feats_q = RealFeatures(B.T)
    width = 1
    kernel = GaussianKernel(10, width)
    mmd = QuadraticTimeMMD()
    mmd.set_kernel(kernel)
    mmd.set_p(feats_p)
    mmd.set_q(feats_q)
    mmd_stat = mmd.compute_statistic()

    # sample from null
    num_null_samples = 10000
    mmd_null_samples = np.zeros(num_null_samples)
    for i in range(num_null_samples):
        # fix y_rep from above, and change the other one (that would replace y_test)
        y_rep2 = np.random.randn(len(X_test)) * pred_std.flatten() + pred_mean.flatten()
        y_rep2 = np.atleast_2d(y_rep2).T
        A = np.hstack((X_test, y_rep2))
        feats_p = RealFeatures(A.T)
        width = 1
        kernel = GaussianKernel(10, width)
        mmd = QuadraticTimeMMD()
        mmd.set_kernel(kernel)
        mmd.set_p(feats_p)
        mmd.set_q(feats_q)
        mmd_null_samples[i] = mmd.compute_statistic()

    p_value_mmd = 1.0 - np.mean(mmd_null_samples <= mmd_stat)

    return p_value_ours, p_value_mmd
    def fit_all_models(self):
        functions = {}

        num_features = self.Z.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Y = GPRegression(X=self.Z, Y=self.Y, kernel=kernel, noise_var=1.)
        gp_Y.optimize()

        num_features = self.X.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Z = GPRegression(X=self.X, Y=self.Z, kernel=kernel)
        gp_Z.optimize()

        functions = OrderedDict([('Y', gp_Y), ('Z', gp_Z), ('X', [])])

        return functions
    def refit_models(self, observational_samples):
        X = np.asarray(observational_samples['X'])[:, np.newaxis]
        Z = np.asarray(observational_samples['Z'])[:, np.newaxis]
        Y = np.asarray(observational_samples['Y'])[:, np.newaxis]

        functions = {}

        num_features = Z.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Y = GPRegression(X=Z, Y=Y, kernel=kernel, noise_var=1.)
        gp_Y.optimize()

        num_features = X.shape[1]
        kernel = RBF(num_features, ARD=False, lengthscale=1., variance=1.)
        gp_Z = GPRegression(X=X, Y=Z, kernel=kernel)
        gp_Z.optimize()

        functions = OrderedDict([('Y', gp_Y), ('Z', gp_Z), ('X', [])])

        return functions
Beispiel #6
0
 def test_modelrecreation(self):
     par = toy_model()
     pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assertIsNot(par.param_array, pcopy.param_array)
     self.assertIsNot(par.gradient_full, pcopy.gradient_full)
     self.assertTrue(pcopy.checkgrad())
     self.assert_(np.any(pcopy.gradient!=0.0))
     pcopy.optimize('bfgs')
     par.optimize('bfgs')
     np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
     par.randomize()
     with tempfile.TemporaryFile('w+b') as f:
         par.pickle(f)
         f.seek(0)
         pcopy = pickle.load(f)
     np.testing.assert_allclose(par.param_array, pcopy.param_array)
     np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full, atol=1e-6)
     self.assertSequenceEqual(str(par), str(pcopy))
     self.assert_(pcopy.checkgrad())
    
    return samples

if __name__ == '__main__':
    data = loadmat("../data/02-solar.mat")
    X = data['X']
    y = data['y']
    
    X_train, y_train, X_test, y_test, N, N_test = prepare_dataset(X, y)
    
    print "num_train:", len(X_train)
    print "num_test:", len(X_test)
    
    kernel = RBF(input_dim=1, variance=1., lengthscale=1.)
    m = GPRegression(X_train, y_train, kernel)
    m.optimize()
    
    res = 100
    pred_mean, pred_std = m.predict(X_test)
    plt.plot(X_test, pred_mean, 'b-')
    plt.plot(X_test, pred_mean + 2 * pred_std, 'b--')
    plt.plot(X_test, pred_mean - 2 * pred_std, 'b--')
    plt.plot(X_train, y_train, 'b.', markersize=3)
    plt.plot(X_test, y_test, 'r.', markersize=5)
    plt.grid(True)
    plt.xlabel(r"$X$")
    plt.ylabel(r"$y$")
    plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight')
    plt.show()
    
    s = GaussianQuadraticTest(None)