def test_2layer_vs_nat_grad(self): Ns, N, M = 5, 1, 50 D_X, D_Y = 1, 1 lik_var = 0.1 X = np.random.uniform(size=(N, D_X)) Y = np.random.uniform(size=(N, D_Y)) Z = np.random.uniform(size=(M, D_Y)) Xs = np.random.uniform(size=(Ns, D_X)) Z[:N, :] = X[:M, :] def kerns(): return [RBF(D_X, lengthscales=0.1), RBF(D_X, lengthscales=0.5)] layers_col = init_layers_linear(X, Y, Z, kerns()) layers_ng = init_layers_linear(X, Y, Z, kerns()) def lik(): l = Gaussian() l.variance = lik_var return l last_layer = SGPR_Layer(layers_col[-1].kern, layers_col[-1].feature.Z.read_value(), D_Y, layers_col[-1].mean_function) layers_col = layers_col[:-1] + [last_layer] m_col = DGP_Collapsed(X, Y, lik(), layers_col) m_ng = DGP_Quad(X, Y, lik(), layers_ng, H=200) q_mu1 = np.random.randn(M, D_X) q_sqrt1 = np.random.randn(M, M) q_sqrt1 = np.tril(q_sqrt1)[None, :, :] for m in m_col, m_ng: m.layers[0].q_mu = q_mu1 m.layers[0].q_sqrt = q_sqrt1 p = [[m_ng.layers[-1].q_mu, m_ng.layers[-1].q_sqrt]] NatGradOptimizer(gamma=1.).minimize(m_ng, var_list=p, maxiter=1) assert_allclose(m_col.compute_log_likelihood(), m_ng.compute_log_likelihood())
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function, white=False, **kwargs): layers = init_layers_linear(X, Y, Z, kernels, num_outputs=num_outputs, mean_function=mean_function, white=white) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def test_single_layer(self): kern = RBF(1, lengthscales=0.1) layers = init_layers_linear(self.X, self.Y, self.X, [kern]) lik = Gaussian() lik.variance = self.lik_var last_layer = SGPR_Layer(layers[-1].kern, layers[-1].feature.Z.read_value(), self.D_Y, layers[-1].mean_function) layers = layers[:-1] + [last_layer] m_dgp = DGP_Collapsed(self.X, self.Y, lik, layers) L_dgp = m_dgp.compute_log_likelihood() mean_dgp, var_dgp = m_dgp.predict_f_full_cov(self.Xs, 1) m_exact = GPR(self.X, self.Y, kern) m_exact.likelihood.variance = self.lik_var L_exact = m_exact.compute_log_likelihood() mean_exact, var_exact = m_exact.predict_f_full_cov(self.Xs) assert_allclose(L_dgp, L_exact, atol=1e-5, rtol=1e-5) assert_allclose(mean_dgp[0], mean_exact, atol=1e-5, rtol=1e-5) assert_allclose(var_dgp[0], var_exact, atol=1e-5, rtol=1e-5)
def test_quadrature(self): N = 2 np.random.seed(0) X = np.random.uniform(size=(N, 1)) Y = np.sin(20 * X) + np.random.randn(*X.shape) * 0.001 kernels = lambda: [ RBF(1, lengthscales=0.1), RBF(1, lengthscales=0.1) ] layers = lambda: init_layers_linear(X, Y, X, kernels()) def lik(): l = Gaussian() l.variance = 0.01 return l m_stochastic = DGP_Base(X, Y, lik(), layers(), num_samples=100) # it seems 300 is necessary, which suggests that quadrature isn't very easy m_quad = DGP_Quad(X, Y, lik(), layers(), H=300) # q_mu_0 = np.random.randn(N, 1) # q_sqrt_0 = np.random.randn(1, N, N)**2 # # q_mu_1 = np.random.randn(N, 1) # q_sqrt_1 = np.random.randn(1, N, N)**2 for model in m_quad, m_stochastic: model.set_trainable(False) for layer in model.layers: layer.q_mu.set_trainable(True) layer.q_sqrt.set_trainable(True) ScipyOptimizer().minimize(m_quad, maxiter=500) q_mu_0 = m_quad.layers[0].q_mu.read_value() q_sqrt_0 = m_quad.layers[0].q_sqrt.read_value() q_mu_1 = m_quad.layers[1].q_mu.read_value() q_sqrt_1 = m_quad.layers[1].q_sqrt.read_value() for model in m_stochastic, m_quad: model.layers[0].q_mu = q_mu_0 model.layers[0].q_sqrt = q_sqrt_0 model.layers[1].q_mu = q_mu_1 model.layers[1].q_sqrt = q_sqrt_1 Ls_quad = [m_quad.compute_log_likelihood() for _ in range(2)] Ls_stochastic = [ m_stochastic.compute_log_likelihood() for _ in range(1000) ] assert_allclose(Ls_quad[0], Ls_quad[1]) # quadrature should be determinsitic m = np.average(Ls_stochastic) std_err = np.std(Ls_stochastic) / (float(len(Ls_stochastic))**0.5) print('sampling average {}'.format(m)) print('sampling std eff {}'.format(std_err)) print('quad val {}'.format(Ls_quad[0])) assert np.abs(Ls_quad[0] - m) < std_err * 3 # 99.73% CI