def test_sum_kernel_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 3 kernel1 = Matern52(D) kernel2 = Matern52(D) kernel3 = Matern52(D) kernel = SumKernel(kernel1, kernel2, kernel3) data1 = npr.randn(N, D) data2 = npr.randn(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in xrange(M): for j in xrange(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 5 inds = [0, 2, 4] kernel = Subset(D, Matern52(len(inds)), inds) data1 = npr.randn(N, D) data2 = npr.randn(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in range(M): for j in range(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = (old_div((loss_1 - loss_2), (2 * eps))) print('Subset kernel grad using indices %s:' % inds) print(dloss) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 3 kernel = Scale(Matern52(D)) kernel.amp2.value = 5.75 data1 = npr.randn(N, D) data2 = npr.randn(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in range(M): for j in range(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def _build(self): self.params = {} self.latent_values = None # Build the transformer beta_warp = BetaWarp(self.num_dims) beta_alpha, beta_beta = beta_warp.hypers self.params['beta_alpha'] = beta_alpha self.params['beta_beta'] = beta_beta transformer = Transformer(self.num_dims) transformer.add_layer(beta_warp) # Build the component kernels input_kernel = Matern52(self.num_dims) ls = input_kernel.hypers self.params['ls'] = ls # Now apply the transformation. transform_kernel = TransformKernel(input_kernel, transformer) # Add some perturbation for stability stability_noise = Noise(self.num_dims) # Finally make a noisy version if necessary # In a classifier GP the notion of "noise" is really just the scale. if self.noiseless: self._kernel = SumKernel(transform_kernel, stability_noise) else: scaled_kernel = Scale(transform_kernel) self._kernel = SumKernel(scaled_kernel, stability_noise) amp2 = scaled_kernel.hypers self.params['amp2'] = amp2 # Build the mean function (just a constant mean for now) self.mean = Hyperparameter(initial_value=0.0, prior=priors.Gaussian(0.0, 1.0), name='mean') self.params['mean'] = self.mean # Buld the latent values. Empty for now until the GP gets data. self.latent_values = Hyperparameter(initial_value=np.array([]), name='latent values') # Build the samplers to_sample = [self.mean] if self.noiseless else [self.mean, amp2] self._samplers.append( SliceSampler(*to_sample, compwise=False, thinning=self.thinning)) self._samplers.append( WhitenedPriorSliceSampler(ls, beta_alpha, beta_beta, compwise=True, thinning=self.thinning)) self.latent_values_sampler = EllipticalSliceSampler( self.latent_values, thinning=self.ess_thinning)
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 5 beta_warp = BetaWarp(2) norm = Normalization(2) lin = Linear(D) transformer = Transformer(D) # Each entry is a tuple, (transformation, indices_it_acts_on) transformer.add_layer( (beta_warp, [0, 2]), (norm, [1, 4])) # This is crazy. We would never do this. # One transformation means apply to all dimensions. transformer.add_layer(lin) kernel = TransformKernel(Matern52(lin.num_factors), transformer) data1 = npr.rand(N, D) data2 = npr.rand(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in xrange(M): for j in xrange(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6