def _build(self): self.params = {} self.latent_values = None # Build the transformer beta_warp = BetaWarp(self.num_dims) beta_alpha, beta_beta = beta_warp.hypers self.params['beta_alpha'] = beta_alpha self.params['beta_beta'] = beta_beta transformer = Transformer(self.num_dims) transformer.add_layer(beta_warp) # Build the component kernels input_kernel = Matern52(self.num_dims) ls = input_kernel.hypers self.params['ls'] = ls # Now apply the transformation. transform_kernel = TransformKernel(input_kernel, transformer) # Add some perturbation for stability stability_noise = Noise(self.num_dims) # Finally make a noisy version if necessary # In a classifier GP the notion of "noise" is really just the scale. if self.noiseless: self._kernel = SumKernel(transform_kernel, stability_noise) else: scaled_kernel = Scale(transform_kernel) self._kernel = SumKernel(scaled_kernel, stability_noise) amp2 = scaled_kernel.hypers self.params['amp2'] = amp2 # Build the mean function (just a constant mean for now) self.mean = Hyperparameter(initial_value=0.0, prior=priors.Gaussian(0.0, 1.0), name='mean') self.params['mean'] = self.mean # Buld the latent values. Empty for now until the GP gets data. self.latent_values = Hyperparameter(initial_value=np.array([]), name='latent values') # Build the samplers to_sample = [self.mean] if self.noiseless else [self.mean, amp2] self._samplers.append( SliceSampler(*to_sample, compwise=False, thinning=self.thinning)) self._samplers.append( WhitenedPriorSliceSampler(ls, beta_alpha, beta_beta, compwise=True, thinning=self.thinning)) self.latent_values_sampler = EllipticalSliceSampler( self.latent_values, thinning=self.ess_thinning)
def test_add_layer(): t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) output_inds = t.add_layer(st1,st2) assert t.layer_output_dims[0] == 10 assert output_inds == [[0,1,2], [3,4,5,6]] assert t.layer_transformations[0][0] == st1[0] and t.layer_transformations[0][1] == st2[0] assert t.layer_inds[0][0] == [0,2,4] and t.layer_inds[0][1] == [1,3,5,7] assert t.layer_remaining_inds[0] == [6,8,9] assert_raises(AssertionError, t.add_layer, st1[0]) t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) st3 = SimpleTransformation(10) t.add_layer(st1,st2) output_inds = t.add_layer(st3) assert len(t.layer_transformations) == 2 assert output_inds == list(range(10))
def test_construction(): npr.seed(1) D = 10 norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds))
def test_forward_pass_empty_transformer(): N = 10 D = 10 t = Transformer(D) data = 0.5*npr.rand(N,D) assert_raises(AssertionError, t.forward_pass, data)
def test_forward_pass(): npr.seed(1) N = 15 D = 10 data = 0.5*npr.rand(N,D) norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds)) new_data = t.forward_pass(data) assert new_data.shape[1] == 9 assert np.all(new_data[:,7:] == data[:,[4,7]]) assert np.linalg.norm(new_data[:,0:3].sum(1) - 1) < 1e-10 bw = BetaWarp(9) t.add_layer(bw)
def test_validate_layer(): t = Transformer(10) # This is fine layer_inds = [[1,2,3], [0,5,6,7]] t.validate_layer(layer_inds) # This should break - max index is greater than 9 layer_inds = [[1,2,3], [10,5,6,7]] assert_raises(AssertionError, t.validate_layer, layer_inds) # This should break - duplicate indices layer_inds = [[1,2,3], [0,1,6,7]] assert_raises(AssertionError, t.validate_layer, layer_inds) # This should break - duplicate indices layer_inds = [[1,2,1], [0,5,6,7]] assert_raises(AssertionError, t.validate_layer, layer_inds)
def test_add_layer(): t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) output_inds = t.add_layer(st1,st2) assert t.layer_output_dims[0] == 10 assert output_inds == [[0,1,2], [3,4,5,6]] assert t.layer_transformations[0][0] == st1[0] and t.layer_transformations[0][1] == st2[0] assert t.layer_inds[0][0] == [0,2,4] and t.layer_inds[0][1] == [1,3,5,7] assert t.layer_remaining_inds[0] == [6,8,9] assert_raises(AssertionError, t.add_layer, st1[0]) t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) st3 = SimpleTransformation(10) t.add_layer(st1,st2) output_inds = t.add_layer(st3) assert len(t.layer_transformations) == 2 assert output_inds == range(10)
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 5 beta_warp = BetaWarp(2) norm = Normalization(2) lin = Linear(D) transformer = Transformer(D) # Each entry is a tuple, (transformation, indices_it_acts_on) transformer.add_layer((beta_warp,[0,2]), (norm, [1,4])) # This is crazy. We would never do this. # One transformation means apply to all dimensions. transformer.add_layer(lin) kernel = TransformKernel(Matern52(lin.num_factors), transformer) data1 = npr.rand(N,D) data2 = npr.rand(M,D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in xrange(M): for j in xrange(D): data2[i,j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i,j] -= 2*eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_backward_pass_2(): t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) st3 = SimpleTransformation(10) t.add_layer(st1,st2) t.add_layer(st3) inputs = np.ones((2,10)) inputs[1,:] *= 2 t.forward_pass(inputs) V = np.ones((2,10)) V[1,:] *= 2 grad = t.backward_pass(V) assert np.all(grad == np.array([[8, 8, 8, 8, 8, 8, 2, 8, 2, 2], [64, 64, 64, 64, 64, 64, 8, 64, 8, 8]]))
def test_forward_pass_2(): t = Transformer(10) st1 = (SimpleTransformation(3),[0,2,4]) st2 = (SimpleTransformation(4),[1,3,5,7]) st3 = SimpleTransformation(10) t.add_layer(st1,st2) t.add_layer(st3) inputs = np.ones((2,10)) inputs[1,:] *= 2 outputs = t.forward_pass(inputs) assert np.all(outputs == np.array([[4, 4, 4, 4, 4, 4, 4, 2, 2, 2], [8, 8, 8, 8, 8, 8, 8, 4, 4, 4]]))
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 5 beta_warp = BetaWarp(2) norm = Normalization(2) lin = Linear(D) transformer = Transformer(D) # Each entry is a tuple, (transformation, indices_it_acts_on) transformer.add_layer( (beta_warp, [0, 2]), (norm, [1, 4])) # This is crazy. We would never do this. # One transformation means apply to all dimensions. transformer.add_layer(lin) kernel = TransformKernel(Matern52(lin.num_factors), transformer) data1 = npr.rand(N, D) data2 = npr.rand(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in xrange(M): for j in xrange(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_backward_pass(): npr.seed(1) eps = 1e-5 N = 15 D = 10 data = 0.5*npr.rand(N,D) norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) # Add a layer and test the gradient t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds)) new_data = t.forward_pass(data) loss = np.sum(new_data**2) V = 2*new_data dloss = t.backward_pass(V) dloss_est = np.zeros(dloss.shape) for i in xrange(N): for j in xrange(D): data[i,j] += eps loss_1 = np.sum(t.forward_pass(data)**2) data[i,j] -= 2*eps loss_2 = np.sum(t.forward_pass(data)**2) data[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6 # Add a second layer and test the gradient t.add_layer(Linear(9)) new_data = t.forward_pass(data) loss = np.sum(new_data**2) V = 2*new_data dloss = t.backward_pass(V) dloss_est = np.zeros(dloss.shape) for i in xrange(N): for j in xrange(D): data[i,j] += eps loss_1 = np.sum(t.forward_pass(data)**2) data[i,j] -= 2*eps loss_2 = np.sum(t.forward_pass(data)**2) data[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6