def test_take_along_axis_grad(self, shape, axis, samples): if axis < 0: _axis = len(shape) + axis else: _axis = axis # Setup the theano function t_arr, t_indices = self.get_input_tensors(shape) t_out2 = theano.grad( tt.sum(self._output_tensor(t_arr**2, t_indices, axis)), t_arr, ) func = theano.function([t_arr, t_indices], [t_out2]) # Test that the gradient gives the same output as what is expected arr, indices = self.get_input_values(shape, axis, samples) expected_grad = np.zeros_like(arr) slicer = [slice(None)] * len(shape) for i in range(indices.shape[axis]): slicer[axis] = i inds = indices[slicer].reshape(shape[:_axis] + (1, ) + shape[_axis + 1:]) inds = _make_along_axis_idx(shape, inds, _axis) expected_grad[inds] += 1 expected_grad *= 2 * arr out = func(arr, indices)[0] assert np.allclose(out, expected_grad)
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) X = self.approx.histogram t = self.approx.normalizing_constant dlogpdx = theano.scan( fn=lambda zg: theano.grad(self.logp_norm(zg), zg), sequences=[X])[0] # bottleneck Kxy, dxkxy = f(X) # scaling factor # not needed for Kxy as we already scaled dlogpdx dxkxy /= t n = X.shape[0].astype('float32') / t svgd_grad = (tt.dot(Kxy, dlogpdx) + dxkxy) / n return -1 * svgd_grad # gradient
def apply(self, f): # f: kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) X = self.approx.histogram t = self.approx.normalizing_constant dlogpdx = theano.scan( fn=lambda zg: theano.grad(self.logp_norm(zg), zg), sequences=[X] )[0] # bottleneck Kxy, dxkxy = f(X) # scaling factor # not needed for Kxy as we already scaled dlogpdx dxkxy /= t n = X.shape[0].astype('float32') / t svgd_grad = (tt.dot(Kxy, dlogpdx) + dxkxy) / n return -1 * svgd_grad # gradient
def dlogp(self): return theano.scan( fn=lambda zg: theano.grad(self.approx.logp_norm(zg), zg), sequences=[self.input_matrix])[0]
def train_model(learning_rate=0.1, n_epochs=1000, batch_size=20, n_hidden=500): # BUILD MODEL # print("...building model") rng = numpy.random.RandomState(1234) classifier = DeepVS( rng=rng, input=x, n_in=42, n_hidden=n_hidden, n_out=3 ) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } )