def test_grad(self): input1_value = np.arange(9).reshape(3, 3).astype("float32") input2_value = 10.0 grads = [ tt.grad(self.monitored_input1.sum(), self.input1), tt.grad(self.monitored_input2.sum(), self.input2), ] # Add self.monitored_input1 as an output to the Aesara function to # prevent Aesara from optimizing the PdbBreakpoint op out of the # function graph fct = aesara.function([self.input1, self.input2], grads + [self.monitored_input1]) gradients = fct(input1_value, input2_value)[:-1] expected_gradients = [ np.ones((3, 3), dtype="float32"), np.array(1.0, dtype="float32"), ] for i in range(len(gradients)): np.testing.assert_allclose(gradients[i], expected_gradients[i])
def test_batch_normalization_broadcastable(): # check if the broadcastable pattern is preserved by the optimizations x, dy, scale, bias, mean, var = (scalar(n).dimshuffle(["x"] * 5) for n in ("x", "dy", "scale", "bias", "mean", "var")) # forward pass out_train, x_mean, x_invstd = batchnorm.batch_normalization_train( x, scale, bias, "spatial") out_test = batchnorm.batch_normalization_test(x, scale, bias, mean, var, "spatial") # backward pass grads_train = aet.grad(None, wrt=[x, scale, bias], known_grads={out_train: dy}) grads_test = aet.grad(None, wrt=[x, scale, bias], known_grads={out_test: dy}) # compile f = aesara.function( [x, scale, bias, mean, var, dy], [out_train, x_mean, x_invstd, out_test] + grads_train + grads_test, ) assert not any([ isinstance( n.op, ( batchnorm.AbstractBatchNormTrain, batchnorm.AbstractBatchNormInference, batchnorm.AbstractBatchNormTrainGrad, ), ) for n in f.maker.fgraph.toposort() ])
def test_lop_override(self, cls_ofg): x = tt.vector() y = 1.0 / (1.0 + tt.exp(-x)) def lop_ov(inps, outs, grads): (y_, ) = outs (dedy_, ) = grads return [2.0 * y_ * (1.0 - y_) * dedy_] y_, dedy = tt.vector(), tt.vector() op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy]) xx = tt.vector() yy1 = tt.sum(tt.nnet.sigmoid(xx)) gyy1 = 2.0 * tt.grad(yy1, xx) for ov in [lop_ov, op_lop_ov]: op = cls_ofg([x], [y], lop_overrides=ov) yy2 = tt.sum(op(xx)) gyy2 = tt.grad(yy2, xx) fn = function([xx], [gyy1, gyy2]) xval = np.random.rand(32).astype(config.floatX) y1val, y2val = fn(xval) assert np.allclose(y1val, y2val)
def test_hessian(self): x = np.linspace(0, 1, 100) y = x * x spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1)) x_var = at.dscalar("x") (g_x,) = at.grad(spline(x_var), [x_var]) with pytest.raises(NotImplementedError): at.grad(g_x, [x_var])
def test_grad(self): x = tt.vector("x") a = np.random.random(50).astype(config.floatX) aesara.function([x], tt.grad(tt.sum(diff(x)), x)) utt.verify_grad(self.op, [a]) for k in range(TestDiffOp.nb): aesara.function([x], tt.grad(tt.sum(diff(x, n=k)), x)) utt.verify_grad(DiffOp(n=k), [a], eps=7e-3)
def test_conv(self): for conv_op in [conv.conv2d, conv2d]: for border_mode in ["valid", "full"]: image_shape = (2, 2, 4, 5) filter_shape = (2, 2, 2, 3) image_dim = len(image_shape) filter_dim = len(filter_shape) input = tensor.TensorType(aesara.config.floatX, [False] * image_dim)( name="input" ) filters = tensor.TensorType(aesara.config.floatX, [False] * filter_dim)( name="filter" ) ev_input = tensor.TensorType(aesara.config.floatX, [False] * image_dim)( name="ev_input" ) ev_filters = tensor.TensorType( aesara.config.floatX, [False] * filter_dim )(name="ev_filters") def sym_conv2d(input, filters): return conv_op(input, filters, border_mode=border_mode) output = sym_conv2d(input, filters).flatten() yv = tensor.Rop(output, [input, filters], [ev_input, ev_filters]) mode = None if aesara.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function( [input, filters, ev_input, ev_filters], yv, on_unused_input="ignore", mode=mode, ) sy, _ = aesara.scan( lambda i, y, x1, x2, v1, v2: (tensor.grad(y[i], x1) * v1).sum() + (tensor.grad(y[i], x2) * v2).sum(), sequences=tensor.arange(output.shape[0]), non_sequences=[output, input, filters, ev_input, ev_filters], mode=mode, ) scan_f = function( [input, filters, ev_input, ev_filters], sy, on_unused_input="ignore", mode=mode, ) dtype = aesara.config.floatX image_data = np.random.random(image_shape).astype(dtype) filter_data = np.random.random(filter_shape).astype(dtype) ev_image_data = np.random.random(image_shape).astype(dtype) ev_filter_data = np.random.random(filter_shape).astype(dtype) v1 = rop_f(image_data, filter_data, ev_image_data, ev_filter_data) v2 = scan_f(image_data, filter_data, ev_image_data, ev_filter_data) assert np.allclose(v1, v2), "Rop mismatch: {} {}".format(v1, v2)
def __call__(self, v, cost, parameters, damp): # compute Gauss-Newton Matrix right-multiplied by `v` Jv = tt.Rop(self._s, parameters, v) HJv = tt.grad( tt.sum(tt.grad(cost, self._s) * Jv), self._s, consider_constant=[Jv] ) JHJv = tt.grad(tt.sum(HJv * self._s), parameters, consider_constant=[HJv, Jv]) # apply Tikhonov damping JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)] return JHJv
def test_grad_test_values(self): # Regression test for test values of `ifelse` gradient. backup = aesara.config.compute_test_value aesara.config.compute_test_value = "raise" try: x = tensor.scalar("x") x.tag.test_value = 1 # Used to crash due to undefined test value. tensor.grad(ifelse(0, x, x), x) finally: aesara.config.compute_test_value = backup
def test_grad_grad(self, cls_ofg): x, y, z = tt.matrices("xyz") e = x + y * z op = cls_ofg([x, y, z], [e]) f = op(x, y, z) f = f - tt.grad(tt.sum(f), y) f = f - tt.grad(tt.sum(f), y) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 assert np.allclose(6.0, fn(xv, yv, zv))
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = np.array([0.1, 2, 3]) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = np.ones((5, 2)) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def check_rop_lop(self, y, out_shape): """ As check_mat_rop_lop, except the input is self.x which is a vector. The output is still a vector. """ # TEST ROP vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) yv = tensor.Rop(y, self.x, self.v) rop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = tensor.dot(J, self.v) scan_f = function([self.x, self.v], sy, on_unused_input="ignore") v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2) try: tensor.Rop( aesara.clone(y, replace={self.x: break_op(self.x)}), self.x, self.v ) except ValueError: pytest.skip( "Rop does not handle non-differentiable inputs " "correctly. Bug exposed by fixing Add.grad method." ) vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX) yv = tensor.Lop(y, self.x, self.v) lop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = tensor.dot(self.v, J) scan_f = function([self.x, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = floatX(np.ones((5, 2))) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) # TODO: What's the test? Something needs to be asserted. at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def check_mat_rop_lop(self, y, out_shape): """ Test the Rop/Lop when input is a matrix and the output is a vector :param y: the output variable of the op applied to self.mx :param out_shape: Used to generate a random tensor corresponding to the evaluation point of the Rop (i.e. the tensor with which you multiply the Jacobian). It should be a tuple of ints. If the Op has more than 1 input, one of them must be mx, while others must be shared variables / constants. We will test only against the input self.mx, so you must call check_mat_rop_lop/check_rop_lop for the other inputs. We expect all inputs/outputs have dtype floatX. If you want to test an Op with an output matrix, add a sum after the Op you want to test. """ vx = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=self.mat_in_shape), aesara.config.floatX) yv = tensor.Rop(y, self.mx, self.mv) rop_f = function([self.mx, self.mv], yv, on_unused_input="ignore") sy, _ = aesara.scan( lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.mx, self.mv], ) scan_f = function([self.mx, self.mv], sy, on_unused_input="ignore") v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2) self.check_nondiff_rop(aesara.clone(y, replace={self.mx: break_op(self.mx)})) vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX) yv = tensor.Lop(y, self.mx, self.v) lop_f = function([self.mx, self.v], yv) sy = tensor.grad((self.v * y).sum(), self.mx) scan_f = function([self.mx, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
def test_multiple_out_grad(self): # Tests that we can compute the gradients through lazy if x1 = tensor.vector("x1") x2 = tensor.vector("x2") y1 = tensor.vector("y1") y2 = tensor.vector("y2") c = tensor.iscalar("c") z = ifelse(c, (x1, x2), (y1, y2)) grads = tensor.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2]) f = aesara.function([c, x1, x2, y1, y2], grads) rng = np.random.RandomState(utt.fetch_seed()) lens = [rng.randint(200) for i in range(4)] values = [ np.asarray(rng.uniform(size=(l, )), aesara.config.floatX) for l in lens ] outs_1 = f(1, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert np.all(outs_1[0] == 1.0) assert np.all(outs_1[1] == 1.0) assert np.all(outs_1[2] == 0.0) assert np.all(outs_1[3] == 0.0) outs_0 = f(0, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert np.all(outs_0[0] == 0.0) assert np.all(outs_0[1] == 0.0) assert np.all(outs_0[2] == 1.0) assert np.all(outs_0[3] == 1.0)
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = tensor.vector("x", dtype=self.dtype) y = tensor.vector("y", dtype=self.dtype) c = tensor.iscalar("c") z = ifelse(c, x, y) gx, gy = tensor.grad(z.sum(), [x, y]) f = aesara.function( [c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode) # There is only 2 of the 3 ifelse that are moved on the GPU. # The one that stay on the CPU is for the shape. self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3) rng = np.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = np.asarray(rng.uniform(size=(xlen, )), self.dtype) vy = np.asarray(rng.uniform(size=(ylen, )), self.dtype) gx0, gy0 = f(1, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 1.0) assert np.all(np.asarray(gy0) == 0.0) gx0, gy0 = f(0, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 0.0) assert np.all(np.asarray(gy0) == 1.0)
def test_stan_grad_partial(self): # This test combines the following STAN tests: # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_dda_test.cpp # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_ddb_test.cpp # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_ddz_test.cpp a, b, z = aet.scalars("a", "b", "z") betainc_out = aet.betainc(a, b, z) betainc_grad = aet.grad(betainc_out, [a, b, z]) f_grad = function([a, b, z], betainc_grad) decimal_precision = 7 if config.floatX == "float64" else 3 for test_a, test_b, test_z, expected_dda, expected_ddb, expected_ddz in ( (1.5, 1.25, 0.001, -0.00028665637, 4.41357328e-05, 0.063300692), (1.5, 1.25, 0.5, -0.26038693947, 0.29301795, 1.1905416), (1.5, 1.25, 0.6, -0.23806757, 0.32279575, 1.23341068), (1.5, 1.25, 0.999, -0.00022264493, 0.0018969609, 0.35587692), (15000, 1.25, 0.001, 0, 0, 0), (15000, 1.25, 0.5, 0, 0, 0), (15000, 1.25, 0.6, 0, 0, 0), (15000, 1.25, 0.999, -6.59543226e-10, 2.00849793e-06, 0.009898182), (1.5, 12500, 0.001, -3.93756641e-05, 1.47821755e-09, 0.1848717), (1.5, 12500, 0.5, 0, 0, 0), (1.5, 12500, 0.6, 0, 0, 0), (1.5, 12500, 0.999, 0, 0, 0), (15000, 12500, 0.001, 0, 0, 0), (15000, 12500, 0.5, -8.72102443e-53, 9.55282792e-53, 5.01131256e-48), (15000, 12500, 0.6, -4.085621e-14, -5.5067062e-14, 1.15135267e-71), (15000, 12500, 0.999, 0, 0, 0), ): np.testing.assert_almost_equal( f_grad(test_a, test_b, test_z), [expected_dda, expected_ddb, expected_ddz], decimal=decimal_precision, )
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.DebugMode): n_in = 4098 n_out = 4099 y = tt.lvector("y") b = tt.fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = tt.fmatrix("dot_result") # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = tt.nnet.softmax(dot_result + b) y_pred = tt.argmax(p_y_given_x, axis=-1) loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y]) dW = tt.grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def dlogp(self): grad = at.grad(self.logp_norm.sum(), self.approx_symbolic_matrices) def flatten2(tensor): return tensor.flatten(2) return at.concatenate(list(map(flatten2, grad)), -1)
def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) m = Normal("m") Normal("n", observed=genvar, total_size=1000) grad1 = aesara.function([m], aet.grad(model1.logpt, m)) with pm.Model() as model2: m = Normal("m") shavar = aesara.shared(np.ones((1000, 100))) Normal("n", observed=shavar) grad2 = aesara.function([m], aet.grad(model2.logpt, m)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) g1 = grad1(1) g2 = grad2(1) np.testing.assert_almost_equal(g1, g2)
def test_compute_test_value(self): x = tt.scalar("x") x.tag.test_value = np.array(1.0, dtype=config.floatX) op = OpFromGraph([x], [x**3]) y = tt.scalar("y") y.tag.test_value = np.array(1.0, dtype=config.floatX) f = op(y) grad_f = tt.grad(f, y) assert grad_f.tag.test_value is not None
def test_grad_cast_input(self): # Tests the gradient when both inputs are on the GPU. x = tensor.vector("x", dtype=self.dtype) y = tensor.vector("y", dtype=self.dtype) c = tensor.iscalar("c") z = ifelse(c, self.cast_output(x), self.cast_output(y)) gx, gy = tensor.grad(z.sum(), [x, y]) aesara.function([c, x, y], [gx, gy], mode=self.mode)
def test_Rop_dot_bug_18Oct2013_Jeremiah(self): # This test refers to a bug reported by Jeremiah Lowin on 18th Oct # 2013. The bug consists when through a dot operation there is only # one differentiable path (i.e. there is no gradient wrt to one of # the inputs). x = tensor.arange(20.0).reshape([1, 20]) v = aesara.shared(np.ones([20])) d = tensor.dot(x, v).sum() tensor.Rop(tensor.grad(d, v), v, v)
def test_shared_grad(self, cls_ofg): x, y, z = tt.matrices("xyz") s = shared(np.random.rand(2, 2).astype(config.floatX)) e = x + y * z + s op = cls_ofg([x, y, z], [e]) f = op(x, y, z) f = f - tt.grad(tt.sum(f), y) fn = function([x, y, z], f) xv = np.ones((2, 2), dtype=config.floatX) yv = np.ones((2, 2), dtype=config.floatX) * 3 zv = np.ones((2, 2), dtype=config.floatX) * 5 assert np.allclose(11.0 + s.get_value(), fn(xv, yv, zv)) # grad again the shared variable f = op(x, y, z) f = f - tt.grad(tt.sum(f), s) fn = function([x, y, z], f) assert np.allclose(15.0 + s.get_value(), fn(xv, yv, zv))
def test_rop_lop(): mx = tensor.matrix("mx") mv = tensor.matrix("mv") v = tensor.vector("v") y = matrix_inverse(mx).sum(axis=0) yv = tensor.Rop(y, mx, mv) rop_f = function([mx, mv], yv) sy, _ = aesara.scan( lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(y.shape[0]), non_sequences=[y, mx, mv], ) scan_f = function([mx, mv], sy) rng = np.random.RandomState(utt.fetch_seed()) vx = np.asarray(rng.randn(4, 4), aesara.config.floatX) vv = np.asarray(rng.randn(4, 4), aesara.config.floatX) v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert _allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2) raised = False try: tensor.Rop(aesara.clone(y, replace={mx: break_op(mx)}), mx, mv) except ValueError: raised = True if not raised: raise Exception("Op did not raised an error even though the function" " is not differentiable") vv = np.asarray(rng.uniform(size=(4, )), aesara.config.floatX) yv = tensor.Lop(y, mx, v) lop_f = function([mx, v], yv) sy = tensor.grad((v * y).sum(), mx) scan_f = function([mx, v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert _allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
def test_downsample(self): rng = np.random.RandomState(utt.fetch_seed()) # ws, shp examples = ( ((2,), (16,)), ( (2,), ( 4, 16, ), ), ( (2,), ( 4, 2, 16, ), ), ((1, 1), (4, 2, 16, 16)), ((2, 2), (4, 2, 16, 16)), ((3, 3), (4, 2, 16, 16)), ((3, 2), (4, 2, 16, 16)), ((3, 2, 2), (3, 2, 16, 16, 16)), ((2, 3, 2), (3, 2, 16, 16, 16)), ((2, 2, 3), (3, 2, 16, 16, 16)), ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)), ) for example, ignore_border in itertools.product(examples, [True, False]): (ws, shp) = example vx = rng.rand(*shp) vex = rng.rand(*shp) x = aesara.shared(vx) ex = aesara.shared(vex) maxpool_op = Pool(ignore_border, ndim=len(ws)) a_pooled = maxpool_op(x, ws).flatten() yv = tensor.Rop(a_pooled, x, ex) mode = None if aesara.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function([], yv, on_unused_input="ignore", mode=mode) sy, _ = aesara.scan( lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(), sequences=tensor.arange(a_pooled.shape[0]), non_sequences=[a_pooled, x, ex], mode=mode, ) scan_f = function([], sy, on_unused_input="ignore", mode=mode) v1 = rop_f() v2 = scan_f() assert np.allclose(v1, v2), "Rop mismatch: {} {}".format(v1, v2)
def __init__( self, input=None, target=None, n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw, ): super().__init__(**kw) if input is None: input = tensor.dvector("input") if target is None: target = tensor.dvector("target") self.input = input self.target = target self.lr = shared(lr, "learning_rate") self.w1 = shared(np.zeros((n_hidden, n_input)), "w1") self.w2 = shared(np.zeros((n_output, n_hidden)), "w2") # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target) ** 2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2), } self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates, ) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def setup_gpu_op(self, activations, labels, input_length, compute_grad=True): gpu_ctc_cost = gpu_ctc(activations, labels, input_length) outputs = [gpu_ctc_cost] if compute_grad: # Symbolic gradient of CTC cost gpu_ctc_grad = tt.grad(tt.mean(gpu_ctc_cost), activations) outputs += [gpu_ctc_grad] return aesara.function([], outputs, mode=mode_with_gpu)
def test_grad_int_value(self): w = aesara.shared(np.random.rand(10)) b = aesara.shared(np.random.rand()) params = [w, b] x = tensor.vector() y = tensor.scalar() score = w.dot(x) + b correct = score * y > 0 loss = ifelse(correct, 0, 1) [(param, param - 0.5 * tensor.grad(cost=loss, wrt=param)) for param in params]
def test_beta_inc_stan_grad_combined(self): # This test replicates the following STAN test: # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/grad_reg_inc_beta_test.cpp a, b, z = at.scalars("a", "b", "z") betainc_out = at.betainc(a, b, z) betainc_grad = at.grad(betainc_out, [a, b]) f_grad = function([a, b, z], betainc_grad) for test_a, test_b, test_z, expected_dda, expected_ddb in ( (1.0, 1.0, 1.0, 0, np.nan), (1.0, 1.0, 0.4, -0.36651629, 0.30649537), ): np.testing.assert_allclose(f_grad(test_a, test_b, test_z), [expected_dda, expected_ddb])
def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) if self.approx.all_histograms: z = self.approx.joint_histogram else: z = self.approx.symbolic_random if "more_obj_params" in kwargs: params = self.obj_params + kwargs["more_obj_params"] else: params = self.test_params + kwargs["more_tf_params"] grad *= pm.floatX(-1) grads = at.grad(None, params, known_grads={z: grad}) return self.approx.set_size_and_deterministic( grads, nmc, 0, kwargs.get("more_replacements"))