def test_fill_grad(self): # Fix bug reported at # https://groups.google.com/d/topic/theano-users/nQshB8gUA6k/discussion x = TensorType(config.floatX, (False, True, False))("x") y = TensorType(config.floatX, (False, True, False))("y") e = second(x, y) aesara.grad(e.sum(), y)
def test_grad_test_values(self): # Regression test for test values of `ifelse` gradient. with aesara.config.change_flags(compute_test_value="raise"): x = scalar("x") x.tag.test_value = 1 # Used to crash due to undefined test value. aesara.grad(ifelse(0, x, x), x)
def test_mean(mode): a = iscalar("a") b = iscalar("b") z = mean(a, b) z_fn = aesara.function([a, b], z, mode=mode) res = z_fn(1, 1) assert np.allclose(res, 1.0) a = fscalar("a") b = fscalar("b") c = fscalar("c") z = mean(a, b, c) z_fn = aesara.function([a, b, c], aesara.grad(z, [a]), mode=mode) res = z_fn(3, 4, 5) assert np.allclose(res, 1 / 3) z_fn = aesara.function([a, b, c], aesara.grad(z, [b]), mode=mode) res = z_fn(3, 4, 5) assert np.allclose(res, 1 / 3) z = mean() z_fn = aesara.function([], z, mode=mode) assert z_fn() == 0
def test_cholesky_grad_indef(): x = matrix() mat = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = Cholesky(lower=True, on_error="raise") chol_f = function([x], grad(cholesky(x).sum(), [x])) with pytest.raises(scipy.linalg.LinAlgError): chol_f(mat) cholesky = Cholesky(lower=True, on_error="nan") chol_f = function([x], grad(cholesky(x).sum(), [x])) assert np.all(np.isnan(chol_f(mat)))
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = vector("x", dtype=self.dtype) y = vector("y", dtype=self.dtype) c = iscalar("c") z = ifelse(c, x, y) gx, gy = aesara.grad(z.sum(), [x, y]) f = function( [c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode ) # There is only 2 of the 3 ifelse that are moved on the GPU. # The one that stay on the CPU is for the shape. self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3) rng = np.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = np.asarray(rng.uniform(size=(xlen,)), self.dtype) vy = np.asarray(rng.uniform(size=(ylen,)), self.dtype) gx0, gy0 = f(1, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 1.0) assert np.all(np.asarray(gy0) == 0.0) gx0, gy0 = f(0, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 0.0) assert np.all(np.asarray(gy0) == 1.0)
def test_take_along_axis_grad(self, shape, axis, samples): if axis < 0: _axis = len(shape) + axis else: _axis = axis # Setup the aesara function t_arr, t_indices = self.get_input_tensors(shape) t_out2 = aesara.grad( aet.sum(self._output_tensor(t_arr ** 2, t_indices, axis)), t_arr, ) func = aesara.function([t_arr, t_indices], [t_out2]) # Test that the gradient gives the same output as what is expected arr, indices = self.get_input_values(shape, axis, samples) expected_grad = np.zeros_like(arr) slicer = [slice(None)] * len(shape) for i in range(indices.shape[axis]): slicer[axis] = i inds = indices[slicer].reshape(shape[:_axis] + (1,) + shape[_axis + 1 :]) inds = _make_along_axis_idx(shape, inds, _axis) expected_grad[inds] += 1 expected_grad *= 2 * arr out = func(arr, indices)[0] assert np.allclose(out, expected_grad)
def test_connection_pattern_override(self, cls_ofg): x, y = vectors("xy") def f1(x, y): del x # but we know how to backpropagate for x for some reasons # and we don't care about the gradient wrt y. return y + tt_round(y) def f1_back(inputs, output_gradients): return [output_gradients[0], aesara.gradient.disconnected_type()] op = cls_ofg( inputs=[x, y], outputs=[f1(x, y)], grad_overrides=f1_back, connection_pattern=[[True], [False]], # This is new on_unused_input="ignore", ) # This is new c = op(x, y) g1 = aesara.grad(c.sum(), x) out = g1.eval({ x: np.ones((5, ), dtype=np.float32), y: np.ones((5, ), dtype=np.float32) }) assert np.allclose(out, [1.0] * 5)
def prior_dlogp(vars, model, flat_view): """Returns the gradient of the prior on the parameters as a vector of size D x 1""" terms = at.concatenate( [aesara.grad(var.logpt, var).flatten() for var in vars], axis=0) dlogp = aesara.clone_replace(terms, flat_view.replacements, strict=False) return dlogp
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = vector("x", dtype=self.dtype) y = vector("y", dtype=self.dtype) c = iscalar("c") z = ifelse(c, x, y) gx, gy = aesara.grad(z.sum(), [x, y]) f = function( [c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode) self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3) rng = np.random.default_rng(utt.fetch_seed()) xlen = rng.integers(200) ylen = rng.integers(200) vx = np.asarray(rng.uniform(size=(xlen, )), self.dtype) vy = np.asarray(rng.uniform(size=(ylen, )), self.dtype) gx0, gy0 = f(1, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 1.0) assert np.all(np.asarray(gy0) == 0.0) gx0, gy0 = f(0, vx, vy) assert np.allclose(gx0.shape, vx.shape) assert np.allclose(gy0.shape, vy.shape) assert np.all(np.asarray(gx0) == 0.0) assert np.all(np.asarray(gy0) == 1.0)
def test_CheckAndRaise_basic_c(linker): exc_msg = "this is the exception" check_and_raise = CheckAndRaise(CustomException, exc_msg) conds = at.scalar() y = check_and_raise(at.as_tensor(1), conds) y_fn = aesara.function([conds], y, mode=Mode(linker)) with pytest.raises(CustomException, match=exc_msg): y_fn(0) x = at.vector() y = check_and_raise(x, conds) y_fn = aesara.function([conds, x], y.shape, mode=Mode(linker, OPT_FAST_RUN)) x_val = np.array([1.0], dtype=aesara.config.floatX) assert np.array_equal(y_fn(0, x_val), x_val) y = check_and_raise(x, at.as_tensor(0)) y_grad = aesara.grad(y.sum(), [x]) y_fn = aesara.function([x], y_grad, mode=Mode(linker, OPT_FAST_RUN)) assert np.array_equal(y_fn(x_val), [x_val])
def test_multiple_out_grad(self): # Tests that we can compute the gradients through lazy if x1 = vector("x1") x2 = vector("x2") y1 = vector("y1") y2 = vector("y2") c = iscalar("c") z = ifelse(c, (x1, x2), (y1, y2)) grads = aesara.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2]) f = function([c, x1, x2, y1, y2], grads) rng = np.random.RandomState(utt.fetch_seed()) lens = [rng.randint(200) for i in range(4)] values = [ np.asarray(rng.uniform(size=(l,)), aesara.config.floatX) for l in lens ] outs_1 = f(1, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert np.all(outs_1[0] == 1.0) assert np.all(outs_1[1] == 1.0) assert np.all(outs_1[2] == 0.0) assert np.all(outs_1[3] == 0.0) outs_0 = f(0, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert np.all(outs_0[0] == 0.0) assert np.all(outs_0[1] == 0.0) assert np.all(outs_0[2] == 1.0) assert np.all(outs_0[3] == 1.0)
def test_grad_cast_input(self): x = vector("x", dtype=self.dtype) y = vector("y", dtype=self.dtype) c = iscalar("c") z = ifelse(c, self.cast_output(x), self.cast_output(y)) gx, gy = aesara.grad(z.sum(), [x, y]) function([c, x, y], [gx, gy], mode=self.mode)
def test_broadcast_grad(): # rng = numpy.random.RandomState(utt.fetch_seed()) x1 = tensor4("x") # x1_data = rng.randn(1, 1, 300, 300) sigma = scalar("sigma") # sigma_data = 20 window_radius = 3 filter_1d = aet.arange(-window_radius, window_radius + 1) filter_1d = filter_1d.astype(aesara.config.floatX) filter_1d = exp(-0.5 * filter_1d ** 2 / sigma ** 2) filter_1d = filter_1d / filter_1d.sum() filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"]) y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None]) aesara.grad(y.sum(), sigma)
def test_grad_abs(): a = fscalar("a") b = aesara.tensor.nnet.relu(a) c = aesara.grad(b, a) f = aesara.function([a], c, mode=Mode(optimizer=None)) # Currently Aesara return 0.5, but it isn't sure it won't change # in the futur. ret = f(0.0) assert ret == 0.5, ret
def test_grad_keep_type(self): # Tests that the aesara grad method returns a list if it is passed a list # and a single variable if it is passed a single variable. # pylearn2 depends on aesara behaving this way. This functionality has been # added three times and erroneously removed twice. If you do anything that # requires changing this test or making it fail you are almost certainly # making a common mistake, NOT fixing something. X = matrix() y = X.sum() G = aesara.grad(y, [X]) assert isinstance(G, list) G = aesara.grad(y, X) assert not isinstance(G, list)
def test_broadcast_grad(): x1 = tensor4("x") sigma = scalar("sigma") window_radius = 3 filter_1d = at.arange(-window_radius, window_radius + 1) filter_1d = filter_1d.astype(aesara.config.floatX) filter_1d = exp(-0.5 * filter_1d**2 / sigma**2) filter_1d = filter_1d / filter_1d.sum() filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"]) y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None]) # TODO FIXME: Make this a real test and `assert` something aesara.grad(y.sum(), sigma)
def test_grad_cast_input(self): # Tests the gradient when both inputs are on the GPU. x = vector("x", dtype=self.dtype) y = vector("y", dtype=self.dtype) c = iscalar("c") z = ifelse(c, self.cast_output(x), self.cast_output(y)) gx, gy = aesara.grad(z.sum(), [x, y]) function([c, x, y], [gx, gy], mode=self.mode)
def test_any_grad(self): x = bmatrix("x") x_all = x.any() gx = aesara.grad(x_all, x) f = aesara.function([x], gx) x_random = self.rng.binomial(n=1, p=0.5, size=(5, 7)).astype("int8") for x_val in (x_random, np.zeros_like(x_random), np.ones_like(x_random)): gx_val = f(x_val) assert gx_val.shape == x_val.shape assert np.all(gx_val == 0)
def setup_method(self): self.k = iscalar("k") self.A = vector("A") result, _ = scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(self.A), non_sequences=self.A, n_steps=self.k, ) result_check, _ = scan_checkpoints( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(self.A), non_sequences=self.A, n_steps=self.k, save_every_N=100, ) self.result = result[-1] self.result_check = result_check[-1] self.grad_A = aesara.grad(self.result.sum(), self.A) self.grad_A_check = aesara.grad(self.result_check.sum(), self.A)
def test_grad_int_value(self): w = aesara.shared(np.random.rand(10)) b = aesara.shared(np.random.rand()) params = [w, b] x = vector() y = scalar() score = w.dot(x) + b correct = score * y > 0 loss = ifelse(correct, 0, 1) [(param, param - 0.5 * aesara.grad(cost=loss, wrt=param)) for param in params]
def test_gradient_scan(): # Test for a crash when using MRG inside scan and taking the gradient # See https://groups.google.com/d/msg/aesara-dev/UbcYyU5m-M8/UO9UgXqnQP0J aesara_rng = MRG_RandomStreams(10) w = aesara.shared(np.ones(1, dtype="float32")) def one_step(x): return x + aesara_rng.uniform((1, ), dtype="float32") * w x = tensor.vector(dtype="float32") values, updates = aesara.scan(one_step, outputs_info=x, n_steps=10) gw = aesara.grad(tensor.sum(values[-1]), w) f = aesara.function([x], gw) f(np.arange(1, dtype="float32"))
def test_binary_crossentropy_reshape(): # Reported as https://github.com/Theano/Theano/issues/4086 a = tensor4("a") for c in ( binary_crossentropy(sigmoid(a.reshape((-1, 1))), 1).sum(), binary_crossentropy(sigmoid(a).reshape((-1, 1)), 1).sum(), ): ga = aesara.grad(c, a) # This only works when "specialize" options are included mode = aesara.compile.get_default_mode().including("fast_run") fga = aesara.function([a], ga, mode=mode) utt.assert_allclose( fga(np.array([[[[30.0]]]], dtype=config.floatX)), np.zeros((1, 1, 1, 1), dtype=config.floatX), )
def test_grad_log1msigm(self): # At some point, this returned nan, because (1 - sigm(x)) was # on both the numerator and the denominator of a fraction, # but the two nodes in question had not been merged. x = matrix("x") lr = scalar("lr") s = sigmoid(x) l = log(1 - s) c = l.mean() ux = x - lr * aesara.grad(c, x) # Before the optimization, inf and NaN will be produced in the graph, # and DebugMode will complain. Everything is fine afterwards. mode = self.get_mode() if not isinstance(mode, aesara.compile.debugmode.DebugMode): f = aesara.function([x, lr], ux, mode=mode) ux_v = f([[50]], 0.1) assert not np.isnan(ux_v)
def __init__( self, objective: ObjectiveBase, aet_x: TensorVariable, aet_fun: TensorVariable, coeff: Optional[float] = 1.0, x_names: Sequence[str] = None, ): if not isinstance(objective, ObjectiveBase): raise TypeError('objective must be an ObjectiveBase instance') if not objective.check_mode(MODE_FUN): raise NotImplementedError( f'objective must support mode={MODE_FUN}') super().__init__(x_names) self.base_objective = objective self.aet_x = aet_x self.aet_fun = aet_fun self._coeff = coeff self.obj_op = AesaraObjectiveOp(self, self._coeff) # compiled function if objective.has_fun: self.afun = aesara.function([aet_x], self.obj_op(aet_fun)) # compiled gradient if objective.has_grad: self.agrad = aesara.function([aet_x], aesara.grad(self.obj_op(aet_fun), [aet_x])) # compiled hessian if objective.has_hess: self.ahess = aesara.function([aet_x], aesara.gradient.hessian( self.obj_op(aet_fun), [aet_x])) # compiled input mapping self.infun = aesara.function([aet_x], aet_fun) # temporary storage for evaluation results of objective self.inner_ret: ResultDict = {}
def test_downcast_dtype(self): # Test that the gradient of a cost wrt a float32 variable does not # get upcasted to float64. # x has dtype float32, regardless of the value of floatX x = aesara.tensor.fscalar("x") y = x * 2 z = aesara.tensor.lscalar("z") c = y + z dc_dx, dc_dy, dc_dz, dc_dc = aesara.grad(c, [x, y, z, c]) # The dtype of dc_dy and dc_dz can be either float32 or float64, # that might depend on floatX, but is not specified. assert dc_dc.dtype in ("float32", "float64") assert dc_dz.dtype in ("float32", "float64") assert dc_dy.dtype in ("float32", "float64") # When the output gradient of y is passed to op.grad, it should # be downcasted to float32, so dc_dx should also be float32 assert dc_dx.dtype == "float32"
def test_subgraph_grad(): # Tests that the grad method with no known_grads # matches what happens if you use successive subgraph_grads x = aesara.tensor.fvector("x") t = aesara.tensor.fvector("t") w1 = aesara.shared(np.random.randn(3, 4)) w2 = aesara.shared(np.random.randn(4, 2)) a1 = aesara.tensor.tanh(aesara.tensor.dot(x, w1)) a2 = aesara.tensor.tanh(aesara.tensor.dot(a1, w2)) cost2 = aesara.tensor.sqr(a2 - t).sum() cost2 += aesara.tensor.sqr(w2.sum()) cost1 = aesara.tensor.sqr(w1.sum()) params = [[w2], [w1]] costs = [cost2, cost1] grad_ends = [[a1], [x]] inputs = [t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(2), rng.randn(3)] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] wrt = [w2, w1] cost = cost2 + cost1 true_grads = aesara.grad(cost, wrt) true_grads = aesara.function(inputs, true_grads) true_grads = true_grads(*values) next_grad = None param_grads = [] for i in range(2): param_grad, next_grad = aesara.subgraph_grad(wrt=params[i], end=grad_ends[i], start=next_grad, cost=costs[i]) next_grad = OrderedDict(zip(grad_ends[i], next_grad)) param_grads.extend(param_grad) pgrads = aesara.function(inputs, param_grads) pgrads = pgrads(*values) for true_grad, pgrad in zip(true_grads, pgrads): assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
def test_sparseblockgemv_grad_shape(self): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = imatrix() oIdx = imatrix() o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx) go = aesara.grad(o.sum(), [b, W, h]) f = aesara.function([W, h, iIdx, b, oIdx], go, mode=self.mode) W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data() # just make sure that it runs correctly and all the shapes are ok. b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val) assert b_g.shape == b_val.shape assert h_g.shape == h_val.shape assert W_g.shape == W_val.shape
def elemwise_dlogL(vars, model, flat_view): """ Returns Jacobian of the log likelihood for each training datum wrt vars as a matrix of size N x D """ # select one observed random variable obs_var = model.observed_RVs[0] # tensor of shape (batch_size,) logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) # calculate fisher information terms = [] for var in vars: output, _ = aesara.scan( lambda i, logX=logL, v=var: aesara.grad(logX[i], v).flatten(), sequences=[at.arange(logL.shape[0])], ) terms.append(output) dlogL = aesara.clone_replace( at.concatenate(terms, axis=1), flat_view.replacements, strict=False ) return dlogL
def test_blocksparse_grad_merge(self): b = fmatrix() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data() W = gpuarray_shared_constructor(W_val, context=test_ctx_name) o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx) gW = aesara.grad(o.sum(), W) lr = np.asarray(0.05, dtype="float32") upd = W - lr * gW f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu) # Make sure the lr update was merged. assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter) # Exclude the merge optimizations. mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha") mode = mode.excluding("local_merge_blocksparse_output") f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode) # Make sure the lr update is not merged. assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter) f2(h_val, iIdx_val, b_val, oIdx_val) W_ref = W.get_value() # reset the var W.set_value(W_val) f1(h_val, iIdx_val, b_val, oIdx_val) W_opt = W.get_value() utt.assert_allclose(W_ref, W_opt)
def test_local_csm_grad_c(): data = vector() indices, indptr, shape = (ivector(), ivector(), ivector()) mode = get_default_mode() if aesara.config.mode == "FAST_COMPILE": mode = Mode(linker="c|py", optimizer="fast_compile") mode = mode.including("specialize", "local_csm_grad_c") for CS, cast in [ (sparse.CSC, sp.sparse.csc_matrix), (sparse.CSR, sp.sparse.csr_matrix), ]: cost = aet_sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape))) f = aesara.function( [data, indices, indptr, shape], aesara.grad(cost, data), mode=mode ) assert not any( isinstance(node.op, sparse.CSMGrad) for node in f.maker.fgraph.toposort() ) v = cast(random_lil((10, 40), config.floatX, 3)) f(v.data, v.indices, v.indptr, v.shape)