def test_subgraph_grad(): # Tests that the grad method with no known_grads # matches what happens if you use successive subgraph_grads x = fvector("x") t = fvector("t") w1 = aesara.shared(np.random.randn(3, 4)) w2 = aesara.shared(np.random.randn(4, 2)) a1 = tanh(dot(x, w1)) a2 = tanh(dot(a1, w2)) cost2 = sqr(a2 - t).sum() cost2 += sqr(w2.sum()) cost1 = sqr(w1.sum()) params = [[w2], [w1]] costs = [cost2, cost1] grad_ends = [[a1], [x]] inputs = [t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(2), rng.randn(3)] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] wrt = [w2, w1] cost = cost2 + cost1 true_grads = grad(cost, wrt) true_grads = aesara.function(inputs, true_grads) true_grads = true_grads(*values) next_grad = None param_grads = [] for i in range(2): param_grad, next_grad = subgraph_grad(wrt=params[i], end=grad_ends[i], start=next_grad, cost=costs[i]) next_grad = OrderedDict(zip(grad_ends[i], next_grad)) param_grads.extend(param_grad) pgrads = aesara.function(inputs, param_grads) pgrads = pgrads(*values) for true_grad, pgrad in zip(true_grads, pgrads): assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
def test_known_grads(): # Tests that the grad method with no known_grads # matches what happens if you put its own known_grads # in for each variable full_range = aet.arange(10) x = scalar("x") t = iscalar("t") ft = full_range[t] ft.name = "ft" coeffs = vector("c") ct = coeffs[t] ct.name = "ct" p = x**ft p.name = "p" y = ct * p y.name = "y" cost = sqr(y) cost.name = "cost" layers = [[cost], [y], [ct, p], [ct, x, ft], [coeffs, t, full_range, x]] inputs = [coeffs, t, x] rng = np.random.default_rng([2012, 11, 15]) values = [ rng.standard_normal((10)), rng.integers(10), rng.standard_normal() ] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] true_grads = grad(cost, inputs, disconnected_inputs="ignore") true_grads = aesara.function(inputs, true_grads) true_grads = true_grads(*values) for layer in layers: first = grad(cost, layer, disconnected_inputs="ignore") known = OrderedDict(zip(layer, first)) full = grad(cost=None, known_grads=known, wrt=inputs, disconnected_inputs="ignore") full = aesara.function(inputs, full) full = full(*values) assert len(true_grads) == len(full) for a, b, var in zip(true_grads, full, inputs): assert np.allclose(a, b)