def test_both_assert_merge_2_reverse(self): # Test case "test_both_assert_merge_2" but in reverse order x1 = matrix("x1") x2 = matrix("x2") x3 = matrix("x3") e = dot(x1, assert_op(x2, (x2 > x3).all())) + dot( assert_op(x1, (x1 > x3).all()), x2) g = FunctionGraph([x1, x2, x3], [e]) MergeOptimizer().optimize(g) strg = aesara.printing.debugprint(g, file="str") strref = """Elemwise{add,no_inplace} [id A] '' 7 |dot [id B] '' 6 | |Assert{msg='Aesara Assert failed!'} [id C] '' 5 | | |x1 [id D] | | |All [id E] '' 3 | | |Elemwise{gt,no_inplace} [id F] '' 1 | | |x1 [id D] | | |x3 [id G] | |Assert{msg='Aesara Assert failed!'} [id H] '' 4 | |x2 [id I] | |All [id J] '' 2 | |Elemwise{gt,no_inplace} [id K] '' 0 | |x2 [id I] | |x3 [id G] |dot [id B] '' 6 """ print(strg) assert strg == strref, (strg, strref)
def test_NoOutputFromInplace(): x = matrix() y = matrix() a = dot(x, y) b = tanh(a) c = tanh(dot(2 * x, y)) # Ensure that the elemwise op that produces the output is inplace when # using a mode that does not include the optimization fct_no_opt = function([x, y], [b, c], mode="FAST_RUN") op = fct_no_opt.maker.fgraph.outputs[0].owner.op assert op.destroy_map and 0 in op.destroy_map op = fct_no_opt.maker.fgraph.outputs[1].owner.op assert op.destroy_map and 0 in op.destroy_map # Ensure that the elemwise op that produces the output is not inplace when # using a mode that includes the optimization opt = AddFeatureOptimizer(NoOutputFromInplace([1])) mode_opt = Mode(linker="py", optimizer="fast_run").register((opt, 49.9)) fct_opt = function([x, y], [b, c], mode=mode_opt) op = fct_opt.maker.fgraph.outputs[0].owner.op assert op.destroy_map and 0 in op.destroy_map op = fct_opt.maker.fgraph.outputs[1].owner.op assert not op.destroy_map or 0 not in op.destroy_map
def L_op(self, inputs, outputs, g_outputs): r"""The gradient function should return .. math:: V\frac{\partial X^+}{\partial X}, where :math:`V` corresponds to ``g_outputs`` and :math:`X` to ``inputs``. According to `Wikipedia <https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse#Derivative>`_, this corresponds to .. math:: (-X^+ V^T X^+ + X^+ X^{+T} V (I - X X^+) + (I - X^+ X) V X^{+T} X^+)^T. """ (x, ) = inputs (z, ) = outputs (gz, ) = g_outputs x_dot_z = tm.dot(x, z) z_dot_x = tm.dot(z, x) grad = (-matrix_dot(z, gz.T, z) + matrix_dot(z, z.T, gz, (aet.identity_like(x_dot_z) - x_dot_z)) + matrix_dot( (aet.identity_like(z_dot_x) - z_dot_x), gz, z.T, z)).T return [grad]
def test_grad_name(self): A = matrix("A") x = vector("x") f = dot(x, dot(A, x)) f.name = "f" g = grad(f, x) assert g.name == "(df/dx)"
def check_rop_lop(self, y, out_shape): """ As check_mat_rop_lop, except the input is self.x which is a vector. The output is still a vector. """ # TEST ROP vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) yv = Rop(y, self.x, self.v) rop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: grad(y[i], x), sequences=aet.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = dot(J, self.v) scan_f = function([self.x, self.v], sy, on_unused_input="ignore") v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), f"ROP mismatch: {v1} {v2}" try: Rop( aesara.clone_replace(y, replace={self.x: break_op(self.x)}), self.x, self.v, ) except ValueError: pytest.skip("Rop does not handle non-differentiable inputs " "correctly. Bug exposed by fixing Add.grad method.") vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX) yv = Lop(y, self.x, self.v) lop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: grad(y[i], x), sequences=aet.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = dot(self.v, J) scan_f = function([self.x, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
def test_constant(self): x = at.constant(np.random.random((2, 3)), dtype=config.floatX) y = aesara.shared(np.random.random((3, 6)).astype(config.floatX), "y") # should work z = dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([], z) assert _allclose(f(), z.tag.test_value) # this test should fail x = at.constant(np.random.random((2, 4)), dtype=config.floatX) with pytest.raises(ValueError): dot(x, y)
def test_pregreedy_optimizer(self): W = at.zeros((5, 4)) bv = at.zeros((5,)) bh = at.zeros((4,)) v = matrix("v") (bv_t, bh_t), _ = scan( lambda _: [bv, bh], sequences=v, outputs_info=[None, None] ) chain, _ = scan( lambda x: dot(dot(x, W) + bh_t, W.T) + bv_t, outputs_info=v, n_steps=2, ) # TODO FIXME: Make this a real test and assert something. function([v], chain)(np.zeros((3, 5), dtype=config.floatX))
def test_shared(self): x = matrix("x") x.tag.test_value = np.random.random((3, 4)).astype(config.floatX) y = aesara.shared(np.random.random((4, 6)).astype(config.floatX), "y") # should work z = dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([x], z) assert _allclose(f(x.tag.test_value), z.tag.test_value) # this test should fail y.set_value(np.random.random((5, 6)).astype(config.floatX)) with pytest.raises(ValueError): dot(x, y)
def spectral_radius_bound(X, log2_exponent): """ Returns upper bound on the largest eigenvalue of square symmetrix matrix X. log2_exponent must be a positive-valued integer. The larger it is, the slower and tighter the bound. Values up to 5 should usually suffice. The algorithm works by multiplying X by itself this many times. From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22. Rq: an efficient algorithm, not used here, is defined in this paper. """ if X.type.ndim != 2: raise TypeError("spectral_radius_bound requires a matrix argument", X) if not isinstance(log2_exponent, int): raise TypeError("spectral_radius_bound requires an integer exponent", log2_exponent) if log2_exponent <= 0: raise ValueError( "spectral_radius_bound requires a strictly positive " "exponent", log2_exponent, ) XX = X for i in range(log2_exponent): XX = dot(XX, XX) return aet_pow(trace(XX), 2**(-log2_exponent))
def test_inner_replace_dot(): """ This tests that rewrites are applied to the inner-graph. In particular, BLAS-based rewrites that remove the original dot product. This was previously a test with a name that implied it was testing the `Scan` push-out rewrites, but it wasn't testing that at all, because the rewrites were never being applied. """ W = matrix("W") h = matrix("h") mode = get_default_mode().including("scan") # .excluding("BlasOpt") o, _ = scan( lambda hi, him1, W: (hi, dot(hi + him1, W)), outputs_info=[at.zeros([h.shape[1]]), None], sequences=[h], non_sequences=[W], mode=mode, ) f = function([W, h], o, mode=mode) scan_nodes = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, Scan)] assert len(scan_nodes) == 1 scan_op = scan_nodes[0].op assert not any(isinstance(n.op, Dot) for n in scan_op.fn.maker.fgraph.apply_nodes)
def test_scipy_paper_example2(self): """ This just sees if things compile well and if they run """ rng = numpy.random x = matrix() y = vector() w = shared(rng.randn(100)) b = shared(np.zeros(())) # Construct Aesara expression graph p_1 = 1 / (1 + exp(-dot(x, w) - b)) xent = -y * log(p_1) - (1 - y) * log(1 - p_1) prediction = p_1 > 0.5 cost = xent.mean() + 0.01 * (w ** 2).sum() gw, gb = grad(cost, [w, b]) # Compile expressions to functions train = function( inputs=[x, y], outputs=[prediction, xent], updates=[(w, w - 0.1 * gw), (b, b - 0.1 * gb)], ) function(inputs=[x], outputs=prediction) N = 4 feats = 100 D = (rng.randn(N, feats), rng.randint(size=4, low=0, high=2)) training_steps = 10 for i in range(training_steps): pred, err = train(D[0], D[1])
def __init__(self, input, n_in, n_out, name_prefix=""): """Initialize the parameters of the logistic regression :type input: TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = aesara.shared( value=np.zeros((n_in, n_out), dtype=aesara.config.floatX), name=name_prefix + "W", ) # compute vector of class-membership probabilities in symbolic form self.p_y_given_x = softmax(dot(input, self.W)) # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = argmax(self.p_y_given_x, axis=1) # parameters of the model self.params = [self.W]
def test_f_contiguous(self): a = fmatrix("a") b = fmatrix("b") z = BrokenCImplementationAdd()(a, b) # In this test, we do not want z to be an output of the graph. out = dot(z, np.eye(7)) a_val = self.rng.randn(7, 7).astype("float32") b_val = self.rng.randn(7, 7).astype("float32") # Should work mode = DebugMode(check_preallocated_output=["c_contiguous"]) f = aesara.function([a, b], out, mode=mode) f(a_val, b_val) # print 'out_val =', out_val # print out_val.strides # Should raise an Exception, since the output buffer is # used incorrectly. mode = DebugMode(check_preallocated_output=["f_contiguous"]) f = aesara.function([a, b], out, mode=mode) if config.cxx: with pytest.raises(BadThunkOutput): f(a_val, b_val) else: # The python code of this op is good. f(a_val, b_val)
def test_one_assert_merge(self): # Merge two nodes, one has assert, the other not. x1 = matrix("x1") x2 = matrix("x2") e = dot(x1, x2) + dot(assert_op(x1, (x1 > x2).all()), x2) g = FunctionGraph([x1, x2], [e], clone=False) MergeOptimizer().optimize(g) assert g.outputs[0].owner.op == add add_inputs = g.outputs[0].owner.inputs assert isinstance(add_inputs[0].owner.op, Dot) # Confirm that the `Assert`s are correct assert_var = add_inputs[0].owner.inputs[0] assert_ref = assert_op(x1, (x1 > x2).all()) assert equal_computations([assert_var], [assert_ref]) # Confirm the merge assert add_inputs[0] is add_inputs[1]
def test_variable_only(self): x = matrix("x") x.tag.test_value = np.random.random((3, 4)).astype(config.floatX) y = matrix("y") y.tag.test_value = np.random.random((4, 5)).astype(config.floatX) # should work z = dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([x, y], z) assert _allclose(f(x.tag.test_value, y.tag.test_value), z.tag.test_value) # this test should fail y.tag.test_value = np.random.random((6, 5)).astype(config.floatX) with pytest.raises(ValueError): dot(x, y)
def test_NanGuardMode(): # Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans # intentionally. A working implementation should be able to capture all # the abnormalties. rng = np.random.default_rng(2482) x = matrix() w = shared(rng.standard_normal((5, 7)).astype(config.floatX)) y = dot(x, w) fun = function([x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True)) a = rng.standard_normal((3, 5)).astype(config.floatX) with pytest.warns(RuntimeWarning): infa = np.tile((np.asarray(100.0) ** 1000000).astype(config.floatX), (3, 5)) nana = np.tile(np.asarray(np.nan).astype(config.floatX), (3, 5)) biga = np.tile(np.asarray(1e20).astype(config.floatX), (3, 5)) fun(a) # normal values # Temporarily silence logger _logger = logging.getLogger("aesara.compile.nanguardmode") try: _logger.propagate = False with pytest.raises(AssertionError): fun(infa) # INFs with pytest.raises(AssertionError), pytest.warns(RuntimeWarning): fun(nana) # NANs with pytest.raises(AssertionError): fun(biga) # big values finally: _logger.propagate = True # slices a = rng.standard_normal((3, 4, 5)).astype(config.floatX) with pytest.warns(RuntimeWarning): infa = np.tile((np.asarray(100.0) ** 1000000).astype(config.floatX), (3, 4, 5)) nana = np.tile(np.asarray(np.nan).astype(config.floatX), (3, 4, 5)) biga = np.tile(np.asarray(1e20).astype(config.floatX), (3, 4, 5)) x = tensor3() y = x[:, at.arange(2), at.arange(2), None] fun = function([x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True)) fun(a) # normal values try: _logger.propagate = False with pytest.raises(AssertionError): fun(infa) # INFs with pytest.raises(AssertionError), pytest.warns(RuntimeWarning): fun(nana) # NANs with pytest.raises(AssertionError): fun(biga) # big values finally: _logger.propagate = True
def test_both_assert_merge_identical(self): """Merge two nodes, both have `Assert`s on the same node with the same conditions.""" x1 = matrix("x1") x2 = matrix("x2") e = dot(assert_op(x1, (x1 > x2).all()), x2) + dot( assert_op(x1, (x1 > x2).all()), x2) g = FunctionGraph([x1, x2], [e], clone=False) MergeOptimizer().optimize(g) assert g.outputs[0].owner.op == add add_inputs = g.outputs[0].owner.inputs assert isinstance(add_inputs[0].owner.op, Dot) # Confirm that the `Assert`s are correct assert_var = add_inputs[0].owner.inputs[0] assert_ref = assert_op(x1, (x1 > x2).all()) assert equal_computations([assert_var], [assert_ref]) # Confirm the merge assert add_inputs[0] is add_inputs[1]
def test_gemv_dot_strides(): # Reported in https://github.com/Theano/Theano/issues/6142 xv = rand(5) yv = rand(5, 1) x = gpuarray_shared_constructor(xv) y = gpuarray_shared_constructor(yv, broadcastable=(False, True)) f = aesara.function([], dot(x, y[::-1]), mode=mode_with_gpu) out = f() utt.assert_allclose(out, np.dot(xv, yv[::-1]))
def test_dependence(): dependence = make_dependence_cmp() x = matrix("x") y = dot(x * 2, x + 1) nodes = io_toposort([x], [y]) for a, b in zip(nodes[:-1], nodes[1:]): assert dependence(a, b) <= 0
def test_Rop_dot_bug_18Oct2013_Jeremiah(self): # This test refers to a bug reported by Jeremiah Lowin on 18th Oct # 2013. The bug consists when through a dot operation there is only # one differentiable path (i.e. there is no gradient wrt to one of # the inputs). x = aet.arange(20.0).reshape([1, 20]) v = aesara.shared(np.ones([20])) d = dot(x, v).sum() Rop(grad(d, v), v, v)
def test_subgraph_grad(): # Tests that the grad method with no known_grads # matches what happens if you use successive subgraph_grads x = fvector("x") t = fvector("t") w1 = aesara.shared(np.random.randn(3, 4)) w2 = aesara.shared(np.random.randn(4, 2)) a1 = tanh(dot(x, w1)) a2 = tanh(dot(a1, w2)) cost2 = sqr(a2 - t).sum() cost2 += sqr(w2.sum()) cost1 = sqr(w1.sum()) params = [[w2], [w1]] costs = [cost2, cost1] grad_ends = [[a1], [x]] inputs = [t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(2), rng.randn(3)] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] wrt = [w2, w1] cost = cost2 + cost1 true_grads = grad(cost, wrt) true_grads = aesara.function(inputs, true_grads) true_grads = true_grads(*values) next_grad = None param_grads = [] for i in range(2): param_grad, next_grad = subgraph_grad(wrt=params[i], end=grad_ends[i], start=next_grad, cost=costs[i]) next_grad = OrderedDict(zip(grad_ends[i], next_grad)) param_grads.extend(param_grad) pgrads = aesara.function(inputs, param_grads) pgrads = pgrads(*values) for true_grad, pgrad in zip(true_grads, pgrads): assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
def test_both_assert_merge_1(self): # Merge two nodes, both have assert on the same node # with different conditions. x1 = matrix("x1") x2 = matrix("x2") x3 = matrix("x3") e = dot(assert_op(x1, (x1 > x3).all()), x2) + dot( assert_op(x1, (x1 > x2).all()), x2) g = FunctionGraph([x1, x2, x3], [e]) MergeOptimizer().optimize(g) strg = aesara.printing.debugprint(g, file="str") strref1 = """Elemwise{add,no_inplace} [id A] '' 6 |dot [id B] '' 5 | |Assert{msg='Aesara Assert failed!'} [id C] '' 4 | | |x1 [id D] | | |All [id E] '' 3 | | | |Elemwise{gt,no_inplace} [id F] '' 1 | | | |x1 [id D] | | | |x3 [id G] | | |All [id H] '' 2 | | |Elemwise{gt,no_inplace} [id I] '' 0 | | |x1 [id D] | | |x2 [id J] | |x2 [id J] |dot [id B] '' 5 """ strref2 = """Elemwise{add,no_inplace} [id A] '' 6 |dot [id B] '' 5 | |Assert{msg='Aesara Assert failed!'} [id C] '' 4 | | |x1 [id D] | | |All [id E] '' 3 | | | |Elemwise{gt,no_inplace} [id F] '' 1 | | | |x1 [id D] | | | |x2 [id G] | | |All [id H] '' 2 | | |Elemwise{gt,no_inplace} [id I] '' 0 | | |x1 [id D] | | |x3 [id J] | |x2 [id G] |dot [id B] '' 5 """ # print(strg) assert strg == strref1 or strg == strref2, (strg, strref1, strref2)
def test_compute_flag(self): x = matrix("x") y = matrix("y") y.tag.test_value = np.random.random((4, 5)).astype(config.floatX) # should skip computation of test value with config.change_flags(compute_test_value="off"): z = dot(x, y) assert not hasattr(z.tag, "test_value") # should fail when asked by user with pytest.raises(ValueError), config.change_flags( compute_test_value="raise"): dot(x, y) # test that a warning is raised if required with pytest.warns(UserWarning), config.change_flags( compute_test_value="warn"): dot(x, y)
def __init__( self, input=None, target=None, n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw, ): super().__init__(**kw) if input is None: input = dvector("input") if target is None: target = dvector("target") self.input = input self.target = target self.lr = shared(lr, "learning_rate") self.w1 = shared(np.zeros((n_hidden, n_input)), "w1") self.w2 = shared(np.zeros((n_output, n_hidden)), "w2") # print self.lr.type self.hidden = sigmoid(dot(self.w1, self.input)) self.output = dot(self.w2, self.hidden) self.cost = aet_sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * grad(self.cost, self.w1), self.w2: self.w2 - self.lr * grad(self.cost, self.w2), } self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates, ) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def test_one_assert_merge(self): # Merge two nodes, one has assert, the other not. x1 = matrix("x1") x2 = matrix("x2") e = dot(x1, x2) + dot(assert_op(x1, (x1 > x2).all()), x2) g = FunctionGraph([x1, x2], [e]) MergeOptimizer().optimize(g) strg = aesara.printing.debugprint(g, file="str") strref = """Elemwise{add,no_inplace} [id A] '' 4 |dot [id B] '' 3 | |Assert{msg='Aesara Assert failed!'} [id C] '' 2 | | |x1 [id D] | | |All [id E] '' 1 | | |Elemwise{gt,no_inplace} [id F] '' 0 | | |x1 [id D] | | |x2 [id G] | |x2 [id G] |dot [id B] '' 3 """ assert strg == strref, (strg, strref)
def test_sort_apply_nodes(): x = matrix("x") y = dot(x * 2, x + 1) def str_cmp(a, b): return cmp(str(a), str(b)) # lexicographical sort nodes = sort_apply_nodes([x], [y], cmps=[str_cmp]) for a, b in zip(nodes[:-1], nodes[1:]): assert str(a) <= str(b)
def test_infix_dot_method(): X = dmatrix("X") y = dvector("y") res = X @ y exp_res = X.dot(y) assert equal_computations([res], [exp_res]) X_val = np.arange(2 * 3).reshape((2, 3)) res = X_val @ y exp_res = dot(X_val, y) assert equal_computations([res], [exp_res])
def matrix_dot(*args): r"""Shorthand for product between several dots. Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will generate the matrix product between all in the given order, namely :math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`. """ rval = args[0] for a in args[1:]: rval = tm.dot(rval, a) return rval
def test_both_assert_merge_2_reverse(self): # Test case "test_both_assert_merge_2" but in reverse order x1 = matrix("x1") x2 = matrix("x2") x3 = matrix("x3") e = dot(x1, assert_op(x2, (x2 > x3).all())) + dot( assert_op(x1, (x1 > x3).all()), x2) g = FunctionGraph([x1, x2, x3], [e], clone=False) MergeOptimizer().optimize(g) assert g.outputs[0].owner.op == add add_inputs = g.outputs[0].owner.inputs assert isinstance(add_inputs[0].owner.op, Dot) # Confirm that the `Assert`s are correct assert_var_1, assert_var_2 = add_inputs[0].owner.inputs assert_ref_1 = assert_op(x2, (x2 > x3).all()) assert equal_computations([assert_var_1], [assert_ref_1]) assert_ref_2 = assert_op(x1, (x1 > x3).all()) assert equal_computations([assert_var_2], [assert_ref_2]) # Confirm the merge assert add_inputs[0] is add_inputs[1]
def matrix_power(M, n): r"""Raise a square matrix, ``M``, to the (integer) power ``n``. This implementation uses exponentiation by squaring which is significantly faster than the naive implementation. The time complexity for exponentiation by squaring is :math: `\mathcal{O}((n \log M)^k)` Parameters ---------- M: TensorVariable n: int """ if n < 0: M = pinv(M) n = abs(n) # Shortcuts when 0 < n <= 3 if n == 0: return at.eye(M.shape[-2]) elif n == 1: return M elif n == 2: return tm.dot(M, M) elif n == 3: return tm.dot(tm.dot(M, M), M) result = z = None while n > 0: z = M if z is None else tm.dot(z, z) n, bit = divmod(n, 2) if bit: result = z if result is None else tm.dot(result, z) return result