def init_predictive_output(inputs, targets, hyp, x_star, s_star): E = hyp.shape[0] def init_K(i, X, Y): XX = X.sum(1).reshape((X.shape[0], 1)) K = XX + XX.T return K.sum() beta, K_updts = scan( init_K, sequences=at.arange(E), non_sequences=[inputs, targets] ) # mean def predict_mean_i(i, x_star, s_star, X, beta, h): n, D = shape(X) # rescale every dimension by the corresponding inverse lengthscale iL = at.diag(h[i, :D]) inp = (X - x_star).dot(iL) # compute the mean B = iL.dot(s_star).dot(iL) t = inp.dot(B) lb = (inp * t).sum() + beta.sum() Mi = at_sum(lb) * h[i, D] return Mi (M), M_updts = scan( predict_mean_i, sequences=at.arange(E), non_sequences=[x_star, s_star, inputs, beta, hyp], ) return M
def test_three_scans(self): r""" This test checks a case where we have three `Scan`\s, two of them cannot be merged together, but the third one can be merged with either. """ x = vector() y = vector() def sum(s): return s + 1 sx, upx = scan(sum, sequences=[x], n_steps=4, name="X") # We need to use an expression of y rather than y so the toposort # comes up with the 'Y' scan last. sy, upy = scan(sum, sequences=[2 * y + 2], n_steps=4, name="Y") sz, upz = scan(sum, sequences=[sx], n_steps=4, name="Z") f = function( [x, y], [sy, sz], mode=self.mode.excluding("scan_pushout_seqs_ops") ) topo = f.maker.fgraph.toposort() scans = [n for n in topo if isinstance(n.op, Scan)] assert len(scans) == 2 rng = np.random.default_rng(utt.fetch_seed()) x_val = rng.uniform(size=(4,)).astype(config.floatX) y_val = rng.uniform(size=(4,)).astype(config.floatX) # Run it so DebugMode can detect optimization problems. f(x_val, y_val)
def test_non_zero_init(self): # Test the case where the initial value for the nitsot output is non-zero input1 = tensor3() input2 = tensor3() input3 = tensor3() W = aesara.shared(np.random.normal(size=(4, 5))).astype(config.floatX) U = aesara.shared(np.random.normal(size=(6, 7))).astype(config.floatX) def inner_fct(seq1, seq2, seq3, previous_output): temp1 = dot(seq1, W) + seq3 temp2 = dot(seq2, U) dot_output = dot(temp1, temp2) return previous_output + dot_output init = aet.as_tensor_variable(np.random.normal(size=(3, 7))) # Compile the function twice, once with the optimization and once # without opt_mode = mode.including("scan") h, _ = aesara.scan( inner_fct, sequences=[input1, input2, input3], outputs_info=init, mode=opt_mode, ) output = h[-1] f_opt = aesara.function([input1, input2, input3], output, mode=opt_mode) no_opt_mode = mode.excluding("scanOp_pushout_output") h, _ = aesara.scan( inner_fct, sequences=[input1, input2, input3], outputs_info=init, mode=no_opt_mode, ) output = h[-1] f_no_opt = aesara.function([input1, input2, input3], output, mode=no_opt_mode) # Ensure that the optimization has been applied for f_opt # TODO # Compare the outputs of the 2 functions input1_value = np.random.random((2, 3, 4)).astype(config.floatX) input2_value = np.random.random((2, 5, 6)).astype(config.floatX) input3_value = np.random.random((2, 3, 5)).astype(config.floatX) output_opt = f_opt(input1_value, input2_value, input3_value) output_no_opt = f_no_opt(input1_value, input2_value, input3_value) utt.assert_allclose(output_opt, output_no_opt)
def check_rop_lop(self, y, out_shape): """ As check_mat_rop_lop, except the input is self.x which is a vector. The output is still a vector. """ # TEST ROP vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) yv = Rop(y, self.x, self.v) rop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: grad(y[i], x), sequences=aet.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = dot(J, self.v) scan_f = function([self.x, self.v], sy, on_unused_input="ignore") v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), f"ROP mismatch: {v1} {v2}" try: Rop( aesara.clone_replace(y, replace={self.x: break_op(self.x)}), self.x, self.v, ) except ValueError: pytest.skip("Rop does not handle non-differentiable inputs " "correctly. Bug exposed by fixing Add.grad method.") vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX) yv = Lop(y, self.x, self.v) lop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: grad(y[i], x), sequences=aet.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = dot(self.v, J) scan_f = function([self.x, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
def test_pregreedy_optimizer(self): W = at.zeros((5, 4)) bv = at.zeros((5,)) bh = at.zeros((4,)) v = matrix("v") (bv_t, bh_t), _ = scan( lambda _: [bv, bh], sequences=v, outputs_info=[None, None] ) chain, _ = scan( lambda x: dot(dot(x, W) + bh_t, W.T) + bv_t, outputs_info=v, n_steps=2, ) # TODO FIXME: Make this a real test and assert something. function([v], chain)(np.zeros((3, 5), dtype=config.floatX))
def test_alloc_inputs3(): _W1 = matrix() _W2 = matrix() _h0 = vector() W1 = specify_shape(_W1, (3, 3)) W2 = specify_shape(_W2, (3, 3)) h0 = specify_shape(_h0, (3,)) def lambda_fn(W1, h, W2): return W1 * dot(h, W2) o, _ = scan( lambda_fn, sequences=at.zeros_like(W1), outputs_info=h0, non_sequences=[at.zeros_like(W2)], n_steps=5, ) # TODO FIXME: This result depends on unrelated rewrites in the "fast" mode. f = function([_h0, _W1, _W2], o, mode="FAST_RUN") scan_node = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, Scan)][0] assert len(scan_node.op.inner_inputs) == 1
def test_alloc_inputs2(): W1 = matrix() W2 = matrix() h0 = vector() def lambda_fn(W1, h, W2): return W1 * dot(h, W2) o, _ = scan( lambda_fn, sequences=at.zeros_like(W1), outputs_info=h0, non_sequences=[at.zeros_like(W2)], n_steps=5, ) f = function([h0, W1, W2], o, mode=get_default_mode().including("scan")) scan_node = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, Scan)][0] assert ( len( [ x for x in scan_node.op.fn.maker.fgraph.toposort() if isinstance(x.op, Elemwise) ] ) == 0 )
def test_inner_replace_dot(): """ This tests that rewrites are applied to the inner-graph. In particular, BLAS-based rewrites that remove the original dot product. This was previously a test with a name that implied it was testing the `Scan` push-out rewrites, but it wasn't testing that at all, because the rewrites were never being applied. """ W = matrix("W") h = matrix("h") mode = get_default_mode().including("scan") # .excluding("BlasOpt") o, _ = scan( lambda hi, him1, W: (hi, dot(hi + him1, W)), outputs_info=[at.zeros([h.shape[1]]), None], sequences=[h], non_sequences=[W], mode=mode, ) f = function([W, h], o, mode=mode) scan_nodes = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, Scan)] assert len(scan_nodes) == 1 scan_op = scan_nodes[0].op assert not any(isinstance(n.op, Dot) for n in scan_op.fn.maker.fgraph.apply_nodes)
def test_subtensor_multiple_slices(self): r""" This addresses a bug that happens when you have multiple subtensors on the output of `Scan`. The bug requires the reshape to be produced, and it has something to do with how the `Subtensor`\s overlap. """ def f_pow2(x_tm1): return 2 * x_tm1 state = vector("state") n_steps = iscalar("nsteps") output, updates = scan( f_pow2, [], state, [], n_steps=n_steps, truncate_gradient=-1, go_backwards=False, ) nw_shape = ivector("nw_shape") # Note that the output is reshaped to 3 dimensional tensor, and my_f = function( [state, n_steps, nw_shape], [reshape(output, nw_shape, ndim=3)[:-2], output[:-4]], updates=updates, allow_input_downcast=True, ) nodes = [x for x in my_f.maker.fgraph.toposort() if isinstance(x.op, Scan)] # This assertion fails if savemem optimization failed on scan if config.mode != "FAST_COMPILE": assert nodes[0].op._scan_savemem_visited rng = np.random.default_rng(utt.fetch_seed()) my_f(rng.uniform(size=(3,)), 4, np.int64([2, 2, 3]))
def test_save_mem(self): rng = np.random.default_rng(utt.fetch_seed()) vW_in2 = asarrayX(rng.uniform(-0.5, 0.5, size=(2,))) vW = asarrayX(rng.uniform(-0.5, 0.5, size=(2, 2))) vWout = asarrayX(rng.uniform(-0.5, 0.5, size=(2,))) vW_in1 = asarrayX(rng.uniform(-0.5, 0.5, size=(2, 2))) v_u1 = asarrayX(rng.uniform(-0.5, 0.5, size=(8, 2))) v_u2 = asarrayX(rng.uniform(-0.5, 0.5, size=(8,))) v_x0 = asarrayX(rng.uniform(-0.5, 0.5, size=(2,))) v_y0 = asarrayX(rng.uniform(size=(3,))) W_in2 = shared(vW_in2, name="win2") W = shared(vW, name="w") W_out = shared(vWout, name="wout") W_in1 = matrix("win") u1 = matrix("u1") u2 = vector("u2") x0 = vector("x0") y0 = vector("y0") def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1): return [ y_tm3 + 1, dot(u1_t, W_in1) + u2_t * W_in2 + dot(x_tm1, W), y_tm1 + dot(x_tm1, W_out), ] _outputs, updates = scan( f_rnn_cmpl, [u1, u2], [None, dict(initial=x0), dict(initial=y0, taps=[-1, -3])], W_in1, n_steps=None, truncate_gradient=-1, go_backwards=False, ) outputs = [_outputs[0][-1], _outputs[1][-1], _outputs[2][-1]] f4 = function( [u1, u2, x0, y0, W_in1], outputs, updates=updates, allow_input_downcast=True, mode=self.mode, ) # compute the values in numpy v_x = np.zeros((8, 2), dtype=config.floatX) v_y = np.zeros((8,), dtype=config.floatX) v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + np.dot(v_x0, vW) v_y[0] = np.dot(v_x0, vWout) + v_y0[2] for i in range(1, 8): v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW) v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1] (aesara_dump, aesara_x, aesara_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1) utt.assert_allclose(aesara_x, v_x[-1:]) utt.assert_allclose(aesara_y, v_y[-1:])
def jacobian_diag(f, x): idx = at.arange(f.shape[0], dtype="int32") def grad_ii(i): return grad(f[i], x)[i] return aesara.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0]
def test_scan_debugprint2(): coefficients = vector("coefficients") x = scalar("x") max_coefficients_supported = 10000 # Generate the components of the polynomial components, updates = aesara.scan( fn=lambda coefficient, power, free_variable: coefficient * (free_variable ** power), outputs_info=None, sequences=[coefficients, aet.arange(max_coefficients_supported)], non_sequences=x, ) # Sum them up polynomial = components.sum() output_str = debugprint(polynomial, file="str") lines = output_str.split("\n") expected_output = """Sum{acc_dtype=float64} [id A] '' |for{cpu,scan_fn} [id B] '' |Elemwise{scalar_minimum,no_inplace} [id C] '' | |Subtensor{int64} [id D] '' | | |Shape [id E] '' | | | |Subtensor{int64::} [id F] 'coefficients[0:]' | | | |coefficients [id G] | | | |ScalarConstant{0} [id H] | | |ScalarConstant{0} [id I] | |Subtensor{int64} [id J] '' | |Shape [id K] '' | | |Subtensor{int64::} [id L] '' | | |ARange{dtype='int64'} [id M] '' | | | |TensorConstant{0} [id N] | | | |TensorConstant{10000} [id O] | | | |TensorConstant{1} [id P] | | |ScalarConstant{0} [id Q] | |ScalarConstant{0} [id R] |Subtensor{:int64:} [id S] '' | |Subtensor{int64::} [id F] 'coefficients[0:]' | |ScalarFromTensor [id T] '' | |Elemwise{scalar_minimum,no_inplace} [id C] '' |Subtensor{:int64:} [id U] '' | |Subtensor{int64::} [id L] '' | |ScalarFromTensor [id V] '' | |Elemwise{scalar_minimum,no_inplace} [id C] '' |Elemwise{scalar_minimum,no_inplace} [id C] '' |x [id W] Inner graphs of the scan ops: for{cpu,scan_fn} [id B] '' >Elemwise{mul,no_inplace} [id X] '' > |coefficients[t] [id Y] -> [id S] > |Elemwise{pow,no_inplace} [id Z] '' > |x_copy [id BA] -> [id W] > |<TensorType(int64, scalar)> [id BB] -> [id U]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def create_test_hmm(): srng = at.random.RandomStream() N_tt = at.iscalar("N") N_tt.tag.test_value = 10 M_tt = at.iscalar("M") M_tt.tag.test_value = 2 mus_tt = at.matrix("mus") mus_tt.tag.test_value = np.stack( [np.arange(0.0, 10), np.arange(0.0, -10, -1)], axis=-1).astype(aesara.config.floatX) sigmas_tt = at.ones((N_tt, )) sigmas_tt.name = "sigmas" pi_0_rv = srng.dirichlet(at.ones((M_tt, )), name="pi_0") Gamma_rv = srng.dirichlet(at.ones((M_tt, M_tt)), name="Gamma") S_0_rv = srng.categorical(pi_0_rv, name="S_0") def scan_fn(mus_t, sigma_t, S_tm1, Gamma_t): S_t = srng.categorical(Gamma_t[S_tm1], name="S_t") Y_t = srng.normal(mus_t[S_t], sigma_t, name="Y_t") return S_t, Y_t (S_rv, Y_rv), scan_updates = aesara.scan( fn=scan_fn, sequences=[mus_tt, sigmas_tt], non_sequences=[Gamma_rv], outputs_info=[{ "initial": S_0_rv, "taps": [-1] }, {}], strict=True, name="scan_rv", ) Y_rv.name = "Y_rv" scan_op = Y_rv.owner.op scan_args = ScanArgs.from_node(Y_rv.owner) Gamma_in = scan_args.inner_in_non_seqs[0] Y_t = scan_args.inner_out_nit_sot[0] mus_t = scan_args.inner_in_seqs[0] sigmas_t = scan_args.inner_in_seqs[1] S_t = scan_args.inner_out_sit_sot[0] rng_in = scan_args.inner_out_shared[0] mus_in = Y_rv.owner.inputs[1] mus_in.name = "mus_in" sigmas_in = Y_rv.owner.inputs[2] sigmas_in.name = "sigmas_in" # The output `S_rv` is really `S_rv[1:]`, so we have to extract the actual # `Scan` output: `S_rv`. S_in = S_rv.owner.inputs[0] S_in.name = "S_in" return locals()
def test_pushout_all(self): W1 = matrix("W1") W2 = matrix("W2") h0 = vector("h0") def lambda_fn(h, W1, W2): return dot(h, W1 + W2) o, _ = scan(lambda_fn, non_sequences=[h0, W1, W2], n_steps=5) f = function([h0, W1, W2], o, mode=self.mode) scan_nodes = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, Scan)] assert len(scan_nodes) == 0 seed = utt.fetch_seed() rng = np.random.default_rng(seed) floatX = config.floatX v_h = np.array(rng.uniform(size=(2,)), dtype=floatX) v_W1 = np.array(rng.uniform(size=(2, 2)), dtype=floatX) v_W2 = np.array(rng.uniform(size=(2, 2)), dtype=floatX) v_out = np.dot(v_h, v_W1 + v_W2) sol = np.zeros((5, 2)) # This line is here to make sol have the same shape as the output of # aesara. Note that what we ask aesara to do is to repeat the 2 # elements vector v_out 5 times sol[:, :] = v_out utt.assert_allclose(sol, f(v_h, v_W1, v_W2))
def incomplete_beta_ps(a, b, value): """Power series for incomplete beta Use when b*x is small and value not too close to 1. Based on Cephes library by Steve Moshier (incbet.c) """ one = aet.constant(1, dtype="float64") ai = one / a u = (one - b) * value t1 = u / (a + one) t = u threshold = np.MachAr().eps * ai s = aet.constant(0, dtype="float64") def _step(i, t, s): t *= (i - b) * value / i step = t / (a + i) s += step return ((t, s), until(aet.abs_(step) < threshold)) (t, s), _ = scan(_step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")]) s = s[-1] + t1 + ai t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s) return aet.exp(t)
def test_scan(self): x = vector("x") # we will insert a subgraph involving these variables into the inner # graph of scan. since they were not previously in the inner graph, # they are like non_sequences to scan(). scan() infers these and # imports them into the inner graph properly, and map_variables() # should do this as well. outer = scalar("outer") shared = aesara.shared(np.array(1.0, dtype=aesara.config.floatX), name="shared") constant = at.constant(1, name="constant") # z will equal 1 so multiplying by it doesn't change any values z = outer * (shared + constant) def step(x, a): r = a + x r.tag.replacement = z * (a - x) return r s, _ = aesara.scan(step, sequences=x, outputs_info=[np.array(0.0)]) # ensure z is owned by the outer graph so map_variables() will need to # jump through additional hoops to placate FunctionGraph. t = z * s (s2, ) = map_variables(self.replacer, [t]) t2 = z * s2 f = aesara.function([x, outer], [t, t2]) rval = f(x=np.array([1, 2, 3], dtype=np.float32), outer=0.5) assert np.array_equal(rval, [[1, 3, 6], [-1, -3, -6]])
def outer_step(*args): # Separate the received arguments into their respective (seq, outputs # from previous iterations, nonseqs) categories i_sequences = list(args[: len(o_sequences)]) i_prev_outputs = list(args[len(o_sequences) : -len(o_nonsequences)]) i_non_sequences = list(args[-len(o_nonsequences) :]) i_outputs_infos = ( i_prev_outputs + [ None, ] * len(new_nitsots) ) # Call the user-provided function with the proper arguments results, updates = aesara.scan( fn=fn, sequences=i_sequences[:-1], outputs_info=i_outputs_infos, non_sequences=i_non_sequences, name=name + "_inner", n_steps=i_sequences[-1], ) if not isinstance(results, list): results = [results] # Keep only the last timestep of every output but keep all the updates if not isinstance(results, list): return results[-1], updates else: return [r[-1] for r in results], updates
def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None): # Init dist should have shape (*size, ar_order) if size is not None: batch_size = size else: # In this case the size of the init_dist depends on the parameters shape # The last dimension of rho and init_dist does not matter batch_size = at.broadcast_shape(sigma, rhos[..., 0], init_dist[..., 0]) if init_dist.owner.op.ndim_supp == 0: init_dist_size = (*batch_size, ar_order) else: # In this case the support dimension must cover for ar_order init_dist_size = batch_size init_dist = change_rv_size(init_dist, init_dist_size) # Create OpFromGraph representing random draws form AR process # Variables with underscore suffix are dummy inputs into the OpFromGraph init_ = init_dist.type() rhos_ = rhos.type() sigma_ = sigma.type() steps_ = steps.type() rhos_bcast_shape_ = init_.shape if constant_term: # In this case init shape is one unit smaller than rhos in the last dimension rhos_bcast_shape_ = (*rhos_bcast_shape_[:-1], rhos_bcast_shape_[-1] + 1) rhos_bcast_ = at.broadcast_to(rhos_, rhos_bcast_shape_) noise_rng = aesara.shared(np.random.default_rng()) def step(*args): *prev_xs, reversed_rhos, sigma, rng = args if constant_term: mu = reversed_rhos[-1] + at.sum(prev_xs * reversed_rhos[:-1], axis=0) else: mu = at.sum(prev_xs * reversed_rhos, axis=0) next_rng, new_x = Normal.dist(mu=mu, sigma=sigma, rng=rng).owner.outputs return new_x, {rng: next_rng} # We transpose inputs as scan iterates over first dimension innov_, innov_updates_ = aesara.scan( fn=step, outputs_info=[{"initial": init_.T, "taps": range(-ar_order, 0)}], non_sequences=[rhos_bcast_.T[::-1], sigma_.T, noise_rng], n_steps=steps_, strict=True, ) (noise_next_rng,) = tuple(innov_updates_.values()) ar_ = at.concatenate([init_, innov_.T], axis=-1) ar_op = AutoRegressiveRV( inputs=[rhos_, sigma_, init_, steps_], outputs=[noise_next_rng, ar_], ar_order=ar_order, constant_term=constant_term, inline=True, ) ar = ar_op(rhos, sigma, init_dist, steps) return ar
def test_gen_cloning_with_shape_change(self, datagen): gen = generator(datagen) gen_r = at_rng().normal(size=gen.shape).T X = gen.dot(gen_r) res, _ = aesara.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50, ) shared = aesara.shared(datagen.data.astype(gen.dtype)) res2 = aesara.clone_replace(res, {gen: shared**2}) assert res2.eval().shape == (1000, )
def test_savemem_opt(self): y0 = shared(np.ones((2, 10))) [y1, y2], updates = scan( lambda y: [y, y], outputs_info=[dict(initial=y0, taps=[-2]), None], n_steps=5, ) # TODO FIXME: Make this a real test and assert something. function([], y2.sum(), mode=self.mode)()
def test_downsample(self): rng = np.random.RandomState(utt.fetch_seed()) # ws, shp examples = ( ((2, ), (16, )), ( (2, ), ( 4, 16, ), ), ( (2, ), ( 4, 2, 16, ), ), ((1, 1), (4, 2, 16, 16)), ((2, 2), (4, 2, 16, 16)), ((3, 3), (4, 2, 16, 16)), ((3, 2), (4, 2, 16, 16)), ((3, 2, 2), (3, 2, 16, 16, 16)), ((2, 3, 2), (3, 2, 16, 16, 16)), ((2, 2, 3), (3, 2, 16, 16, 16)), ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)), ) for example, ignore_border in itertools.product( examples, [True, False]): (ws, shp) = example vx = rng.rand(*shp) vex = rng.rand(*shp) x = aesara.shared(vx) ex = aesara.shared(vex) maxpool_op = Pool(ignore_border, ndim=len(ws)) a_pooled = maxpool_op(x, ws).flatten() yv = Rop(a_pooled, x, ex) mode = None if aesara.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function([], yv, on_unused_input="ignore", mode=mode) sy, _ = aesara.scan( lambda i, y, x, v: (grad(y[i], x) * v).sum(), sequences=aet.arange(a_pooled.shape[0]), non_sequences=[a_pooled, x, ex], mode=mode, ) scan_f = function([], sy, on_unused_input="ignore", mode=mode) v1 = rop_f() v2 = scan_f() assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
def test_debugprint_compiled_fn(): M = at.tensor(np.float64, shape=(20000, 2, 2)) one = at.as_tensor(1, dtype=np.int64) zero = at.as_tensor(0, dtype=np.int64) def no_shared_fn(n, x_tm1, M): p = M[n, x_tm1] return at.switch(at.lt(zero, p[0]), one, zero) out, updates = aesara.scan( no_shared_fn, outputs_info=[{ "initial": zero, "taps": [-1] }], sequences=[at.arange(M.shape[0])], non_sequences=[M], allow_gc=False, mode="FAST_RUN", ) # In this case, `debugprint` should print the compiled inner-graph # (i.e. from `Scan._fn`) out = aesara.function([M], out, updates=updates, mode="FAST_RUN") expected_output = """forall_inplace,cpu,scan_fn} [id A] 2 (outer_out_sit_sot-0) |TensorConstant{20000} [id B] (n_steps) |TensorConstant{[ 0 ..998 19999]} [id C] (outer_in_seqs-0) |IncSubtensor{InplaceSet;:int64:} [id D] 1 (outer_in_sit_sot-0) | |AllocEmpty{dtype='int64'} [id E] 0 | | |TensorConstant{20000} [id B] | |TensorConstant{(1,) of 0} [id F] | |ScalarConstant{1} [id G] |<TensorType(float64, (20000, 2, 2))> [id H] (outer_in_non_seqs-0) Inner graphs: forall_inplace,cpu,scan_fn} [id A] (outer_out_sit_sot-0) >Elemwise{Composite{Switch(LT(i0, i1), i2, i0)}} [id I] (inner_out_sit_sot-0) > |TensorConstant{0} [id J] > |Subtensor{int64, int64, int64} [id K] > | |*2-<TensorType(float64, (20000, 2, 2))> [id L] -> [id H] (inner_in_non_seqs-0) > | |ScalarFromTensor [id M] > | | |*0-<TensorType(int64, ())> [id N] -> [id C] (inner_in_seqs-0) > | |ScalarFromTensor [id O] > | | |*1-<TensorType(int64, ())> [id P] -> [id D] (inner_in_sit_sot-0) > | |ScalarConstant{0} [id Q] > |TensorConstant{1} [id R] """ output_str = debugprint(out, file="str", print_op_info=True) lines = output_str.split("\n") for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def test_scan_err1(self): # This test should fail when building fx for the first time k = tt.iscalar("k") A = tt.matrix("A") k.tag.test_value = 3 A.tag.test_value = np.random.rand(5, 3).astype(config.floatX) def fx(prior_result, A): return tt.dot(prior_result, A) with pytest.raises(ValueError) as e: aesara.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k) assert str(e.traceback[0].path).endswith("test_compute_test_value.py") # We should be in the "fx" function defined above assert e.traceback[2].name == "fx"
def outer_scan_step(x_t, w): h, _ = scan( inner_scan_step, sequences=[x_t[1:]], outputs_info=[x_t[0]], non_sequences=[w], strict=True, name="the_inner_scan", ) return h
def test_conv(self): for conv_op in [conv.conv2d, conv2d]: for border_mode in ["valid", "full"]: image_shape = (2, 2, 4, 5) filter_shape = (2, 2, 2, 3) image_dim = len(image_shape) filter_dim = len(filter_shape) input = TensorType(aesara.config.floatX, [False] * image_dim)(name="input") filters = TensorType(aesara.config.floatX, [False] * filter_dim)(name="filter") ev_input = TensorType(aesara.config.floatX, [False] * image_dim)(name="ev_input") ev_filters = TensorType(aesara.config.floatX, [False] * filter_dim)(name="ev_filters") def sym_conv2d(input, filters): return conv_op(input, filters, border_mode=border_mode) output = sym_conv2d(input, filters).flatten() yv = Rop(output, [input, filters], [ev_input, ev_filters]) mode = None if aesara.config.mode == "FAST_COMPILE": mode = "FAST_RUN" rop_f = function( [input, filters, ev_input, ev_filters], yv, on_unused_input="ignore", mode=mode, ) sy, _ = aesara.scan( lambda i, y, x1, x2, v1, v2: (grad(y[i], x1) * v1).sum() + (grad(y[i], x2) * v2).sum(), sequences=aet.arange(output.shape[0]), non_sequences=[ output, input, filters, ev_input, ev_filters ], mode=mode, ) scan_f = function( [input, filters, ev_input, ev_filters], sy, on_unused_input="ignore", mode=mode, ) dtype = aesara.config.floatX image_data = np.random.random(image_shape).astype(dtype) filter_data = np.random.random(filter_shape).astype(dtype) ev_image_data = np.random.random(image_shape).astype(dtype) ev_filter_data = np.random.random(filter_shape).astype(dtype) v1 = rop_f(image_data, filter_data, ev_image_data, ev_filter_data) v2 = scan_f(image_data, filter_data, ev_image_data, ev_filter_data) assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
def test_pushout_while(self): """ Ensure that the optimizations for Scan that push computation out of the Scan don't alter the result for 'as_while' scans. """ W1 = matrix("W1") W2 = matrix("W2") step_indices = vector("step_indices") def lambda_fn(step_idx, W1, W2): until_condition = until(step_idx > 2) return dot(W1, W2), until_condition # Compile a function with the optimization o, _ = scan( lambda_fn, sequences=[step_indices, W1], non_sequences=[W2], n_steps=5 ) f = function([W1, W2, step_indices], o, mode=self.mode) # Compule an aesara function without the optimization o, _ = scan( lambda_fn, sequences=[step_indices, W1], non_sequences=[W2], n_steps=5, mode="FAST_COMPILE", ) f_ref = function([W1, W2, step_indices], o, mode=self.mode) # Compare the results of the two implementations input_values = [ np.random.default_rng(utt.fetch_seed()).random((5, 5)).astype("float32"), np.random.default_rng(utt.fetch_seed()).random((5, 5)).astype("float32"), np.arange(5).astype("float32"), ] out = f(*input_values) out_ref = f_ref(*input_values) utt.assert_allclose(out, out_ref)
def test_save_mem_store_steps(self): def f_rnn(u_t, x1_tm1, x1_tm3, x2_tm1, x3tm2, x3_tm1, x4_tm1): return ( u_t + 1.0, u_t + 2.0, u_t + 3.0, u_t + 4.0, u_t + 5.0, u_t + 6.0, u_t + 7.0, ) u = vector("u") x10 = vector("x10") x20 = scalar("x20") x30 = vector("x30") x40 = scalar("x40") [x1, x2, x3, x4, x5, x6, x7], updates = scan( f_rnn, u, [ None, None, None, dict(initial=x10, taps=[-1, -2]), x20, dict(initial=x30, taps=[-1, -2]), x40, ], n_steps=None, truncate_gradient=-1, go_backwards=False, ) f2 = function( [u, x10, x20, x30, x40], [x1[-7], x2[-3:-1], x3[-6:], x4[-1], x5[-1]], updates=updates, allow_input_downcast=True, mode=self.mode, ) # get random initial values rng = np.random.default_rng(utt.fetch_seed()) v_u = rng.uniform(-5.0, 5.0, size=(20,)) # compute the output in numpy tx1, tx2, tx3, tx4, tx5 = f2(v_u, [0, 0], 0, [0, 0], 0) utt.assert_allclose(tx1, v_u[-7] + 1.0) utt.assert_allclose(tx2, v_u[-3:-1] + 2.0) utt.assert_allclose(tx3, v_u[-6:] + 3.0) utt.assert_allclose(tx4, v_u[-1] + 4.0) utt.assert_allclose(tx5, v_u[-1] + 5.0)
def get_outputs(x, w): features, _ = scan( outer_scan_step, sequences=[x], non_sequences=[w], strict=True, name="the_outer_scan", ) return_val = grad(features.sum(), w) return return_val
def compute_A_k(A, k): result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=at.ones_like(A), non_sequences=A, n_steps=k, ) A_k = result[-1] return A_k
def test_scan_debugprint1(): k = iscalar("k") A = dvector("A") # Symbolic description of the result result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(A), non_sequences=A, n_steps=k, ) final_result = result[-1] output_str = debugprint(final_result, file="str") lines = output_str.split("\n") expected_output = """Subtensor{int64} [id A] '' |Subtensor{int64::} [id B] '' | |for{cpu,scan_fn} [id C] '' | | |k [id D] | | |IncSubtensor{Set;:int64:} [id E] '' | | | |AllocEmpty{dtype='float64'} [id F] '' | | | | |Elemwise{add,no_inplace} [id G] '' | | | | | |k [id D] | | | | | |Subtensor{int64} [id H] '' | | | | | |Shape [id I] '' | | | | | | |Rebroadcast{(0, False)} [id J] '' | | | | | | |InplaceDimShuffle{x,0} [id K] '' | | | | | | |Elemwise{second,no_inplace} [id L] '' | | | | | | |A [id M] | | | | | | |InplaceDimShuffle{x} [id N] '' | | | | | | |TensorConstant{1.0} [id O] | | | | | |ScalarConstant{0} [id P] | | | | |Subtensor{int64} [id Q] '' | | | | |Shape [id R] '' | | | | | |Rebroadcast{(0, False)} [id J] '' | | | | |ScalarConstant{1} [id S] | | | |Rebroadcast{(0, False)} [id J] '' | | | |ScalarFromTensor [id T] '' | | | |Subtensor{int64} [id H] '' | | |A [id M] | |ScalarConstant{1} [id U] |ScalarConstant{-1} [id V] Inner graphs: for{cpu,scan_fn} [id C] '' >Elemwise{mul,no_inplace} [id W] '' > |<TensorType(float64, vector)> [id X] -> [id E] > |A_copy [id Y] -> [id M]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()