def test_alloc(self): # Alloc of the sum of x into a vector out1d = aet.alloc(self.x.sum(), self.in_shape[0]) self.check_rop_lop(out1d, self.in_shape[0]) # Alloc of x into a 3-D tensor, flattened out3d = aet.alloc(self.x, self.mat_in_shape[0], self.mat_in_shape[1], self.in_shape[0]) self.check_rop_lop( out3d.flatten(), self.mat_in_shape[0] * self.mat_in_shape[1] * self.in_shape[0], )
def test_covexp_aesara(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: a = at.alloc(2.0, 1, 1) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
def logp(self, obs): """Return the scalar Theano log-likelihood at a point.""" obs_tt = at.as_tensor_variable(obs) logp_val = at.alloc(-np.inf, *obs.shape) for i, dist in enumerate(self.comp_dists): i_mask = at.eq(self.states, i) obs_i = obs_tt[i_mask] subset_dist = dist.dist(*distribution_subset_args(dist, obs.shape, i_mask)) logp_val = at.set_subtensor(logp_val[i_mask], subset_dist.logp(obs_i)) return logp_val
def test_local_alloc_dimshuffle(): alloc_dimshuffle = out2in(local_alloc_dimshuffle) x = vector("x") m = iscalar("m") y = x.dimshuffle("x", 0) out = aet.alloc(y, m, 1, x.shape[0]) g = FunctionGraph([x, m], [out]) alloc_dimshuffle(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo])
def test_local_dimshuffle_alloc(): reshape_dimshuffle = out2in(local_dimshuffle_alloc) x = vector("x") out = aet.alloc(x, 3, 2).dimshuffle("x", "x", 0, 1) g = FunctionGraph([x], [out]) reshape_dimshuffle(g) l = PerformLinker() l.accept(g) f = l.make_function() assert f([3, 4]).ndim == 4 topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo])
def diag(self, X): return at.alloc(1.0, X.shape[0])
def full(self, X, Xs=None): if Xs is None: return at.diag(self.diag(X)) else: return at.alloc(0.0, X.shape[0], Xs.shape[0])
def diag(self, X): return at.alloc(at.square(self.sigma), X.shape[0])
def full(self, X, Xs=None): if Xs is None: return at.alloc(self.c, X.shape[0], X.shape[0]) else: return at.alloc(self.c, X.shape[0], Xs.shape[0])
def diag(self, X): return at.alloc(self.c, X.shape[0])
def test_gpu_memory_usage(self): # This test validates that the memory usage of the defined aesara # function is reasonnable when executed on the GPU. It checks for # a bug in which one of scan's optimization was not applied which # made the scan node compute large and unnecessary outputs which # brought memory usage on the GPU to ~12G. # Dimensionality of input and output data (not one-hot coded) n_in = 100 n_out = 100 # Number of neurons in hidden layer n_hid = 4000 # Number of minibatches mb_size = 2 # Time steps in minibatch mb_length = 200 # Define input variables xin = ftensor3(name="xin") yout = ftensor3(name="yout") # Initialize the network parameters U = aesara.shared(np.zeros((n_in, n_hid), dtype="float32"), name="W_xin_to_l1") V = aesara.shared(np.zeros((n_hid, n_hid), dtype="float32"), name="W_l1_to_l1") W = aesara.shared(np.zeros((n_hid, n_out), dtype="float32"), name="W_l1_to_l2") nparams = [U, V, W] # Build the forward pass l1_base = dot(xin, U) def scan_l(baseline, last_step): return baseline + dot(last_step, V) zero_output = aet.alloc(np.asarray(0.0, dtype="float32"), mb_size, n_hid) l1_out, _ = scan( scan_l, sequences=[l1_base], outputs_info=[zero_output], mode=self.mode_with_gpu_nodebug, ) l2_out = dot(l1_out, W) # Compute the cost and take the gradient wrt params cost = tt_sum((l2_out - yout)**2) grads = aesara.grad(cost, nparams) updates = list(zip(nparams, (n - g for n, g in zip(nparams, grads)))) # Compile the aesara function feval_backprop = aesara.function([xin, yout], cost, updates=updates, mode=self.mode_with_gpu_nodebug) # Validate that the PushOutScanOutput optimization has been applied # by checking the number of outputs of the grad Scan node in the # compiled function. nodes = feval_backprop.maker.fgraph.toposort() scan_nodes = [n for n in nodes if isinstance(n.op, Scan)] # The grad scan is always the 2nd one according to toposort. If the # optimization has been applied, it has 2 outputs, otherwise 3. grad_scan_node = scan_nodes[1] assert len(grad_scan_node.outputs) == 2, len(grad_scan_node.outputs) # Call the aesara function to ensure the absence of a memory error feval_backprop( np.zeros((mb_length, mb_size, n_in), dtype="float32"), np.zeros((mb_length, mb_size, n_out), dtype="float32"), )
def __call__(self, X): return at.alloc(1.0, X.shape[0]) * self.c
def test_machine_translation(self): # This test case comes from https://github.com/rizar/scan-grad-speed and # is an example of actual computation done with scan in the context of # machine translation # # 'dim' has been reduced from 1000 to 5 to make the test run faster # Parameters from an actual machine tranlation run batch_size = 80 seq_len = 50 dim = 5 # Weight matrices U = aesara.shared( np.random.normal(size=(dim, dim), scale=0.0001).astype(config.floatX) ) U.name = "U" V = aesara.shared(U.get_value()) V.name = "V" W = aesara.shared(U.get_value()) W.name = "W" # Variables and their values x = tt.tensor3("x") x_value = np.random.normal( size=(seq_len, batch_size, dim), scale=0.0001 ).astype(config.floatX) ri = tt.tensor3("ri") ri_value = x_value zi = tt.tensor3("zi") zi_value = x_value init = tt.alloc(np.cast[config.floatX](0), batch_size, dim) def rnn_step1( # sequences x, ri, zi, # outputs_info h, ): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = tt.nnet.sigmoid(pre_r) z = tt.nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = tt.tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h # Compile the function twice, once with the optimization and once # without opt_mode = mode.including("scan") h, _ = aesara.scan( rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name="fpass1", mode=opt_mode, ) cost = h[-1].sum() grad1 = tt.grad(cost, [U, V, W]) f_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=opt_mode) no_opt_mode = mode.excluding("scanOp_pushout_output") h, _ = aesara.scan( rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name="fpass1", mode=no_opt_mode, ) cost = h[-1].sum() grad1 = tt.grad(cost, [U, V, W]) f_no_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=no_opt_mode) # Validate that the optimization has been applied scan_node_grad = [ node for node in f_opt.maker.fgraph.toposort() if isinstance(node.op, Scan) ][1] for output in scan_node_grad.op.outputs: assert not ( isinstance(output.owner.op, tt.elemwise.Elemwise) and any([isinstance(i, tt.Dot) for i in output.owner.inputs]) ) # Compare the outputs of the two functions on the same input data. f_opt_output = f_opt(x_value, ri_value, zi_value) f_no_opt_output = f_no_opt(x_value, ri_value, zi_value) utt.assert_allclose(f_opt_output, f_no_opt_output)
def _run(self, num_features, num_timesteps, batch_size, mode): # determine shapes of inputs and targets depending on the batch size if batch_size == 1: inputs_size = (num_timesteps, num_features) targets_size = (num_timesteps, 1) else: inputs_size = (num_timesteps, batch_size, num_features) targets_size = (num_timesteps, batch_size, 1) # make inputs and targets shared variables inputs = aesara.shared( self.rng.uniform(size=inputs_size).astype(config.floatX), borrow=True ) targets = aesara.shared( self.rng.uniform(size=targets_size).astype(config.floatX), borrow=True ) # create symbolic inputs and targets variables if batch_size == 1: x = tt.matrix("inputs") t = tt.matrix("targets") else: x = tt.tensor3("inputs") t = tt.tensor3("inputs") x.tag.test_value = inputs.get_value(borrow=True) t.tag.test_value = targets.get_value(borrow=True) # create a set of parameters for a simple RNN W_xh = aesara.shared( (0.01 * self.rng.uniform(size=(num_features, 10))).astype(config.floatX), borrow=True, ) W_hh = aesara.shared( (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True ) W_hy = aesara.shared( (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True ) b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True) b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True) params = [W_xh, W_hh, W_hy, b_h, b_y] # recurrent function def step(x_t, h_tm1): h = tt.tanh(tt.dot(h_tm1, W_hh) + tt.dot(x_t, W_xh) + b_h) return h # build recurrent graph if batch_size == 1: h_0 = tt.alloc(0.0, 10).astype(config.floatX) else: h_0 = tt.alloc(0.0, batch_size, 10).astype(config.floatX) h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0]) # network output y = tt.dot(h, W_hy) + b_y # Create Gauss-Newton-Matrix object. Not really of any use here, but I # need it for Hessian-Free optimization. gn = GaussNewtonMatrix(y) # compute MSE cost = ((t - y) ** 2).sum(axis=1).mean() # Compute the cost at some other point in the parameter # space. Not really of any use here, but this is how I do it # during certain iterations of CG in the HF algorithm. There, # it's in fact `pi + current update proposal`. For simplicity, # I just multiply by 2 here. cost_ = aesara.clone(cost, replace={pi: 2 * pi for pi in params}) # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG, # but for simplicity, I just take the parameters vector because it's # already there. Gv = gn(v=params, cost=cost, parameters=params, damp=tt.constant(1.0)) # compile Aesara function f = aesara.function([], [cost_] + Gv, givens={x: inputs, t: targets}, mode=mode) # execute f()