def test_jax_Alloc(): x = aet.alloc(0.0, 2, 3) x_fg = FunctionGraph([], [x]) (jax_res, ) = compare_jax_and_py(x_fg, []) assert jax_res.shape == (2, 3) x = aet.alloc(1.1, 2, 3) x_fg = FunctionGraph([], [x]) compare_jax_and_py(x_fg, []) x = aet.AllocEmpty("float32")(2, 3) x_fg = FunctionGraph([], [x]) def compare_shape_dtype(x, y): (x, ) = x (y, ) = y return x.shape == y.shape and x.dtype == y.dtype compare_jax_and_py(x_fg, [], assert_fn=compare_shape_dtype) a = scalar("a") x = aet.alloc(a, 20) x_fg = FunctionGraph([a], [x]) compare_jax_and_py(x_fg, [10.0]) a = vector("a") x = aet.alloc(a, 20, 10) x_fg = FunctionGraph([a], [x]) compare_jax_and_py(x_fg, [np.ones(10, dtype=config.floatX)])
def local_alloc_dimshuffle(node): """ If a dimshuffle is inside an alloc and only adds dimension to the left, remove it. Alloc(DimShuffle(x), ...) - > Alloc(x, ...) """ if isinstance(node.op, tt.Alloc): input_ = node.inputs[0] if input_.owner and isinstance(input_.owner.op, DimShuffle): # check if it only adds dimension to the left new_order = input_.owner.op.new_order expected_new_order = ("x", ) * ( input_.ndim - input_.owner.inputs[0].ndim) + tuple( range(input_.owner.inputs[0].ndim)) if new_order != expected_new_order: return False return [tt.alloc(input_.owner.inputs[0], *node.inputs[1:])] return False
def test_grad_disconnected(self): # tests corner cases of gradient for shape and alloc x = vector(name="x") total = x.sum() total.name = "total" num_elements = x.shape[0] num_elements.name = "num_elements" silly_vector = aet.alloc(total / num_elements, num_elements) silly_vector.name = "silly_vector" cost = silly_vector.sum() cost.name = "cost" # note that cost simplifies to be the same as "total" g = grad(cost, x, add_names=False) # we still need to pass in x because it determines the shape of # the output f = aesara.function([x], g) rng = np.random.RandomState([2012, 9, 5]) x = np.cast[x.dtype](rng.randn(3)) g = f(x) assert np.allclose(g, np.ones(x.shape, dtype=x.dtype))
def local_dimshuffle_alloc(node): """ If an alloc is inside a dimshuffle which only adds dimension to the left, scrap the dimshuffle and adds 1 into the alloc dimshuffle{x, 0, 1}(alloc([3 4], 3, 2) => alloc([3 4], 1, 3, 2) """ if isinstance(node.op, DimShuffle) and node.inputs[0].owner: input_ = node.inputs[0] if isinstance(input_.owner.op, tt.Alloc): # check if it only adds dimension to the left new_order = node.op.new_order expected_new_order = ("x", ) * ( len(new_order) - input_.ndim) + tuple(range(input_.ndim)) if new_order != expected_new_order: return False # count numbers of 'x' nb_new_dims = len(new_order) - input_.ndim new_shape_input = (1, ) * nb_new_dims + tuple( input_.owner.inputs[1:]) return [tt.alloc(input_.owner.inputs[0], *new_shape_input)] return False
def test_machine_translation(self): # This test case comes from https://github.com/rizar/scan-grad-speed and # is an example of actual computation done with scan in the context of # machine translation # # 'dim' has been reduced from 1000 to 5 to make the test run faster # Parameters from an actual machine tranlation run batch_size = 80 seq_len = 50 dim = 5 # Weight matrices U = aesara.shared( np.random.normal(size=(dim, dim), scale=0.0001).astype(config.floatX)) U.name = "U" V = aesara.shared(U.get_value()) V.name = "V" W = aesara.shared(U.get_value()) W.name = "W" # Variables and their values x = tensor3("x") x_value = np.random.normal(size=(seq_len, batch_size, dim), scale=0.0001).astype(config.floatX) ri = tensor3("ri") ri_value = x_value zi = tensor3("zi") zi_value = x_value init = aet.alloc(np.cast[config.floatX](0), batch_size, dim) def rnn_step1( # sequences x, ri, zi, # outputs_info h, ): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = nnet.sigmoid(pre_r) z = nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h # Compile the function twice, once with the optimization and once # without opt_mode = mode.including("scan") h, _ = aesara.scan( rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name="fpass1", mode=opt_mode, ) cost = h[-1].sum() grad1 = grad(cost, [U, V, W]) f_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=opt_mode) no_opt_mode = mode.excluding("scanOp_pushout_output") h, _ = aesara.scan( rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name="fpass1", mode=no_opt_mode, ) cost = h[-1].sum() grad1 = grad(cost, [U, V, W]) f_no_opt = aesara.function(inputs=[x, ri, zi], outputs=grad1, mode=no_opt_mode) # Validate that the optimization has been applied scan_node_grad = [ node for node in f_opt.maker.fgraph.toposort() if isinstance(node.op, Scan) ][1] for output in scan_node_grad.op.outputs: assert not (isinstance(output.owner.op, Elemwise) and any( [isinstance(i, Dot) for i in output.owner.inputs])) # Compare the outputs of the two functions on the same input data. f_opt_output = f_opt(x_value, ri_value, zi_value) f_no_opt_output = f_no_opt(x_value, ri_value, zi_value) utt.assert_allclose(f_opt_output, f_no_opt_output)
def _run(self, num_features, num_timesteps, batch_size, mode): # determine shapes of inputs and targets depending on the batch size if batch_size == 1: inputs_size = (num_timesteps, num_features) targets_size = (num_timesteps, 1) else: inputs_size = (num_timesteps, batch_size, num_features) targets_size = (num_timesteps, batch_size, 1) # make inputs and targets shared variables inputs = aesara.shared(self.rng.uniform(size=inputs_size).astype( config.floatX), borrow=True) targets = aesara.shared(self.rng.uniform(size=targets_size).astype( config.floatX), borrow=True) # create symbolic inputs and targets variables if batch_size == 1: x = matrix("inputs") t = matrix("targets") else: x = tensor3("inputs") t = tensor3("inputs") x.tag.test_value = inputs.get_value(borrow=True) t.tag.test_value = targets.get_value(borrow=True) # create a set of parameters for a simple RNN W_xh = aesara.shared( (0.01 * self.rng.uniform(size=(num_features, 10))).astype( config.floatX), borrow=True, ) W_hh = aesara.shared( (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True) W_hy = aesara.shared( (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True) b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True) b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True) params = [W_xh, W_hh, W_hy, b_h, b_y] # recurrent function def step(x_t, h_tm1): h = tanh(dot(h_tm1, W_hh) + dot(x_t, W_xh) + b_h) return h # build recurrent graph if batch_size == 1: h_0 = aet.alloc(0.0, 10).astype(config.floatX) else: h_0 = aet.alloc(0.0, batch_size, 10).astype(config.floatX) h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0]) # network output y = dot(h, W_hy) + b_y # Create Gauss-Newton-Matrix object. Not really of any use here, but I # need it for Hessian-Free optimization. gn = GaussNewtonMatrix(y) # compute MSE cost = ((t - y)**2).sum(axis=1).mean() # Compute the cost at some other point in the parameter # space. Not really of any use here, but this is how I do it # during certain iterations of CG in the HF algorithm. There, # it's in fact `pi + current update proposal`. For simplicity, # I just multiply by 2 here. cost_ = aesara.clone_replace(cost, replace={pi: 2 * pi for pi in params}) # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG, # but for simplicity, I just take the parameters vector because it's # already there. Gv = gn(v=params, cost=cost, parameters=params, damp=aet.constant(1.0)) # compile Aesara function f = aesara.function([], [cost_] + Gv, givens={ x: inputs, t: targets }, mode=mode) # execute f()
def repeat(x, repeats, axis=None): """Repeat elements of an array. It returns an array which has the same shape as `x`, except along the given axis. The axis is used to speficy along which axis to repeat values. By default, use the flattened input array, and return a flat output array. The number of repetitions for each element is `repeat`. `repeats` is broadcasted to fit the length of the given `axis`. Parameters ---------- x Input data, tensor variable. repeats int, scalar or tensor variable axis : int, optional See Also -------- tensor.tile .. versionadded:: 0.6 """ repeats = aet.as_tensor_variable(repeats) if repeats.ndim > 1: raise ValueError("The dimension of repeats should not exceed 1.") if repeats.ndim == 1 and not repeats.broadcastable[0]: return RepeatOp(axis=axis)(x, repeats) else: if repeats.ndim == 1: repeats = repeats[0] if x.dtype == "uint64": raise TypeError("repeat doesn't support dtype uint64") if axis is None: axis = 0 x = x.flatten() else: if axis >= x.ndim: raise ValueError("Axis should not exceed x.ndim-1.") if axis < 0: axis = x.ndim + axis shape = [x.shape[i] for i in range(x.ndim)] # shape_ is the shape of the intermediate tensor which has # an additional dimension comparing to x. We use alloc to # allocate space for this intermediate tensor to replicate x # along that additional dimension. shape_ = shape[:] shape_.insert(axis + 1, repeats) # shape is now the shape of output, where shape[axis] becomes # shape[axis]*repeats. shape[axis] = shape[axis] * repeats # dims_ is the dimension of that intermediate tensor. dims_ = list(np.arange(x.ndim)) dims_.insert(axis + 1, "x") # After the original tensor is duplicated along the additional # dimension, we reshape it to the expected output shape, and # return the output z. z = aet.alloc(x.dimshuffle(*dims_), *shape_).reshape(shape) return z
f = aesara.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av) def gpu_alloc_expected(x, *shp): g = gpuarray.empty(shp, dtype=x.dtype, context=get_context(test_ctx_name)) g[:] = x return g TestGpuAlloc = makeTester( name="GpuAllocTester", # The +1 is there to allow the lift to the GPU. op=lambda *args: alloc(*args) + 1, gpu_op=GpuAlloc(test_ctx_name), cases=dict( correct01=(rand(), np.int32(7)), # just gives a DeepCopyOp with possibly wrong results on the CPU # correct01_bcast=(rand(1), np.int32(7)), correct02=(rand(), np.int32(4), np.int32(7)), correct12=(rand(7), np.int32(4), np.int32(7)), correct13=(rand(7), np.int32(2), np.int32(4), np.int32(7)), correct23=(rand(4, 7), np.int32(2), np.int32(4), np.int32(7)), bad_shape12=(rand(7), np.int32(7), np.int32(5)), ), ) class TestGPUAlloc(TestAlloc):