def test_composite_elemwise_float16(self): w = bvector() x = vector(dtype="float16") y = fvector() cz = tanh(x + aet.cast(y, "float16")) o = ( cz - cz ** 2 + aet.cast(x, "int16") + aet.cast(x, "float32") + aet.cast(w, "float16") - aet.constant(np.float16(1.0)) ) aesara.function([w, x, y], o, mode=mode_with_gpu) v = vector(dtype="uint8") w = vector(dtype="float16") x = vector(dtype="float16") y = vector(dtype="float16") z = vector(dtype="float16") o = aet.switch(v, mul(w, x, y), z) aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) index = at.cast(X, "int32") if Xs is None: index2 = index.T else: index2 = at.cast(Xs, "int32").T return self.B[index, index2]
def test_xent_thing_int32(self): x = matrix("x") y = lvector("y") yi = aet.cast(y, "int32") expressions = [ aet_sum(-log(softmax(x)[aet.arange(yi.shape[0]), yi])), -aet_sum(log(softmax(x)[aet.arange(yi.shape[0]), yi])), -aet_sum(log(softmax(x))[aet.arange(yi.shape[0]), yi]), aet_sum(-log(softmax(x))[aet.arange(yi.shape[0]), yi]), ] for expr in expressions: fgraph = FunctionGraph([x, y], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)] # Also verify the gradient wrt x fgraph = FunctionGraph([x, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 3 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
def incomplete_beta_ps(a, b, value): """Power series for incomplete beta Use when b*x is small and value not too close to 1. Based on Cephes library by Steve Moshier (incbet.c) """ one = aet.constant(1, dtype="float64") ai = one / a u = (one - b) * value t1 = u / (a + one) t = u threshold = np.MachAr().eps * ai s = aet.constant(0, dtype="float64") def _step(i, t, s): t *= (i - b) * value / i step = t / (a + i) s += step return ((t, s), until(aet.abs_(step) < threshold)) (t, s), _ = scan(_step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")]) s = s[-1] + t1 + ai t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s) return aet.exp(t)
def test_illegal(self): try: x = zmatrix() function([x], cast(x, "float64"))(np.ones((2, 3), dtype="complex128")) except TypeError: return assert 0
def test_observed_with_column_vector(self): """This test is related to https://github.com/pymc-devs/aesara/issues/390 which breaks broadcastability of column-vector RVs. This unexpected change in type can lead to incompatibilities during graph rewriting for model.logp evaluation. """ with pm.Model() as model: # The `observed` is a broadcastable column vector obs = at.as_tensor_variable( np.ones((3, 1), dtype=aesara.config.floatX)) assert obs.broadcastable == (False, True) # Both shapes describe broadcastable volumn vectors size64 = at.constant([3, 1], dtype="int64") # But the second shape is upcasted from an int32 vector cast64 = at.cast(at.constant([3, 1], dtype="int32"), dtype="int64") pm.Normal("size64", mu=0, sigma=1, size=size64, observed=obs) pm.Normal("shape64", mu=0, sigma=1, shape=size64, observed=obs) model.logp() pm.Normal("size_cast64", mu=0, sigma=1, size=cast64, observed=obs) pm.Normal("shape_cast64", mu=0, sigma=1, shape=cast64, observed=obs) model.logp()
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join( 0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape ) # store in the required shape new_shape = tensor.cast(new_shape, "int64") input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def test_composite_elemwise_float16(self): w = aesara.tensor.bvector() x = aesara.tensor.vector(dtype="float16") y = aesara.tensor.fvector() cz = tensor.tanh(x + tensor.cast(y, "float16")) o = (cz - cz**2 + tensor.cast(x, "int16") + tensor.cast(x, "float32") + tensor.cast(w, "float16") - tensor.constant(np.float16(1.0))) aesara.function([w, x, y], o, mode=mode_with_gpu) v = aesara.tensor.vector(dtype="uint8") w = aesara.tensor.vector(dtype="float16") x = aesara.tensor.vector(dtype="float16") y = aesara.tensor.vector(dtype="float16") z = aesara.tensor.vector(dtype="float16") o = tensor.switch(v, tensor.mul(w, x, y), z) aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
def test_extract_obs_data(): with pytest.raises(TypeError): extract_obs_data(at.matrix()) data = np.random.normal(size=(2, 3)) data_at = at.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool) for val_at in (data_at, aesara.shared(data)): res = extract_obs_data(val_at) assert isinstance(res, np.ndarray) assert np.array_equal(res, data) # AdvancedIncSubtensor check data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] constant = at.as_tensor(data_m.filled()) z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)) res = extract_obs_data(z_at) assert isinstance(res, np.ndarray) assert np.ma.allequal(res, data_m) # AdvancedIncSubtensor1 check data = np.random.normal(size=(3, )) data_at = at.as_tensor(data) mask = np.random.binomial(1, 0.5, size=(3, )).astype(bool) data_m = np.ma.MaskedArray(data, mask) missing_values = data_at.type()[mask] constant = at.as_tensor(data_m.filled()) z_at = at.set_subtensor(constant[mask.nonzero()], missing_values) assert isinstance(z_at.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)) res = extract_obs_data(z_at) assert isinstance(res, np.ndarray) assert np.ma.allequal(res, data_m) # Cast check data = np.array(5) t = at.cast(at.as_tensor(5.0), np.int64) res = extract_obs_data(t) assert isinstance(res, np.ndarray) assert np.array_equal(res, data)
def test_dtype_mismatch(self): rng = np.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) y = tensor.cast(x * 10, "int8") cond = aesara.tensor.iscalar("cond") with pytest.raises(TypeError): ifelse(cond, x, y) with pytest.raises(TypeError): ifelse(cond, y, x)
def local_check_parameter_to_ninf_switch(fgraph, node): if isinstance(node.op, CheckParameterValue): logp_expr, *logp_conds = node.inputs if len(logp_conds) > 1: logp_cond = at.all(logp_conds) else: (logp_cond,) = logp_conds out = at.switch(logp_cond, logp_expr, -np.inf) out.name = node.op.msg if out.dtype != node.outputs[0].dtype: out = at.cast(out, node.outputs[0].dtype) return [out]
def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype="int64", nstreams=None, **kwargs): # TODO : need description for method, parameter and return if n == 1: p = undefined_grad(as_tensor_variable(p)) x = self.uniform(size=size, nstreams=nstreams, **kwargs) return cast(x < p, dtype) else: raise NotImplementedError("MRG_RandomStream.binomial with n > 1")
def shared_dataset(data_xy): """Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Aesara to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch every time is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = aesara.shared(np.asarray(data_x, dtype=aesara.config.floatX)) shared_y = aesara.shared(np.asarray(data_y, dtype=aesara.config.floatX)) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, aet.cast(shared_y, "int32")
def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype="int64", prob=None): """ Sample n times with probability of success prob for each trial, return the number of successes. If the size argument is ambiguous on the number of dimensions, ndim may be a plain integer to supplement the missing information. If size is None, the output shape will be determined by the shapes of n and prob. """ if prob is not None: p = prob print( "DEPRECATION WARNING: the parameter prob to the binomal fct have been renamed to p to have the same name as np.", file=sys.stderr, ) n = tensor.as_tensor_variable(n) p = tensor.as_tensor_variable(p) ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, p) if n.dtype == "int64": try: np.random.binomial( n=np.asarray([2, 3, 4], dtype="int64"), p=np.asarray([0.1, 0.2, 0.3], dtype="float64"), ) except TypeError: # THIS WORKS AROUND A NUMPY BUG on 32bit machine n = tensor.cast(n, "int32") op = RandomFunction( "binomial", tensor.TensorType(dtype=dtype, broadcastable=(False, ) * ndim)) return op(random_state, size, n, p)
def incomplete_beta_cfe(a, b, x, small): """Incomplete beta continued fraction expansions based on Cephes library by Steve Moshier (incbet.c). small: Choose element-wise which continued fraction expansion to use. """ BIG = aet.constant(4.503599627370496e15, dtype="float64") BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64") THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64") zero = aet.constant(0.0, dtype="float64") one = aet.constant(1.0, dtype="float64") two = aet.constant(2.0, dtype="float64") r = one k1 = a k3 = a k4 = a + one k5 = one k8 = a + two k2 = aet.switch(small, a + b, b - one) k6 = aet.switch(small, b - one, a + b) k7 = aet.switch(small, k4, a + one) k26update = aet.switch(small, one, -one) x = aet.switch(small, x, x / (one - x)) pkm2 = zero qkm2 = one pkm1 = one qkm1 = one r = one def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): xk = -(x * k1 * k2) / (k3 * k4) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk xk = (x * k5 * k6) / (k7 * k8) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk old_r = r r = aet.switch(aet.eq(qk, zero), r, pk / qk) k1 += one k2 += k26update k3 += two k4 += two k5 += one k6 -= k26update k7 += two k8 += two big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG) biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV), aet.lt(aet.abs_(pk), BIGINV)) pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2) pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1) qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2) qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1) pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2) pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1) qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2) qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1) return ( (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))), ) (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( _step, sequences=[aet.arange(0, 300)], outputs_info=[ e for e in aet.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64") ], ) return r[-1]
def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None, **kwargs): # TODO : need description for parameter 'size', 'ndim', 'nstreams' """ Sample a tensor of given size whose element from a uniform distribution between low and high. If the size argument is ambiguous on the number of dimensions, ndim may be a plain integer to supplement the missing information. Parameters ---------- low Lower bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``low`` will be cast into dtype. This bound is excluded. high Higher bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``high`` will be cast into dtype. This bound is excluded. size Can be a list of integer or Aesara variable (ex: the shape of other Aesara Variable). dtype The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. """ low = as_tensor_variable(low) high = as_tensor_variable(high) if dtype is None: dtype = aes.upcast(config.floatX, low.dtype, high.dtype) low = cast(low, dtype=dtype) high = cast(high, dtype=dtype) low = undefined_grad(low) high = undefined_grad(high) if isinstance(size, tuple): msg = "size must be a tuple of int or an Aesara variable" assert all( isinstance(i, (np.integer, int, Variable)) for i in size), msg if any(isinstance(i, (np.integer, int)) and i <= 0 for i in size): raise ValueError( "The specified size contains a dimension with value <= 0", size) else: if not (isinstance(size, Variable) and size.ndim == 1): raise TypeError("size must be a tuple of int or an Aesara " "Variable with 1 dimension, got " + str(size) + " of type " + str(type(size))) orig_nstreams = nstreams if nstreams is None: nstreams = self.n_streams(size) rstates = self.get_substream_rstates(nstreams, dtype) d = {} if "target" in kwargs: d = dict(target=kwargs.pop("target")) if len(kwargs) > 0: raise TypeError( f"uniform() got unexpected keyword arguments {kwargs.keys()}") node_rstate = shared(rstates, **d) u = self.pretty_return( node_rstate, *mrg_uniform.new(node_rstate, ndim, dtype, size), size=size, nstreams=orig_nstreams, ) # Add a reference to distinguish from other shared variables node_rstate.tag.is_rng = True r = u * (high - low) + low if u.type.broadcastable != r.type.broadcastable: raise NotImplementedError( "Increase the size to match the broadcasting pattern of " "`low` and `high` arguments") assert r.dtype == dtype return r
def __call__(self, y0, theta, return_sens=False, **kwargs): if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states: raise ShapeError("Length of y0 is wrong.", actual=(len(y0), ), expected=(self.n_states, )) if isinstance(theta, (list, tuple)) and not len(theta) == self.n_theta: raise ShapeError("Length of theta is wrong.", actual=(len(theta), ), expected=(self.n_theta, )) # convert inputs to tensors (and check their types) y0 = at.cast(at.unbroadcast(at.as_tensor_variable(y0), 0), floatX) theta = at.cast(at.unbroadcast(at.as_tensor_variable(theta), 0), floatX) inputs = [y0, theta] for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)): if not input_val.type.in_same_class(itype): raise ValueError( f"Input {i} of type {input_val.type} does not have the expected type of {itype}" ) # use default implementation to prepare symbolic outputs (via make_node) states, sens = super().__call__(y0, theta, **kwargs) if aesara.config.compute_test_value != "off": # compute test values from input test values test_states, test_sens = self._simulate( y0=get_test_value(y0), theta=get_test_value(theta)) # check types of simulation result if not test_states.dtype == self._otypes[0].dtype: raise DtypeError( "Simulated states have the wrong type.", actual=test_states.dtype, expected=self._otypes[0].dtype, ) if not test_sens.dtype == self._otypes[1].dtype: raise DtypeError( "Simulated sensitivities have the wrong type.", actual=test_sens.dtype, expected=self._otypes[1].dtype, ) # check shapes of simulation result expected_states_shape = (self.n_times, self.n_states) expected_sens_shape = (self.n_times, self.n_states, self.n_p) if not test_states.shape == expected_states_shape: raise ShapeError( "Simulated states have the wrong shape.", test_states.shape, expected_states_shape, ) if not test_sens.shape == expected_sens_shape: raise ShapeError( "Simulated sensitivities have the wrong shape.", test_sens.shape, expected_sens_shape, ) # attach results as test values to the outputs states.tag.test_value = test_states sens.tag.test_value = test_sens if return_sens: return states, sens return states
# Declare Aesara symbolic variables x = aesara.shared(D[0], name="x") y = aesara.shared(D[1], name="y") w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w") b = aesara.shared(np.asarray(0.0, dtype=aesara.config.floatX), name="b") x.tag.test_value = D[0] y.tag.test_value = D[1] # print "Initial model:" # print w.get_value(), b.get_value() # Construct Aesara expression graph p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b)) # Probability of having a one prediction = p_1 > 0.5 # The prediction that is done: 0 or 1 xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1) # Cross-entropy cost = tt.cast(xent.mean(), "float32") + 0.01 * (w**2).sum() # The cost to optimize gw, gb = tt.grad(cost, [w, b]) # Compile expressions to functions train = aesara.function( inputs=[], outputs=[prediction, xent], updates=[(w, w - 0.01 * gw), (b, b - 0.01 * gb)], name="train", ) predict = aesara.function(inputs=[], outputs=prediction, name="predict") if any([ n.op.__class__.__name__ in ["Gemv", "CGemv", "Gemm", "CGemm"] for n in train.maker.fgraph.toposort() ]):
def normal( self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, truncate=False, **kwargs, ): """ Sample a tensor of values from a normal distribution. Parameters ---------- size : int_vector_like Array dimensions for the output tensor. avg : float_like, optional The mean value for the truncated normal to sample from (defaults to 0.0). std : float_like, optional The standard deviation for the truncated normal to sample from (defaults to 1.0). truncate : bool, optional Truncates the normal distribution at 2 standard deviations if True (defaults to False). When this flag is set, the standard deviation of the result will be less than the one specified. ndim : int, optional The number of dimensions for the output tensor (defaults to None). This argument is necessary if the size argument is ambiguous on the number of dimensions. dtype : str, optional The data-type for the output tensor. If not specified, the dtype is inferred from avg and std, but it is at least as precise as floatX. kwargs Other keyword arguments for random number generation (see uniform). Returns ------- samples : TensorVariable A Aesara tensor of samples randomly drawn from a normal distribution. """ size = _check_size(size) avg = undefined_grad(as_tensor_variable(avg)) std = undefined_grad(as_tensor_variable(std)) if dtype is None: dtype = aes.upcast(config.floatX, avg.dtype, std.dtype) avg = at.cast(avg, dtype=dtype) std = at.cast(std, dtype=dtype) # generate even number of uniform samples # Do manual constant folding to lower optiimizer work. if isinstance(size, Constant): n_odd_samples = size.prod(dtype="int64") else: n_odd_samples = prod(size, dtype="int64") n_even_samples = n_odd_samples + n_odd_samples % 2 uniform = self.uniform( (n_even_samples, ), low=0.0, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) # box-muller transform u1 = uniform[:n_even_samples // 2] u2 = uniform[n_even_samples // 2:] r = sqrt(-2.0 * log(u1)) theta = np.array(2.0 * np.pi, dtype=dtype) * u2 cos_theta, sin_theta = cos(theta), sin(theta) z0 = r * cos_theta z1 = r * sin_theta if truncate: # use valid samples to_fix0 = (z0 < -2.0) | (z0 > 2.0) to_fix1 = (z1 < -2.0) | (z1 > 2.0) z0_valid = z0[at.nonzero(~to_fix0)] z1_valid = z1[at.nonzero(~to_fix1)] # re-sample invalid samples to_fix0 = at.nonzero(to_fix0)[0] to_fix1 = at.nonzero(to_fix1)[0] n_fix_samples = to_fix0.size + to_fix1.size lower = at.constant(1.0 / np.e**2, dtype=dtype) u_fix = self.uniform( (n_fix_samples, ), low=lower, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) r_fix = sqrt(-2.0 * log(u_fix)) z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0] z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1] # pack everything together to a useful result norm_samples = at.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed) else: norm_samples = at.join(0, z0, z1) if isinstance(n_odd_samples, Variable): samples = norm_samples[:n_odd_samples] elif n_odd_samples % 2 == 1: samples = norm_samples[:-1] else: samples = norm_samples samples = reshape(samples, newshape=size, ndim=ndim) samples *= std samples += avg return samples
# Declare Aesara symbolic variables x = aet.matrix("x") y = aet.vector("y") w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w") b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b") x.tag.test_value = D[0] y.tag.test_value = D[1] #print "Initial model:" #print w.get_value(), b.get_value() # Construct Aesara expression graph p_1 = 1 / (1 + aet.exp(-aet.dot(x, w) - b)) # Probability of having a one prediction = p_1 > 0.5 # The prediction that is done: 0 or 1 xent = -y * aet.log(p_1) - (1 - y) * aet.log(1 - p_1) # Cross-entropy cost = aet.cast(xent.mean(), 'float32') + \ 0.01 * (w ** 2).sum() # The cost to optimize gw, gb = aet.grad(cost, [w, b]) # Compile expressions to functions train = aesara.function(inputs=[x, y], outputs=[prediction, xent], updates={ w: w - 0.01 * gw, b: b - 0.01 * gb }, name="train") predict = aesara.function(inputs=[x], outputs=prediction, name="predict") if any([ x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm']
def test_batch_normalization_train(): for axes in ("per-activation", "spatial", (1, 2, 3, 4)): for vartype in (tensor5, tensor3, vector): x, scale, bias, running_mean, running_var = (vartype(n) for n in ( "x", "scale", "bias", "running_mean", "running_var")) ndim = x.ndim eps = 5e-3 # some non-standard value to test if it's used running_average_factor = 0.3 # remove non-existing axes if isinstance(axes, tuple): axes = tuple(i for i in axes if i < ndim) if len(axes) == 0: continue # forward pass ( out, x_mean, x_invstd, out_running_mean, out_running_var, ) = batchnorm.batch_normalization_train( x, scale, bias, axes, eps, running_average_factor, running_mean, running_var, ) # reference forward pass if axes == "per-activation": axes2 = (0, ) elif axes == "spatial": axes2 = (0, ) + tuple(range(2, ndim)) else: axes2 = axes x_mean2 = x.mean(axis=axes2, keepdims=True) x_var2 = x.var(axis=axes2, keepdims=True) x_invstd2 = aet.reciprocal(aet.sqrt(x_var2 + eps)) scale2 = aet.addbroadcast(scale, *axes2) bias2 = aet.addbroadcast(bias, *axes2) out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2 m = aet.cast( aet.prod(x.shape) / aet.prod(scale.shape), aesara.config.floatX) out_running_mean2 = (running_mean * (1 - running_average_factor) + x_mean2 * running_average_factor) out_running_var2 = (running_var * (1 - running_average_factor) + (m / (m - 1)) * x_var2 * running_average_factor) # backward pass dy = vartype("dy") grads = aet.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # reference backward pass grads2 = aet.grad(None, wrt=[x, scale, bias], known_grads={out2: dy}) # second-order backward pass dx = vartype("dinputs") dscale = vartype("dscale") dbias = vartype("dbias") grad_grads = aet.grad( None, wrt=[x, dy, scale], known_grads=OrderedDict({ grads[0]: dx, grads[1]: dscale, grads[2]: dbias }), consider_constant=[ x, dy, scale, bias, x_mean, x_invstd, running_mean, running_var, ], return_disconnected="zero", ) # reference second-order backward pass grad_grads2 = aet.grad( None, wrt=[x, dy, scale], known_grads=OrderedDict({ grads2[0]: dx, grads2[1]: dscale, grads2[2]: dbias }), consider_constant=[ x, dy, scale, bias, x_mean2, x_var2, running_mean, running_var, ], return_disconnected="zero", ) # compile f = aesara.function( [ x, scale, bias, running_mean, running_var, dy, dx, dscale, dbias ], [ out, x_mean, x_invstd, out_running_mean, out_running_var, out2, x_mean2, x_invstd2, out_running_mean2, out_running_var2, ] + grads + grads2 + grad_grads + grad_grads2, ) # check if the abstract Ops have been replaced assert not any([ isinstance( n.op, ( batchnorm.AbstractBatchNormTrain, batchnorm.AbstractBatchNormInference, batchnorm.AbstractBatchNormTrainGrad, ), ) for n in f.maker.fgraph.toposort() ]) # run for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)): data_shape = data_shape[:ndim] param_shape = tuple(1 if d in axes2 else s for d, s in enumerate(data_shape)) rng = np.random.default_rng(1234) X = 4 + 3 * rng.random(data_shape).astype(aesara.config.floatX) Dy = -1 + 2 * rng.random(data_shape).astype( aesara.config.floatX) Scale = rng.random(param_shape).astype(aesara.config.floatX) Bias = rng.random(param_shape).astype(aesara.config.floatX) Running_mean = rng.random(param_shape).astype( aesara.config.floatX) Running_var = rng.random(param_shape).astype( aesara.config.floatX) Dx = 4 + 3 * rng.random(data_shape).astype( aesara.config.floatX) Dscale = -1 + 2 * rng.random(param_shape).astype( aesara.config.floatX) Dbias = rng.random(param_shape).astype(aesara.config.floatX) outputs = f(X, Scale, Bias, Running_mean, Running_var, Dy, Dx, Dscale, Dbias) # compare outputs utt.assert_allclose(outputs[0], outputs[0 + 5]) # out utt.assert_allclose(outputs[1], outputs[1 + 5]) # mean utt.assert_allclose(outputs[2], outputs[2 + 5]) # invstd utt.assert_allclose(outputs[3], outputs[3 + 5]) # running_mean utt.assert_allclose(np.nan_to_num(outputs[4]), np.nan_to_num(outputs[4 + 5])) # running_var # compare gradients utt.assert_allclose(outputs[10], outputs[10 + 3], atol=1e-4) # dx utt.assert_allclose(outputs[11], outputs[11 + 3], rtol=2e-4, atol=1e-4) # dscale utt.assert_allclose(outputs[12], outputs[12 + 3]) # dbias # compare second-order gradients utt.assert_allclose(outputs[16], outputs[16 + 3], atol=1e-4) # ddx utt.assert_allclose(outputs[17], outputs[17 + 3]) # ddy utt.assert_allclose(outputs[18], outputs[18 + 3], rtol=3e-4, atol=1e-4) # ddscale
def test_elemwise1(self): self.check_rop_lop(self.x + aet.cast(self.x, "int32"), self.in_shape)
def f_rnn(u_t, x_tm1, W_in, W): return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64"))
def test_cast(self): x = zvector() with pytest.raises(TypeError): cast(x, "int32")
def diag(self, X): X, _ = self._slice(X, None) index = at.cast(X, "int32") return at.diag(self.B)[index.ravel()]
def _infer_ndim_bcast(ndim, shape, *args): """ Infer the number of dimensions from the shape or the other arguments. Returns ------- (int, variable, tuple) triple, where the variable is an integer vector, and the tuple contains Booleans The first element returned is the inferred number of dimensions. The second element is the shape inferred (combining symbolic and constant informations from shape and args). The third element is a broadcasting pattern corresponding to that shape. """ # Find the minimum value of ndim required by the *args if args: args_ndim = max(arg.ndim for arg in args) else: args_ndim = 0 if isinstance(shape, (tuple, list)): # there is a convention that -1 means the corresponding shape of a # potentially-broadcasted symbolic arg # # This case combines together symbolic and non-symbolic shape # information shape_ndim = len(shape) if ndim is None: ndim = shape_ndim else: if shape_ndim != ndim: raise ValueError( "ndim should be equal to len(shape), but\n", "ndim = %s, len(shape) = %s, shape = %s" % (ndim, shape_ndim, shape), ) bcast = [] pre_v_shape = [] for i, s in enumerate(shape): if hasattr(s, "type"): # s is symbolic bcast.append(False) # todo - introspect further pre_v_shape.append(s) else: if s >= 0: pre_v_shape.append(tensor.as_tensor_variable(s)) bcast.append(s == 1) elif s == -1: n_a_i = 0 for a in args: # ndim: _ _ _ _ _ _ # ashp: s0 s1 s2 s3 # i if i >= ndim - a.ndim: n_a_i += 1 a_i = i + a.ndim - ndim if not a.broadcastable[a_i]: pre_v_shape.append(a.shape[a_i]) bcast.append(False) break else: if n_a_i == 0: raise ValueError( "Auto-shape of -1 must overlap" "with the shape of one of the broadcastable" "inputs") else: pre_v_shape.append(tensor.as_tensor_variable(1)) bcast.append(True) else: ValueError("negative shape", s) # post-condition: shape may still contain both symbolic and # non-symbolic things if len(pre_v_shape) == 0: v_shape = tensor.constant([], dtype="int64") else: v_shape = tensor.stack(pre_v_shape) elif shape is None: # The number of drawn samples will be determined automatically, # but we need to know ndim if not args: raise TypeError("_infer_ndim_bcast cannot infer shape without" " either shape or args") template = reduce(lambda a, b: a + b, args) v_shape = template.shape bcast = template.broadcastable ndim = template.ndim else: v_shape = tensor.as_tensor_variable(shape) if v_shape.ndim != 1: raise TypeError( "shape must be a vector or list of scalar, got '%s'" % v_shape) if ndim is None: ndim = tensor.get_vector_length(v_shape) bcast = [False] * ndim if v_shape.ndim != 1: raise TypeError("shape must be a vector or list of scalar, got '%s'" % v_shape) if v_shape.dtype not in aesara.tensor.integer_dtypes: raise TypeError("shape must be an integer vector or list", v_shape.dtype) if args_ndim > ndim: raise ValueError( "ndim should be at least as big as required by args value", (ndim, args_ndim), args, ) assert ndim == len(bcast) return ndim, tensor.cast(v_shape, "int64"), tuple(bcast)