def test_str(self): op = Elemwise(aes.add, inplace_pattern=None, name=None) assert str(op) == "Elemwise{add}" op = Elemwise(aes.add, inplace_pattern={0: 0}, name=None) assert str(op) == "Elemwise{add}[(0, 0)]" op = Elemwise(aes.add, inplace_pattern=None, name="my_op") assert str(op) == "my_op"
def test_infer_shape(self): for s_left, s_right in [ ((5, 6), (5, 6)), ((5, 6), (5, 1)), ((5, 6), (1, 6)), ((5, 1), (5, 6)), ((1, 6), (5, 6)), ((2, 3, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 5), (2, 3, 1, 5)), ((2, 3, 4, 5), (1, 3, 4, 5)), ((2, 1, 4, 5), (2, 3, 4, 5)), ((2, 3, 4, 1), (2, 3, 4, 5)), ]: dtype = aesara.config.floatX t_left = TensorType(dtype, [(entry == 1) for entry in s_left])() t_right = TensorType(dtype, [(entry == 1) for entry in s_right])() t_left_val = np.zeros(s_left, dtype=dtype) t_right_val = np.zeros(s_right, dtype=dtype) self._compile_and_check( [t_left, t_right], [Elemwise(aes.add)(t_left, t_right)], [t_left_val, t_right_val], Elemwise, )
def test_not_implemented_elemwise_grad(): # Regression test for unimplemented gradient in an Elemwise Op. class TestOp(aes.ScalarOp): def __init__(self): self.output_types_preference = aes.upgrade_to_float def impl(self, n, x): return x * n def grad(self, inputs, gout): (n, x) = inputs (gz, ) = gout dy_dx = n return [ aesara.gradient.grad_not_implemented(self, 0, n), gz * dy_dx ] test_op = Elemwise(TestOp()) x = scalar() assert isinstance(aesara.gradient.grad(test_op(2, x), x), Variable) # Verify that trying to use the not implemented gradient fails. with pytest.raises(aesara.gradient.NullTypeGradError): aesara.gradient.grad(test_op(x, 2), x)
def test_numba_Composite(inputs, input_values): x_s = aes.float64("x") y_s = aes.float64("y") comp_op = Elemwise( Composite([x_s, y_s], [x_s + y_s * 2 + aes.exp(x_s - y_s)])) out_fg = FunctionGraph(inputs, [comp_op(*inputs)]) compare_numba_and_py(out_fg, input_values)
def test_jax_Composite(x, y, x_val, y_val): x_s = aes.float64("x") y_s = aes.float64("y") comp_op = Elemwise(Composite([x_s, y_s], [x_s + y_s * 2 + aes.exp(x_s - y_s)])) out = comp_op(x, y) out_fg = FunctionGraph([x, y], [out]) test_input_vals = [ x_val.astype(config.floatX), y_val.astype(config.floatX), ] _ = compare_jax_and_py(out_fg, test_input_vals)
def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"): """ This function will build the symbolic graph for applying batch normalization to a set of activations. Also works on GPUs, but is not optimized using cuDNN. .. versionadded:: 0.7.1 Parameters ---------- inputs : symbolic tensor Mini-batch of activations gamma: symbolic tensor BN scale parameter, must be of same dimensionality as inputs and broadcastable against it beta: symbolic tensor BN shift parameter, must be of same dimensionality as inputs and broadcastable against it mean: symbolic tensor inputs means, must be of same dimensionality as inputs and broadcastable against it std: symbolic tensor inputs standard deviation, must be of same dimensionality as inputs and broadcastable against it mode: 'low_mem' or 'high_mem' Specify which batch_normalization implementation that will be used. As no intermediate representations are stored for the back-propagation, 'low_mem' implementation lower the memory usage, however, it is 5-10% slower than 'high_mem' implementation. Note that 5-10% computation time difference compare the batch_normalization operation only, time difference between implementation is likely to be less important on the full model fprop/bprop. """ if mode == "low_mem": elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype)) rval = elm_bn(inputs, mean, std, gamma, beta) elif mode == "high_mem": rval = (inputs - mean) * (gamma / std) + beta else: raise ValueError('mode must be either "low_mem", "high_mem"') return rval
return [x_grad * self.grad_op(x)] class I1e(UnaryScalarOp): """ Modified Bessel function of the first kind of order 1, exponentially scaled. """ nfunc_spec = ("scipy.special.i1e", 1, 1) def impl(self, x): return scipy.special.i1e(x) i1e_scalar = I1e(upgrade_to_float_no_complex, name="i1e") i1e = Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}") class I0e(UnaryScalarOp): """ Modified Bessel function of the first kind of order 0, exponentially scaled. """ nfunc_spec = ("scipy.special.i0e", 1, 1) def impl(self, x): return scipy.special.i0e(x) def grad(self, inp, grads): (x, ) = inp (gz, ) = grads
def with_mode( self, mode, scalar_op=aes.add, dtype="floatX", pre_scalar_op=None, test_nan=False, tensor_op=None, ): for xsh, tosum in self.cases: if dtype == "floatX": dtype = aesara.config.floatX x = self.type(dtype, [(entry == 1) for entry in xsh])("x") d = {} if pre_scalar_op is not None: d = {"pre_scalar_op": pre_scalar_op} if tensor_op is None: e = as_tensor_variable(self.op(scalar_op, axis=tosum, **d)(x)) else: e = as_tensor_variable(tensor_op(x, axis=tosum, **d)) if tosum is None: tosum = list(range(len(xsh))) f = aesara.function([x], e, mode=mode, on_unused_input="ignore") xv = np.asarray(np.random.random(xsh)) if dtype not in discrete_dtypes: xv = np.asarray(xv, dtype=dtype) else: xv = np.asarray(xv < 0.5, dtype=dtype) if test_nan and xv.size > 0: if len(xsh) > 0: xv = xv.flatten() xv[0] = np.nan xv = xv.reshape(*xsh) else: xv = np.asarray(np.nan, dtype=dtype) zv = xv if pre_scalar_op is not None: zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv}) if len(tosum) > 1 and any(a < 0 for a in tosum): # In that case, we need to use the good order of axis # in the reduction. axis2 = [] for a in tosum: if a < 0: axis2.append(a + len(xsh)) else: axis2.append(a) assert len(axis2) == len(tosum) tosum = tuple(axis2) if tensor_op == at_all: for axis in reversed(sorted(tosum)): zv = np.all(zv, axis) if len(tosum) == 0: zv = zv != 0 elif tensor_op == at_any: for axis in reversed(sorted(tosum)): zv = np.any(zv, axis) if len(tosum) == 0: zv = zv != 0 elif scalar_op == aes.add: for axis in reversed(sorted(tosum)): zv = np.add.reduce(zv, axis) if dtype == "bool": # np.add of a bool upcast, while CAReduce don't zv = zv.astype(dtype) elif scalar_op == aes.mul: for axis in reversed(sorted(tosum)): zv = np.multiply.reduce(zv, axis) elif scalar_op == aes.scalar_maximum: # There is no identity value for the maximum function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.maximum.reduce(zv, axis) elif scalar_op == aes.scalar_minimum: # There is no identity value for the minimum function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.minimum.reduce(zv, axis) elif scalar_op == aes.or_: for axis in reversed(sorted(tosum)): zv = np.bitwise_or.reduce(zv, axis) elif scalar_op == aes.and_: for axis in reversed(sorted(tosum)): zv = reduce_bitwise_and(zv, axis, dtype=dtype) elif scalar_op == aes.xor: # There is no identity value for the xor function # So we can't support shape of dimensions 0. if np.prod(zv.shape) == 0: continue for axis in reversed(sorted(tosum)): zv = np.bitwise_xor.reduce(zv, axis) else: raise Exception( f"Test for CAReduce with scalar_op {scalar_op} not implemented" ) if test_nan: try: assert self.type.values_eq(f(xv), zv), (f(xv), zv) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0 else: try: f_xv = f(xv) assert f_xv.shape == zv.shape, (f_xv, zv) utt.assert_allclose(zv, f_xv) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0 x = self.type(dtype, [(entry == 1) for entry in xsh])("x") if tensor_op is None: e = self.op(scalar_op, axis=tosum)(x) else: e = tensor_op(x, axis=tosum) if tosum is None: tosum = list(range(len(xsh))) f = aesara.function([x], e.shape, mode=mode, on_unused_input="ignore") if not ( scalar_op in [aes.scalar_maximum, aes.scalar_minimum] and (xsh == () or np.prod(xsh) == 0) ): try: assert all(f(xv) == zv.shape) except NotImplementedError: # GpuCAReduce don't implement all cases when size is 0 assert xv.size == 0
def __init__(self, tensor): self.tensor = tensor def __call__(self, input): """Replaces the single input of symbolic variable to be the passed argument. Parameters ---------- input: TensorVariable """ (oldinput,) = inputvars(self.tensor) return aesara.clone_replace(self.tensor, {oldinput: input}, strict=False) scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity") identity = Elemwise(scalar_identity, name="identity") class GeneratorOp(Op): """ Generator Op is designed for storing python generators inside aesara graph. __call__ creates TensorVariable It has 2 new methods - var.set_gen(gen): sets new generator - var.set_default(value): sets new default value (None erases default value) If generator is exhausted, variable will produce default value if it is not None, else raises `StopIteration` exception that can be caught on runtime. Parameters
def _set_row_mappings(self, Gamma, dir_priors, model): """Create maps from Dirichlet priors parameters to rows and slices in the transition matrix. These maps are needed when a transition matrix isn't simply comprised of Dirichlet prior rows, but--instead--slices of Dirichlet priors. Consider the following: .. code-block:: python with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1]) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1]) p_0_rv = tt.as_tensor([0, 0, 1]) p_1_rv = tt.zeros(3) p_1_rv = tt.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = tt.zeros(3) p_2_rv = tt.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = tt.stack([p_0_rv, p_1_rv, p_2_rv]) The transition matrix `P_tt` has Dirichlet priors in only two of its three rows, and--even then--they're only present in parts of two rows. In this example, we need to know that Dirichlet prior 0, i.e. `d_0_rv`, is mapped to row 1, and prior 1 is mapped to row 2. Furthermore, we need to know that prior 0 fills columns 0 and 2 in row 1, and prior 1 fills columns 1 and 2 in row 2. These mappings allow one to embed Dirichlet priors in larger transition matrices with--for instance--fixed transition behavior. """ # noqa: E501 # Remove unimportant `Op`s from the transition matrix graph Gamma = pre_greedy_local_optimizer( FunctionGraph([], []), [ OpRemove(Elemwise(aes.Cast(aes.float32))), OpRemove(Elemwise(aes.Cast(aes.float64))), OpRemove(Elemwise(aes.identity)), ], Gamma, ) # Canonicalize the transition matrix graph fg = FunctionGraph( list(graph_inputs([Gamma] + self.dir_priors_untrans)), [Gamma] + self.dir_priors_untrans, clone=True, ) canonicalize_opt = optdb.query(Query(include=["canonicalize"])) canonicalize_opt.optimize(fg) Gamma = fg.outputs[0] dir_priors_untrans = fg.outputs[1:] fg.disown() Gamma_DimShuffle = Gamma.owner if not (isinstance(Gamma_DimShuffle.op, DimShuffle)): raise TypeError("The transition matrix should be non-time-varying") Gamma_Join = Gamma_DimShuffle.inputs[0].owner if not (isinstance(Gamma_Join.op, at.basic.Join)): raise TypeError( "The transition matrix should be comprised of stacked row vectors" ) Gamma_rows = Gamma_Join.inputs[1:] self.n_rows = len(Gamma_rows) # Loop through the rows in the transition matrix's graph and determine # how our transformed Dirichlet RVs map to this transition matrix. self.row_remaps = {} self.row_slices = {} for i, dim_row in enumerate(Gamma_rows): if not dim_row.owner: continue # By-pass the `DimShuffle`s applied to the `AdvancedIncSubtensor1` # `Op`s in which we're actually interested gamma_row = dim_row.owner.inputs[0] if gamma_row in dir_priors_untrans: # This is a row that's simply a `Dirichlet` j = dir_priors_untrans.index(gamma_row) self.row_remaps[j] = i self.row_slices[j] = slice(None) if gamma_row.owner.inputs[1] not in dir_priors_untrans: continue # Parts of a row set by a `*Subtensor*` `Op` using a full # `Dirichlet` e.g. `P_row[idx] = dir_rv` j = dir_priors_untrans.index(gamma_row.owner.inputs[1]) untrans_dirich = dir_priors_untrans[j] if (gamma_row.owner and isinstance(gamma_row.owner.op, AdvancedIncSubtensor1) and gamma_row.owner.inputs[1] == untrans_dirich): self.row_remaps[j] = i rhand_val = gamma_row.owner.inputs[2] if not isinstance(rhand_val, TensorConstant): # TODO: We could allow more types of `idx` (e.g. slices) # Currently, `idx` can't be something like `2:5` raise TypeError("Only array indexing allowed for mixed" " Dirichlet/non-Dirichlet rows") self.row_slices[j] = rhand_val.data
(xx<3 ? (0.935409070603099 + 0.0458812946797165*(xx-1.7)): 0.99505475368673)); } //%(z)s = 0.5*(ultrafasttanh(0.5*x)+1.); %(z)s = 0.5*(%(z)s+1.); }""" % locals()) @staticmethod def c_code_cache_version(): return (5, ) ultra_fast_scalar_sigmoid = UltraFastScalarSigmoid( aes.upgrade_to_float, name="ultra_fast_scalar_sigmoid") ultra_fast_sigmoid = Elemwise(ultra_fast_scalar_sigmoid, name="ultra_fast_sigmoid") ultra_fast_sigmoid_inplace = Elemwise( UltraFastScalarSigmoid(aes.transfer_type(0)), inplace_pattern={0: 0}, name="ultra_fast_sigmoid_inplace", ) pprint.assign(ultra_fast_sigmoid, printing.FunctionPrinter(["ultra_fast_sigmoid"])) # @opt.register_uncanonicalize @local_optimizer(None) def local_ultra_fast_sigmoid(fgraph, node): """
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import aesara.tensor as at import numpy as np from aesara import scalar from aesara.scalar.basic_scipy import GammaLn, Psi from aesara.tensor.elemwise import Elemwise __all__ = ["gammaln", "multigammaln", "psi", "log_i0"] scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln") gammaln = Elemwise(scalar_gammaln, name="gammaln") def multigammaln(a, p): """Multivariate Log Gamma Parameters ---------- a: tensor like p: int degrees of freedom. p > 0 """ i = at.arange(1, p + 1) return p * (p - 1) * at.log(np.pi) / 4.0 + at.sum( gammaln(a + (1.0 - i) / 2.0), axis=0)
(gz, ) = grads return [gz * (1 + scalar.log(x))] def c_code(self, node, name, inputs, outputs, sub): (x, ) = inputs (z, ) = outputs if node.inputs[0].type in [scalar.float32, scalar.float64]: return ("""%(z)s = %(x)s == 0.0 ? 0.0 : %(x)s * log(%(x)s);""" % locals()) raise NotImplementedError("only floatingpoint is implemented") scalar_xlogx = XlogX(scalar.upgrade_to_float, name="scalar_xlogx") xlogx = Elemwise(scalar_xlogx, name="xlogx") class XlogY0(scalar.BinaryScalarOp): """ Compute X * log(Y), with special case 0 log(0) = 0. """ @staticmethod def st_impl(x, y): if x == 0.0: return 0.0 return x * np.log(y) def impl(self, x, y): return XlogY0.st_impl(x, y)
sympy.gamma: aet.gamma, sympy.loggamma: aet.gammaln, sympy.Pow: aet.pow, sympy.Eq: aet.eq, sympy.StrictGreaterThan: aet.gt, sympy.StrictLessThan: aet.lt, sympy.LessThan: aet.le, sympy.GreaterThan: aet.ge, sympy.And: aet.and_, sympy.Or: aet.or_, sympy.Max: aet.maximum, # Sympy accept >2 inputs, Aesara only 2 sympy.Min: aet.minimum, # Sympy accept >2 inputs, Aesara only 2 sympy.conjugate: aet.conj, sympy.core.numbers.ImaginaryUnit: lambda:aet.complex(0,1), # Matrices sympy.MatAdd: Elemwise(aes.add), sympy.HadamardProduct: Elemwise(aes.mul), sympy.Trace: nlinalg.trace, sympy.Determinant : nlinalg.det, sympy.Inverse: nlinalg.matrix_inverse, sympy.Transpose: DimShuffle((False, False), [1, 0]), } class AesaraPrinter(Printer): """ Code printer which creates Aesara symbolic expression graphs. Parameters ========== cache : dict