Example #1
0
 def test_str(self):
     op = Elemwise(aes.add, inplace_pattern=None, name=None)
     assert str(op) == "Elemwise{add}"
     op = Elemwise(aes.add, inplace_pattern={0: 0}, name=None)
     assert str(op) == "Elemwise{add}[(0, 0)]"
     op = Elemwise(aes.add, inplace_pattern=None, name="my_op")
     assert str(op) == "my_op"
Example #2
0
    def test_infer_shape(self):

        for s_left, s_right in [
            ((5, 6), (5, 6)),
            ((5, 6), (5, 1)),
            ((5, 6), (1, 6)),
            ((5, 1), (5, 6)),
            ((1, 6), (5, 6)),
            ((2, 3, 4, 5), (2, 3, 4, 5)),
            ((2, 3, 4, 5), (2, 3, 1, 5)),
            ((2, 3, 4, 5), (1, 3, 4, 5)),
            ((2, 1, 4, 5), (2, 3, 4, 5)),
            ((2, 3, 4, 1), (2, 3, 4, 5)),
        ]:
            dtype = aesara.config.floatX
            t_left = TensorType(dtype, [(entry == 1) for entry in s_left])()
            t_right = TensorType(dtype, [(entry == 1) for entry in s_right])()
            t_left_val = np.zeros(s_left, dtype=dtype)
            t_right_val = np.zeros(s_right, dtype=dtype)
            self._compile_and_check(
                [t_left, t_right],
                [Elemwise(aes.add)(t_left, t_right)],
                [t_left_val, t_right_val],
                Elemwise,
            )
Example #3
0
def test_not_implemented_elemwise_grad():
    # Regression test for unimplemented gradient in an Elemwise Op.

    class TestOp(aes.ScalarOp):
        def __init__(self):
            self.output_types_preference = aes.upgrade_to_float

        def impl(self, n, x):
            return x * n

        def grad(self, inputs, gout):
            (n, x) = inputs
            (gz, ) = gout
            dy_dx = n
            return [
                aesara.gradient.grad_not_implemented(self, 0, n), gz * dy_dx
            ]

    test_op = Elemwise(TestOp())
    x = scalar()
    assert isinstance(aesara.gradient.grad(test_op(2, x), x), Variable)

    # Verify that trying to use the not implemented gradient fails.
    with pytest.raises(aesara.gradient.NullTypeGradError):
        aesara.gradient.grad(test_op(x, 2), x)
Example #4
0
def test_numba_Composite(inputs, input_values):
    x_s = aes.float64("x")
    y_s = aes.float64("y")
    comp_op = Elemwise(
        Composite([x_s, y_s], [x_s + y_s * 2 + aes.exp(x_s - y_s)]))
    out_fg = FunctionGraph(inputs, [comp_op(*inputs)])
    compare_numba_and_py(out_fg, input_values)
Example #5
0
def test_jax_Composite(x, y, x_val, y_val):
    x_s = aes.float64("x")
    y_s = aes.float64("y")

    comp_op = Elemwise(Composite([x_s, y_s], [x_s + y_s * 2 + aes.exp(x_s - y_s)]))

    out = comp_op(x, y)

    out_fg = FunctionGraph([x, y], [out])

    test_input_vals = [
        x_val.astype(config.floatX),
        y_val.astype(config.floatX),
    ]
    _ = compare_jax_and_py(out_fg, test_input_vals)
Example #6
0
def batch_normalization(inputs, gamma, beta, mean, std, mode="low_mem"):
    """
    This function will build the symbolic graph for applying batch normalization
    to a set of activations.
    Also works on GPUs, but is not optimized using cuDNN.

    .. versionadded:: 0.7.1

    Parameters
    ----------
    inputs : symbolic tensor
        Mini-batch of activations
    gamma: symbolic tensor
        BN scale parameter, must be of same dimensionality as
        inputs and broadcastable against it
    beta: symbolic tensor
        BN shift parameter, must be of same dimensionality as
        inputs and broadcastable against it
    mean: symbolic tensor
        inputs means, must be of same dimensionality as
        inputs and broadcastable against it
    std: symbolic tensor
        inputs standard deviation, must be of same dimensionality as
        inputs and broadcastable against it
    mode: 'low_mem' or 'high_mem'
        Specify which batch_normalization implementation that will be
        used.
        As no intermediate representations are stored for the back-propagation,
        'low_mem' implementation lower the memory usage, however,
        it is 5-10% slower than 'high_mem' implementation. Note that 5-10% computation
        time difference compare the batch_normalization operation only, time difference
        between implementation is likely to be less important on the full model fprop/bprop.
    """
    if mode == "low_mem":
        elm_bn = Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
        rval = elm_bn(inputs, mean, std, gamma, beta)
    elif mode == "high_mem":
        rval = (inputs - mean) * (gamma / std) + beta
    else:
        raise ValueError('mode must be either "low_mem", "high_mem"')
    return rval
Example #7
0
        return [x_grad * self.grad_op(x)]


class I1e(UnaryScalarOp):
    """
    Modified Bessel function of the first kind of order 1, exponentially scaled.
    """

    nfunc_spec = ("scipy.special.i1e", 1, 1)

    def impl(self, x):
        return scipy.special.i1e(x)


i1e_scalar = I1e(upgrade_to_float_no_complex, name="i1e")
i1e = Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}")


class I0e(UnaryScalarOp):
    """
    Modified Bessel function of the first kind of order 0, exponentially scaled.
    """

    nfunc_spec = ("scipy.special.i0e", 1, 1)

    def impl(self, x):
        return scipy.special.i0e(x)

    def grad(self, inp, grads):
        (x, ) = inp
        (gz, ) = grads
Example #8
0
    def with_mode(
        self,
        mode,
        scalar_op=aes.add,
        dtype="floatX",
        pre_scalar_op=None,
        test_nan=False,
        tensor_op=None,
    ):
        for xsh, tosum in self.cases:
            if dtype == "floatX":
                dtype = aesara.config.floatX
            x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
            d = {}
            if pre_scalar_op is not None:
                d = {"pre_scalar_op": pre_scalar_op}
            if tensor_op is None:
                e = as_tensor_variable(self.op(scalar_op, axis=tosum, **d)(x))
            else:
                e = as_tensor_variable(tensor_op(x, axis=tosum, **d))

            if tosum is None:
                tosum = list(range(len(xsh)))

            f = aesara.function([x], e, mode=mode, on_unused_input="ignore")
            xv = np.asarray(np.random.random(xsh))

            if dtype not in discrete_dtypes:
                xv = np.asarray(xv, dtype=dtype)
            else:
                xv = np.asarray(xv < 0.5, dtype=dtype)

            if test_nan and xv.size > 0:
                if len(xsh) > 0:
                    xv = xv.flatten()
                    xv[0] = np.nan
                    xv = xv.reshape(*xsh)
                else:
                    xv = np.asarray(np.nan, dtype=dtype)
            zv = xv
            if pre_scalar_op is not None:
                zv = Elemwise(scalar_op=pre_scalar_op)(x).eval({x: xv})

            if len(tosum) > 1 and any(a < 0 for a in tosum):
                # In that case, we need to use the good order of axis
                # in the reduction.
                axis2 = []
                for a in tosum:
                    if a < 0:
                        axis2.append(a + len(xsh))
                    else:
                        axis2.append(a)
                assert len(axis2) == len(tosum)
                tosum = tuple(axis2)
            if tensor_op == at_all:
                for axis in reversed(sorted(tosum)):
                    zv = np.all(zv, axis)
                if len(tosum) == 0:
                    zv = zv != 0
            elif tensor_op == at_any:
                for axis in reversed(sorted(tosum)):
                    zv = np.any(zv, axis)
                if len(tosum) == 0:
                    zv = zv != 0
            elif scalar_op == aes.add:
                for axis in reversed(sorted(tosum)):
                    zv = np.add.reduce(zv, axis)
                if dtype == "bool":
                    # np.add of a bool upcast, while CAReduce don't
                    zv = zv.astype(dtype)
            elif scalar_op == aes.mul:
                for axis in reversed(sorted(tosum)):
                    zv = np.multiply.reduce(zv, axis)
            elif scalar_op == aes.scalar_maximum:
                # There is no identity value for the maximum function
                # So we can't support shape of dimensions 0.
                if np.prod(zv.shape) == 0:
                    continue
                for axis in reversed(sorted(tosum)):
                    zv = np.maximum.reduce(zv, axis)
            elif scalar_op == aes.scalar_minimum:
                # There is no identity value for the minimum function
                # So we can't support shape of dimensions 0.
                if np.prod(zv.shape) == 0:
                    continue
                for axis in reversed(sorted(tosum)):
                    zv = np.minimum.reduce(zv, axis)
            elif scalar_op == aes.or_:
                for axis in reversed(sorted(tosum)):
                    zv = np.bitwise_or.reduce(zv, axis)
            elif scalar_op == aes.and_:
                for axis in reversed(sorted(tosum)):
                    zv = reduce_bitwise_and(zv, axis, dtype=dtype)
            elif scalar_op == aes.xor:
                # There is no identity value for the xor function
                # So we can't support shape of dimensions 0.
                if np.prod(zv.shape) == 0:
                    continue
                for axis in reversed(sorted(tosum)):
                    zv = np.bitwise_xor.reduce(zv, axis)
            else:
                raise Exception(
                    f"Test for CAReduce with scalar_op {scalar_op} not implemented"
                )

            if test_nan:
                try:
                    assert self.type.values_eq(f(xv), zv), (f(xv), zv)
                except NotImplementedError:
                    # GpuCAReduce don't implement all cases when size is 0
                    assert xv.size == 0
            else:
                try:
                    f_xv = f(xv)
                    assert f_xv.shape == zv.shape, (f_xv, zv)
                    utt.assert_allclose(zv, f_xv)
                except NotImplementedError:
                    # GpuCAReduce don't implement all cases when size is 0
                    assert xv.size == 0

            x = self.type(dtype, [(entry == 1) for entry in xsh])("x")
            if tensor_op is None:
                e = self.op(scalar_op, axis=tosum)(x)
            else:
                e = tensor_op(x, axis=tosum)
            if tosum is None:
                tosum = list(range(len(xsh)))
            f = aesara.function([x], e.shape, mode=mode, on_unused_input="ignore")
            if not (
                scalar_op in [aes.scalar_maximum, aes.scalar_minimum]
                and (xsh == () or np.prod(xsh) == 0)
            ):
                try:
                    assert all(f(xv) == zv.shape)
                except NotImplementedError:
                    # GpuCAReduce don't implement all cases when size is 0
                    assert xv.size == 0
Example #9
0
    def __init__(self, tensor):
        self.tensor = tensor

    def __call__(self, input):
        """Replaces the single input of symbolic variable to be the passed argument.

        Parameters
        ----------
        input: TensorVariable
        """
        (oldinput,) = inputvars(self.tensor)
        return aesara.clone_replace(self.tensor, {oldinput: input}, strict=False)


scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity")
identity = Elemwise(scalar_identity, name="identity")


class GeneratorOp(Op):
    """
    Generator Op is designed for storing python generators inside aesara graph.

    __call__ creates TensorVariable
        It has 2 new methods
        - var.set_gen(gen): sets new generator
        - var.set_default(value): sets new default value (None erases default value)

    If generator is exhausted, variable will produce default value if it is not None,
    else raises `StopIteration` exception that can be caught on runtime.

    Parameters
Example #10
0
    def _set_row_mappings(self, Gamma, dir_priors, model):
        """Create maps from Dirichlet priors parameters to rows and slices in the transition matrix.

        These maps are needed when a transition matrix isn't simply comprised
        of Dirichlet prior rows, but--instead--slices of Dirichlet priors.

        Consider the following:

        .. code-block:: python

            with pm.Model():
                d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1])
                d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1])

                p_0_rv = tt.as_tensor([0, 0, 1])
                p_1_rv = tt.zeros(3)
                p_1_rv = tt.set_subtensor(p_0_rv[[0, 2]], d_0_rv)
                p_2_rv = tt.zeros(3)
                p_2_rv = tt.set_subtensor(p_1_rv[[1, 2]], d_1_rv)

                P_tt = tt.stack([p_0_rv, p_1_rv, p_2_rv])

        The transition matrix `P_tt` has Dirichlet priors in only two of its
        three rows, and--even then--they're only present in parts of two rows.

        In this example, we need to know that Dirichlet prior 0, i.e. `d_0_rv`,
        is mapped to row 1, and prior 1 is mapped to row 2.  Furthermore, we
        need to know that prior 0 fills columns 0 and 2 in row 1, and prior 1
        fills columns 1 and 2 in row 2.

        These mappings allow one to embed Dirichlet priors in larger transition
        matrices with--for instance--fixed transition behavior.

        """  # noqa: E501

        # Remove unimportant `Op`s from the transition matrix graph
        Gamma = pre_greedy_local_optimizer(
            FunctionGraph([], []),
            [
                OpRemove(Elemwise(aes.Cast(aes.float32))),
                OpRemove(Elemwise(aes.Cast(aes.float64))),
                OpRemove(Elemwise(aes.identity)),
            ],
            Gamma,
        )

        # Canonicalize the transition matrix graph
        fg = FunctionGraph(
            list(graph_inputs([Gamma] + self.dir_priors_untrans)),
            [Gamma] + self.dir_priors_untrans,
            clone=True,
        )
        canonicalize_opt = optdb.query(Query(include=["canonicalize"]))
        canonicalize_opt.optimize(fg)
        Gamma = fg.outputs[0]
        dir_priors_untrans = fg.outputs[1:]
        fg.disown()

        Gamma_DimShuffle = Gamma.owner

        if not (isinstance(Gamma_DimShuffle.op, DimShuffle)):
            raise TypeError("The transition matrix should be non-time-varying")

        Gamma_Join = Gamma_DimShuffle.inputs[0].owner

        if not (isinstance(Gamma_Join.op, at.basic.Join)):
            raise TypeError(
                "The transition matrix should be comprised of stacked row vectors"
            )

        Gamma_rows = Gamma_Join.inputs[1:]

        self.n_rows = len(Gamma_rows)

        # Loop through the rows in the transition matrix's graph and determine
        # how our transformed Dirichlet RVs map to this transition matrix.
        self.row_remaps = {}
        self.row_slices = {}
        for i, dim_row in enumerate(Gamma_rows):
            if not dim_row.owner:
                continue

            # By-pass the `DimShuffle`s applied to the `AdvancedIncSubtensor1`
            # `Op`s in which we're actually interested
            gamma_row = dim_row.owner.inputs[0]

            if gamma_row in dir_priors_untrans:
                # This is a row that's simply a `Dirichlet`
                j = dir_priors_untrans.index(gamma_row)
                self.row_remaps[j] = i
                self.row_slices[j] = slice(None)

            if gamma_row.owner.inputs[1] not in dir_priors_untrans:
                continue

            # Parts of a row set by a `*Subtensor*` `Op` using a full
            # `Dirichlet` e.g. `P_row[idx] = dir_rv`
            j = dir_priors_untrans.index(gamma_row.owner.inputs[1])
            untrans_dirich = dir_priors_untrans[j]

            if (gamma_row.owner
                    and isinstance(gamma_row.owner.op, AdvancedIncSubtensor1)
                    and gamma_row.owner.inputs[1] == untrans_dirich):
                self.row_remaps[j] = i

                rhand_val = gamma_row.owner.inputs[2]
                if not isinstance(rhand_val, TensorConstant):
                    # TODO: We could allow more types of `idx` (e.g. slices)
                    # Currently, `idx` can't be something like `2:5`
                    raise TypeError("Only array indexing allowed for mixed"
                                    " Dirichlet/non-Dirichlet rows")
                self.row_slices[j] = rhand_val.data
Example #11
0
                           (xx<3 ? (0.935409070603099 + 0.0458812946797165*(xx-1.7)):
                                   0.99505475368673));
    }

        //%(z)s = 0.5*(ultrafasttanh(0.5*x)+1.);
        %(z)s = 0.5*(%(z)s+1.);
        }""" % locals())

    @staticmethod
    def c_code_cache_version():
        return (5, )


ultra_fast_scalar_sigmoid = UltraFastScalarSigmoid(
    aes.upgrade_to_float, name="ultra_fast_scalar_sigmoid")
ultra_fast_sigmoid = Elemwise(ultra_fast_scalar_sigmoid,
                              name="ultra_fast_sigmoid")

ultra_fast_sigmoid_inplace = Elemwise(
    UltraFastScalarSigmoid(aes.transfer_type(0)),
    inplace_pattern={0: 0},
    name="ultra_fast_sigmoid_inplace",
)

pprint.assign(ultra_fast_sigmoid,
              printing.FunctionPrinter(["ultra_fast_sigmoid"]))


# @opt.register_uncanonicalize
@local_optimizer(None)
def local_ultra_fast_sigmoid(fgraph, node):
    """
Example #12
0
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import aesara.tensor as at
import numpy as np

from aesara import scalar
from aesara.scalar.basic_scipy import GammaLn, Psi
from aesara.tensor.elemwise import Elemwise

__all__ = ["gammaln", "multigammaln", "psi", "log_i0"]

scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln")
gammaln = Elemwise(scalar_gammaln, name="gammaln")


def multigammaln(a, p):
    """Multivariate Log Gamma

    Parameters
    ----------
    a: tensor like
    p: int
       degrees of freedom. p > 0
    """
    i = at.arange(1, p + 1)
    return p * (p - 1) * at.log(np.pi) / 4.0 + at.sum(
        gammaln(a + (1.0 - i) / 2.0), axis=0)
Example #13
0
        (gz, ) = grads
        return [gz * (1 + scalar.log(x))]

    def c_code(self, node, name, inputs, outputs, sub):
        (x, ) = inputs
        (z, ) = outputs
        if node.inputs[0].type in [scalar.float32, scalar.float64]:
            return ("""%(z)s =
                %(x)s == 0.0
                ? 0.0
                : %(x)s * log(%(x)s);""" % locals())
        raise NotImplementedError("only floatingpoint is implemented")


scalar_xlogx = XlogX(scalar.upgrade_to_float, name="scalar_xlogx")
xlogx = Elemwise(scalar_xlogx, name="xlogx")


class XlogY0(scalar.BinaryScalarOp):
    """
    Compute X * log(Y), with special case 0 log(0) = 0.

    """
    @staticmethod
    def st_impl(x, y):
        if x == 0.0:
            return 0.0
        return x * np.log(y)

    def impl(self, x, y):
        return XlogY0.st_impl(x, y)
Example #14
0
            sympy.gamma: aet.gamma,
            sympy.loggamma: aet.gammaln,
            sympy.Pow: aet.pow,
            sympy.Eq: aet.eq,
            sympy.StrictGreaterThan: aet.gt,
            sympy.StrictLessThan: aet.lt,
            sympy.LessThan: aet.le,
            sympy.GreaterThan: aet.ge,
            sympy.And: aet.and_,
            sympy.Or: aet.or_,
            sympy.Max: aet.maximum,  # Sympy accept >2 inputs, Aesara only 2
            sympy.Min: aet.minimum,  # Sympy accept >2 inputs, Aesara only 2
            sympy.conjugate: aet.conj,
            sympy.core.numbers.ImaginaryUnit: lambda:aet.complex(0,1),
            # Matrices
            sympy.MatAdd: Elemwise(aes.add),
            sympy.HadamardProduct: Elemwise(aes.mul),
            sympy.Trace: nlinalg.trace,
            sympy.Determinant : nlinalg.det,
            sympy.Inverse: nlinalg.matrix_inverse,
            sympy.Transpose: DimShuffle((False, False), [1, 0]),
    }


class AesaraPrinter(Printer):
    """ Code printer which creates Aesara symbolic expression graphs.

    Parameters
    ==========

    cache : dict