Example #1
0
def test_merge_with_weird_eq():
    # numpy arrays don't compare equal like other python objects

    # SCALAR CASE
    x = tt.constant(np.asarray(1), name="x")
    y = tt.constant(np.asarray(1), name="y")
    g = FunctionGraph([x, y], [x + y])
    MergeOptimizer().optimize(g)

    assert len(g.apply_nodes) == 1
    node = list(g.apply_nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]

    # NONSCALAR CASE
    # This was created to test TensorConstantSignature
    x = tt.constant(np.ones(5), name="x")
    y = tt.constant(np.ones(5), name="y")
    g = FunctionGraph([x, y], [x + y])
    MergeOptimizer().optimize(g)

    assert len(g.apply_nodes) == 1
    node = list(g.apply_nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]
Example #2
0
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        XXX: Unnamed/non-keyword arguments are considered distribution
        parameters!  If you want to set `size`, `rng`, and/or `name`, use their
        keywords.

        Parameters
        ----------
        rng: RandomStateType
            Existing Aesara `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            Numpy-like size of the output (i.e. replications).
        dtype: str
            The dtype of the sampled output.  If the value ``"floatX"`` is
            given, then ``dtype`` is set to ``aesara.config.floatX``.  This
            value is only used when `self.dtype` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: `Apply`
            A node with inputs `(rng, size, dtype) + dist_args` and outputs
            `(rng_var, out_var)`.

        """
        size = normalize_size_param(size)

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params
        )

        if rng is None:
            rng = aesara.shared(np.random.RandomState())
        elif not isinstance(rng.type, RandomStateType):
            raise TypeError("The type of rng should be an instance of RandomStateType")

        bcast = self.compute_bcast(dist_params, size)
        dtype = self.dtype or dtype

        if dtype == "floatX":
            dtype = config.floatX
        elif dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes):
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, broadcastable=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Example #3
0
File: op.py Project: mgorny/aesara
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        Parameters
        ----------
        rng: RandomGeneratorType or RandomStateType
            Existing Aesara `Generator` or `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            NumPy-like size parameter.
        dtype: str
            The dtype of the sampled output.  If the value ``"floatX"`` is
            given, then `dtype` is set to ``aesara.config.floatX``.  This value is
            only used when ``self.dtype`` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: Apply
            A node with inputs ``(rng, size, dtype) + dist_args`` and outputs
            ``(rng_var, out_var)``.

        """
        size = normalize_size_param(size)

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params)

        if rng is None:
            rng = aesara.shared(np.random.default_rng())
        elif not isinstance(rng.type, RandomType):
            raise TypeError(
                "The type of rng should be an instance of either RandomGeneratorType or RandomStateType"
            )

        shape = self._infer_shape(size, dist_params)
        _, bcast = infer_broadcastable(shape)
        dtype = self.dtype or dtype

        if dtype == "floatX":
            dtype = config.floatX
        elif dtype is None or (isinstance(dtype, str)
                               and dtype not in all_dtypes):
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, shape=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Example #4
0
    def grad(self, inputs, grads):
        x, scale, bias, est_mean, est_var, epsilon = inputs
        dy = grads[0]
        axes = self.axes
        if min(axes) < 0 or max(axes) >= x.ndim:
            raise ValueError(
                f"axes should be less than ndim (<{x.ndim}), but {axes} given"
            )

        scale, bias, est_mean, est_var = (
            aet.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var)
        )

        # define helper expressions
        est_var_eps = est_var + epsilon
        est_std = sqrt(est_var_eps)
        two = aet.constant(2.0)

        # define and return gradients
        dx = dy * (scale / est_std)
        dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std
        dbias = dy.sum(axes, keepdims=True)
        dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
        dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (
            scale / (two * est_var_eps * est_std)
        )
        return [dx, dscale, dbias, dmean, dvar, aesara.gradient.DisconnectedType()()]
Example #5
0
    def test_reshape(self):
        new_shape = constant(
            np.asarray([self.mat_in_shape[0] * self.mat_in_shape[1]],
                       dtype="int64"))

        self.check_mat_rop_lop(self.mx.reshape(new_shape),
                               (self.mat_in_shape[0] * self.mat_in_shape[1], ))
Example #6
0
def test_dense_types():

    x = matrix()
    assert isinstance(x, DenseTensorVariable)
    assert not isinstance(x, DenseTensorConstant)

    x = constant(1)
    assert not isinstance(x, DenseTensorVariable)
    assert isinstance(x, DenseTensorConstant)
Example #7
0
def hard_sigmoid(x):
    """
    An approximation of sigmoid.

    More approximate and faster than ultra_fast_sigmoid.

    Approx in 3 parts: 0, scaled linear, 1.

    Removing the slope and shift does not make it faster.

    """
    # Use the same dtype as determined by "upgrade_to_float",
    # and perform computation in that dtype.
    out_dtype = aes.upgrade_to_float(aes.Scalar(dtype=x.dtype))[0].dtype
    slope = constant(0.2, dtype=out_dtype)
    shift = constant(0.5, dtype=out_dtype)
    x = (x * slope) + shift
    x = clip(x, 0, 1)
    return x
Example #8
0
        def slice_ind_dims(p, ps, n):
            shape = tuple(ps)

            if n == 0:
                return (p, shape)

            ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n
            ind_shape = [
                s if b is False else constant(1, "int64")
                for s, b in zip(shape[:-n], p.broadcastable[:-n])
            ]
            return (
                p[ind_slice],
                ind_shape,
            )
Example #9
0
def normalize_size_param(size):
    """Create an Aesara value for a ``RandomVariable`` ``size`` parameter."""
    if size is None:
        size = constant([], dtype="int64")
    elif isinstance(size, int):
        size = as_tensor_variable([size], ndim=1)
    elif not isinstance(size, (np.ndarray, Variable, Sequence)):
        raise TypeError(
            "Parameter size must be None, an integer, or a sequence with integers."
        )
    else:
        size = cast(as_tensor_variable(size, ndim=1), "int64")

    assert size.dtype in int_dtypes

    return size
Example #10
0
 def make_node(self, x, shape):
     if not isinstance(x, Variable):
         x = aet.as_tensor_variable(x)
     if shape == () or shape == []:
         tshape = aet.constant([], dtype="int64")
     else:
         tshape = aet.as_tensor_variable(shape, ndim=1)
         if tshape.dtype not in aesara.tensor.type.integer_dtypes:
             raise AssertionError(
                 f"The `shape` must be an integer type. Got {tshape.dtype} instead."
             )
     if isinstance(tshape, TensorConstant) and tshape.data.size != x.ndim:
         ndim = len(tshape.data)
         raise AssertionError(
             f"Input `x` is {x.ndim}-dimensional and will never match a {ndim}-dimensional shape."
         )
     return Apply(self, [x, tshape], [x.type()])
Example #11
0
    def test_grad(self):
        a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)

        x = matrix("x")

        expressions_gradients = [
            (x * zero_grad(x), x),
            (x * zero_grad(exp(x)), exp(x)),
            (zero_grad(x), aet.constant(0.0)),
            (x**2 * zero_grad(x), 2 * x**2),
        ]

        for expr, expr_grad in expressions_gradients:
            g = grad(expr.sum(), x)
            # gradient according to aesara
            f = aesara.function([x], g, on_unused_input="ignore")
            # desired gradient
            f2 = aesara.function([x], expr_grad, on_unused_input="ignore")

            assert np.allclose(f(a), f2(a))
Example #12
0
    def test_multiple_out_crash(self):
        # This test failed up to commit 2faeb62c38
        p0 = self.shared(np.asarray(np.random.random([4, 8]),
                                    dtype=self.dtype))
        p1 = self.shared(np.asarray(np.random.random(8), dtype=self.dtype))
        p2 = self.shared(np.asarray(np.random.random([8, 3]),
                                    dtype=self.dtype))
        p3 = self.shared(np.asarray(np.random.random(3), dtype=self.dtype))
        p = [p0, p1, p2, p3]

        # in my code these vars are the result of applying scan
        ften0 = tensor3("ft0", dtype=self.dtype)
        fmat1 = matrix("fm1", dtype=self.dtype)
        ften2 = tensor3("ft2", dtype=self.dtype)
        fmat3 = matrix("fm3", dtype=self.dtype)

        # then I keep only the last iteration
        fsub0 = ften0[-1]
        fsub1 = fmat1[-1]
        fsub2 = ften2[-1]
        fsub3 = fmat3[-1]

        fsub = [fsub0, fsub1, fsub2, fsub3]

        acc = at.constant(1, "int8") >= 0

        new_positions = ifelse(acc, fsub, p)

        new_updates = [(p[0], new_positions[0])]

        f = function([ften0, fmat1, ften2, fmat3], [],
                     updates=new_updates,
                     mode=self.mode)
        self.assertFunctionContains1(f, self.get_ifelse(4))

        i1 = np.asarray(np.random.random([19, 4, 8]), dtype=self.dtype)
        i2 = np.asarray(np.random.random([19, 8]), dtype=self.dtype)
        i3 = np.asarray(np.random.random([19, 8, 3]), dtype=self.dtype)
        i4 = np.asarray(np.random.random([19, 3]), dtype=self.dtype)

        f(i1, i2, i3, i4)
Example #13
0
def normalize_size_param(size):
    """Create an Aesara value for a ``RandomVariable`` ``size`` parameter."""
    if size is None:
        size = constant([], dtype="int64")
    elif isinstance(size, int):
        size = as_tensor_variable([size], ndim=1)
    elif not isinstance(size, (np.ndarray, Variable, Sequence)):
        raise TypeError(
            "Parameter size must be None, an integer, or a sequence with integers."
        )
    else:
        size = cast(as_tensor_variable(size, ndim=1), "int64")

        if not isinstance(size, Constant):
            # This should help ensure that the length of non-constant `size`s
            # will be available after certain types of cloning (e.g. the kind
            # `Scan` performs)
            size = specify_shape(size, (get_vector_length(size), ))

    assert size.dtype in int_dtypes

    return size
Example #14
0
def local_dimshuffle_rv_lift(fgraph, node):
    """Lift `DimShuffle`s through `RandomVariable` `Op`s.

    For example, ``normal(mu, std).T == normal(mu.T, std.T)``.

    The basic idea behind this optimization is that we need to separate the
    `DimShuffle`ing into independent `DimShuffle`s that each occur in two
    distinct sub-spaces: the parameters and ``size`` (i.e. replications)
    sub-spaces.

    If a `DimShuffle` exchanges dimensions across those two sub-spaces, then we
    don't do anything.

    Otherwise, if the `DimShuffle` only exchanges dimensions within each of
    those sub-spaces, we can break it apart and apply the parameter-space
    `DimShuffle` to the `RandomVariable`'s distribution parameters, and the
    apply the replications-space `DimShuffle` to the `RandomVariable`'s``size``
    tuple.  The latter is a particularly simple rearranging of a tuple, but the
    former requires a little more work.
    """

    ds_op = node.op

    if not isinstance(ds_op, DimShuffle):
        return False

    base_rv = node.inputs[0]
    rv_node = base_rv.owner

    if not (
        rv_node and isinstance(rv_node.op, RandomVariable) and rv_node.op.ndim_supp == 0
    ):
        return False

    # If no one else is using the underlying `RandomVariable`, then we can
    # do this; otherwise, the graph would be internally inconsistent.
    if not all(
        (n == node or isinstance(n.op, Shape)) for n, i in fgraph.clients[base_rv]
    ):
        return False

    rv_op = rv_node.op
    rng, size, dtype, *dist_params = rv_node.inputs

    # We need to know the dimensions that were *not* added by the `size`
    # parameter (i.e. the dimensions corresponding to independent variates with
    # different parameter values)
    num_ind_dims = None
    if len(dist_params) == 1:
        num_ind_dims = dist_params[0].ndim
    else:
        # When there is more than one distribution parameter, assume that all
        # of them will broadcast to the maximum number of dimensions
        num_ind_dims = max(d.ndim for d in dist_params)

    # If the indices in `ds_new_order` are entirely within the replication
    # indices group or the independent variates indices group, then we can apply
    # this optimization.

    ds_new_order = ds_op.new_order
    # Create a map from old index order to new/`DimShuffled` index order
    dim_orders = [(n, d) for n, d in enumerate(ds_new_order) if isinstance(d, int)]

    # Find the index at which the replications/independents split occurs
    reps_ind_split_idx = len(dim_orders) - (num_ind_dims + rv_op.ndim_supp)

    ds_reps_new_dims = dim_orders[:reps_ind_split_idx]
    ds_ind_new_dims = dim_orders[reps_ind_split_idx:]
    ds_only_in_ind = ds_ind_new_dims and all(
        d >= reps_ind_split_idx for n, d in ds_ind_new_dims
    )

    if ds_only_in_ind:

        # Update the `size` array to reflect the `DimShuffle`d dimensions,
        # since the trailing dimensions in `size` represent the independent
        # variates dimensions (for univariate distributions, at least)
        new_size = (
            [constant(1, dtype="int64") if o == "x" else size[o] for o in ds_new_order]
            if get_vector_length(size) > 0
            else size
        )

        # Compute the new axes parameter(s) for the `DimShuffle` that will be
        # applied to the `RandomVariable` parameters (they need to be offset)
        rv_params_new_order = [
            d - reps_ind_split_idx if isinstance(d, int) else d
            for d in ds_new_order[ds_ind_new_dims[0][0] :]
        ]

        # Lift the `DimShuffle`s into the parameters
        # NOTE: The parameters might not be broadcasted against each other, so
        # we can only apply the parts of the `DimShuffle` that are relevant.
        new_dist_params = []
        for d in dist_params:
            if d.ndim < len(ds_ind_new_dims):
                _rv_params_new_order = [
                    o
                    for o in rv_params_new_order
                    if (isinstance(o, int) and o < d.ndim) or o == "x"
                ]
            else:
                _rv_params_new_order = rv_params_new_order

            new_dist_params.append(
                type(ds_op)(d.type.broadcastable, _rv_params_new_order)(d)
            )
        new_node = rv_op.make_node(rng, new_size, dtype, *new_dist_params)

        if config.compute_test_value != "off":
            compute_test_value(new_node)

        return [new_node.outputs[1]]

    ds_only_in_reps = ds_reps_new_dims and all(
        d < reps_ind_split_idx for n, d in ds_reps_new_dims
    )

    if ds_only_in_reps:
        # Update the `size` array to reflect the `DimShuffle`d dimensions.
        # There should be no need to `DimShuffle` now.
        new_size = [
            constant(1, dtype="int64") if o == "x" else size[o] for o in ds_new_order
        ]

        new_node = rv_op.make_node(rng, new_size, dtype, *dist_params)

        if config.compute_test_value != "off":
            compute_test_value(new_node)

        return [new_node.outputs[1]]

    return False
Example #15
0
    def _infer_shape(
        self,
        size: Tuple[TensorVariable],
        dist_params: List[TensorVariable],
        param_shapes: Optional[List[Tuple[TensorVariable]]] = None,
    ) -> Tuple[ScalarVariable]:
        """Compute the output shape given the size and distribution parameters.

        Parameters
        ----------
        size
            The size parameter specified for this `RandomVariable`.
        dist_params
            The symbolic parameter for this `RandomVariable`'s distribution.
        param_shapes
            The shapes of the `dist_params` as given by `ShapeFeature`'s
            via `Op.infer_shape`'s `input_shapes` argument.  This parameter's
            values are essentially more accurate versions of ``[d.shape for d
            in dist_params]``.

        """

        size_len = get_vector_length(size)

        if self.ndim_supp == 0 and size_len > 0:
            # In this case, we have a univariate distribution with a non-empty
            # `size` parameter, which means that the `size` parameter
            # completely determines the shape of the random variable.  More
            # importantly, the `size` parameter may be the only correct source
            # of information for the output shape, in that we would be misled
            # by the `dist_params` if we tried to infer the relevant parts of
            # the output shape from those.
            return size

        # Broadcast the parameters
        param_shapes = params_broadcast_shapes(
            param_shapes or [shape_tuple(p) for p in dist_params],
            self.ndims_params)

        def slice_ind_dims(p, ps, n):
            shape = tuple(ps)

            if n == 0:
                return (p, shape)

            ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n
            ind_shape = [
                s if b is False else constant(1, "int64")
                for s, b in zip(shape[:-n], p.broadcastable[:-n])
            ]
            return (
                p[ind_slice],
                ind_shape,
            )

        # These are versions of our actual parameters with the anticipated
        # dimensions (i.e. support dimensions) removed so that only the
        # independent variate dimensions are left.
        params_ind_slice = tuple(
            slice_ind_dims(p, ps, n)
            for p, ps, n in zip(dist_params, param_shapes, self.ndims_params))

        if len(params_ind_slice) == 1:
            ind_param, ind_shape = params_ind_slice[0]
            ndim_ind = len(ind_shape)
            shape_ind = ind_shape
        elif len(params_ind_slice) > 1:
            # If there are multiple parameters, the dimensions of their
            # independent variates should broadcast together.
            p_slices, p_shapes = zip(*params_ind_slice)

            shape_ind = aesara.tensor.extra_ops.broadcast_shape_iter(
                p_shapes, arrays_are_shapes=True)

            ndim_ind = len(shape_ind)
        else:
            ndim_ind = 0

        if self.ndim_supp == 0:
            shape_supp = tuple()
            shape_reps = tuple(size)

            if ndim_ind > 0:
                shape_reps = shape_reps[:-ndim_ind]

            ndim_reps = len(shape_reps)
        else:
            shape_supp = self._shape_from_params(
                dist_params,
                param_shapes=param_shapes,
            )

            ndim_reps = size_len
            shape_reps = size

        ndim_shape = self.ndim_supp + ndim_ind + ndim_reps

        if ndim_shape == 0:
            shape = constant([], dtype="int64")
        else:
            shape = tuple(shape_reps) + tuple(shape_ind) + tuple(shape_supp)

        # if shape is None:
        #     raise ShapeError()

        return shape
Example #16
0
File: op.py Project: mgorny/aesara
    def _infer_shape(
        self,
        size: TensorVariable,
        dist_params: Sequence[TensorVariable],
        param_shapes: Optional[Sequence[Tuple[Variable, ...]]] = None,
    ) -> Union[TensorVariable, Tuple[ScalarVariable, ...]]:
        """Compute the output shape given the size and distribution parameters.

        Parameters
        ----------
        size
            The size parameter specified for this `RandomVariable`.
        dist_params
            The symbolic parameter for this `RandomVariable`'s distribution.
        param_shapes
            The shapes of the `dist_params` as given by `ShapeFeature`'s
            via `Op.infer_shape`'s `input_shapes` argument.  This parameter's
            values are essentially more accurate versions of ``[d.shape for d
            in dist_params]``.

        """

        size_len = get_vector_length(size)

        if size_len > 0:
            if self.ndim_supp == 0:
                return size
            else:
                supp_shape = self._supp_shape_from_params(
                    dist_params, param_shapes=param_shapes)
                return tuple(size) + tuple(supp_shape)

        # Broadcast the parameters
        param_shapes = params_broadcast_shapes(
            param_shapes or [shape_tuple(p) for p in dist_params],
            self.ndims_params)

        def slice_ind_dims(p, ps, n):
            shape = tuple(ps)

            if n == 0:
                return (p, shape)

            ind_slice = (slice(None), ) * (p.ndim - n) + (0, ) * n
            ind_shape = [
                s if b is False else constant(1, "int64")
                for s, b in zip(shape[:-n], p.broadcastable[:-n])
            ]
            return (
                p[ind_slice],
                ind_shape,
            )

        # These are versions of our actual parameters with the anticipated
        # dimensions (i.e. support dimensions) removed so that only the
        # independent variate dimensions are left.
        params_ind_slice = tuple(
            slice_ind_dims(p, ps, n)
            for p, ps, n in zip(dist_params, param_shapes, self.ndims_params))

        if len(params_ind_slice) == 1:
            _, shape_ind = params_ind_slice[0]
        elif len(params_ind_slice) > 1:
            # If there are multiple parameters, the dimensions of their
            # independent variates should broadcast together.
            p_slices, p_shapes = zip(*params_ind_slice)

            shape_ind = aesara.tensor.extra_ops.broadcast_shape_iter(
                p_shapes, arrays_are_shapes=True)

        else:
            # Distribution has no parameters
            shape_ind = ()

        if self.ndim_supp == 0:
            shape_supp = ()
        else:
            shape_supp = self._supp_shape_from_params(
                dist_params,
                param_shapes=param_shapes,
            )

        shape = tuple(shape_ind) + tuple(shape_supp)
        if not shape:
            shape = constant([], dtype="int64")

        return shape
Example #17
0
    def _run(self, num_features, num_timesteps, batch_size, mode):
        # determine shapes of inputs and targets depending on the batch size
        if batch_size == 1:
            inputs_size = (num_timesteps, num_features)
            targets_size = (num_timesteps, 1)
        else:
            inputs_size = (num_timesteps, batch_size, num_features)
            targets_size = (num_timesteps, batch_size, 1)

        # make inputs and targets shared variables
        inputs = aesara.shared(self.rng.uniform(size=inputs_size).astype(
            config.floatX),
                               borrow=True)
        targets = aesara.shared(self.rng.uniform(size=targets_size).astype(
            config.floatX),
                                borrow=True)

        # create symbolic inputs and targets variables
        if batch_size == 1:
            x = matrix("inputs")
            t = matrix("targets")
        else:
            x = tensor3("inputs")
            t = tensor3("inputs")
        x.tag.test_value = inputs.get_value(borrow=True)
        t.tag.test_value = targets.get_value(borrow=True)

        # create a set of parameters for a simple RNN
        W_xh = aesara.shared(
            (0.01 * self.rng.uniform(size=(num_features, 10))).astype(
                config.floatX),
            borrow=True,
        )
        W_hh = aesara.shared(
            (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX),
            borrow=True)
        W_hy = aesara.shared(
            (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX),
            borrow=True)
        b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True)
        b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True)

        params = [W_xh, W_hh, W_hy, b_h, b_y]

        # recurrent function
        def step(x_t, h_tm1):
            h = tanh(dot(h_tm1, W_hh) + dot(x_t, W_xh) + b_h)
            return h

        # build recurrent graph
        if batch_size == 1:
            h_0 = aet.alloc(0.0, 10).astype(config.floatX)
        else:
            h_0 = aet.alloc(0.0, batch_size, 10).astype(config.floatX)
        h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0])
        # network output
        y = dot(h, W_hy) + b_y

        # Create Gauss-Newton-Matrix object. Not really of any use here, but I
        # need it for Hessian-Free optimization.
        gn = GaussNewtonMatrix(y)

        # compute MSE
        cost = ((t - y)**2).sum(axis=1).mean()

        # Compute the cost at some other point in the parameter
        # space. Not really of any use here, but this is how I do it
        # during certain iterations of CG in the HF algorithm. There,
        # it's in fact `pi + current update proposal`.  For simplicity,
        # I just multiply by 2 here.
        cost_ = aesara.clone_replace(cost,
                                     replace={pi: 2 * pi
                                              for pi in params})

        # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG,
        # but for simplicity, I just take the parameters vector because it's
        # already there.
        Gv = gn(v=params, cost=cost, parameters=params, damp=aet.constant(1.0))

        # compile Aesara function
        f = aesara.function([], [cost_] + Gv,
                            givens={
                                x: inputs,
                                t: targets
                            },
                            mode=mode)
        # execute
        f()
Example #18
0
 def test_empty_shp(self):
     const = constant([1]).reshape(())
     f = function([], const)
     assert f().shape == ()
Example #19
0
def local_dimshuffle_subtensor(node):
    """If a subtensor is inside a dimshuffle which only drop
    broadcastable dimensions, scrap the dimshuffle and index the
    subtensor with 0

    x[i:j, :, k:l].dimshuffle(0, 2) =>
        x[i:j, 0, k:l] if x.broadcastable == (False, True, False)

    """
    if isinstance(node.op, DimShuffle) and node.inputs[0].owner:
        # the dimshuffle can only drop dimensions (cannot reshape nor add 'x')
        if "x" in node.op.new_order:
            return False
        new_order = node.op.new_order
        # new order could be empty
        # Verif that we don't change dimensions order.
        if len(new_order) > 1:
            past_dim = new_order[0]
            for dim in new_order[1:]:
                if not dim > past_dim:
                    return False
                else:
                    past_dim = dim

        input_ = node.inputs[0]
        if isinstance(input_.owner.op, Subtensor):
            # the arguments missing from the dimshuffles must be dims
            # that are broadcastable
            broadcastable = input_.broadcastable

            missing_dims = list(range(input_.ndim))
            for dim in new_order:
                missing_dims.remove(dim)

            if not all([broadcastable[i] for i in missing_dims]):
                return False

            # create a new idx_list for a new Subtensor object
            # have to loop on idx_list and inputs
            # inputs has the length of sum of non None elements of idx_list
            # (check in slice!).
            # len(missing_dims) can be < len(idx_list), this happens if
            # tensor was indexed such as x[scalar, :, :], check that as well
            new_idx_list = list(input_.owner.op.idx_list)
            new_inputs = [input_.owner.inputs[0]]
            zero = tt.constant(0)
            slice_attr_list = ["start", "stop", "step"]
            j = 0
            slice_i = -1
            subtensor_removed_dims = 0
            for i, idx in enumerate(input_.owner.op.idx_list):
                if isinstance(idx, slice):
                    past_j = j
                    slice_i += 1
                    for slice_attr in slice_attr_list:
                        if getattr(idx, slice_attr) is not None:
                            new_inputs += [input_.owner.inputs[1 + j]]
                            j += 1
                    # if past_j == j indicates a slice(None, None, None),
                    # that's where we want to index with 0 if it is also at
                    # the same spot of a missing dim
                    if past_j == j and slice_i in missing_dims:
                        new_idx_list[i] = zero
                        new_inputs += [zero]
                else:
                    new_inputs += [input_.owner.inputs[1 + j]]
                    j += 1
                    subtensor_removed_dims += 1
            # Verify the trailing dimensions the subtensor didn't look at.
            for idx in range(len(input_.owner.op.idx_list),
                             new_inputs[0].ndim):
                if (idx - subtensor_removed_dims) in missing_dims:
                    while len(new_idx_list) < idx:
                        new_idx_list.append(slice(None))

                    new_idx_list.append(zero)
                    new_inputs.append(zero)
            return [Subtensor(new_idx_list)(*new_inputs)]
    return False