Esempio n. 1
0
def test_merge_with_weird_eq():
    """numpy arrays don't compare equal like other python objects"""

    #SCALAR CASE
    x = T.constant(numpy.asarray(1), name='x')
    y = T.constant(numpy.asarray(1), name='y')
    g = Env([x, y], [x+y])
    MergeOptimizer().optimize(g)

    assert len(g.nodes) == 1
    node = list(g.nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]

    #NONSCALAR CASE
    # This was created to test TensorConstantSignature
    x = T.constant(numpy.ones(5), name='x')
    y = T.constant(numpy.ones(5), name='y')
    g = Env([x, y], [x+y])
    MergeOptimizer().optimize(g)

    assert len(g.nodes) == 1
    node = list(g.nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]
Esempio n. 2
0
def test_merge_with_weird_eq():
    """numpy arrays don't compare equal like other python objects"""

    # SCALAR CASE
    x = T.constant(np.asarray(1), name='x')
    y = T.constant(np.asarray(1), name='y')
    g = Env([x, y], [x+y])
    MergeOptimizer().optimize(g)

    assert len(g.apply_nodes) == 1
    node = list(g.apply_nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]

    # NONSCALAR CASE
    # This was created to test TensorConstantSignature
    x = T.constant(np.ones(5), name='x')
    y = T.constant(np.ones(5), name='y')
    g = Env([x, y], [x+y])
    MergeOptimizer().optimize(g)

    assert len(g.apply_nodes) == 1
    node = list(g.apply_nodes)[0]
    assert len(node.inputs) == 2
    assert node.inputs[0] is node.inputs[1]
Esempio n. 3
0
    def grad(self, inputs, grads):
        x, scale, bias, est_mean, est_var, epsilon = inputs
        dy = grads[0]
        axes = self.axes
        if min(axes) < 0 or max(axes) >= x.ndim:
            raise ValueError(
                f"axes should be less than ndim (<{x.ndim}), but {axes} given"
            )

        scale, bias, est_mean, est_var = (
            tt.addbroadcast(t, *axes) for t in (scale, bias, est_mean, est_var)
        )

        # define helper expressions
        est_var_eps = est_var + epsilon
        est_std = tt.sqrt(est_var_eps)
        two = tt.constant(2.0)

        # define and return gradients
        dx = dy * (scale / est_std)
        dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std
        dbias = dy.sum(axes, keepdims=True)
        dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
        dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (
            scale / (two * est_var_eps * est_std)
        )
        return [dx, dscale, dbias, dmean, dvar, theano.gradient.DisconnectedType()()]
Esempio n. 4
0
def hard_sigmoid(x):
    """An approximation of sigmoid.

    More approximate and faster than ultra_fast_sigmoid.

    Approx in 3 parts: 0, scaled linear, 1

    Removing the slope and shift does not make it faster.

    """
    # Use the same dtype as determined by "upgrade_to_float",
    # and perform computation in that dtype.
    out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype
    slope = tensor.constant(0.2, dtype=out_dtype)
    shift = tensor.constant(0.5, dtype=out_dtype)
    x = (x * slope) + shift
    x = tensor.clip(x, 0, 1)
    return x
Esempio n. 5
0
        def slice_ind_dims(p, ps, n):
            shape = tuple(ps)

            if n == 0:
                return (p, shape)

            ind_slice = (slice(None),) * (p.ndim - n) + (0,) * n
            ind_shape = [
                s if b is False else constant(1, "int64")
                for s, b in zip(shape[:-n], p.broadcastable[:-n])
            ]
            return (
                p[ind_slice],
                ind_shape,
            )
Esempio n. 6
0
def local_0_dot_x(node):
    if not isinstance(node.op, T.Dot):
        return False

    x = node.inputs[0]
    y = node.inputs[1]
    replace = False
    try:
        if get_scalar_constant_value(x) == 0:
            replace = True
    except NotScalarConstantError:
        pass

    try:
        if get_scalar_constant_value(y) == 0:
            replace = True
    except NotScalarConstantError:
        pass

    if replace:
        constant_zero = T.constant(0, dtype=node.outputs[0].type.dtype)
        if x.ndim == 2 and y.ndim == 2:
            constant_zero = assert_(constant_zero,
                                    T.eq(x.shape[1], y.shape[0]))
            return [T.alloc(constant_zero, x.shape[0], y.shape[1])]
        elif x.ndim == 1 and y.ndim == 2:
            constant_zero = assert_(constant_zero,
                                    T.eq(x.shape[0], y.shape[0]))
            return [T.alloc(constant_zero, y.shape[1])]
        elif x.ndim == 2 and y.ndim == 1:
            constant_zero = assert_(constant_zero,
                                    T.eq(x.shape[1], y.shape[0]))
            return [T.alloc(constant_zero, x.shape[0])]
        elif x.ndim == 1 and y.ndim == 1:
            constant_zero = assert_(constant_zero,
                                    T.eq(x.shape[0], y.shape[0]))
            return [constant_zero]
        else:
            _logger.warning("Optimization Warning: "
                            "Optimization theano/opt.py:local_0_dot_x Found "
                            "that it could apply, but was not implemented "
                            "for dot product with these input types:\n"
                            "(%s, %s)",
                            x.type, y.type)
Esempio n. 7
0
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        XXX: Unnamed/non-keyword arguments are considered distribution
        parameters!  If you want to set `size`, `rng`, and/or `name`, use their
        keywords.

        Parameters
        ----------
        rng: RandomStateType
            Existing Theano `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            Numpy-like size of the output (i.e. replications).
        dtype: Theano dtype
            The dtype of the sampled output.  This value is only used when
            `self.dtype` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: `Apply`
            A node with inputs `(rng, size, dtype) + dist_args` and outputs
            `(rng_var, out_var)`.

        """
        if size is None:
            size = constant([], dtype="int64")
        elif isinstance(size, int):
            size = as_tensor_variable([size], ndim=1)
        elif not isinstance(size, (np.ndarray, Variable, Sequence)):
            raise TypeError(
                "Parameter size must be None, an integer, or a sequence with integers."
            )
        else:
            size = cast(as_tensor_variable(size, ndim=1), "int64")

        assert size.dtype in int_dtypes

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params
        )

        if rng is None:
            rng = theano.shared(np.random.RandomState())
        elif not isinstance(rng.type, RandomStateType):
            raise TypeError("The type of rng should be an instance of RandomStateType")

        bcast = self.compute_bcast(dist_params, size)
        dtype = self.dtype or dtype

        if dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes):
            # dtype = tt.scal.upcast(self.dtype, *[p.dtype for p in dist_params])
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, broadcastable=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Esempio n. 8
0
    def _infer_shape(self, size, dist_params, param_shapes=None):
        """Compute the output shape given the size and distribution parameters.

        Parameters
        ----------
        size : TensorVariable
            The size parameter specified for this `RandomVariable`.
        dist_params : list of TensorVariable
            The symbolic parameter for this `RandomVariable`'s distribution.
        param_shapes : list of tuples of TensorVariable (optional)
            The shapes of the `dist_params` as given by `ShapeFeature`'s
            via `Op.infer_shape`'s `input_shapes` argument.  This parameter's
            values are essentially more accurate versions of ``[d.shape for d
            in dist_params]``.

        Outputs
        -------
        shape : tuple of `ScalarVariable`

        """

        size_len = get_vector_length(size)

        if self.ndim_supp == 0 and size_len > 0:
            # In this case, we have a univariate distribution with a non-empty
            # `size` parameter, which means that the `size` parameter
            # completely determines the shape of the random variable.  More
            # importantly, the `size` parameter may be the only correct source
            # of information for the output shape, in that we would be misled
            # by the `dist_params` if we tried to infer the relevant parts of
            # the output shape from those.
            return size

        # Broadcast the parameters
        param_shapes = params_broadcast_shapes(
            param_shapes or [p.shape for p in dist_params], self.ndims_params
        )

        def slice_ind_dims(p, ps, n):
            shape = tuple(ps)

            if n == 0:
                return (p, shape)

            ind_slice = (slice(None),) * (p.ndim - n) + (0,) * n
            ind_shape = [
                s if b is False else constant(1, "int64")
                for s, b in zip(shape[:-n], p.broadcastable[:-n])
            ]
            return (
                p[ind_slice],
                ind_shape,
            )

        # These are versions of our actual parameters with the anticipated
        # dimensions (i.e. support dimensions) removed so that only the
        # independent variate dimensions are left.
        params_ind_slice = tuple(
            slice_ind_dims(p, ps, n)
            for p, ps, n in zip(dist_params, param_shapes, self.ndims_params)
        )

        if len(params_ind_slice) == 1:
            ind_param, ind_shape = params_ind_slice[0]
            ndim_ind = len(ind_shape)
            shape_ind = ind_shape
        elif len(params_ind_slice) > 1:
            # If there are multiple parameters, the dimensions of their
            # independent variates should broadcast together.
            p_slices, p_shapes = zip(*params_ind_slice)

            shape_ind = theano.tensor.extra_ops.broadcast_shape_iter(
                p_shapes, arrays_are_shapes=True
            )

            ndim_ind = len(shape_ind)
        else:
            ndim_ind = 0

        if self.ndim_supp == 0:
            shape_supp = tuple()
            shape_reps = tuple(size)

            if ndim_ind > 0:
                shape_reps = shape_reps[:-ndim_ind]

            ndim_reps = len(shape_reps)
        else:
            shape_supp = self._shape_from_params(
                dist_params,
                param_shapes=param_shapes,
            )

            ndim_reps = size_len
            shape_reps = size

        ndim_shape = self.ndim_supp + ndim_ind + ndim_reps

        if ndim_shape == 0:
            shape = constant([], dtype="int64")
        else:
            shape = tuple(shape_reps) + tuple(shape_ind) + tuple(shape_supp)

        # if shape is None:
        #     raise ShapeError()

        return shape
Esempio n. 9
0
def local_dimshuffle_subtensor(node):
    """If a subtensor is inside a dimshuffle which only drop
    broadcastable dimensions, scrap the dimshuffle and index the
    subtensor with 0

    x[i:j, :, k:l].dimshuffle(0, 2) =>
        x[i:j, 0, k:l] if x.broadcastable == (False, True, False)

    """
    if isinstance(node.op, DimShuffle) and node.inputs[0].owner:
        # the dimshuffle can only drop dimensions (cannot reshape nor add 'x')
        if 'x' in node.op.new_order:
            return False
        new_order = node.op.new_order
        # new order could be empty
        if len(new_order) > 1:
            past_dim = new_order[0]
            for dim in new_order[1:]:
                if not dim > past_dim:
                    return False
                else:
                    past_dim = dim

        input_ = node.inputs[0]
        if isinstance(input_.owner.op, Subtensor):
            # the arguments missing from the dimshuffles must be dims
            # that are broadcastable
            broadcastable = input_.broadcastable

            missing_dims = list(range(input_.ndim))
            for dim in new_order:
                missing_dims.remove(dim)

            if not all([broadcastable[i] for i in missing_dims]):
                return False

            # create a new idx_list for a new Subtensor object
            # have to loop on idx_list and inputs
            # inputs has the length of sum of non None elements of idx_list
            # (check in slice!).
            # len(missing_dims) can be < len(idx_list), this happens if
            # tensor was indexed such as x[scalar, :, :], check that as well
            new_idx_list = list(input_.owner.op.idx_list)
            new_inputs = [input_.owner.inputs[0]]
            zero = T.constant(0)
            slice_attr_list = ['start', 'stop', 'step']
            j = 0
            slice_i = -1
            subtensor_removed_dims = 0
            for idx in input_.owner.op.idx_list:
                if isinstance(idx, slice):
                    past_j = j
                    slice_i += 1
                    for slice_attr in slice_attr_list:
                        if getattr(idx, slice_attr) is not None:
                            new_inputs += [input_.owner.inputs[1 + j]]
                            j += 1
                    # if past_j == j indicates a slice(None, None, None),
                    # that's where we want to index with 0 if it is also at
                    # the same spot of a missing dim
                    if past_j == j and slice_i in missing_dims:
                        new_idx_list[j] = zero
                        new_inputs += [zero]
                else:
                    new_inputs += [input_.owner.inputs[1 + j]]
                    j += 1
                    subtensor_removed_dims += 1
            # Verify the trailing dimensions the subtensor didn't look at.
            for idx in range(len(input_.owner.op.idx_list),
                             new_inputs[0].ndim):
                if (idx - subtensor_removed_dims) in missing_dims:
                    while len(new_idx_list) < idx:
                        new_idx_list.append(slice(None))

                    new_idx_list.append(zero)
                    new_inputs.append(zero)
            return [Subtensor(new_idx_list)(*new_inputs)]
    return False
def local_dimshuffle_subtensor(node):
    """If a subtensor is inside a dimshuffle which only drop
    broadcastable dimensions, scrap the dimshuffle and index the
    subtensor with 0

    x[i:j, :, k:l].dimshuffle(0, 2) =>
        x[i:j, 0, k:l] if x.broadcastable == (False, True, False)

    """
    if isinstance(node.op, DimShuffle) and node.inputs[0].owner:
        # the dimshuffle can only drop dimensions (cannot reshape nor add 'x')
        if 'x' in node.op.new_order:
            return False
        new_order = node.op.new_order
        # new order could be empty
        if len(new_order) > 1:
            past_dim = new_order[0]
            for dim in new_order[1:]:
                if not dim > past_dim:
                    return False
                else:
                    past_dim = dim

        input_ = node.inputs[0]
        if isinstance(input_.owner.op, Subtensor):
            # the arguments missing from the dimshuffles must be dims
            # that are broadcastable
            broadcastable = input_.broadcastable

            missing_dims = list(range(input_.ndim))
            for dim in new_order:
                missing_dims.remove(dim)

            if not all([broadcastable[i] for i in missing_dims]):
                return False

            # create a new idx_list for a new Subtensor object
            # have to loop on idx_list and inputs
            # inputs has the length of sum of non None elements of idx_list
            # (check in slice!).
            # len(missing_dims) can be < len(idx_list), this happens if
            # tensor was indexed such as x[scalar, :, :], check that as well
            new_idx_list = list(input_.owner.op.idx_list)
            new_inputs = [input_.owner.inputs[0]]
            zero = T.constant(0)
            slice_attr_list = ['start', 'stop', 'step']
            j = 0
            slice_i = -1
            subtensor_removed_dims = 0
            for idx in input_.owner.op.idx_list:
                if isinstance(idx, slice):
                    past_j = j
                    slice_i += 1
                    for slice_attr in slice_attr_list:
                        if getattr(idx, slice_attr) is not None:
                            new_inputs += [input_.owner.inputs[1 + j]]
                            j += 1
                    # if past_j == j indicates a slice(None, None, None),
                    # that's where we want to index with 0 if it is also at
                    # the same spot of a missing dim
                    if past_j == j and slice_i in missing_dims:
                        new_idx_list[j] = zero
                        new_inputs += [zero]
                else:
                    new_inputs += [input_.owner.inputs[1 + j]]
                    j += 1
                    subtensor_removed_dims += 1
            # Verify the trailing dimensions the subtensor didn't look at.
            for idx in range(len(input_.owner.op.idx_list),
                             new_inputs[0].ndim):
                if (idx - subtensor_removed_dims) in missing_dims:
                    while len(new_idx_list) < idx:
                        new_idx_list.append(slice(None))

                    new_idx_list.append(zero)
                    new_inputs.append(zero)
            return [Subtensor(new_idx_list)(*new_inputs)]
    return False