Esempio n. 1
0
def broadcast_like(value, template, fgraph, dtype=None):
    """
    Return a Variable with the same shape and dtype as the template,
    filled by broadcasting value through it. `value` will be cast as
    necessary.

    """
    value = T.as_tensor_variable(value)
    if value.type == template.type:
        return value
    if template not in fgraph.variables:
        raise NotImplementedError('broadcast_like currently requires the '
                                  'template Variable to be in the fgraph already')
    if hasattr(fgraph, 'shape_feature'):
        new_shape = fgraph.shape_feature.shape_of[template]
    else:
        new_shape = template.shape
    if dtype is None:
        dtype = template.dtype
    rval = T.alloc(T.cast(value, dtype), *new_shape)
    # the template may have 1s in its shape without being broadcastable
    if rval.broadcastable != template.broadcastable:
        rval = T.unbroadcast(rval, *[i for i in xrange(rval.ndim)
                                     if rval.broadcastable[i] and
                                     not template.broadcastable[i]])
    assert rval.type.dtype == dtype

    if rval.type.broadcastable != template.broadcastable:
        raise AssertionError("rval.type.broadcastable is " +
                             str(rval.type.broadcastable) +
                             " but template.broadcastable is" +
                             str(template.broadcastable))

    return rval
Esempio n. 2
0
    def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if a.dtype.startswith("complex"):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = basic.abs_(offset)
        pos_offset_flag = basic.ge(offset, 0)
        neg_offset_flag = basic.lt(offset, 0)
        min_wh = basic.minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = basic.minimum(
            min_wh,
            width * pos_offset_flag + height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = basic.cast(start, "int32")
        step = basic.cast(step, "int32")
        end = basic.cast(end, "int32")

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = grad_undefined(
            self,
            2,
            offset,
            "offset is not defined for non-integer offset so"
            " fill_diagonal_offset(a,val,offset+eps) is undefined",
        )

        return [wr_a, wr_val, wr_offset]
Esempio n. 3
0
def local_abstract_batch_norm_train(fgraph, node):
    if not isinstance(node.op, AbstractBatchNormTrain):
        return None

    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if (
        not isinstance(x.type, TensorType)
        or not isinstance(scale.type, TensorType)
        or not isinstance(bias.type, TensorType)
        or not isinstance(epsilon.type, TensorType)
        or not isinstance(running_average_factor.type, TensorType)
    ):
        return None
    # optional running_mean and running_var
    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
        return None
    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
        return None

    mean = x.mean(axes, keepdims=True)
    var = x.var(axes, keepdims=True)
    # The epsilon should not upcast the dtype.
    if var.dtype == "float32" and epsilon.dtype == "float64":
        epsilon = epsilon.astype("float32")
    invstd = tt.inv(tt.sqrt(var + epsilon))
    out = (x - mean) * (scale * invstd) + bias
    results = [out, mean, invstd]

    if len(node.inputs) > 5:
        running_mean = node.inputs[5]
        running_mean = (
            running_mean * (1.0 - running_average_factor)
            + mean * running_average_factor
        )
        results.append(running_mean)
    if len(node.inputs) > 6:
        m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
        running_var = node.inputs[6]
        running_var = (
            running_var * (1.0 - running_average_factor)
            + (m / (m - 1)) * var * running_average_factor
        )
        results.append(running_var)

    results = [
        tt.patternbroadcast(r, r_orig.broadcastable)
        for (r, r_orig) in zip(results, node.outputs)
    ]

    for var in theano.gof.graph.variables(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
Esempio n. 4
0
    def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if a.dtype.startswith("complex"):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = basic.abs_(offset)
        pos_offset_flag = basic.ge(offset, 0)
        neg_offset_flag = basic.lt(offset, 0)
        min_wh = basic.minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = basic.minimum(min_wh, width * pos_offset_flag + height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = basic.cast(start, "int32")
        step = basic.cast(step, "int32")
        end = basic.cast(end, "int32")

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = theano.gradient.grad_undefined(
            self,
            2,
            offset,
            "offset is not defined for non-integer offset so" " fill_diagonal_offset(a,val,offset+eps) is undefined",
        )

        return [wr_a, wr_val, wr_offset]
Esempio n. 5
0
def sparse_mean(x, axis=None):
    """Mean of a tensor, alongside the specified axis.
    """
    # bool is available since theano v0.9dev
    if 'int' in x.dtype or x.dtype == 'bool':
        dtype = floatx()
    else:
        dtype = x.dtype

    if isinstance(axis, (integer_types, np.integer)):
        if axis == -1:
            axis = max(x.ndim - 1, 0)
    s = th_sparse_module.sp_sum(x, axis, True)
    shp = shape(x)

    if s.dtype in ('float16', 'float32', 'complex64'):
        shp = cast(shp, 'float32')
    else:
        shp = cast(shp, 'float64')

    if axis is None:
        axis = list(range(len(x.data.shape)))
    elif isinstance(axis, (integer_types, np.integer)):
        axis = [axis]
    elif isinstance(axis, np.ndarray) and axis.ndim == 0:
        axis = [int(axis)]
    else:
        axis = [int(a) for a in axis]

    for i in axis:
        s = true_div(s, shp[i])

    if s.dtype != shp.dtype and s.dtype in discrete_dtypes:
        s = cast(s, shp.dtype)

    if dtype == 'float16' or (dtype is None and x.dtype == 'float16'):
        s = cast(s, 'float16')
    s.name = 'mean'
    return s
Esempio n. 6
0
 def make_node(self, a, val):
     a = basic.as_tensor_variable(a)
     val = basic.as_tensor_variable(val)
     if a.ndim < 2:
         raise TypeError("%s: first parameter must have at least"
                         " two dimensions" % self.__class__.__name__)
     elif val.ndim != 0:
         raise TypeError("%s: second parameter must be a scalar" %
                         self.__class__.__name__)
     val = basic.cast(val, dtype=upcast(a.dtype, val.dtype))
     if val.dtype != a.dtype:
         raise TypeError("%s: type of second parameter must be the same as"
                         " the first's" % self.__class__.__name__)
     return Apply(self, [a, val], [a.type()])
Esempio n. 7
0
def local_abstract_batch_norm_train(node):
    if not isinstance(node.op, AbstractBatchNormTrain):
        return None

    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if not isinstance(x.type, TensorType) or \
       not isinstance(scale.type, TensorType) or \
       not isinstance(bias.type, TensorType) or \
       not isinstance(epsilon.type, TensorType) or \
       not isinstance(running_average_factor.type, TensorType):
        return None
    # optional running_mean and running_var
    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
        return None
    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
        return None

    mean = x.mean(axes, keepdims=True)
    var = x.var(axes, keepdims=True)
    # The epsilon should not upcast the dtype.
    if var.dtype == 'float32' and epsilon.dtype == 'float64':
        epsilon = epsilon.astype('float32')
    invstd = T.inv(T.sqrt(var + epsilon))
    out = (x - mean) * (scale * invstd) + bias
    results = [out, mean, invstd]

    if len(node.inputs) > 5:
        running_mean = node.inputs[5]
        running_mean = running_mean * (1.0 - running_average_factor) + \
            mean * running_average_factor
        results.append(running_mean)
    if len(node.inputs) > 6:
        m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
        running_var = node.inputs[6]
        running_var = running_var * (1.0 - running_average_factor) + \
            (m / (m - 1)) * var * running_average_factor
        results.append(running_var)

    results = [T.patternbroadcast(r, r_orig.broadcastable)
               for (r, r_orig) in zip(results, node.outputs)]

    for var in theano.gof.graph.variables(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
Esempio n. 8
0
    def make_node(self, a, val, offset):
        a = basic.as_tensor_variable(a)
        val = basic.as_tensor_variable(val)
        offset = basic.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError("%s: first parameter must have exactly"
                            " two dimensions" % self.__class__.__name__)
        elif val.ndim != 0:
            raise TypeError("%s: second parameter must be a scalar" %
                            self.__class__.__name__)
        elif offset.ndim != 0:
            raise TypeError("%s: third parameter must be a scalar" %
                            self.__class__.__name__)
        val = basic.cast(val, dtype=upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError("%s: type of second parameter must be the same"
                            " as the first's" % self.__class__.__name__)
        elif offset.dtype not in theano.tensor.integer_dtypes:
            raise TypeError("%s: type of third parameter must be as integer"
                            " use theano.tensor.cast( input, 'int32/int64')" %
                            self.__class__.__name__)

        return Apply(self, [a, val, offset], [a.type()])
Esempio n. 9
0
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        XXX: Unnamed/non-keyword arguments are considered distribution
        parameters!  If you want to set `size`, `rng`, and/or `name`, use their
        keywords.

        Parameters
        ----------
        rng: RandomStateType
            Existing Theano `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            Numpy-like size of the output (i.e. replications).
        dtype: Theano dtype
            The dtype of the sampled output.  This value is only used when
            `self.dtype` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: `Apply`
            A node with inputs `(rng, size, dtype) + dist_args` and outputs
            `(rng_var, out_var)`.

        """
        if size is None:
            size = constant([], dtype="int64")
        elif isinstance(size, int):
            size = as_tensor_variable([size], ndim=1)
        elif not isinstance(size, (np.ndarray, Variable, Sequence)):
            raise TypeError(
                "Parameter size must be None, an integer, or a sequence with integers."
            )
        else:
            size = cast(as_tensor_variable(size, ndim=1), "int64")

        assert size.dtype in int_dtypes

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params
        )

        if rng is None:
            rng = theano.shared(np.random.RandomState())
        elif not isinstance(rng.type, RandomStateType):
            raise TypeError("The type of rng should be an instance of RandomStateType")

        bcast = self.compute_bcast(dist_params, size)
        dtype = self.dtype or dtype

        if dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes):
            # dtype = tt.scal.upcast(self.dtype, *[p.dtype for p in dist_params])
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, broadcastable=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Esempio n. 10
0
 def infer_shape(self, fgraph, node, in_shapes):
     temp = node.inputs[0]
     M = basic.switch(basic.lt(temp, 0), basic.cast(0, temp.dtype), temp)
     return [[M]]