Ejemplo n.º 1
0
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Examples
    --------
    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    """
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape)
    else:
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size = tensor.prod(input.shape[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(
            0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape
        )

    # store in the required shape
    new_shape = tensor.cast(new_shape, "int64")
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
Ejemplo n.º 2
0
def local_det_chol(node):
    """
    If we have det(X) and there is already an L=cholesky(X)
    floating around, then we can use prod(diag(L)) to get the determinant.

    """
    if node.op == det:
        (x, ) = node.inputs
        for (cl, xpos) in x.clients:
            if isinstance(cl.op, Cholesky):
                L = cl.outputs[0]
                return [tensor.prod(extract_diag(L)**2)]
Ejemplo n.º 3
0
def _get_scaling(total_size, shape, ndim):
    """
    Gets scaling constant for logp

    Parameters
    ----------
    total_size: int or list[int]
    shape: shape
        shape to scale
    ndim: int
        ndim hint

    Returns
    -------
    scalar
    """
    if total_size is None:
        coef = floatX(1)
    elif isinstance(total_size, int):
        if ndim >= 1:
            denom = shape[0]
        else:
            denom = 1
        coef = floatX(total_size) / floatX(denom)
    elif isinstance(total_size, (list, tuple)):
        if not all(
                isinstance(i, int)
                for i in total_size if (i is not Ellipsis and i is not None)):
            raise TypeError("Unrecognized `total_size` type, expected "
                            "int or list of ints, got %r" % total_size)
        if Ellipsis in total_size:
            sep = total_size.index(Ellipsis)
            begin = total_size[:sep]
            end = total_size[sep + 1:]
            if Ellipsis in end:
                raise ValueError(
                    "Double Ellipsis in `total_size` is restricted, got %r" %
                    total_size)
        else:
            begin = total_size
            end = []
        if (len(begin) + len(end)) > ndim:
            raise ValueError("Length of `total_size` is too big, "
                             "number of scalings is bigger that ndim, got %r" %
                             total_size)
        elif (len(begin) + len(end)) == 0:
            return floatX(1)
        if len(end) > 0:
            shp_end = shape[-len(end):]
        else:
            shp_end = np.asarray([])
        shp_begin = shape[:len(begin)]
        begin_coef = [
            floatX(t) / shp_begin[i] for i, t in enumerate(begin)
            if t is not None
        ]
        end_coef = [
            floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None
        ]
        coefs = begin_coef + end_coef
        coef = at.prod(coefs)
    else:
        raise TypeError(
            "Unrecognized `total_size` type, expected int or list of ints, got %r"
            % total_size)
    return at.as_tensor(floatX(coef))
Ejemplo n.º 4
0
def _get_scaling(total_size: Optional[Union[int, Sequence[int]]], shape,
                 ndim: int):
    """
    Gets scaling constant for logp.

    Parameters
    ----------
    total_size: Optional[int|List[int]]
        size of a fully observed data without minibatching,
        `None` means data is fully observed
    shape: shape
        shape of an observed data
    ndim: int
        ndim hint

    Returns
    -------
    scalar
    """
    if total_size is None:
        coef = 1.0
    elif isinstance(total_size, int):
        if ndim >= 1:
            denom = shape[0]
        else:
            denom = 1
        coef = floatX(total_size) / floatX(denom)
    elif isinstance(total_size, (list, tuple)):
        if not all(
                isinstance(i, int)
                for i in total_size if (i is not Ellipsis and i is not None)):
            raise TypeError("Unrecognized `total_size` type, expected "
                            "int or list of ints, got %r" % total_size)
        if Ellipsis in total_size:
            sep = total_size.index(Ellipsis)
            begin = total_size[:sep]
            end = total_size[sep + 1:]
            if Ellipsis in end:
                raise ValueError(
                    "Double Ellipsis in `total_size` is restricted, got %r" %
                    total_size)
        else:
            begin = total_size
            end = []
        if (len(begin) + len(end)) > ndim:
            raise ValueError("Length of `total_size` is too big, "
                             "number of scalings is bigger that ndim, got %r" %
                             total_size)
        elif (len(begin) + len(end)) == 0:
            coef = 1.0
        if len(end) > 0:
            shp_end = shape[-len(end):]
        else:
            shp_end = np.asarray([])
        shp_begin = shape[:len(begin)]
        begin_coef = [
            floatX(t) / floatX(shp_begin[i]) for i, t in enumerate(begin)
            if t is not None
        ]
        end_coef = [
            floatX(t) / floatX(shp_end[i]) for i, t in enumerate(end)
            if t is not None
        ]
        coefs = begin_coef + end_coef
        coef = at.prod(coefs)
    else:
        raise TypeError(
            "Unrecognized `total_size` type, expected int or list of ints, got %r"
            % total_size)
    return at.as_tensor(coef, dtype=aesara.config.floatX)
Ejemplo n.º 5
0
def test_batch_normalization_train():

    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
        for vartype in (tensor5, tensor3, vector):
            x, scale, bias, running_mean, running_var = (vartype(n) for n in (
                "x", "scale", "bias", "running_mean", "running_var"))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
            running_average_factor = 0.3

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            (
                out,
                x_mean,
                x_invstd,
                out_running_mean,
                out_running_var,
            ) = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
                axes,
                eps,
                running_average_factor,
                running_mean,
                running_var,
            )
            # reference forward pass
            if axes == "per-activation":
                axes2 = (0, )
            elif axes == "spatial":
                axes2 = (0, ) + tuple(range(2, ndim))
            else:
                axes2 = axes
            x_mean2 = x.mean(axis=axes2, keepdims=True)
            x_var2 = x.var(axis=axes2, keepdims=True)
            x_invstd2 = aet.reciprocal(aet.sqrt(x_var2 + eps))
            scale2 = aet.addbroadcast(scale, *axes2)
            bias2 = aet.addbroadcast(bias, *axes2)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            m = aet.cast(
                aet.prod(x.shape) / aet.prod(scale.shape),
                aesara.config.floatX)
            out_running_mean2 = (running_mean * (1 - running_average_factor) +
                                 x_mean2 * running_average_factor)
            out_running_var2 = (running_var * (1 - running_average_factor) +
                                (m /
                                 (m - 1)) * x_var2 * running_average_factor)
            # backward pass
            dy = vartype("dy")
            grads = aet.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = aet.grad(None,
                              wrt=[x, scale, bias],
                              known_grads={out2: dy})
            # second-order backward pass
            dx = vartype("dinputs")
            dscale = vartype("dscale")
            dbias = vartype("dbias")
            grad_grads = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict({
                    grads[0]: dx,
                    grads[1]: dscale,
                    grads[2]: dbias
                }),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean,
                    x_invstd,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # reference second-order backward pass
            grad_grads2 = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict({
                    grads2[0]: dx,
                    grads2[1]: dscale,
                    grads2[2]: dbias
                }),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean2,
                    x_var2,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # compile
            f = aesara.function(
                [
                    x, scale, bias, running_mean, running_var, dy, dx, dscale,
                    dbias
                ],
                [
                    out,
                    x_mean,
                    x_invstd,
                    out_running_mean,
                    out_running_var,
                    out2,
                    x_mean2,
                    x_invstd2,
                    out_running_mean2,
                    out_running_var2,
                ] + grads + grads2 + grad_grads + grad_grads2,
            )
            # check if the abstract Ops have been replaced
            assert not any([
                isinstance(
                    n.op,
                    (
                        batchnorm.AbstractBatchNormTrain,
                        batchnorm.AbstractBatchNormInference,
                        batchnorm.AbstractBatchNormTrainGrad,
                    ),
                ) for n in f.maker.fgraph.toposort()
            ])
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5,
                                                                      5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes2 else s
                                    for d, s in enumerate(data_shape))

                rng = np.random.default_rng(1234)

                X = 4 + 3 * rng.random(data_shape).astype(aesara.config.floatX)
                Dy = -1 + 2 * rng.random(data_shape).astype(
                    aesara.config.floatX)
                Scale = rng.random(param_shape).astype(aesara.config.floatX)
                Bias = rng.random(param_shape).astype(aesara.config.floatX)
                Running_mean = rng.random(param_shape).astype(
                    aesara.config.floatX)
                Running_var = rng.random(param_shape).astype(
                    aesara.config.floatX)
                Dx = 4 + 3 * rng.random(data_shape).astype(
                    aesara.config.floatX)
                Dscale = -1 + 2 * rng.random(param_shape).astype(
                    aesara.config.floatX)
                Dbias = rng.random(param_shape).astype(aesara.config.floatX)

                outputs = f(X, Scale, Bias, Running_mean, Running_var, Dy, Dx,
                            Dscale, Dbias)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 5])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 5])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 5])  # invstd
                utt.assert_allclose(outputs[3], outputs[3 + 5])  # running_mean
                utt.assert_allclose(np.nan_to_num(outputs[4]),
                                    np.nan_to_num(outputs[4 +
                                                          5]))  # running_var
                # compare gradients
                utt.assert_allclose(outputs[10], outputs[10 + 3],
                                    atol=1e-4)  # dx
                utt.assert_allclose(outputs[11],
                                    outputs[11 + 3],
                                    rtol=2e-4,
                                    atol=1e-4)  # dscale
                utt.assert_allclose(outputs[12], outputs[12 + 3])  # dbias
                # compare second-order gradients
                utt.assert_allclose(outputs[16], outputs[16 + 3],
                                    atol=1e-4)  # ddx
                utt.assert_allclose(outputs[17], outputs[17 + 3])  # ddy
                utt.assert_allclose(outputs[18],
                                    outputs[18 + 3],
                                    rtol=3e-4,
                                    atol=1e-4)  # ddscale
Ejemplo n.º 6
0
    def normal(
        self,
        size,
        avg=0.0,
        std=1.0,
        ndim=None,
        dtype=None,
        nstreams=None,
        truncate=False,
        **kwargs,
    ):
        """
        Sample a tensor of values from a normal distribution.

        Parameters
        ----------
        size : int_vector_like
            Array dimensions for the output tensor.
        avg : float_like, optional
            The mean value for the truncated normal to sample from (defaults to 0.0).
        std : float_like, optional
            The standard deviation for the truncated normal to sample from (defaults to 1.0).
        truncate : bool, optional
            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
            When this flag is set, the standard deviation of the result will be less than the one specified.
        ndim : int, optional
            The number of dimensions for the output tensor (defaults to None).
            This argument is necessary if the size argument is ambiguous on the number of dimensions.
        dtype : str, optional
            The data-type for the output tensor. If not specified,
            the dtype is inferred from avg and std, but it is at least as precise as floatX.
        kwargs
            Other keyword arguments for random number generation (see uniform).

        Returns
        -------
        samples : TensorVariable
            A Aesara tensor of samples randomly drawn from a normal distribution.

        """
        size = _check_size(size)
        avg = undefined_grad(as_tensor_variable(avg))
        std = undefined_grad(as_tensor_variable(std))

        if dtype is None:
            dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)

        avg = tensor.cast(avg, dtype=dtype)
        std = tensor.cast(std, dtype=dtype)

        # generate even number of uniform samples
        # Do manual constant folding to lower optiimizer work.
        if isinstance(size, aesara.Constant):
            n_odd_samples = size.prod(dtype="int64")
        else:
            n_odd_samples = tensor.prod(size, dtype="int64")
        n_even_samples = n_odd_samples + n_odd_samples % 2
        uniform = self.uniform(
            (n_even_samples, ),
            low=0.0,
            high=1.0,
            ndim=1,
            dtype=dtype,
            nstreams=nstreams,
            **kwargs,
        )

        # box-muller transform
        u1 = uniform[:n_even_samples // 2]
        u2 = uniform[n_even_samples // 2:]
        r = tensor.sqrt(-2.0 * tensor.log(u1))
        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
        cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta)
        z0 = r * cos_theta
        z1 = r * sin_theta

        if truncate:
            # use valid samples
            to_fix0 = (z0 < -2.0) | (z0 > 2.0)
            to_fix1 = (z1 < -2.0) | (z1 > 2.0)
            z0_valid = z0[tensor.nonzero(~to_fix0)]
            z1_valid = z1[tensor.nonzero(~to_fix1)]

            # re-sample invalid samples
            to_fix0 = tensor.nonzero(to_fix0)[0]
            to_fix1 = tensor.nonzero(to_fix1)[0]
            n_fix_samples = to_fix0.size + to_fix1.size
            lower = tensor.constant(1.0 / np.e**2, dtype=dtype)
            u_fix = self.uniform(
                (n_fix_samples, ),
                low=lower,
                high=1.0,
                ndim=1,
                dtype=dtype,
                nstreams=nstreams,
                **kwargs,
            )
            r_fix = tensor.sqrt(-2.0 * tensor.log(u_fix))
            z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0]
            z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1]

            # pack everything together to a useful result
            norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid,
                                       z1_fixed)
        else:
            norm_samples = tensor.join(0, z0, z1)
        if isinstance(n_odd_samples, aesara.Variable):
            samples = norm_samples[:n_odd_samples]
        elif n_odd_samples % 2 == 1:
            samples = norm_samples[:-1]
        else:
            samples = norm_samples
        samples = tensor.reshape(samples, newshape=size, ndim=ndim)
        samples *= std
        samples += avg

        return samples