예제 #1
0
    def test_composite_elemwise_float16(self):
        w = bvector()
        x = vector(dtype="float16")
        y = fvector()

        cz = tanh(x + aet.cast(y, "float16"))
        o = (
            cz
            - cz ** 2
            + aet.cast(x, "int16")
            + aet.cast(x, "float32")
            + aet.cast(w, "float16")
            - aet.constant(np.float16(1.0))
        )

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = vector(dtype="uint8")
        w = vector(dtype="float16")
        x = vector(dtype="float16")
        y = vector(dtype="float16")
        z = vector(dtype="float16")

        o = aet.switch(v, mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
예제 #2
0
파일: cov.py 프로젝트: t-triobox/pymc3
 def full(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     index = at.cast(X, "int32")
     if Xs is None:
         index2 = index.T
     else:
         index2 = at.cast(Xs, "int32").T
     return self.B[index, index2]
예제 #3
0
    def test_xent_thing_int32(self):
        x = matrix("x")
        y = lvector("y")
        yi = aet.cast(y, "int32")
        expressions = [
            aet_sum(-log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x))[aet.arange(yi.shape[0]), yi]),
            aet_sum(-log(softmax(x))[aet.arange(yi.shape[0]), yi]),
        ]

        for expr in expressions:
            fgraph = FunctionGraph([x, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 5
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            # Also verify the gradient wrt x
            fgraph = FunctionGraph([x, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 3
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops
예제 #4
0
def incomplete_beta_ps(a, b, value):
    """Power series for incomplete beta
    Use when b*x is small and value not too close to 1.
    Based on Cephes library by Steve Moshier (incbet.c)
    """
    one = aet.constant(1, dtype="float64")
    ai = one / a
    u = (one - b) * value
    t1 = u / (a + one)
    t = u
    threshold = np.MachAr().eps * ai
    s = aet.constant(0, dtype="float64")

    def _step(i, t, s):
        t *= (i - b) * value / i
        step = t / (a + i)
        s += step
        return ((t, s), until(aet.abs_(step) < threshold))

    (t, s), _ = scan(_step,
                     sequences=[aet.arange(2, 302)],
                     outputs_info=[e for e in aet.cast((t, s), "float64")])

    s = s[-1] + t1 + ai

    t = gammaln(a +
                b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s)
    return aet.exp(t)
예제 #5
0
 def test_illegal(self):
     try:
         x = zmatrix()
         function([x], cast(x, "float64"))(np.ones((2, 3), dtype="complex128"))
     except TypeError:
         return
     assert 0
예제 #6
0
    def test_observed_with_column_vector(self):
        """This test is related to https://github.com/pymc-devs/aesara/issues/390 which breaks
        broadcastability of column-vector RVs. This unexpected change in type can lead to
        incompatibilities during graph rewriting for model.logp evaluation.
        """
        with pm.Model() as model:
            # The `observed` is a broadcastable column vector
            obs = at.as_tensor_variable(
                np.ones((3, 1), dtype=aesara.config.floatX))
            assert obs.broadcastable == (False, True)

            # Both shapes describe broadcastable volumn vectors
            size64 = at.constant([3, 1], dtype="int64")
            # But the second shape is upcasted from an int32 vector
            cast64 = at.cast(at.constant([3, 1], dtype="int32"), dtype="int64")

            pm.Normal("size64", mu=0, sigma=1, size=size64, observed=obs)
            pm.Normal("shape64", mu=0, sigma=1, shape=size64, observed=obs)
            model.logp()

            pm.Normal("size_cast64", mu=0, sigma=1, size=cast64, observed=obs)
            pm.Normal("shape_cast64",
                      mu=0,
                      sigma=1,
                      shape=cast64,
                      observed=obs)
            model.logp()
예제 #7
0
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Examples
    --------
    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    """
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape)
    else:
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size = tensor.prod(input.shape[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(
            0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape
        )

    # store in the required shape
    new_shape = tensor.cast(new_shape, "int64")
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
예제 #8
0
    def test_composite_elemwise_float16(self):
        w = aesara.tensor.bvector()
        x = aesara.tensor.vector(dtype="float16")
        y = aesara.tensor.fvector()

        cz = tensor.tanh(x + tensor.cast(y, "float16"))
        o = (cz - cz**2 + tensor.cast(x, "int16") + tensor.cast(x, "float32") +
             tensor.cast(w, "float16") - tensor.constant(np.float16(1.0)))

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = aesara.tensor.vector(dtype="uint8")
        w = aesara.tensor.vector(dtype="float16")
        x = aesara.tensor.vector(dtype="float16")
        y = aesara.tensor.vector(dtype="float16")
        z = aesara.tensor.vector(dtype="float16")

        o = tensor.switch(v, tensor.mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
예제 #9
0
def test_extract_obs_data():

    with pytest.raises(TypeError):
        extract_obs_data(at.matrix())

    data = np.random.normal(size=(2, 3))
    data_at = at.as_tensor(data)
    mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool)

    for val_at in (data_at, aesara.shared(data)):
        res = extract_obs_data(val_at)

        assert isinstance(res, np.ndarray)
        assert np.array_equal(res, data)

    # AdvancedIncSubtensor check
    data_m = np.ma.MaskedArray(data, mask)
    missing_values = data_at.type()[mask]
    constant = at.as_tensor(data_m.filled())
    z_at = at.set_subtensor(constant[mask.nonzero()], missing_values)

    assert isinstance(z_at.owner.op,
                      (AdvancedIncSubtensor, AdvancedIncSubtensor1))

    res = extract_obs_data(z_at)

    assert isinstance(res, np.ndarray)
    assert np.ma.allequal(res, data_m)

    # AdvancedIncSubtensor1 check
    data = np.random.normal(size=(3, ))
    data_at = at.as_tensor(data)
    mask = np.random.binomial(1, 0.5, size=(3, )).astype(bool)

    data_m = np.ma.MaskedArray(data, mask)
    missing_values = data_at.type()[mask]
    constant = at.as_tensor(data_m.filled())
    z_at = at.set_subtensor(constant[mask.nonzero()], missing_values)

    assert isinstance(z_at.owner.op,
                      (AdvancedIncSubtensor, AdvancedIncSubtensor1))

    res = extract_obs_data(z_at)

    assert isinstance(res, np.ndarray)
    assert np.ma.allequal(res, data_m)

    # Cast check
    data = np.array(5)
    t = at.cast(at.as_tensor(5.0), np.int64)
    res = extract_obs_data(t)

    assert isinstance(res, np.ndarray)
    assert np.array_equal(res, data)
예제 #10
0
    def test_dtype_mismatch(self):
        rng = np.random.RandomState(utt.fetch_seed())
        data = rng.rand(5).astype(self.dtype)
        x = self.shared(data)
        y = tensor.cast(x * 10, "int8")
        cond = aesara.tensor.iscalar("cond")

        with pytest.raises(TypeError):
            ifelse(cond, x, y)
        with pytest.raises(TypeError):
            ifelse(cond, y, x)
예제 #11
0
파일: aesaraf.py 프로젝트: ricardoV94/pymc3
def local_check_parameter_to_ninf_switch(fgraph, node):
    if isinstance(node.op, CheckParameterValue):
        logp_expr, *logp_conds = node.inputs
        if len(logp_conds) > 1:
            logp_cond = at.all(logp_conds)
        else:
            (logp_cond,) = logp_conds
        out = at.switch(logp_cond, logp_expr, -np.inf)
        out.name = node.op.msg

        if out.dtype != node.outputs[0].dtype:
            out = at.cast(out, node.outputs[0].dtype)

        return [out]
예제 #12
0
 def binomial(self,
              size=None,
              n=1,
              p=0.5,
              ndim=None,
              dtype="int64",
              nstreams=None,
              **kwargs):
     # TODO : need description for method, parameter and return
     if n == 1:
         p = undefined_grad(as_tensor_variable(p))
         x = self.uniform(size=size, nstreams=nstreams, **kwargs)
         return cast(x < p, dtype)
     else:
         raise NotImplementedError("MRG_RandomStream.binomial with n > 1")
예제 #13
0
    def shared_dataset(data_xy):
        """Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Aesara to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch every time
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = aesara.shared(np.asarray(data_x, dtype=aesara.config.floatX))
        shared_y = aesara.shared(np.asarray(data_y, dtype=aesara.config.floatX))
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, aet.cast(shared_y, "int32")
예제 #14
0
def binomial(random_state,
             size=None,
             n=1,
             p=0.5,
             ndim=None,
             dtype="int64",
             prob=None):
    """
    Sample n times with probability of success prob for each trial,
    return the number of successes.

    If the size argument is ambiguous on the number of dimensions, ndim
    may be a plain integer to supplement the missing information.

    If size is None, the output shape will be determined by the shapes
    of n and prob.

    """
    if prob is not None:
        p = prob
        print(
            "DEPRECATION WARNING: the parameter prob to the binomal fct have been renamed to p to have the same name as np.",
            file=sys.stderr,
        )
    n = tensor.as_tensor_variable(n)
    p = tensor.as_tensor_variable(p)
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, p)
    if n.dtype == "int64":
        try:
            np.random.binomial(
                n=np.asarray([2, 3, 4], dtype="int64"),
                p=np.asarray([0.1, 0.2, 0.3], dtype="float64"),
            )
        except TypeError:
            # THIS WORKS AROUND A NUMPY BUG on 32bit machine
            n = tensor.cast(n, "int32")
    op = RandomFunction(
        "binomial",
        tensor.TensorType(dtype=dtype, broadcastable=(False, ) * ndim))
    return op(random_state, size, n, p)
예제 #15
0
def incomplete_beta_cfe(a, b, x, small):
    """Incomplete beta continued fraction expansions
    based on Cephes library by Steve Moshier (incbet.c).
    small: Choose element-wise which continued fraction expansion to use.
    """
    BIG = aet.constant(4.503599627370496e15, dtype="float64")
    BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64")
    THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64")

    zero = aet.constant(0.0, dtype="float64")
    one = aet.constant(1.0, dtype="float64")
    two = aet.constant(2.0, dtype="float64")

    r = one
    k1 = a
    k3 = a
    k4 = a + one
    k5 = one
    k8 = a + two

    k2 = aet.switch(small, a + b, b - one)
    k6 = aet.switch(small, b - one, a + b)
    k7 = aet.switch(small, k4, a + one)
    k26update = aet.switch(small, one, -one)
    x = aet.switch(small, x, x / (one - x))

    pkm2 = zero
    qkm2 = one
    pkm1 = one
    qkm1 = one
    r = one

    def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
        xk = -(x * k1 * k2) / (k3 * k4)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        xk = (x * k5 * k6) / (k7 * k8)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        old_r = r
        r = aet.switch(aet.eq(qk, zero), r, pk / qk)

        k1 += one
        k2 += k26update
        k3 += two
        k4 += two
        k5 += one
        k6 -= k26update
        k7 += two
        k8 += two

        big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG)
        biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV),
                              aet.lt(aet.abs_(pk), BIGINV))

        pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2)
        pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1)
        qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2)
        qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1)

        pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2)
        pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1)
        qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2)
        qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1)

        return (
            (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r),
            until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))),
        )

    (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
        _step,
        sequences=[aet.arange(0, 300)],
        outputs_info=[
            e for e in aet.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5,
                                 k6, k7, k8, r), "float64")
        ],
    )

    return r[-1]
예제 #16
0
    def uniform(self,
                size,
                low=0.0,
                high=1.0,
                ndim=None,
                dtype=None,
                nstreams=None,
                **kwargs):
        # TODO : need description for parameter 'size', 'ndim', 'nstreams'
        """
        Sample a tensor of given size whose element from a uniform
        distribution between low and high.

        If the size argument is ambiguous on the number of dimensions,
        ndim may be a plain integer to supplement the missing information.

        Parameters
        ----------
        low
            Lower bound of the interval on which values are sampled.
            If the ``dtype`` arg is provided, ``low`` will be cast into
            dtype. This bound is excluded.
        high
            Higher bound of the interval on which values are sampled.
            If the ``dtype`` arg is provided, ``high`` will be cast into
            dtype. This bound is excluded.
        size
          Can be a list of integer or Aesara variable (ex: the shape
          of other Aesara Variable).
        dtype
            The output data type. If dtype is not specified, it will be
            inferred from the dtype of low and high, but will be at
            least as precise as floatX.

        """
        low = as_tensor_variable(low)
        high = as_tensor_variable(high)

        if dtype is None:
            dtype = aes.upcast(config.floatX, low.dtype, high.dtype)

        low = cast(low, dtype=dtype)
        high = cast(high, dtype=dtype)

        low = undefined_grad(low)
        high = undefined_grad(high)

        if isinstance(size, tuple):
            msg = "size must be a tuple of int or an Aesara variable"
            assert all(
                isinstance(i, (np.integer, int, Variable)) for i in size), msg
            if any(isinstance(i, (np.integer, int)) and i <= 0 for i in size):
                raise ValueError(
                    "The specified size contains a dimension with value <= 0",
                    size)

        else:
            if not (isinstance(size, Variable) and size.ndim == 1):
                raise TypeError("size must be a tuple of int or an Aesara "
                                "Variable with 1 dimension, got " + str(size) +
                                " of type " + str(type(size)))
        orig_nstreams = nstreams
        if nstreams is None:
            nstreams = self.n_streams(size)
        rstates = self.get_substream_rstates(nstreams, dtype)

        d = {}
        if "target" in kwargs:
            d = dict(target=kwargs.pop("target"))
        if len(kwargs) > 0:
            raise TypeError(
                f"uniform() got unexpected keyword arguments {kwargs.keys()}")
        node_rstate = shared(rstates, **d)
        u = self.pretty_return(
            node_rstate,
            *mrg_uniform.new(node_rstate, ndim, dtype, size),
            size=size,
            nstreams=orig_nstreams,
        )
        # Add a reference to distinguish from other shared variables
        node_rstate.tag.is_rng = True
        r = u * (high - low) + low

        if u.type.broadcastable != r.type.broadcastable:
            raise NotImplementedError(
                "Increase the size to match the broadcasting pattern of "
                "`low` and `high` arguments")

        assert r.dtype == dtype
        return r
예제 #17
0
파일: ode.py 프로젝트: bwengals/pymc3
    def __call__(self, y0, theta, return_sens=False, **kwargs):
        if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states:
            raise ShapeError("Length of y0 is wrong.",
                             actual=(len(y0), ),
                             expected=(self.n_states, ))
        if isinstance(theta, (list, tuple)) and not len(theta) == self.n_theta:
            raise ShapeError("Length of theta is wrong.",
                             actual=(len(theta), ),
                             expected=(self.n_theta, ))

        # convert inputs to tensors (and check their types)
        y0 = at.cast(at.unbroadcast(at.as_tensor_variable(y0), 0), floatX)
        theta = at.cast(at.unbroadcast(at.as_tensor_variable(theta), 0),
                        floatX)
        inputs = [y0, theta]
        for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)):
            if not input_val.type.in_same_class(itype):
                raise ValueError(
                    f"Input {i} of type {input_val.type} does not have the expected type of {itype}"
                )

        # use default implementation to prepare symbolic outputs (via make_node)
        states, sens = super().__call__(y0, theta, **kwargs)

        if aesara.config.compute_test_value != "off":
            # compute test values from input test values
            test_states, test_sens = self._simulate(
                y0=get_test_value(y0), theta=get_test_value(theta))

            # check types of simulation result
            if not test_states.dtype == self._otypes[0].dtype:
                raise DtypeError(
                    "Simulated states have the wrong type.",
                    actual=test_states.dtype,
                    expected=self._otypes[0].dtype,
                )
            if not test_sens.dtype == self._otypes[1].dtype:
                raise DtypeError(
                    "Simulated sensitivities have the wrong type.",
                    actual=test_sens.dtype,
                    expected=self._otypes[1].dtype,
                )

            # check shapes of simulation result
            expected_states_shape = (self.n_times, self.n_states)
            expected_sens_shape = (self.n_times, self.n_states, self.n_p)
            if not test_states.shape == expected_states_shape:
                raise ShapeError(
                    "Simulated states have the wrong shape.",
                    test_states.shape,
                    expected_states_shape,
                )
            if not test_sens.shape == expected_sens_shape:
                raise ShapeError(
                    "Simulated sensitivities have the wrong shape.",
                    test_sens.shape,
                    expected_sens_shape,
                )

            # attach results as test values to the outputs
            states.tag.test_value = test_states
            sens.tag.test_value = test_sens

        if return_sens:
            return states, sens
        return states
# Declare Aesara symbolic variables
x = aesara.shared(D[0], name="x")
y = aesara.shared(D[1], name="y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0.0, dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
# print "Initial model:"
# print w.get_value(), b.get_value()

# Construct Aesara expression graph
p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))  # Probability of having a one
prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)  # Cross-entropy
cost = tt.cast(xent.mean(),
               "float32") + 0.01 * (w**2).sum()  # The cost to optimize
gw, gb = tt.grad(cost, [w, b])

# Compile expressions to functions
train = aesara.function(
    inputs=[],
    outputs=[prediction, xent],
    updates=[(w, w - 0.01 * gw), (b, b - 0.01 * gb)],
    name="train",
)
predict = aesara.function(inputs=[], outputs=prediction, name="predict")

if any([
        n.op.__class__.__name__ in ["Gemv", "CGemv", "Gemm", "CGemm"]
        for n in train.maker.fgraph.toposort()
]):
예제 #19
0
    def normal(
        self,
        size,
        avg=0.0,
        std=1.0,
        ndim=None,
        dtype=None,
        nstreams=None,
        truncate=False,
        **kwargs,
    ):
        """
        Sample a tensor of values from a normal distribution.

        Parameters
        ----------
        size : int_vector_like
            Array dimensions for the output tensor.
        avg : float_like, optional
            The mean value for the truncated normal to sample from (defaults to 0.0).
        std : float_like, optional
            The standard deviation for the truncated normal to sample from (defaults to 1.0).
        truncate : bool, optional
            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
            When this flag is set, the standard deviation of the result will be less than the one specified.
        ndim : int, optional
            The number of dimensions for the output tensor (defaults to None).
            This argument is necessary if the size argument is ambiguous on the number of dimensions.
        dtype : str, optional
            The data-type for the output tensor. If not specified,
            the dtype is inferred from avg and std, but it is at least as precise as floatX.
        kwargs
            Other keyword arguments for random number generation (see uniform).

        Returns
        -------
        samples : TensorVariable
            A Aesara tensor of samples randomly drawn from a normal distribution.

        """
        size = _check_size(size)
        avg = undefined_grad(as_tensor_variable(avg))
        std = undefined_grad(as_tensor_variable(std))

        if dtype is None:
            dtype = aes.upcast(config.floatX, avg.dtype, std.dtype)

        avg = at.cast(avg, dtype=dtype)
        std = at.cast(std, dtype=dtype)

        # generate even number of uniform samples
        # Do manual constant folding to lower optiimizer work.
        if isinstance(size, Constant):
            n_odd_samples = size.prod(dtype="int64")
        else:
            n_odd_samples = prod(size, dtype="int64")
        n_even_samples = n_odd_samples + n_odd_samples % 2
        uniform = self.uniform(
            (n_even_samples, ),
            low=0.0,
            high=1.0,
            ndim=1,
            dtype=dtype,
            nstreams=nstreams,
            **kwargs,
        )

        # box-muller transform
        u1 = uniform[:n_even_samples // 2]
        u2 = uniform[n_even_samples // 2:]
        r = sqrt(-2.0 * log(u1))
        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
        cos_theta, sin_theta = cos(theta), sin(theta)
        z0 = r * cos_theta
        z1 = r * sin_theta

        if truncate:
            # use valid samples
            to_fix0 = (z0 < -2.0) | (z0 > 2.0)
            to_fix1 = (z1 < -2.0) | (z1 > 2.0)
            z0_valid = z0[at.nonzero(~to_fix0)]
            z1_valid = z1[at.nonzero(~to_fix1)]

            # re-sample invalid samples
            to_fix0 = at.nonzero(to_fix0)[0]
            to_fix1 = at.nonzero(to_fix1)[0]
            n_fix_samples = to_fix0.size + to_fix1.size
            lower = at.constant(1.0 / np.e**2, dtype=dtype)
            u_fix = self.uniform(
                (n_fix_samples, ),
                low=lower,
                high=1.0,
                ndim=1,
                dtype=dtype,
                nstreams=nstreams,
                **kwargs,
            )
            r_fix = sqrt(-2.0 * log(u_fix))
            z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0]
            z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1]

            # pack everything together to a useful result
            norm_samples = at.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed)
        else:
            norm_samples = at.join(0, z0, z1)
        if isinstance(n_odd_samples, Variable):
            samples = norm_samples[:n_odd_samples]
        elif n_odd_samples % 2 == 1:
            samples = norm_samples[:-1]
        else:
            samples = norm_samples
        samples = reshape(samples, newshape=size, ndim=ndim)
        samples *= std
        samples += avg

        return samples
예제 #20
0
# Declare Aesara symbolic variables
x = aet.matrix("x")
y = aet.vector("y")
w = aesara.shared(rng.randn(feats).astype(aesara.config.floatX), name="w")
b = aesara.shared(np.asarray(0., dtype=aesara.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()

# Construct Aesara expression graph
p_1 = 1 / (1 + aet.exp(-aet.dot(x, w) - b))  # Probability of having a one
prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
xent = -y * aet.log(p_1) - (1 - y) * aet.log(1 - p_1)  # Cross-entropy
cost = aet.cast(xent.mean(), 'float32') + \
       0.01 * (w ** 2).sum()  # The cost to optimize
gw, gb = aet.grad(cost, [w, b])

# Compile expressions to functions
train = aesara.function(inputs=[x, y],
                        outputs=[prediction, xent],
                        updates={
                            w: w - 0.01 * gw,
                            b: b - 0.01 * gb
                        },
                        name="train")
predict = aesara.function(inputs=[x], outputs=prediction, name="predict")

if any([
        x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm']
예제 #21
0
def test_batch_normalization_train():

    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
        for vartype in (tensor5, tensor3, vector):
            x, scale, bias, running_mean, running_var = (vartype(n) for n in (
                "x", "scale", "bias", "running_mean", "running_var"))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
            running_average_factor = 0.3

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            (
                out,
                x_mean,
                x_invstd,
                out_running_mean,
                out_running_var,
            ) = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
                axes,
                eps,
                running_average_factor,
                running_mean,
                running_var,
            )
            # reference forward pass
            if axes == "per-activation":
                axes2 = (0, )
            elif axes == "spatial":
                axes2 = (0, ) + tuple(range(2, ndim))
            else:
                axes2 = axes
            x_mean2 = x.mean(axis=axes2, keepdims=True)
            x_var2 = x.var(axis=axes2, keepdims=True)
            x_invstd2 = aet.reciprocal(aet.sqrt(x_var2 + eps))
            scale2 = aet.addbroadcast(scale, *axes2)
            bias2 = aet.addbroadcast(bias, *axes2)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            m = aet.cast(
                aet.prod(x.shape) / aet.prod(scale.shape),
                aesara.config.floatX)
            out_running_mean2 = (running_mean * (1 - running_average_factor) +
                                 x_mean2 * running_average_factor)
            out_running_var2 = (running_var * (1 - running_average_factor) +
                                (m /
                                 (m - 1)) * x_var2 * running_average_factor)
            # backward pass
            dy = vartype("dy")
            grads = aet.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = aet.grad(None,
                              wrt=[x, scale, bias],
                              known_grads={out2: dy})
            # second-order backward pass
            dx = vartype("dinputs")
            dscale = vartype("dscale")
            dbias = vartype("dbias")
            grad_grads = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict({
                    grads[0]: dx,
                    grads[1]: dscale,
                    grads[2]: dbias
                }),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean,
                    x_invstd,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # reference second-order backward pass
            grad_grads2 = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict({
                    grads2[0]: dx,
                    grads2[1]: dscale,
                    grads2[2]: dbias
                }),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean2,
                    x_var2,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # compile
            f = aesara.function(
                [
                    x, scale, bias, running_mean, running_var, dy, dx, dscale,
                    dbias
                ],
                [
                    out,
                    x_mean,
                    x_invstd,
                    out_running_mean,
                    out_running_var,
                    out2,
                    x_mean2,
                    x_invstd2,
                    out_running_mean2,
                    out_running_var2,
                ] + grads + grads2 + grad_grads + grad_grads2,
            )
            # check if the abstract Ops have been replaced
            assert not any([
                isinstance(
                    n.op,
                    (
                        batchnorm.AbstractBatchNormTrain,
                        batchnorm.AbstractBatchNormInference,
                        batchnorm.AbstractBatchNormTrainGrad,
                    ),
                ) for n in f.maker.fgraph.toposort()
            ])
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5,
                                                                      5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes2 else s
                                    for d, s in enumerate(data_shape))

                rng = np.random.default_rng(1234)

                X = 4 + 3 * rng.random(data_shape).astype(aesara.config.floatX)
                Dy = -1 + 2 * rng.random(data_shape).astype(
                    aesara.config.floatX)
                Scale = rng.random(param_shape).astype(aesara.config.floatX)
                Bias = rng.random(param_shape).astype(aesara.config.floatX)
                Running_mean = rng.random(param_shape).astype(
                    aesara.config.floatX)
                Running_var = rng.random(param_shape).astype(
                    aesara.config.floatX)
                Dx = 4 + 3 * rng.random(data_shape).astype(
                    aesara.config.floatX)
                Dscale = -1 + 2 * rng.random(param_shape).astype(
                    aesara.config.floatX)
                Dbias = rng.random(param_shape).astype(aesara.config.floatX)

                outputs = f(X, Scale, Bias, Running_mean, Running_var, Dy, Dx,
                            Dscale, Dbias)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 5])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 5])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 5])  # invstd
                utt.assert_allclose(outputs[3], outputs[3 + 5])  # running_mean
                utt.assert_allclose(np.nan_to_num(outputs[4]),
                                    np.nan_to_num(outputs[4 +
                                                          5]))  # running_var
                # compare gradients
                utt.assert_allclose(outputs[10], outputs[10 + 3],
                                    atol=1e-4)  # dx
                utt.assert_allclose(outputs[11],
                                    outputs[11 + 3],
                                    rtol=2e-4,
                                    atol=1e-4)  # dscale
                utt.assert_allclose(outputs[12], outputs[12 + 3])  # dbias
                # compare second-order gradients
                utt.assert_allclose(outputs[16], outputs[16 + 3],
                                    atol=1e-4)  # ddx
                utt.assert_allclose(outputs[17], outputs[17 + 3])  # ddy
                utt.assert_allclose(outputs[18],
                                    outputs[18 + 3],
                                    rtol=3e-4,
                                    atol=1e-4)  # ddscale
예제 #22
0
 def test_elemwise1(self):
     self.check_rop_lop(self.x + aet.cast(self.x, "int32"), self.in_shape)
예제 #23
0
 def f_rnn(u_t, x_tm1, W_in, W):
     return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64"))
예제 #24
0
 def test_cast(self):
     x = zvector()
     with pytest.raises(TypeError):
         cast(x, "int32")
예제 #25
0
파일: cov.py 프로젝트: t-triobox/pymc3
 def diag(self, X):
     X, _ = self._slice(X, None)
     index = at.cast(X, "int32")
     return at.diag(self.B)[index.ravel()]
예제 #26
0
def _infer_ndim_bcast(ndim, shape, *args):
    """
    Infer the number of dimensions from the shape or the other arguments.

    Returns
    -------
    (int, variable, tuple) triple, where the variable is an integer vector,
    and the tuple contains Booleans
        The first element returned is the inferred number of dimensions.
        The second element is the shape inferred (combining symbolic and
        constant informations from shape and args).
        The third element is a broadcasting pattern corresponding to that shape.

    """

    # Find the minimum value of ndim required by the *args
    if args:
        args_ndim = max(arg.ndim for arg in args)
    else:
        args_ndim = 0

    if isinstance(shape, (tuple, list)):
        # there is a convention that -1 means the corresponding shape of a
        # potentially-broadcasted symbolic arg
        #
        # This case combines together symbolic and non-symbolic shape
        # information
        shape_ndim = len(shape)
        if ndim is None:
            ndim = shape_ndim
        else:
            if shape_ndim != ndim:
                raise ValueError(
                    "ndim should be equal to len(shape), but\n",
                    "ndim = %s, len(shape) = %s, shape = %s" %
                    (ndim, shape_ndim, shape),
                )

        bcast = []
        pre_v_shape = []
        for i, s in enumerate(shape):
            if hasattr(s, "type"):  # s is symbolic
                bcast.append(False)  # todo - introspect further
                pre_v_shape.append(s)
            else:
                if s >= 0:
                    pre_v_shape.append(tensor.as_tensor_variable(s))
                    bcast.append(s == 1)
                elif s == -1:
                    n_a_i = 0
                    for a in args:
                        # ndim: _   _   _   _   _   _
                        # ashp:         s0  s1  s2  s3
                        #           i
                        if i >= ndim - a.ndim:
                            n_a_i += 1
                            a_i = i + a.ndim - ndim
                            if not a.broadcastable[a_i]:
                                pre_v_shape.append(a.shape[a_i])
                                bcast.append(False)
                                break
                    else:
                        if n_a_i == 0:
                            raise ValueError(
                                "Auto-shape of -1 must overlap"
                                "with the shape of one of the broadcastable"
                                "inputs")
                        else:
                            pre_v_shape.append(tensor.as_tensor_variable(1))
                            bcast.append(True)
                else:
                    ValueError("negative shape", s)
        # post-condition: shape may still contain both symbolic and
        # non-symbolic things
        if len(pre_v_shape) == 0:
            v_shape = tensor.constant([], dtype="int64")
        else:
            v_shape = tensor.stack(pre_v_shape)

    elif shape is None:
        # The number of drawn samples will be determined automatically,
        # but we need to know ndim
        if not args:
            raise TypeError("_infer_ndim_bcast cannot infer shape without"
                            " either shape or args")
        template = reduce(lambda a, b: a + b, args)
        v_shape = template.shape
        bcast = template.broadcastable
        ndim = template.ndim
    else:
        v_shape = tensor.as_tensor_variable(shape)
        if v_shape.ndim != 1:
            raise TypeError(
                "shape must be a vector or list of scalar, got '%s'" % v_shape)

        if ndim is None:
            ndim = tensor.get_vector_length(v_shape)
        bcast = [False] * ndim

    if v_shape.ndim != 1:
        raise TypeError("shape must be a vector or list of scalar, got '%s'" %
                        v_shape)

    if v_shape.dtype not in aesara.tensor.integer_dtypes:
        raise TypeError("shape must be an integer vector or list",
                        v_shape.dtype)

    if args_ndim > ndim:
        raise ValueError(
            "ndim should be at least as big as required by args value",
            (ndim, args_ndim),
            args,
        )

    assert ndim == len(bcast)
    return ndim, tensor.cast(v_shape, "int64"), tuple(bcast)