Beispiel #1
0
 def func(chol_vec, delta):
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     return MvNormalLogp()(cov, delta)
Beispiel #2
0
 def two_gaussians(x):
     """
     Mixture of gaussians likelihood
     """
     log_like1 = (-0.5 * n * at.log(2 * np.pi) - 0.5 * at.log(dsigma) -
                  0.5 * (x - mu1).T.dot(isigma).dot(x - mu1))
     log_like2 = (-0.5 * n * at.log(2 * np.pi) - 0.5 * at.log(dsigma) -
                  0.5 * (x - mu2).T.dot(isigma).dot(x - mu2))
     return at.log(w1 * at.exp(log_like1) + w2 * at.exp(log_like2))
Beispiel #3
0
def test_get_jaxified_logp():
    with pm.Model() as m:
        x = pm.Flat("x")
        y = pm.Flat("y")
        pm.Potential("pot", at.log(at.exp(x) + at.exp(y)))

    jax_fn = get_jaxified_logp(m)
    # This would underflow if not optimized
    assert not np.isinf(jax_fn((np.array(5000.0), np.array(5000.0))))
Beispiel #4
0
    def backward(self, rv_var, rv_value):
        a, b = self.param_extract_fn(rv_var)

        if a is not None and b is not None:
            sigmoid_x = at.sigmoid(rv_value)
            return sigmoid_x * b + (1 - sigmoid_x) * a
        elif a is not None:
            return at.exp(rv_value) + a
        elif b is not None:
            return b - at.exp(rv_value)
        else:
            return rv_value
Beispiel #5
0
 def two_gaussians(x):
     log_like1 = (
         -0.5 * n * aet.log(2 * np.pi)
         - 0.5 * aet.log(dsigma)
         - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
     )
     log_like2 = (
         -0.5 * n * aet.log(2 * np.pi)
         - 0.5 * aet.log(dsigma)
         - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
     )
     return aet.log(w1 * aet.exp(log_like1) + w2 * aet.exp(log_like2))
Beispiel #6
0
def test_HSStep_NegativeBinomial():
    np.random.seed(2032)
    M = 5
    N = 50
    X = np.random.normal(size=N * M).reshape((N, M))
    beta_true = np.array([1, 1, 2, 2, 0])
    y_nb = pm.NegativeBinomial.dist(np.exp(X.dot(beta_true)), 1).random()

    N_draws = 500
    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M)
        pm.NegativeBinomial("y",
                            mu=at.exp(beta.dot(X.T)),
                            alpha=1,
                            observed=y_nb)
        hsstep = HSStep([beta])
        trace = pm.sample(
            draws=N_draws,
            step=hsstep,
            chains=1,
            return_inferencedata=True,
            compute_convergence_checks=False,
        )

    beta_samples = trace.posterior["beta"][0].values
    assert beta_samples.shape == (N_draws, M)
    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.5)

    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M, testval=beta_true * 0.1)
        pm.NegativeBinomial("y",
                            mu=beta.dot(np.abs(X.T)),
                            alpha=1,
                            observed=y_nb)
        hsstep = HSStep([beta])
        trace = pm.sample(
            draws=N_draws,
            step=hsstep,
            chains=1,
            return_inferencedata=True,
            compute_convergence_checks=False,
        )

    beta_samples = trace.posterior["beta"][0].values
    assert beta_samples.shape == (N_draws, M)

    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M, testval=beta_true * 0.1)
        eta = pm.NegativeBinomial("eta", mu=beta.dot(X.T), alpha=1, shape=N)
        pm.Normal("y", mu=at.exp(eta), sigma=1, observed=y_nb)

        with pytest.raises(NotImplementedError):
            HSStep([beta])
Beispiel #7
0
 def test_hessian(self):
     chol_vec = at.vector("chol_vec")
     chol_vec.tag.test_value = np.array([0.1, 2, 3])
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     delta = at.matrix("delta")
     delta.tag.test_value = np.ones((5, 2))
     logp = MvNormalLogp()(cov, delta)
     g_cov, g_delta = at.grad(logp, [cov, delta])
     at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
Beispiel #8
0
 def test_hessian(self):
     chol_vec = at.vector("chol_vec")
     chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3]))
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     delta = at.matrix("delta")
     delta.tag.test_value = floatX(np.ones((5, 2)))
     logp = MvNormalLogp()(cov, delta)
     g_cov, g_delta = at.grad(logp, [cov, delta])
     # TODO: What's the test?  Something needs to be asserted.
     at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
Beispiel #9
0
    def forward(self, rv_var, rv_value):
        """Inverse operation of softplus.

        y = Log(Exp(x) - 1)
          = Log(1 - Exp(-x)) + x
        """
        return at.log(1.0 - at.exp(-rv_value)) + rv_value
Beispiel #10
0
    def __call__(self, X):
        XY = X.dot(X.T)
        x2 = at.sum(X**2, axis=1).dimshuffle(0, "x")
        X2e = at.repeat(x2, X.shape[0], axis=1)
        H = X2e + X2e.T - 2.0 * XY

        V = at.sort(H.flatten())
        length = V.shape[0]
        # median distance
        m = at.switch(
            at.eq((length % 2), 0),
            # if even vector
            at.mean(V[((length // 2) - 1):((length // 2) + 1)]),
            # if odd vector
            V[length // 2],
        )

        h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1))

        #  RBF
        Kxy = at.exp(-H / h / 2.0)

        # Derivative
        dxkxy = -at.dot(Kxy, X)
        sumkxy = at.sum(Kxy, axis=-1, keepdims=True)
        dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h

        return Kxy, dxkxy
    def test_lop_override(self, cls_ofg):
        x = tt.vector()
        y = 1.0 / (1.0 + tt.exp(-x))

        def lop_ov(inps, outs, grads):
            (y_, ) = outs
            (dedy_, ) = grads
            return [2.0 * y_ * (1.0 - y_) * dedy_]

        y_, dedy = tt.vector(), tt.vector()
        op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy])

        xx = tt.vector()
        yy1 = tt.sum(tt.nnet.sigmoid(xx))
        gyy1 = 2.0 * tt.grad(yy1, xx)

        for ov in [lop_ov, op_lop_ov]:
            op = cls_ofg([x], [y], lop_overrides=ov)
            yy2 = tt.sum(op(xx))
            gyy2 = tt.grad(yy2, xx)
            fn = function([xx], [gyy1, gyy2])

            xval = np.random.rand(32).astype(config.floatX)
            y1val, y2val = fn(xval)
            assert np.allclose(y1val, y2val)
Beispiel #12
0
def logsumexp(x, axis=None, keepdims=True):
    # Adapted from https://github.com/Theano/Theano/issues/1563
    x_max = aet.max(x, axis=axis, keepdims=True)
    x_max = aet.switch(aet.isinf(x_max), 0, x_max)
    res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis,
                          keepdims=True)) + x_max
    return res if keepdims else res.squeeze()
Beispiel #13
0
def test_HSStep_NegativeBinomial_sparse():
    np.random.seed(2032)
    M = 5
    N = 50
    X = np.random.normal(size=N * M).reshape((N, M))
    beta_true = np.array([1, 1, 2, 2, 0])
    y_nb = pm.NegativeBinomial.dist(np.exp(X.dot(beta_true)), 1).random()

    X = sp.sparse.csr_matrix(X)

    N_draws = 500
    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M)
        pm.NegativeBinomial("y",
                            mu=at.exp(sp_dot(X, at.shape_padright(beta))),
                            alpha=1,
                            observed=y_nb)
        hsstep = HSStep([beta])
        trace = pm.sample(
            draws=N_draws,
            step=hsstep,
            chains=1,
            return_inferencedata=True,
            compute_convergence_checks=False,
        )

    beta_samples = trace.posterior["beta"][0].values
    assert beta_samples.shape == (N_draws, M)
    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.5)
Beispiel #14
0
def incomplete_beta_ps(a, b, value):
    """Power series for incomplete beta
    Use when b*x is small and value not too close to 1.
    Based on Cephes library by Steve Moshier (incbet.c)
    """
    one = aet.constant(1, dtype="float64")
    ai = one / a
    u = (one - b) * value
    t1 = u / (a + one)
    t = u
    threshold = np.MachAr().eps * ai
    s = aet.constant(0, dtype="float64")

    def _step(i, t, s):
        t *= (i - b) * value / i
        step = t / (a + i)
        s += step
        return ((t, s), until(aet.abs_(step) < threshold))

    (t, s), _ = scan(_step,
                     sequences=[aet.arange(2, 302)],
                     outputs_info=[e for e in aet.cast((t, s), "float64")])

    s = s[-1] + t1 + ai

    t = gammaln(a +
                b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s)
    return aet.exp(t)
Beispiel #15
0
    def forward(self, value, *inputs):
        """Inverse operation of softplus.

        y = Log(Exp(x) - 1)
          = Log(1 - Exp(-x)) + x
        """
        return at.log(1.0 - at.exp(-value)) + value
Beispiel #16
0
def test_logp_helper_exceptions():
    with pytest.raises(TypeError, match="When RV is not a pure distribution"):
        logp(at.exp(Normal.dist()), [1, 2])

    with pytest.raises(NotImplementedError,
                       match="PyMC could not infer logp of input variable"):
        logp(at.cos(Normal.dist()), 1)
Beispiel #17
0
 def backward(self, y_):
     y = y_.T
     y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)])
     # "softmax" with vector support and no deprication warning:
     e_y = aet.exp(y - aet.max(y, 0, keepdims=True))
     x = e_y / aet.sum(e_y, 0, keepdims=True)
     return floatX(x.T)
Beispiel #18
0
    def forward(self, x):
        """Inverse operation of softplus.

        y = Log(Exp(x) - 1)
          = Log(1 - Exp(-x)) + x
        """
        return aet.log(1.0 - aet.exp(-x)) + x
Beispiel #19
0
 def full(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     if Xs is None:
         Xs = X
     f1 = X.dimshuffle(0, "x", 1)
     f2 = Xs.dimshuffle("x", 0, 1)
     r = np.pi * (f1 - f2) / self.period
     r = at.sum(at.square(at.sin(r) / self.ls), 2)
     return at.exp(-0.5 * r)
Beispiel #20
0
def test_get_jaxified_graph():
    # Check that jaxifying a graph does not emmit the Supervisor Warning. This test can
    # be removed once https://github.com/aesara-devs/aesara/issues/637 is sorted.
    x = at.scalar("x")
    y = at.exp(x)
    with pytest.warns(None) as record:
        fn = get_jaxified_graph(inputs=[x], outputs=[y])
    assert not record
    assert np.isclose(fn(0), 1)
Beispiel #21
0
def log_diff_normal_cdf(mu, sigma, x, y):
    """
    Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space.

    Parameters
    ----------
    mu: float
        mean
    sigma: float
        std

    x: float

    y: float
        must be strictly less than x.

    Returns
    -------
    log (\\Phi(x) - \\Phi(y))

    """
    x = (x - mu) / sigma / aet.sqrt(2.0)
    y = (y - mu) / sigma / aet.sqrt(2.0)

    # To stabilize the computation, consider these three regions:
    # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
    # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
    # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
    return aet.log(0.5) + aet.switch(
        aet.gt(y, 0),
        -aet.square(y) + aet.log(
            aet.erfcx(y) -
            aet.exp(aet.square(y) - aet.square(x)) * aet.erfcx(x)),
        aet.switch(
            aet.lt(x, 0),  # 0 > x > y
            -aet.square(x) + aet.log(
                aet.erfcx(-x) -
                aet.exp(aet.square(x) - aet.square(y)) * aet.erfcx(-y)),
            aet.log(aet.erf(x) - aet.erf(y)),  # x >0 > y
        ),
    )
Beispiel #22
0
 def full(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     rx = self.lfunc(at.as_tensor_variable(X), self.args)
     if Xs is None:
         rz = self.lfunc(at.as_tensor_variable(X), self.args)
         r2 = self.square_dist(X, X)
     else:
         rz = self.lfunc(at.as_tensor_variable(Xs), self.args)
         r2 = self.square_dist(X, Xs)
     rx2 = at.reshape(at.square(rx), (-1, 1))
     rz2 = at.reshape(at.square(rz), (1, -1))
     return at.sqrt((2.0 * at.outer(rx, rz)) / (rx2 + rz2)) * at.exp(-1.0 * r2 / (rx2 + rz2))
Beispiel #23
0
    def backward(self, rv_var, rv_value):
        if rv_var.broadcastable[-1]:
            # If this variable is just a bunch of scalars/degenerate
            # Dirichlets, we can't transform it
            return rv_value

        y = rv_value.T
        y = at.concatenate([y, -at.sum(y, 0, keepdims=True)])
        # "softmax" with vector support and no deprication warning:
        e_y = at.exp(y - at.max(y, 0, keepdims=True))
        x = e_y / at.sum(e_y, 0, keepdims=True)
        return floatX(x.T)
    def test_local_sigm_times_exp(self):
        # Test the `local_sigm_times_exp` optimization.
        # exp(x) * sigm(-x) -> sigm(x)
        # exp(-x) * sigm(x) -> sigm(-x)

        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops

        m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"])
        x, y = tt.vectors("x", "y")

        f = aesara.function([x], sigmoid(-x) * tt.exp(x), mode=m)
        match(f, [sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], sigmoid(x) * tt.exp(-x), mode=m)
        match(f, [tt.neg, sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m)
        match(f, [tt.neg, sigmoid, tt.neg])
        # assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function(
            [x, y],
            (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) *
             tt.exp(y)),
            mode=m,
        )
        topo = f.maker.fgraph.toposort()
        for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]:
            assert sum([n.op == op for n in topo]) == nb
Beispiel #25
0
    def grad(self, inputs, cost_grad):
        """
        In defining the gradient, the Finite Fourier Transform is viewed as
        a complex-differentiable function of a complex variable
        """
        a = inputs[0]
        n = inputs[1]
        axis = inputs[2]
        grad = cost_grad[0]
        if not isinstance(axis, tensor.TensorConstant):
            raise NotImplementedError(
                "%s: gradient is currently implemented"
                " only for axis being a Aesara constant" %
                self.__class__.__name__)
        axis = int(axis.data)
        # notice that the number of actual elements in wrto is independent of
        # possible padding or truncation:
        elem = tensor.arange(0, tensor.shape(a)[axis], 1)
        # accounts for padding:
        freq = tensor.arange(0, n, 1)
        outer = tensor.outer(freq, elem)
        pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1.0 * n))
        res = tensor.tensordot(grad, pow_outer, (axis, 0))

        # This would be simpler but not implemented by aesara:
        # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
        # tensor.set_subtensor(res[...,n::], 0, False, False), res)

        # Instead we resort to that to account for truncation:
        flip_shape = list(np.arange(0, a.ndim)[::-1])
        res = res.dimshuffle(flip_shape)
        res = tensor.switch(
            tensor.lt(n,
                      tensor.shape(a)[axis]),
            tensor.set_subtensor(
                res[n::, ],
                0,
                False,
                False,
            ),
            res,
        )
        res = res.dimshuffle(flip_shape)

        # insures that gradient shape conforms to input shape:
        out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] +
                     list(np.arange(axis, a.ndim - 1)))
        res = res.dimshuffle(*out_shape)
        return [res, None, None]
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = self.get_mode()
        x = tt.fmatrix()

        # tests exp_over_1_plus_exp
        f = aesara.function([x], 1 - tt.exp(x) / (1 + tt.exp(x)), mode=m)
        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            tt.neg,
            sigmoid_inplace,
        ]

        # tests inv_1_plus_exp
        f = aesara.function([x],
                            1 - tt.fill(x, 1.0) / (1 + tt.exp(-x)),
                            mode=m)
        assert check_stack_trace(f, ops_to_check=[tt.neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            tt.neg,
            sigmoid_inplace,
        ]
Beispiel #27
0
    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
        x_vec = vector("x")
        z = aet.dot(x_vec.dimshuffle(0, "x"), x_vec.dimshuffle("x", 0))
        y = pool_2d(input=z, ws=(2, 2), ignore_border=True)
        C = aet.exp(aet_sum(y))

        grad_hess = aesara.gradient.hessian(cost=C, wrt=x_vec)
        fn_hess = function(inputs=[x_vec], outputs=grad_hess)

        # The value has been manually computed from the theoretical gradient,
        # and confirmed by the implementation.

        assert np.allclose(fn_hess([1, 2]), [[0.0, 0.0], [0.0, 982.7667]])
    def test_log1pexp_to_softplus(self):
        m = aesara.config.mode
        if m == "FAST_COMPILE":
            m = "FAST_RUN"

        x = tt.vector()

        out = tt.log(1 + tt.exp(x))
        f = aesara.function([x], out, mode=self.m)

        # Fix ticket #4581 first
        # assert check_stack_trace(f, ops_to_check='all')
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
        f(np.random.rand(54).astype(config.floatX))
Beispiel #29
0
def log1mexp(x):
    r"""Return log(1 - exp(-x)).

    This function is numerically more stable than the naive approach.

    For details, see
    https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf

    References
        ----------
        .. [Machler2012] Martin Mächler (2012).
            "Accurately computing `\log(1-\exp(- \mid a \mid))` Assessed by the Rmpfr
            package"

    """
    return at.switch(at.lt(x, 0.6931471805599453), at.log(-at.expm1(-x)),
                     at.log1p(-at.exp(-x)))
Beispiel #30
0
def log_normal(x, mean, **kwargs):
    """
    Calculate logarithm of normal distribution at point `x`
    with given `mean` and `std`

    Parameters
    ----------
    x: Tensor
        point of evaluation
    mean: Tensor
        mean of normal distribution
    kwargs: one of parameters `{sigma, tau, w, rho}`

    Notes
    -----
    There are four variants for density parametrization.
    They are:
        1) standard deviation - `std`
        2) `w`, logarithm of `std` :math:`w = log(std)`
        3) `rho` that follows this equation :math:`rho = log(exp(std) - 1)`
        4) `tau` that follows this equation :math:`tau = std^{-1}`
    ----
    """
    sigma = kwargs.get("sigma")
    w = kwargs.get("w")
    rho = kwargs.get("rho")
    tau = kwargs.get("tau")
    eps = kwargs.get("eps", 0.0)
    check = sum(map(lambda a: a is not None, [sigma, w, rho, tau]))
    if check > 1:
        raise ValueError("more than one required kwarg is passed")
    if check == 0:
        raise ValueError("none of required kwarg is passed")
    if sigma is not None:
        std = sigma
    elif w is not None:
        std = aet.exp(w)
    elif rho is not None:
        std = rho2sigma(rho)
    else:
        std = tau**(-1)
    std += f(eps)
    return f(c) - aet.log(aet.abs_(std)) - (x - mean)**2 / (2.0 * std**2)