Exemple #1
0
def logpow(x, m):
    """
    Calculates log(x**m) since m*log(x) will fail when m, x = 0.
    """
    # return m * log(x)
    return aet.switch(aet.eq(x, 0), aet.switch(aet.eq(m, 0), 0.0, -np.inf),
                      m * aet.log(x))
Exemple #2
0
def mixture_model(random_seed=1234):
    """Sample mixture model to use in benchmarks"""
    np.random.seed(1234)
    size = 1000
    w_true = np.array([0.35, 0.4, 0.25])
    mu_true = np.array([0.0, 2.0, 5.0])
    sigma = np.array([0.5, 0.5, 1.0])
    component = np.random.choice(mu_true.size, size=size, p=w_true)
    x = np.random.normal(mu_true[component], sigma[component], size=size)

    with pm.Model() as model:
        w = pm.Dirichlet("w", a=np.ones_like(w_true))
        mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
        enforce_order = pm.Potential(
            "enforce_order",
            aet.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) +
            aet.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf),
        )
        tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape)
        pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x)

    # Initialization can be poorly specified, this is a hack to make it work
    start = {
        "mu": mu_true.copy(),
        "tau_log__": np.log(1.0 / sigma**2),
        "w_stickbreaking__": np.array([-0.03, 0.44]),
    }
    return model, start
Exemple #3
0
    def dlogp(inputs, gradients):
        (g_logp, ) = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.0)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = aet.nlinalg.diag(chol_cov)
        ok = aet.all(diag > 0)

        chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = aet.switch(ok, g_cov, -np.nan)
        g_delta = aet.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
Exemple #4
0
def build_disaster_model(masked=False):
    # fmt: off
    disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
                               3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
                               2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
                               1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
                               0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
                               3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
                               0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
    # fmt: on
    if masked:
        disasters_data[[23, 68]] = -1
        disasters_data = np.ma.masked_values(disasters_data, value=-1)
    years = len(disasters_data)

    with pm.Model() as model:
        # Prior for distribution of switchpoint location
        switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
        # Priors for pre- and post-switch mean number of disasters
        early_mean = pm.Exponential("early_mean", lam=1.0)
        late_mean = pm.Exponential("late_mean", lam=1.0)
        # Allocate appropriate Poisson rates to years before and after current
        # switchpoint location
        idx = np.arange(years)
        rate = at.switch(switchpoint >= idx, early_mean, late_mean)
        # Data likelihood
        pm.Poisson("disasters", rate, observed=disasters_data)
    return model
Exemple #5
0
def normal_lccdf(mu, sigma, x):
    z = (x - mu) / sigma
    return aet.switch(
        aet.gt(z, 1.0),
        aet.log(aet.erfcx(z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0,
        aet.log1p(-aet.erfc(-z / aet.sqrt(2.0)) / 2.0),
    )
Exemple #6
0
    def test_composite_elemwise_float16(self):
        w = bvector()
        x = vector(dtype="float16")
        y = fvector()

        cz = tanh(x + aet.cast(y, "float16"))
        o = (
            cz
            - cz ** 2
            + aet.cast(x, "int16")
            + aet.cast(x, "float32")
            + aet.cast(w, "float16")
            - aet.constant(np.float16(1.0))
        )

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = vector(dtype="uint8")
        w = vector(dtype="float16")
        x = vector(dtype="float16")
        y = vector(dtype="float16")
        z = vector(dtype="float16")

        o = aet.switch(v, mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
Exemple #7
0
    def __call__(self, X):
        XY = X.dot(X.T)
        x2 = at.sum(X**2, axis=1).dimshuffle(0, "x")
        X2e = at.repeat(x2, X.shape[0], axis=1)
        H = X2e + X2e.T - 2.0 * XY

        V = at.sort(H.flatten())
        length = V.shape[0]
        # median distance
        m = at.switch(
            at.eq((length % 2), 0),
            # if even vector
            at.mean(V[((length // 2) - 1):((length // 2) + 1)]),
            # if odd vector
            V[length // 2],
        )

        h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1))

        #  RBF
        Kxy = at.exp(-H / h / 2.0)

        # Derivative
        dxkxy = -at.dot(Kxy, X)
        sumkxy = at.sum(Kxy, axis=-1, keepdims=True)
        dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h

        return Kxy, dxkxy
Exemple #8
0
def logsumexp(x, axis=None, keepdims=True):
    # Adapted from https://github.com/Theano/Theano/issues/1563
    x_max = aet.max(x, axis=axis, keepdims=True)
    x_max = aet.switch(aet.isinf(x_max), 0, x_max)
    res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis,
                          keepdims=True)) + x_max
    return res if keepdims else res.squeeze()
Exemple #9
0
    def logp(value, distribution, lower, upper):
        """
        Calculate log-probability of Bounded distribution at specified value.

        Parameters
        ----------
        value: numeric
            Value for which log-probability is calculated.
        distribution: TensorVariable
            Distribution which is being bounded
        lower: numeric
            Lower bound for the distribution being bounded.
        upper: numeric
            Upper bound for the distribution being bounded.

        Returns
        -------
        TensorVariable
        """
        res = at.switch(
            at.or_(at.lt(value, lower), at.gt(value, upper)),
            -np.inf,
            logp(distribution, value),
        )

        return check_parameters(
            res,
            lower <= upper,
            msg="lower <= upper",
        )
Exemple #10
0
def normal_lcdf(mu, sigma, x):
    """Compute the log of the cumulative density function of the normal."""
    z = (x - mu) / sigma
    return aet.switch(
        aet.lt(z, -1.0),
        aet.log(aet.erfcx(-z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0,
        aet.log1p(-aet.erfc(z / aet.sqrt(2.0)) / 2.0),
    )
Exemple #11
0
    def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
        xk = -(x * k1 * k2) / (k3 * k4)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        xk = (x * k5 * k6) / (k7 * k8)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        old_r = r
        r = aet.switch(aet.eq(qk, zero), r, pk / qk)

        k1 += one
        k2 += k26update
        k3 += two
        k4 += two
        k5 += one
        k6 -= k26update
        k7 += two
        k8 += two

        big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG)
        biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV),
                              aet.lt(aet.abs_(pk), BIGINV))

        pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2)
        pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1)
        qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2)
        qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1)

        pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2)
        pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1)
        qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2)
        qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1)

        return (
            (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r),
            until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))),
        )
Exemple #12
0
def moment_censored(op, rv, dist, lower, upper):
    moment = at.switch(
        at.eq(lower, -np.inf),
        at.switch(
            at.isinf(upper),
            # lower = -inf, upper = inf
            0,
            # lower = -inf, upper = x
            upper - 1,
        ),
        at.switch(
            at.eq(upper, np.inf),
            # lower = x, upper = inf
            lower + 1,
            # lower = x, upper = x
            (lower + upper) / 2,
        ),
    )
    moment = at.full_like(dist, moment)
    return moment
Exemple #13
0
def log_diff_normal_cdf(mu, sigma, x, y):
    """
    Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space.

    Parameters
    ----------
    mu: float
        mean
    sigma: float
        std

    x: float

    y: float
        must be strictly less than x.

    Returns
    -------
    log (\\Phi(x) - \\Phi(y))

    """
    x = (x - mu) / sigma / aet.sqrt(2.0)
    y = (y - mu) / sigma / aet.sqrt(2.0)

    # To stabilize the computation, consider these three regions:
    # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
    # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
    # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
    return aet.log(0.5) + aet.switch(
        aet.gt(y, 0),
        -aet.square(y) + aet.log(
            aet.erfcx(y) -
            aet.exp(aet.square(y) - aet.square(x)) * aet.erfcx(x)),
        aet.switch(
            aet.lt(x, 0),  # 0 > x > y
            -aet.square(x) + aet.log(
                aet.erfcx(-x) -
                aet.exp(aet.square(x) - aet.square(y)) * aet.erfcx(-y)),
            aet.log(aet.erf(x) - aet.erf(y)),  # x >0 > y
        ),
    )
Exemple #14
0
def log_i0(x):
    """
    Calculates the logarithm of the 0 order modified Bessel function of the first kind""
    """
    return at.switch(
        at.lt(x, 5),
        at.log1p(x**2.0 / 4.0 + x**4.0 / 64.0 + x**6.0 / 2304.0 +
                 x**8.0 / 147456.0 + x**10.0 / 14745600.0 +
                 x**12.0 / 2123366400.0),
        x - 0.5 * at.log(2.0 * np.pi * x) +
        at.log1p(1.0 / (8.0 * x) + 9.0 / (128.0 * x**2.0) + 225.0 /
                 (3072.0 * x**3.0) + 11025.0 / (98304.0 * x**4.0)),
    )
Exemple #15
0
def incomplete_beta(a, b, value):
    """Incomplete beta implementation
    Power series and continued fraction expansions chosen for best numerical
    convergence across the board based on inputs.
    """
    machep = aet.constant(np.MachAr().eps, dtype="float64")
    one = aet.constant(1, dtype="float64")
    w = one - value

    ps = incomplete_beta_ps(a, b, value)

    flip = aet.gt(value, (a / (a + b)))
    aa, bb = a, b
    a = aet.switch(flip, bb, aa)
    b = aet.switch(flip, aa, bb)
    xc = aet.switch(flip, value, w)
    x = aet.switch(flip, w, value)

    tps = incomplete_beta_ps(a, b, x)
    tps = aet.switch(aet.le(tps, machep), one - machep, one - tps)

    # Choose which continued fraction expansion for best convergence.
    small = aet.lt(x * (a + b - 2.0) - (a - one), 0.0)
    cfe = incomplete_beta_cfe(a, b, x, small)
    w = aet.switch(small, cfe, cfe / xc)

    # Direct incomplete beta accounting for flipped a, b.
    t = aet.exp(a * aet.log(x) + b * aet.log(xc) + gammaln(a + b) -
                gammaln(a) - gammaln(b) + aet.log(w / a))

    t = aet.switch(flip, aet.switch(aet.le(t, machep), one - machep, one - t),
                   t)
    return aet.switch(
        aet.and_(flip, aet.and_(aet.le((b * x), one), aet.le(x, 0.95))),
        tps,
        aet.switch(aet.and_(aet.le(b * value, one), aet.le(value, 0.95)), ps,
                   t),
    )
Exemple #16
0
def adagrad_window(loss_or_grads=None,
                   params=None,
                   learning_rate=0.001,
                   epsilon=0.1,
                   n_win=10):
    """Returns a function that returns parameter updates.
    Instead of accumulated estimate, uses running window

    Parameters
    ----------
    loss_or_grads: symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params: list of shared variables
        The variables to generate update expressions for
    learning_rate: float
        Learning rate.
    epsilon: float
        Offset to avoid zero-division in the normalizer of adagrad.
    n_win: int
        Number of past steps to calculate scales of parameter gradients.

    Returns
    -------
    OrderedDict
        A dictionary mapping each parameter to its update expression
    """
    if loss_or_grads is None and params is None:
        return partial(adagrad_window, **_get_call_kwargs(locals()))
    elif loss_or_grads is None or params is None:
        raise ValueError(
            "Please provide both `loss_or_grads` and `params` to get updates")
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    for param, grad in zip(params, grads):
        i = aesara.shared(pm.floatX(0))
        i_int = i.astype("int32")
        value = param.get_value(borrow=True)
        accu = aesara.shared(
            np.zeros(value.shape + (n_win, ), dtype=value.dtype))

        # Append squared gradient vector to accu_new
        accu_new = aet.set_subtensor(accu[..., i_int], grad**2)
        i_new = aet.switch((i + 1) < n_win, i + 1, 0)
        updates[accu] = accu_new
        updates[i] = i_new

        accu_sum = accu_new.sum(axis=-1)
        updates[param] = param - (learning_rate * grad /
                                  aet.sqrt(accu_sum + epsilon))
    return updates
Exemple #17
0
def local_check_parameter_to_ninf_switch(fgraph, node):
    if isinstance(node.op, CheckParameterValue):
        logp_expr, *logp_conds = node.inputs
        if len(logp_conds) > 1:
            logp_cond = at.all(logp_conds)
        else:
            (logp_cond,) = logp_conds
        out = at.switch(logp_cond, logp_expr, -np.inf)
        out.name = node.op.msg

        if out.dtype != node.outputs[0].dtype:
            out = at.cast(out, node.outputs[0].dtype)

        return [out]
    def grad(self, inputs, cost_grad):
        """
        In defining the gradient, the Finite Fourier Transform is viewed as
        a complex-differentiable function of a complex variable
        """
        a = inputs[0]
        n = inputs[1]
        axis = inputs[2]
        grad = cost_grad[0]
        if not isinstance(axis, tensor.TensorConstant):
            raise NotImplementedError(
                "%s: gradient is currently implemented"
                " only for axis being a Aesara constant" %
                self.__class__.__name__)
        axis = int(axis.data)
        # notice that the number of actual elements in wrto is independent of
        # possible padding or truncation:
        elem = tensor.arange(0, tensor.shape(a)[axis], 1)
        # accounts for padding:
        freq = tensor.arange(0, n, 1)
        outer = tensor.outer(freq, elem)
        pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1.0 * n))
        res = tensor.tensordot(grad, pow_outer, (axis, 0))

        # This would be simpler but not implemented by aesara:
        # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
        # tensor.set_subtensor(res[...,n::], 0, False, False), res)

        # Instead we resort to that to account for truncation:
        flip_shape = list(np.arange(0, a.ndim)[::-1])
        res = res.dimshuffle(flip_shape)
        res = tensor.switch(
            tensor.lt(n,
                      tensor.shape(a)[axis]),
            tensor.set_subtensor(
                res[n::, ],
                0,
                False,
                False,
            ),
            res,
        )
        res = res.dimshuffle(flip_shape)

        # insures that gradient shape conforms to input shape:
        out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] +
                     list(np.arange(axis, a.ndim - 1)))
        res = res.dimshuffle(*out_shape)
        return [res, None, None]
Exemple #19
0
 def _new_initial(self, size, deterministic, more_replacements=None):
     aesara_condition_is_here = isinstance(deterministic, Variable)
     if aesara_condition_is_here:
         return at.switch(
             deterministic,
             at.repeat(self.mean.dimshuffle("x", 0),
                       size if size is not None else 1, -1),
             self.histogram[self.randidx(size)],
         )
     else:
         if deterministic:
             return at.repeat(self.mean.dimshuffle("x", 0),
                              size if size is not None else 1, -1)
         else:
             return self.histogram[self.randidx(size)]
Exemple #20
0
def log1mexp(x):
    r"""Return log(1 - exp(-x)).

    This function is numerically more stable than the naive approach.

    For details, see
    https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf

    References
        ----------
        .. [Machler2012] Martin Mächler (2012).
            "Accurately computing `\log(1-\exp(- \mid a \mid))` Assessed by the Rmpfr
            package"

    """
    return at.switch(at.lt(x, 0.6931471805599453), at.log(-at.expm1(-x)),
                     at.log1p(-at.exp(-x)))
Exemple #21
0
 def _new_initial(self, size, deterministic, more_replacements=None):
     aesara_condition_is_here = isinstance(deterministic, Variable)
     if size is None:
         size = 1
     size = at.as_tensor(size)
     if aesara_condition_is_here:
         return at.switch(
             deterministic,
             at.repeat(self.mean.reshape((1, -1)), size, -1),
             self.histogram[self.randidx(size)],
         )
     else:
         if deterministic:
             raise NotImplementedInference(
                 "Deterministic sampling from a Histogram is broken in v4")
             return at.repeat(self.mean.reshape((1, -1)), size, -1)
         else:
             return self.histogram[self.randidx(size)]
Exemple #22
0
def bound(logp, *conditions, **kwargs):
    """
    Bounds a log probability density with several conditions.
    When conditions are not met, the logp values are replaced by -inf.

    Note that bound should not be used to enforce the logic of the logp under the normal
    support as it can be disabled by the user via check_bounds = False in pm.Model()

    Parameters
    ----------
    logp: float
    *conditions: booleans
    broadcast_conditions: bool (optional, default=True)
        If True, conditions are broadcasted and applied element-wise to each value in logp.
        If False, conditions are collapsed via at.all(). As a consequence the entire logp
        array is either replaced by -inf or unchanged.

        Setting broadcasts_conditions to False is necessary for most (all?) multivariate
        distributions where the dimensions of the conditions do not unambigously match
        that of the logp.

    Returns
    -------
    logp with elements set to -inf where any condition is False
    """

    # If called inside a model context, see if bounds check is disabled
    try:
        from pymc3.model import modelcontext

        model = modelcontext(kwargs.get("model"))
        if not model.check_bounds:
            return logp
    except TypeError:  # No model found
        pass

    broadcast_conditions = kwargs.get("broadcast_conditions", True)

    if broadcast_conditions:
        alltrue = alltrue_elemwise
    else:
        alltrue = alltrue_scalar

    return at.switch(alltrue(conditions), logp, -np.inf)
Exemple #23
0
    def test_composite_elemwise_float16(self):
        w = aesara.tensor.bvector()
        x = aesara.tensor.vector(dtype="float16")
        y = aesara.tensor.fvector()

        cz = tensor.tanh(x + tensor.cast(y, "float16"))
        o = (cz - cz**2 + tensor.cast(x, "int16") + tensor.cast(x, "float32") +
             tensor.cast(w, "float16") - tensor.constant(np.float16(1.0)))

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = aesara.tensor.vector(dtype="uint8")
        w = aesara.tensor.vector(dtype="float16")
        x = aesara.tensor.vector(dtype="float16")
        y = aesara.tensor.vector(dtype="float16")
        z = aesara.tensor.vector(dtype="float16")

        o = tensor.switch(v, tensor.mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
Exemple #24
0
def bound(logp, *conditions, **kwargs):
    """
    Bounds a log probability density with several conditions.

    Parameters
    ----------
    logp: float
    *conditions: booleans
    broadcast_conditions: bool (optional, default=True)
        If True, broadcasts logp to match the largest shape of the conditions.
        This is used e.g. in DiscreteUniform where logp is a scalar constant and the shape
        is specified via the conditions.
        If False, will return the same shape as logp.
        This is used e.g. in Multinomial where broadcasting can lead to differences in the logp.

    Returns
    -------
    logp with elements set to -inf where any condition is False
    """

    # If called inside a model context, see if bounds check is disabled
    try:
        model = modelcontext(kwargs.get("model"))
        if not model.check_bounds:
            return logp
    except TypeError:  # No model found
        pass

    broadcast_conditions = kwargs.get("broadcast_conditions", True)

    if broadcast_conditions:
        alltrue = alltrue_elemwise
    else:
        alltrue = alltrue_scalar

    return aet.switch(alltrue(conditions), logp, -np.inf)
Exemple #25
0
    def logp(self, states):
        r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain.

        This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional
        on state samples, :math:`s_t`, given by the following:

        .. math::

            \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1})

        The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so
        another way to express this result is as follows:

        .. math::

            P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1})

        """  # noqa: E501

        Gammas = at.shape_padleft(self.Gammas, states.ndim - (self.Gammas.ndim - 2))

        # Multiply the initial state probabilities by the first transition
        # matrix by to get the marginal probability for state `S_1`.
        # The integral that produces the marginal is essentially
        # `gamma_0.dot(Gammas[0])`
        Gamma_1 = Gammas[..., 0:1, :, :]
        gamma_0 = tt_expand_dims(self.gamma_0, (-3, -1))
        P_S_1 = at.sum(gamma_0 * Gamma_1, axis=-2)

        # The `tt.switch`s allow us to broadcast the indexing operation when
        # the replication dimensions of `states` and `Gammas` don't match
        # (e.g. `states.shape[0] > Gammas.shape[0]`)
        S_1_slices = tuple(
            slice(
                at.switch(at.eq(P_S_1.shape[i], 1), 0, 0),
                at.switch(at.eq(P_S_1.shape[i], 1), 1, d),
            )
            for i, d in enumerate(states.shape)
        )
        S_1_slices = (tuple(at.ogrid[S_1_slices]) if S_1_slices else tuple()) + (
            states[..., 0:1],
        )
        logp_S_1 = at.log(P_S_1[S_1_slices]).sum(axis=-1)

        # These are slices for the extra dimensions--including the state
        # sequence dimension (e.g. "time")--along which which we need to index
        # the transition matrix rows using the "observed" `states`.
        trans_slices = tuple(
            slice(
                at.switch(
                    at.eq(Gammas.shape[i], 1), 0, 1 if i == states.ndim - 1 else 0
                ),
                at.switch(at.eq(Gammas.shape[i], 1), 1, d),
            )
            for i, d in enumerate(states.shape)
        )
        trans_slices = (tuple(at.ogrid[trans_slices]) if trans_slices else tuple()) + (
            states[..., :-1],
        )

        # Select the transition matrix row of each observed state; this yields
        # `P(S_t | S_{t-1} = s_{t-1})`
        P_S_2T = Gammas[trans_slices]

        obs_slices = tuple(slice(None, d) for d in P_S_2T.shape[:-1])
        obs_slices = (tuple(at.ogrid[obs_slices]) if obs_slices else tuple()) + (
            states[..., 1:],
        )
        logp_S_1T = at.log(P_S_2T[obs_slices])

        res = logp_S_1 + at.sum(logp_S_1T, axis=-1)
        res.name = "DiscreteMarkovChain_logp"

        return res
Exemple #26
0
def incomplete_beta_cfe(a, b, x, small):
    """Incomplete beta continued fraction expansions
    based on Cephes library by Steve Moshier (incbet.c).
    small: Choose element-wise which continued fraction expansion to use.
    """
    BIG = aet.constant(4.503599627370496e15, dtype="float64")
    BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64")
    THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64")

    zero = aet.constant(0.0, dtype="float64")
    one = aet.constant(1.0, dtype="float64")
    two = aet.constant(2.0, dtype="float64")

    r = one
    k1 = a
    k3 = a
    k4 = a + one
    k5 = one
    k8 = a + two

    k2 = aet.switch(small, a + b, b - one)
    k6 = aet.switch(small, b - one, a + b)
    k7 = aet.switch(small, k4, a + one)
    k26update = aet.switch(small, one, -one)
    x = aet.switch(small, x, x / (one - x))

    pkm2 = zero
    qkm2 = one
    pkm1 = one
    qkm1 = one
    r = one

    def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r):
        xk = -(x * k1 * k2) / (k3 * k4)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        xk = (x * k5 * k6) / (k7 * k8)
        pk = pkm1 + pkm2 * xk
        qk = qkm1 + qkm2 * xk
        pkm2 = pkm1
        pkm1 = pk
        qkm2 = qkm1
        qkm1 = qk

        old_r = r
        r = aet.switch(aet.eq(qk, zero), r, pk / qk)

        k1 += one
        k2 += k26update
        k3 += two
        k4 += two
        k5 += one
        k6 -= k26update
        k7 += two
        k8 += two

        big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG)
        biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV),
                              aet.lt(aet.abs_(pk), BIGINV))

        pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2)
        pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1)
        qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2)
        qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1)

        pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2)
        pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1)
        qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2)
        qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1)

        return (
            (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r),
            until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))),
        )

    (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
        _step,
        sequences=[aet.arange(0, 300)],
        outputs_info=[
            e for e in aet.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5,
                                 k6, k7, k8, r), "float64")
        ],
    )

    return r[-1]
Exemple #27
0
def MvNormalLogp():
    """Compute the log pdf of a multivariate normal distribution.

    This should be used in MvNormal.logp once Theano#5908 is released.

    Parameters
    ----------
    cov: aet.matrix
        The covariance matrix.
    delta: aet.matrix
        Array of deviations from the mean.
    """
    cov = aet.matrix("cov")
    cov.tag.test_value = floatX(np.eye(3))
    delta = aet.matrix("delta")
    delta.tag.test_value = floatX(np.zeros((2, 3)))

    solve_lower = Solve(A_structure="lower_triangular")
    solve_upper = Solve(A_structure="upper_triangular")
    cholesky = Cholesky(lower=True, on_error="nan")

    n, k = delta.shape
    n, k = f(n), f(k)
    chol_cov = cholesky(cov)
    diag = aet.nlinalg.diag(chol_cov)
    ok = aet.all(diag > 0)

    chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
    delta_trans = solve_lower(chol_cov, delta.T).T

    result = n * k * aet.log(f(2) * np.pi)
    result += f(2) * n * aet.sum(aet.log(diag))
    result += (delta_trans**f(2)).sum()
    result = f(-0.5) * result
    logp = aet.switch(ok, result, -np.inf)

    def dlogp(inputs, gradients):
        (g_logp, ) = gradients
        cov, delta = inputs

        g_logp.tag.test_value = floatX(1.0)
        n, k = delta.shape

        chol_cov = cholesky(cov)
        diag = aet.nlinalg.diag(chol_cov)
        ok = aet.all(diag > 0)

        chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1))
        delta_trans = solve_lower(chol_cov, delta.T).T

        inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans)
        g_cov = solve_upper(chol_cov.T, inner)
        g_cov = solve_upper(chol_cov.T, g_cov.T)

        tau_delta = solve_upper(chol_cov.T, delta_trans.T)
        g_delta = tau_delta.T

        g_cov = aet.switch(ok, g_cov, -np.nan)
        g_delta = aet.switch(ok, g_delta, -np.nan)

        return [-0.5 * g_cov * g_logp, -g_delta * g_logp]

    return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
 def logp(self, value):
     return at.switch(at.eq(value, self.c), 0.0, -np.inf)
Exemple #29
0
import time

import numpy as np

import aesara
from aesara import tensor as tt
from aesara.ifelse import ifelse


a, b = tt.scalars("a", "b")
x, y = tt.matrices("x", "y")

z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y))
z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y))

f_switch = aesara.function([a, b, x, y], z_switch)
f_lazyifelse = aesara.function([a, b, x, y], z_lazy)

val1 = 0.0
val2 = 1.0
big_mat1 = np.ones((10000, 1000))
big_mat2 = np.ones((10000, 1000))

n_times = 10

tic = time.clock()
for i in range(n_times):
    f_switch(val1, val2, big_mat1, big_mat2)
print("time spent evaluating both values %f sec" % (time.clock() - tic))

tic = time.clock()
Exemple #30
0
def logaddexp(a, b):
    diff = b - a
    return at.switch(diff > 0, b + at.log1p(at.exp(-diff)),
                     a + at.log1p(at.exp(diff)))