def logpow(x, m): """ Calculates log(x**m) since m*log(x) will fail when m, x = 0. """ # return m * log(x) return aet.switch(aet.eq(x, 0), aet.switch(aet.eq(m, 0), 0.0, -np.inf), m * aet.log(x))
def mixture_model(random_seed=1234): """Sample mixture model to use in benchmarks""" np.random.seed(1234) size = 1000 w_true = np.array([0.35, 0.4, 0.25]) mu_true = np.array([0.0, 2.0, 5.0]) sigma = np.array([0.5, 0.5, 1.0]) component = np.random.choice(mu_true.size, size=size, p=w_true) x = np.random.normal(mu_true[component], sigma[component], size=size) with pm.Model() as model: w = pm.Dirichlet("w", a=np.ones_like(w_true)) mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape) enforce_order = pm.Potential( "enforce_order", aet.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) + aet.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), ) tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape) pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x) # Initialization can be poorly specified, this is a hack to make it work start = { "mu": mu_true.copy(), "tau_log__": np.log(1.0 / sigma**2), "w_stickbreaking__": np.array([-0.03, 0.44]), } return model, start
def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = aet.switch(ok, g_cov, -np.nan) g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def build_disaster_model(masked=False): # fmt: off disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) # fmt: on if masked: disasters_data[[23, 68]] = -1 disasters_data = np.ma.masked_values(disasters_data, value=-1) years = len(disasters_data) with pm.Model() as model: # Prior for distribution of switchpoint location switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters early_mean = pm.Exponential("early_mean", lam=1.0) late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = np.arange(years) rate = at.switch(switchpoint >= idx, early_mean, late_mean) # Data likelihood pm.Poisson("disasters", rate, observed=disasters_data) return model
def normal_lccdf(mu, sigma, x): z = (x - mu) / sigma return aet.switch( aet.gt(z, 1.0), aet.log(aet.erfcx(z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, aet.log1p(-aet.erfc(-z / aet.sqrt(2.0)) / 2.0), )
def test_composite_elemwise_float16(self): w = bvector() x = vector(dtype="float16") y = fvector() cz = tanh(x + aet.cast(y, "float16")) o = ( cz - cz ** 2 + aet.cast(x, "int16") + aet.cast(x, "float32") + aet.cast(w, "float16") - aet.constant(np.float16(1.0)) ) aesara.function([w, x, y], o, mode=mode_with_gpu) v = vector(dtype="uint8") w = vector(dtype="float16") x = vector(dtype="float16") y = vector(dtype="float16") z = vector(dtype="float16") o = aet.switch(v, mul(w, x, y), z) aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
def __call__(self, X): XY = X.dot(X.T) x2 = at.sum(X**2, axis=1).dimshuffle(0, "x") X2e = at.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY V = at.sort(H.flatten()) length = V.shape[0] # median distance m = at.switch( at.eq((length % 2), 0), # if even vector at.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2], ) h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = at.exp(-H / h / 2.0) # Derivative dxkxy = -at.dot(Kxy, X) sumkxy = at.sum(Kxy, axis=-1, keepdims=True) dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h return Kxy, dxkxy
def logsumexp(x, axis=None, keepdims=True): # Adapted from https://github.com/Theano/Theano/issues/1563 x_max = aet.max(x, axis=axis, keepdims=True) x_max = aet.switch(aet.isinf(x_max), 0, x_max) res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis, keepdims=True)) + x_max return res if keepdims else res.squeeze()
def logp(value, distribution, lower, upper): """ Calculate log-probability of Bounded distribution at specified value. Parameters ---------- value: numeric Value for which log-probability is calculated. distribution: TensorVariable Distribution which is being bounded lower: numeric Lower bound for the distribution being bounded. upper: numeric Upper bound for the distribution being bounded. Returns ------- TensorVariable """ res = at.switch( at.or_(at.lt(value, lower), at.gt(value, upper)), -np.inf, logp(distribution, value), ) return check_parameters( res, lower <= upper, msg="lower <= upper", )
def normal_lcdf(mu, sigma, x): """Compute the log of the cumulative density function of the normal.""" z = (x - mu) / sigma return aet.switch( aet.lt(z, -1.0), aet.log(aet.erfcx(-z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, aet.log1p(-aet.erfc(z / aet.sqrt(2.0)) / 2.0), )
def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): xk = -(x * k1 * k2) / (k3 * k4) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk xk = (x * k5 * k6) / (k7 * k8) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk old_r = r r = aet.switch(aet.eq(qk, zero), r, pk / qk) k1 += one k2 += k26update k3 += two k4 += two k5 += one k6 -= k26update k7 += two k8 += two big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG) biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV), aet.lt(aet.abs_(pk), BIGINV)) pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2) pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1) qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2) qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1) pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2) pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1) qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2) qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1) return ( (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))), )
def moment_censored(op, rv, dist, lower, upper): moment = at.switch( at.eq(lower, -np.inf), at.switch( at.isinf(upper), # lower = -inf, upper = inf 0, # lower = -inf, upper = x upper - 1, ), at.switch( at.eq(upper, np.inf), # lower = x, upper = inf lower + 1, # lower = x, upper = x (lower + upper) / 2, ), ) moment = at.full_like(dist, moment) return moment
def log_diff_normal_cdf(mu, sigma, x, y): """ Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space. Parameters ---------- mu: float mean sigma: float std x: float y: float must be strictly less than x. Returns ------- log (\\Phi(x) - \\Phi(y)) """ x = (x - mu) / sigma / aet.sqrt(2.0) y = (y - mu) / sigma / aet.sqrt(2.0) # To stabilize the computation, consider these three regions: # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y) # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y) # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine. return aet.log(0.5) + aet.switch( aet.gt(y, 0), -aet.square(y) + aet.log( aet.erfcx(y) - aet.exp(aet.square(y) - aet.square(x)) * aet.erfcx(x)), aet.switch( aet.lt(x, 0), # 0 > x > y -aet.square(x) + aet.log( aet.erfcx(-x) - aet.exp(aet.square(x) - aet.square(y)) * aet.erfcx(-y)), aet.log(aet.erf(x) - aet.erf(y)), # x >0 > y ), )
def log_i0(x): """ Calculates the logarithm of the 0 order modified Bessel function of the first kind"" """ return at.switch( at.lt(x, 5), at.log1p(x**2.0 / 4.0 + x**4.0 / 64.0 + x**6.0 / 2304.0 + x**8.0 / 147456.0 + x**10.0 / 14745600.0 + x**12.0 / 2123366400.0), x - 0.5 * at.log(2.0 * np.pi * x) + at.log1p(1.0 / (8.0 * x) + 9.0 / (128.0 * x**2.0) + 225.0 / (3072.0 * x**3.0) + 11025.0 / (98304.0 * x**4.0)), )
def incomplete_beta(a, b, value): """Incomplete beta implementation Power series and continued fraction expansions chosen for best numerical convergence across the board based on inputs. """ machep = aet.constant(np.MachAr().eps, dtype="float64") one = aet.constant(1, dtype="float64") w = one - value ps = incomplete_beta_ps(a, b, value) flip = aet.gt(value, (a / (a + b))) aa, bb = a, b a = aet.switch(flip, bb, aa) b = aet.switch(flip, aa, bb) xc = aet.switch(flip, value, w) x = aet.switch(flip, w, value) tps = incomplete_beta_ps(a, b, x) tps = aet.switch(aet.le(tps, machep), one - machep, one - tps) # Choose which continued fraction expansion for best convergence. small = aet.lt(x * (a + b - 2.0) - (a - one), 0.0) cfe = incomplete_beta_cfe(a, b, x, small) w = aet.switch(small, cfe, cfe / xc) # Direct incomplete beta accounting for flipped a, b. t = aet.exp(a * aet.log(x) + b * aet.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + aet.log(w / a)) t = aet.switch(flip, aet.switch(aet.le(t, machep), one - machep, one - t), t) return aet.switch( aet.and_(flip, aet.and_(aet.le((b * x), one), aet.le(x, 0.95))), tps, aet.switch(aet.and_(aet.le(b * value, one), aet.le(value, 0.95)), ps, t), )
def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10): """Returns a function that returns parameter updates. Instead of accumulated estimate, uses running window Parameters ---------- loss_or_grads: symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params: list of shared variables The variables to generate update expressions for learning_rate: float Learning rate. epsilon: float Offset to avoid zero-division in the normalizer of adagrad. n_win: int Number of past steps to calculate scales of parameter gradients. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression """ if loss_or_grads is None and params is None: return partial(adagrad_window, **_get_call_kwargs(locals())) elif loss_or_grads is None or params is None: raise ValueError( "Please provide both `loss_or_grads` and `params` to get updates") grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): i = aesara.shared(pm.floatX(0)) i_int = i.astype("int32") value = param.get_value(borrow=True) accu = aesara.shared( np.zeros(value.shape + (n_win, ), dtype=value.dtype)) # Append squared gradient vector to accu_new accu_new = aet.set_subtensor(accu[..., i_int], grad**2) i_new = aet.switch((i + 1) < n_win, i + 1, 0) updates[accu] = accu_new updates[i] = i_new accu_sum = accu_new.sum(axis=-1) updates[param] = param - (learning_rate * grad / aet.sqrt(accu_sum + epsilon)) return updates
def local_check_parameter_to_ninf_switch(fgraph, node): if isinstance(node.op, CheckParameterValue): logp_expr, *logp_conds = node.inputs if len(logp_conds) > 1: logp_cond = at.all(logp_conds) else: (logp_cond,) = logp_conds out = at.switch(logp_cond, logp_expr, -np.inf) out.name = node.op.msg if out.dtype != node.outputs[0].dtype: out = at.cast(out, node.outputs[0].dtype) return [out]
def grad(self, inputs, cost_grad): """ In defining the gradient, the Finite Fourier Transform is viewed as a complex-differentiable function of a complex variable """ a = inputs[0] n = inputs[1] axis = inputs[2] grad = cost_grad[0] if not isinstance(axis, tensor.TensorConstant): raise NotImplementedError( "%s: gradient is currently implemented" " only for axis being a Aesara constant" % self.__class__.__name__) axis = int(axis.data) # notice that the number of actual elements in wrto is independent of # possible padding or truncation: elem = tensor.arange(0, tensor.shape(a)[axis], 1) # accounts for padding: freq = tensor.arange(0, n, 1) outer = tensor.outer(freq, elem) pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1.0 * n)) res = tensor.tensordot(grad, pow_outer, (axis, 0)) # This would be simpler but not implemented by aesara: # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]), # tensor.set_subtensor(res[...,n::], 0, False, False), res) # Instead we resort to that to account for truncation: flip_shape = list(np.arange(0, a.ndim)[::-1]) res = res.dimshuffle(flip_shape) res = tensor.switch( tensor.lt(n, tensor.shape(a)[axis]), tensor.set_subtensor( res[n::, ], 0, False, False, ), res, ) res = res.dimshuffle(flip_shape) # insures that gradient shape conforms to input shape: out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] + list(np.arange(axis, a.ndim - 1))) res = res.dimshuffle(*out_shape) return [res, None, None]
def _new_initial(self, size, deterministic, more_replacements=None): aesara_condition_is_here = isinstance(deterministic, Variable) if aesara_condition_is_here: return at.switch( deterministic, at.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1), self.histogram[self.randidx(size)], ) else: if deterministic: return at.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1) else: return self.histogram[self.randidx(size)]
def log1mexp(x): r"""Return log(1 - exp(-x)). This function is numerically more stable than the naive approach. For details, see https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf References ---------- .. [Machler2012] Martin Mächler (2012). "Accurately computing `\log(1-\exp(- \mid a \mid))` Assessed by the Rmpfr package" """ return at.switch(at.lt(x, 0.6931471805599453), at.log(-at.expm1(-x)), at.log1p(-at.exp(-x)))
def _new_initial(self, size, deterministic, more_replacements=None): aesara_condition_is_here = isinstance(deterministic, Variable) if size is None: size = 1 size = at.as_tensor(size) if aesara_condition_is_here: return at.switch( deterministic, at.repeat(self.mean.reshape((1, -1)), size, -1), self.histogram[self.randidx(size)], ) else: if deterministic: raise NotImplementedInference( "Deterministic sampling from a Histogram is broken in v4") return at.repeat(self.mean.reshape((1, -1)), size, -1) else: return self.histogram[self.randidx(size)]
def bound(logp, *conditions, **kwargs): """ Bounds a log probability density with several conditions. When conditions are not met, the logp values are replaced by -inf. Note that bound should not be used to enforce the logic of the logp under the normal support as it can be disabled by the user via check_bounds = False in pm.Model() Parameters ---------- logp: float *conditions: booleans broadcast_conditions: bool (optional, default=True) If True, conditions are broadcasted and applied element-wise to each value in logp. If False, conditions are collapsed via at.all(). As a consequence the entire logp array is either replaced by -inf or unchanged. Setting broadcasts_conditions to False is necessary for most (all?) multivariate distributions where the dimensions of the conditions do not unambigously match that of the logp. Returns ------- logp with elements set to -inf where any condition is False """ # If called inside a model context, see if bounds check is disabled try: from pymc3.model import modelcontext model = modelcontext(kwargs.get("model")) if not model.check_bounds: return logp except TypeError: # No model found pass broadcast_conditions = kwargs.get("broadcast_conditions", True) if broadcast_conditions: alltrue = alltrue_elemwise else: alltrue = alltrue_scalar return at.switch(alltrue(conditions), logp, -np.inf)
def test_composite_elemwise_float16(self): w = aesara.tensor.bvector() x = aesara.tensor.vector(dtype="float16") y = aesara.tensor.fvector() cz = tensor.tanh(x + tensor.cast(y, "float16")) o = (cz - cz**2 + tensor.cast(x, "int16") + tensor.cast(x, "float32") + tensor.cast(w, "float16") - tensor.constant(np.float16(1.0))) aesara.function([w, x, y], o, mode=mode_with_gpu) v = aesara.tensor.vector(dtype="uint8") w = aesara.tensor.vector(dtype="float16") x = aesara.tensor.vector(dtype="float16") y = aesara.tensor.vector(dtype="float16") z = aesara.tensor.vector(dtype="float16") o = tensor.switch(v, tensor.mul(w, x, y), z) aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
def bound(logp, *conditions, **kwargs): """ Bounds a log probability density with several conditions. Parameters ---------- logp: float *conditions: booleans broadcast_conditions: bool (optional, default=True) If True, broadcasts logp to match the largest shape of the conditions. This is used e.g. in DiscreteUniform where logp is a scalar constant and the shape is specified via the conditions. If False, will return the same shape as logp. This is used e.g. in Multinomial where broadcasting can lead to differences in the logp. Returns ------- logp with elements set to -inf where any condition is False """ # If called inside a model context, see if bounds check is disabled try: model = modelcontext(kwargs.get("model")) if not model.check_bounds: return logp except TypeError: # No model found pass broadcast_conditions = kwargs.get("broadcast_conditions", True) if broadcast_conditions: alltrue = alltrue_elemwise else: alltrue = alltrue_scalar return aet.switch(alltrue(conditions), logp, -np.inf)
def logp(self, states): r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain. This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional on state samples, :math:`s_t`, given by the following: .. math:: \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so another way to express this result is as follows: .. math:: P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) """ # noqa: E501 Gammas = at.shape_padleft(self.Gammas, states.ndim - (self.Gammas.ndim - 2)) # Multiply the initial state probabilities by the first transition # matrix by to get the marginal probability for state `S_1`. # The integral that produces the marginal is essentially # `gamma_0.dot(Gammas[0])` Gamma_1 = Gammas[..., 0:1, :, :] gamma_0 = tt_expand_dims(self.gamma_0, (-3, -1)) P_S_1 = at.sum(gamma_0 * Gamma_1, axis=-2) # The `tt.switch`s allow us to broadcast the indexing operation when # the replication dimensions of `states` and `Gammas` don't match # (e.g. `states.shape[0] > Gammas.shape[0]`) S_1_slices = tuple( slice( at.switch(at.eq(P_S_1.shape[i], 1), 0, 0), at.switch(at.eq(P_S_1.shape[i], 1), 1, d), ) for i, d in enumerate(states.shape) ) S_1_slices = (tuple(at.ogrid[S_1_slices]) if S_1_slices else tuple()) + ( states[..., 0:1], ) logp_S_1 = at.log(P_S_1[S_1_slices]).sum(axis=-1) # These are slices for the extra dimensions--including the state # sequence dimension (e.g. "time")--along which which we need to index # the transition matrix rows using the "observed" `states`. trans_slices = tuple( slice( at.switch( at.eq(Gammas.shape[i], 1), 0, 1 if i == states.ndim - 1 else 0 ), at.switch(at.eq(Gammas.shape[i], 1), 1, d), ) for i, d in enumerate(states.shape) ) trans_slices = (tuple(at.ogrid[trans_slices]) if trans_slices else tuple()) + ( states[..., :-1], ) # Select the transition matrix row of each observed state; this yields # `P(S_t | S_{t-1} = s_{t-1})` P_S_2T = Gammas[trans_slices] obs_slices = tuple(slice(None, d) for d in P_S_2T.shape[:-1]) obs_slices = (tuple(at.ogrid[obs_slices]) if obs_slices else tuple()) + ( states[..., 1:], ) logp_S_1T = at.log(P_S_2T[obs_slices]) res = logp_S_1 + at.sum(logp_S_1T, axis=-1) res.name = "DiscreteMarkovChain_logp" return res
def incomplete_beta_cfe(a, b, x, small): """Incomplete beta continued fraction expansions based on Cephes library by Steve Moshier (incbet.c). small: Choose element-wise which continued fraction expansion to use. """ BIG = aet.constant(4.503599627370496e15, dtype="float64") BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64") THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64") zero = aet.constant(0.0, dtype="float64") one = aet.constant(1.0, dtype="float64") two = aet.constant(2.0, dtype="float64") r = one k1 = a k3 = a k4 = a + one k5 = one k8 = a + two k2 = aet.switch(small, a + b, b - one) k6 = aet.switch(small, b - one, a + b) k7 = aet.switch(small, k4, a + one) k26update = aet.switch(small, one, -one) x = aet.switch(small, x, x / (one - x)) pkm2 = zero qkm2 = one pkm1 = one qkm1 = one r = one def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): xk = -(x * k1 * k2) / (k3 * k4) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk xk = (x * k5 * k6) / (k7 * k8) pk = pkm1 + pkm2 * xk qk = qkm1 + qkm2 * xk pkm2 = pkm1 pkm1 = pk qkm2 = qkm1 qkm1 = qk old_r = r r = aet.switch(aet.eq(qk, zero), r, pk / qk) k1 += one k2 += k26update k3 += two k4 += two k5 += one k6 -= k26update k7 += two k8 += two big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG) biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV), aet.lt(aet.abs_(pk), BIGINV)) pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2) pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1) qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2) qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1) pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2) pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1) qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2) qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1) return ( (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))), ) (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( _step, sequences=[aet.arange(0, 300)], outputs_info=[ e for e in aet.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64") ], ) return r[-1]
def MvNormalLogp(): """Compute the log pdf of a multivariate normal distribution. This should be used in MvNormal.logp once Theano#5908 is released. Parameters ---------- cov: aet.matrix The covariance matrix. delta: aet.matrix Array of deviations from the mean. """ cov = aet.matrix("cov") cov.tag.test_value = floatX(np.eye(3)) delta = aet.matrix("delta") delta.tag.test_value = floatX(np.zeros((2, 3))) solve_lower = Solve(A_structure="lower_triangular") solve_upper = Solve(A_structure="upper_triangular") cholesky = Cholesky(lower=True, on_error="nan") n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T result = n * k * aet.log(f(2) * np.pi) result += f(2) * n * aet.sum(aet.log(diag)) result += (delta_trans**f(2)).sum() result = f(-0.5) * result logp = aet.switch(ok, result, -np.inf) def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = aet.switch(ok, g_cov, -np.nan) g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
def logp(self, value): return at.switch(at.eq(value, self.c), 0.0, -np.inf)
import time import numpy as np import aesara from aesara import tensor as tt from aesara.ifelse import ifelse a, b = tt.scalars("a", "b") x, y = tt.matrices("x", "y") z_switch = tt.switch(tt.lt(a, b), tt.mean(x), tt.mean(y)) z_lazy = ifelse(tt.lt(a, b), tt.mean(x), tt.mean(y)) f_switch = aesara.function([a, b, x, y], z_switch) f_lazyifelse = aesara.function([a, b, x, y], z_lazy) val1 = 0.0 val2 = 1.0 big_mat1 = np.ones((10000, 1000)) big_mat2 = np.ones((10000, 1000)) n_times = 10 tic = time.clock() for i in range(n_times): f_switch(val1, val2, big_mat1, big_mat2) print("time spent evaluating both values %f sec" % (time.clock() - tic)) tic = time.clock()
def logaddexp(a, b): diff = b - a return at.switch(diff > 0, b + at.log1p(at.exp(-diff)), a + at.log1p(at.exp(diff)))