def _set_values(cls, lower, upper, size, shape, initval): if size is None: size = shape lower = np.asarray(lower) lower = floatX(np.where(lower == None, -np.inf, lower)) upper = np.asarray(upper) upper = floatX(np.where(upper == None, np.inf, upper)) if initval is None: _size = np.broadcast_shapes(to_tuple(size), np.shape(lower), np.shape(upper)) _lower = np.broadcast_to(lower, _size) _upper = np.broadcast_to(upper, _size) initval = np.where( (_lower == -np.inf) & (_upper == np.inf), 0, np.where( _lower == -np.inf, _upper - 1, np.where(_upper == np.inf, _lower + 1, (_lower + _upper) / 2), ), ) lower = as_tensor_variable(floatX(lower)) upper = as_tensor_variable(floatX(upper)) return lower, upper, initval
def dist( cls, mu=0.0, sigma=1.0, *, init=None, steps=None, size=None, **kwargs ) -> at.TensorVariable: mu = at.as_tensor_variable(floatX(mu)) sigma = at.as_tensor_variable(floatX(sigma)) steps = get_steps( steps=steps, shape=kwargs.get("shape", None), step_shape_offset=1, ) if steps is None: raise ValueError("Must specify steps or shape parameter") steps = at.as_tensor_variable(intX(steps)) # If no scalar distribution is passed then initialize with a Normal of same mu and sigma if init is None: init = Normal.dist(0, 1) else: if not ( isinstance(init, at.TensorVariable) and init.owner is not None and isinstance(init.owner.op, RandomVariable) and init.owner.op.ndim_supp == 0 ): raise TypeError("init must be a univariate distribution variable") check_dist_not_registered(init) # Ignores logprob of init var because that's accounted for in the logp method init = ignore_logprob(init) return super().dist([mu, sigma, init, steps], size=size, **kwargs)
def __call__(self, X): XY = X.dot(X.T) x2 = at.sum(X**2, axis=1).dimshuffle(0, "x") X2e = at.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY V = at.sort(H.flatten()) length = V.shape[0] # median distance m = at.switch( at.eq((length % 2), 0), # if even vector at.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2], ) h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = at.exp(-H / h / 2.0) # Derivative dxkxy = -at.dot(Kxy, X) sumkxy = at.sum(Kxy, axis=-1, keepdims=True) dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h return Kxy, dxkxy
def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = sum(start[n.name].size for n in model.value_vars) scaling = floatX(np.random.rand(size)) class HMC(BaseHMC): def _hamiltonian_step(self, *args, **kwargs): pass step = HMC(vars=model.value_vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) astart = DictToArrayBijection.map(start) p = RaveledVars(floatX(step.potential.random()), astart.point_map_info) q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5) npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
def extend(self, direction): """Double the treesize by extending the tree in the given direction. If direction is larger than 0, extend it to the right, otherwise extend it to the left. Return a tuple `(diverging, turning)` of type (DivergenceInfo, bool). `diverging` indicates, that the tree extension was aborted because the energy change exceeded `self.Emax`. `turning` indicates that the tree extension was stopped because the termination criterior was reached (the trajectory is turning back). """ if direction > 0: tree, diverging, turning = self._build_subtree( self.right, self.depth, floatX(np.asarray(self.step_size))) leftmost_begin, leftmost_end = self.left, self.right rightmost_begin, rightmost_end = tree.left, tree.right leftmost_p_sum = self.p_sum rightmost_p_sum = tree.p_sum self.right = tree.right else: tree, diverging, turning = self._build_subtree( self.left, self.depth, floatX(np.asarray(-self.step_size))) leftmost_begin, leftmost_end = tree.right, tree.left rightmost_begin, rightmost_end = self.left, self.right leftmost_p_sum = tree.p_sum rightmost_p_sum = self.p_sum self.left = tree.right self.depth += 1 self.n_proposals += tree.n_proposals if diverging or turning: return diverging, turning size1, size2 = self.log_size, tree.log_size if logbern(size2 - size1): self.proposal = tree.proposal self.log_size = np.logaddexp(self.log_size, tree.log_size) self.log_weighted_accept_sum = np.logaddexp( self.log_weighted_accept_sum, tree.log_weighted_accept_sum) self.p_sum[:] += tree.p_sum # Additional turning check only when tree depth > 0 to avoid redundant work if self.depth > 0: left, right = self.left, self.right p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) p_sum1 = leftmost_p_sum + rightmost_begin.p.data turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot( rightmost_begin.v) <= 0) p_sum2 = leftmost_end.p.data + rightmost_p_sum turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot( rightmost_end.v) <= 0) turning = turning | turning1 | turning2 return diverging, turning
def dist( cls, rho, sigma=None, tau=None, *, init_dist=None, steps=None, constant=False, ar_order=None, **kwargs, ): _, sigma = get_tau_sigma(tau=tau, sigma=sigma) sigma = at.as_tensor_variable(floatX(sigma)) rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho))) if "init" in kwargs: warnings.warn( "init parameter is now called init_dist. Using init will raise an error in a future release.", FutureWarning, ) init_dist = kwargs.pop("init") ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order) steps = get_steps(steps=steps, shape=kwargs.get("shape", None), step_shape_offset=ar_order) if steps is None: raise ValueError("Must specify steps or shape parameter") steps = at.as_tensor_variable(intX(steps), ndim=0) if init_dist is not None: if not isinstance(init_dist, TensorVariable) or not isinstance( init_dist.owner.op, RandomVariable ): raise ValueError( f"Init dist must be a distribution created via the `.dist()` API, " f"got {type(init_dist)}" ) check_dist_not_registered(init_dist) if init_dist.owner.op.ndim_supp > 1: raise ValueError( "Init distribution must have a scalar or vector support dimension, ", f"got ndim_supp={init_dist.owner.op.ndim_supp}.", ) else: warnings.warn( "Initial distribution not specified, defaulting to " "`Normal.dist(0, 100, shape=...)`. You can specify an init_dist " "manually to suppress this warning.", UserWarning, ) init_dist = Normal.dist(0, 100, shape=(*sigma.shape, ar_order)) # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term init_dist = ignore_logprob(init_dist) return super().dist([rhos, sigma, init_dist, steps, ar_order, constant], **kwargs)
def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: point_map_info = q0.point_map_info q0 = q0.data # same tuning scheme as DEMetropolis if not self.steps_until_tune and self.tune: if self.tune_target == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) elif self.tune_target == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 epsilon = self.proposal_dist() * self.scaling it = len(self._history) # use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries if it > 1: # differential evolution proposal # select two other chains iz1 = np.random.randint(it) iz2 = np.random.randint(it) while iz2 == iz1: iz2 = np.random.randint(it) z1 = self._history[iz1] z2 = self._history[iz2] # propose a jump q = floatX(q0 + self.lamb * (z1 - z2) + epsilon) else: # propose just with noise in the first 2 iterations q = floatX(q0 + epsilon) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self._history.append(q_new) self.steps_until_tune -= 1 stats = { "tune": self.tune, "scaling": self.scaling, "lambda": self.lamb, "accept": np.exp(accept), "accepted": accepted, } q_new = RaveledVars(q_new, point_map_info) return q_new, [stats]
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = floatX(np.ones((5, 2))) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) # TODO: What's the test? Something needs to be asserted. at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def test_iterator(): with pm.Model() as model: a = pm.Normal("a", shape=1) b = pm.HalfNormal("b") step1 = pm.NUTS([model.rvs_to_values[a]]) step2 = pm.Metropolis([model.rvs_to_values[b]]) step = pm.CompoundStep([step1, step2]) start = {"a": floatX(np.array([1.0])), "b_log__": floatX(np.array(2.0))} sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, step, 0, False) with sampler: for draw in sampler: pass
def test_multinomial_check_parameters(): x = np.array([1, 5]) n = x.sum() with pm.Model() as modelA: p_a = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialA("x", n, p_a, observed=x) with pm.Model() as modelB: p_b = pm.Dirichlet("p", floatX(np.ones(2))) MultinomialB("x", n, p_b, observed=x) assert np.isclose(modelA.logp({"p_simplex__": [0]}), modelB.logp({"p_simplex__": [0]}))
def __new__(cls, name, rho, *args, steps=None, constant=False, ar_order=None, **kwargs): rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho))) ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order) steps = get_steps( steps=steps, shape=None, # Shape will be checked in `cls.dist` dims=kwargs.get("dims", None), observed=kwargs.get("observed", None), step_shape_offset=ar_order, ) return super().__new__(cls, name, rhos, *args, steps=steps, constant=constant, ar_order=ar_order, **kwargs)
def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) Mixture( "x_obs", w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1)
def mutate(self): """Independent Metropolis-Hastings perturbation.""" ac_ = np.empty((self.n_steps, self.draws)) cov = self.proposal_dist.cov log_R = np.log(np.random.rand(self.n_steps, self.draws)) for n_step in range(self.n_steps): # The proposal is independent from the current point. # We have to take that into account to compute the Metropolis-Hastings acceptance proposal = floatX(self.proposal_dist.rvs(size=self.draws)) proposal = proposal.reshape(len(proposal), -1) # To do that we compute the logp of moving to a new point forward = self.proposal_dist.logpdf(proposal) # And to going back from that new point backward = multivariate_normal(proposal.mean(axis=0), cov).logpdf(self.tempered_posterior) ll = np.array( [self.likelihood_logp_func(prop) for prop in proposal]) pl = np.array([self.prior_logp_func(prop) for prop in proposal]) proposal_logp = pl + ll * self.beta accepted = log_R[n_step] < ( (proposal_logp + backward) - (self.tempered_posterior_logp + forward)) ac_[n_step] = accepted self.tempered_posterior[accepted] = proposal[accepted] self.tempered_posterior_logp[accepted] = proposal_logp[accepted] self.prior_logp[accepted] = pl[accepted] self.likelihood_logp[accepted] = ll[accepted] self.acc_rate = np.mean(ac_)
def mutate(self): """Metropolis-Hastings perturbation.""" ac_ = np.empty((self.n_steps, self.draws)) log_R = np.log(self.rng.random((self.n_steps, self.draws))) for n_step in range(self.n_steps): proposal = floatX( self.tempered_posterior + self.proposal_dist(num_draws=self.draws, rng=self.rng) * self.proposal_scales[:, None]) ll = np.array( [self.likelihood_logp_func(prop) for prop in proposal]) pl = np.array([self.prior_logp_func(prop) for prop in proposal]) proposal_logp = pl + ll * self.beta accepted = log_R[n_step] < (proposal_logp - self.tempered_posterior_logp) ac_[n_step] = accepted self.tempered_posterior[accepted] = proposal[accepted] self.prior_logp[accepted] = pl[accepted] self.likelihood_logp[accepted] = ll[accepted] self.tempered_posterior_logp[accepted] = proposal_logp[accepted] self.chain_acc_rate = np.mean(ac_, axis=0)
def mutate(self): """Independent Metropolis-Hastings perturbation.""" ac_ = np.empty((self.n_steps, self.draws)) log_R = np.log(self.rng.random((self.n_steps, self.draws))) # The proposal is independent from the current point. # We have to take that into account to compute the Metropolis-Hastings acceptance # We first compute the logp of proposing a transition to the current points. # This variable is updated at the end of the loop with the entries from the accepted # transitions, which is equivalent to recomputing it in every iteration of the loop. backward_logp = self.proposal_dist.logpdf(self.tempered_posterior) for n_step in range(self.n_steps): proposal = floatX( self.proposal_dist.rvs(size=self.draws, random_state=self.rng)) proposal = proposal.reshape(len(proposal), -1) # We then compute the logp of proposing a transition to the new points forward_logp = self.proposal_dist.logpdf(proposal) ll = np.array( [self.likelihood_logp_func(prop) for prop in proposal]) pl = np.array([self.prior_logp_func(prop) for prop in proposal]) proposal_logp = pl + ll * self.beta accepted = log_R[n_step] < ( (proposal_logp + backward_logp) - (self.tempered_posterior_logp + forward_logp)) ac_[n_step] = accepted self.tempered_posterior[accepted] = proposal[accepted] self.tempered_posterior_logp[accepted] = proposal_logp[accepted] self.prior_logp[accepted] = pl[accepted] self.likelihood_logp[accepted] = ll[accepted] backward_logp[accepted] = forward_logp[accepted] self.acc_rate = np.mean(ac_)
def random(self): """Draw random value from QuadPotential.""" n = floatX(normal(size=self.size)) n /= self.d_sqrt n = self.factor.solve_Lt(n) n = self.factor.apply_Pt(n) return n
def dist( cls, distribution, lower=None, upper=None, size=None, shape=None, **kwargs, ): cls._argument_checks(distribution, **kwargs) lower, upper, initval = cls._set_values(lower, upper, size, shape, initval=None) distribution.tag.ignore_logprob = True if isinstance(distribution.owner.op, Continuous): res = _ContinuousBounded.dist( [distribution, lower, upper], size=size, shape=shape, **kwargs, ) res.tag.test_value = floatX(initval) else: res = _DiscreteBounded.dist( [distribution, lower, upper], size=size, shape=shape, **kwargs, ) res.tag.test_value = intX(initval) return res
def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = at.diag(chol_cov) ok = at.all(diag > 0) chol_cov = at.switch(ok, chol_cov, at.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * at.eye(k) - at.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = at.switch(ok, g_cov, -np.nan) g_delta = at.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def check_vectortransform_elementwise_logp(self, model): x = model.free_RVs[0] x_val_transf = x.tag.value_var pt = model.initial_point(0) test_array_transf = floatX( np.random.randn(*pt[x_val_transf.name].shape)) transform = x_val_transf.tag.transform test_array_untransf = transform.backward(test_array_transf, *x.owner.inputs).eval() # Create input variable with same dimensionality as untransformed test_array x_val_untransf = at.constant(test_array_untransf).type() jacob_det = transform.log_jac_det(test_array_transf, *x.owner.inputs) # Original distribution is univariate if x.owner.op.ndim_supp == 0: assert joint_logpt( x, sum=False)[0].ndim == x.ndim == (jacob_det.ndim + 1) # Original distribution is multivariate else: assert joint_logpt( x, sum=False)[0].ndim == (x.ndim - 1) == jacob_det.ndim a = joint_logpt(x, x_val_transf, jacobian=False).eval({x_val_transf: test_array_transf}) b = joint_logpt(x, x_val_untransf, transformed=False).eval( {x_val_untransf: test_array_untransf}) # Hack to get relative tolerance close_to(a, b, np.abs(0.5 * (a + b) * tol))
def test_logp(self): np.random.seed(42) chol_val = floatX(np.array([[1, 0.9], [0, 2]])) cov_val = floatX(np.dot(chol_val, chol_val.T)) cov = at.matrix("cov") cov.tag.test_value = cov_val delta_val = floatX(np.random.randn(5, 2)) delta = at.matrix("delta") delta.tag.test_value = delta_val expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) expect = expect.logpdf(delta_val).sum() logp = MvNormalLogp()(cov, delta) logp_f = aesara.function([cov, delta], logp) logp = logp_f(cov_val, delta_val) npt.assert_allclose(logp, expect)
def mutate(self): """Metropolis-Hastings perturbation.""" self.n_steps = 1 old_corr = 2 corr = Pearson(self.tempered_posterior) ac_ = [] while True: log_R = np.log(self.rng.random(self.draws)) proposal = floatX( self.tempered_posterior + self.proposal_dist(num_draws=self.draws, rng=self.rng) * self.proposal_scales[:, None]) ll = np.array( [self.likelihood_logp_func(prop) for prop in proposal]) pl = np.array([self.prior_logp_func(prop) for prop in proposal]) proposal_logp = pl + ll * self.beta accepted = log_R < (proposal_logp - self.tempered_posterior_logp) self.tempered_posterior[accepted] = proposal[accepted] self.prior_logp[accepted] = pl[accepted] self.likelihood_logp[accepted] = ll[accepted] self.tempered_posterior_logp[accepted] = proposal_logp[accepted] ac_.append(accepted) self.n_steps += 1 pearson_r = corr.get(self.tempered_posterior) if np.mean( (old_corr - pearson_r) > self.correlation_threshold) > 0.9: old_corr = pearson_r else: break self.chain_acc_rate = np.mean(ac_, axis=0)
def test_logpt_incsubtensor(indices, size): """Make sure we can compute a log-likelihood for ``Y[idx] = data`` where ``Y`` is univariate.""" mu = floatX(np.power(10, np.arange(np.prod(size)))).reshape(size) data = mu[indices] sigma = 0.001 rng = np.random.RandomState(232) a_val = rng.normal(mu, sigma, size=size).astype(aesara.config.floatX) rng = aesara.shared(rng, borrow=False) a = Normal.dist(mu, sigma, size=size, rng=rng) a_value_var = a.type() a.name = "a" a_idx = at.set_subtensor(a[indices], data) assert isinstance(a_idx.owner.op, (IncSubtensor, AdvancedIncSubtensor, AdvancedIncSubtensor1)) a_idx_value_var = a_idx.type() a_idx_value_var.name = "a_idx_value" a_idx_logp = logpt(a_idx, {a_idx: a_value_var}, sum=False) logp_vals = a_idx_logp.eval({a_value_var: a_val}) # The indices that were set should all have the same log-likelihood values, # because the values they were set to correspond to the unique means along # that dimension. This helps us confirm that the log-likelihood is # associating the assigned values with their correct parameters. a_val_idx = a_val.copy() a_val_idx[indices] = data exp_obs_logps = sp.norm.logpdf(a_val_idx, mu, sigma) np.testing.assert_almost_equal(logp_vals, exp_obs_logps)
def test_elemwise_velocity(): scaling = np.array([1, 2, 3]) x = floatX(np.ones_like(scaling)) pot = quadpotential.quad_potential(scaling, True) v = pot.velocity(x) npt.assert_allclose(v, scaling) assert v.dtype == pot.dtype
def test_list_normals_sampling(self): norm_w = np.array([0.75, 0.25]) norm_mu = np.array([0.0, 5.0]) norm_sigma = np.ones_like(norm_mu) norm_x = generate_normal_mixture_data(norm_w, norm_mu, norm_sigma, size=1000) with Model() as model: w = Dirichlet("w", floatX(np.ones_like(norm_w)), shape=norm_w.size) mu = Normal("mu", 0.0, 10.0, shape=norm_w.size) tau = Gamma("tau", 1.0, 1.0, shape=norm_w.size) Mixture( "x_obs", w, [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])], observed=norm_x, ) trace = sample( 5000, chains=1, step=Metropolis(), random_seed=self.random_seed, progressbar=False, return_inferencedata=False, ) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(norm_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(norm_mu), rtol=0.1, atol=0.1)
def test_list_mvnormals_logp(self): mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2,)) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack( ( st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2), ) ).T # check logp of mixture testpoint = model.compute_initial_point() mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=False) assert_allclose(model.compile_logp(y, sum=False)(testpoint)[0], mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf( x=testpoint["w"], alpha=np.ones(2), ) assert_allclose(model.compile_logp()(testpoint), mixlogp_st.sum() + priorlogp)
def test_joint_logp_subtensor(): """Make sure we can compute a log-likelihood for ``Y[I]`` where ``Y`` and ``I`` are random variables.""" size = 5 mu_base = floatX(np.power(10, np.arange(np.prod(size)))).reshape(size) mu = np.stack([mu_base, -mu_base]) sigma = 0.001 rng = aesara.shared(np.random.RandomState(232), borrow=True) A_rv = Normal.dist(mu, sigma, rng=rng) A_rv.name = "A" p = 0.5 I_rv = Bernoulli.dist(p, size=size, rng=rng) I_rv.name = "I" A_idx = A_rv[I_rv, at.ogrid[A_rv.shape[-1]:]] assert isinstance(A_idx.owner.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1)) A_idx_value_var = A_idx.type() A_idx_value_var.name = "A_idx_value" I_value_var = I_rv.type() I_value_var.name = "I_value" A_idx_logps = joint_logp(A_idx, { A_idx: A_idx_value_var, I_rv: I_value_var }, sum=False) A_idx_logp = at.add(*A_idx_logps) logp_vals_fn = aesara.function([A_idx_value_var, I_value_var], A_idx_logp) # The compiled graph should not contain any `RandomVariables` assert_no_rvs(logp_vals_fn.maker.fgraph.outputs[0]) decimals = select_by_precision(float64=6, float32=4) for i in range(10): bern_sp = sp.bernoulli(p) I_value = bern_sp.rvs(size=size).astype(I_rv.dtype) norm_sp = sp.norm(mu[I_value, np.ogrid[mu.shape[1]:]], sigma) A_idx_value = norm_sp.rvs().astype(A_idx.dtype) exp_obs_logps = norm_sp.logpdf(A_idx_value) exp_obs_logps += bern_sp.logpmf(I_value) logp_vals = logp_vals_fn(A_idx_value, I_value) np.testing.assert_almost_equal(logp_vals, exp_obs_logps, decimal=decimals)
def test_expand_packed_triangular(): with pytest.raises(ValueError): x = at.matrix("x") x.tag.test_value = np.array([[1.0]], dtype=aesara.config.floatX) expand_packed_triangular(5, x) N = 5 packed = at.vector("packed") packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2)) with pytest.raises(TypeError): expand_packed_triangular(packed.shape[0], packed) np.random.seed(42) vals = np.random.randn(N, N) lower = floatX(np.tril(vals)) lower_packed = floatX(vals[lower != 0]) upper = floatX(np.triu(vals)) upper_packed = floatX(vals[upper != 0]) expand_lower = expand_packed_triangular(N, packed, lower=True) expand_upper = expand_packed_triangular(N, packed, lower=False) expand_diag_lower = expand_packed_triangular(N, packed, lower=True, diagonal_only=True) expand_diag_upper = expand_packed_triangular(N, packed, lower=False, diagonal_only=True) assert np.all(expand_lower.eval({packed: lower_packed}) == lower) assert np.all(expand_upper.eval({packed: upper_packed}) == upper) assert np.all( expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag( vals))) assert np.all( expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag( vals)))
def test_grad(self): np.random.seed(42) def func(chol_vec, delta): chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) return MvNormalLogp()(cov, delta) chol_vec_val = floatX(np.array([0.5, 1.0, -0.1])) delta_val = floatX(np.random.randn(1, 2)) verify_grad(func, [chol_vec_val, delta_val]) delta_val = floatX(np.random.randn(5, 2)) verify_grad(func, [chol_vec_val, delta_val])
def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: point_map_info = q0.point_map_info q0 = q0.data if not self.steps_until_tune and self.tune: # Tune scaling parameter self.scaling = tune(self.scaling, self.accepted_sum / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted_sum[:] = 0 delta = self.proposal_dist() * self.scaling if self.any_discrete: if self.all_discrete: delta = np.round(delta, 0).astype("int64") q0 = q0.astype("int64") q = (q0 + delta).astype("int64") else: delta[self.discrete] = np.round(delta[self.discrete], 0) q = q0 + delta else: q = floatX(q0 + delta) if self.elemwise_update: q_temp = q0.copy() # Shuffle order of updates (probably we don't need to do this in every step) np.random.shuffle(self.enum_dims) for i in self.enum_dims: q_temp[i] = q[i] accept_rate_i = self.delta_logp(q_temp, q0) q_temp_, accepted_i = metrop_select(accept_rate_i, q_temp, q0) q_temp[i] = q_temp_[i] self.accept_rate_iter[i] = accept_rate_i self.accepted_iter[i] = accepted_i self.accepted_sum[i] += accepted_i q = q_temp else: accept_rate = self.delta_logp(q, q0) q, accepted = metrop_select(accept_rate, q, q0) self.accept_rate_iter = accept_rate self.accepted_iter = accepted self.accepted_sum += accepted self.steps_until_tune -= 1 stats = { "tune": self.tune, "scaling": np.mean(self.scaling), "accept": np.mean(np.exp(self.accept_rate_iter)), "accepted": np.mean(self.accepted_iter), } return RaveledVars(q, point_map_info), [stats]
def dist(cls, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs) -> at.TensorVariable: mu = at.as_tensor_variable(floatX(mu)) sigma = at.as_tensor_variable(floatX(sigma)) steps = get_steps( steps=steps, shape=kwargs.get("shape"), step_shape_offset=1, ) if steps is None: raise ValueError("Must specify steps or shape parameter") steps = at.as_tensor_variable(intX(steps)) if "init" in kwargs: warnings.warn( "init parameter is now called init_dist. Using init will raise an error in a future release.", FutureWarning, ) init_dist = kwargs.pop("init") # If no scalar distribution is passed then initialize with a Normal of same mu and sigma if init_dist is None: warnings.warn( "Initial distribution not specified, defaulting to `Normal.dist(0, 100)`." "You can specify an init_dist manually to suppress this warning.", UserWarning, ) init_dist = Normal.dist(0, 100) else: if not ( isinstance(init_dist, at.TensorVariable) and init_dist.owner is not None and isinstance(init_dist.owner.op, RandomVariable) and init_dist.owner.op.ndim_supp == 0 ): raise TypeError("init must be a univariate distribution variable") check_dist_not_registered(init_dist) # Ignores logprob of init var because that's accounted for in the logp method init_dist = ignore_logprob(init_dist) return super().dist([mu, sigma, init_dist, steps], **kwargs)