def __call__(self, X): XY = X.dot(X.T) x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, 'x') X2e = tt.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2. * XY V = tt.sort(H.flatten()) length = V.shape[0] # median distance m = tt.switch(tt.eq((length % 2), 0), # if even vector tt.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2]) h = .5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = tt.exp(-H / h / 2.0) # Derivative dxkxy = -tt.dot(Kxy, X) sumkxy = tt.sum(Kxy, axis=1).dimshuffle(0, 'x') dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h return Kxy, dxkxy
def test_advi_minibatch(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = floatX(sd * np.random.randn(n) + mu) d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value = floatX(np.zeros(1,)) def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100],) minibatches = create_minibatch(data) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=5, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with pytest.raises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e10)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, integrator="leapfrog", dtype=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance blocked: bool, default=True potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) size = self._logp_dlogp_func.size if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (size ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
def build_model(self): data = pd.read_csv(pm.get_data('wells.dat'), delimiter=u' ', index_col=u'id', dtype={u'switch': np.int8}) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P['1'] = 1 with pm.Model() as model: effects = pm.Normal('effects', mu=0, tau=100. ** -2, shape=len(P.columns)) p = tt.nnet.sigmoid(tt.dot(floatX(np.array(P)), effects)) pm.Bernoulli('s', p, observed=floatX(np.array(data.switch))) return model
def random(self): """Draw random value from QuadPotential.""" n = floatX(normal(size=self.size)) n /= self.d_sqrt n = self.factor.solve_Lt(n) n = self.factor.apply_Pt(n) return n
def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter self.scaling = tune( self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 delta = self.proposal_dist() * self.scaling if self.any_discrete: if self.all_discrete: delta = np.round(delta, 0).astype('int64') q0 = q0.astype('int64') q = (q0 + delta).astype('int64') else: delta[self.discrete] = np.round( delta[self.discrete], 0) q = (q0 + delta) else: q = floatX(q0 + delta) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self.steps_until_tune -= 1 stats = { 'tune': self.tune, 'accept': np.exp(accept), } return q_new, [stats]
def __init__(self, eta, n, sd_dist, *args, **kwargs): self.n = n self.eta = eta if 'transform' in kwargs: raise ValueError('Invalid parameter: transform.') if 'shape' in kwargs: raise ValueError('Invalid parameter: shape.') shape = n * (n + 1) // 2 if sd_dist.shape.ndim not in [0, 1]: raise ValueError('Invalid shape for sd_dist.') transform = transforms.CholeskyCovPacked(n) kwargs['shape'] = shape kwargs['transform'] = transform super(LKJCholeskyCov, self).__init__(*args, **kwargs) self.sd_dist = sd_dist self.diag_idxs = transform.diag_idxs self.mode = floatX(np.zeros(shape)) self.mode[self.diag_idxs] = 1
def astep(self, q0): if not self.steps_until_tune and self.tune: # Tune scaling parameter self.scaling = tune( self.scaling, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 epsilon = self.proposal_dist() * self.scaling # differential evolution proposal # select two other chains ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False) r1 = self.bij.map(self.population[ir1]) r2 = self.bij.map(self.population[ir2]) # propose a jump q = floatX(q0 + self.lamb * (r1 - r2) + epsilon) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self.steps_until_tune -= 1 stats = { 'tune': self.tune, 'accept': np.exp(accept), } return q_new, [stats]
def test_mixture_of_mvn(self): mu1 = np.asarray([0., 1.]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1., 0.]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[.5, .5], mu1, mu2]) with Model() as model: w = Dirichlet('w', floatX(np.ones(2)), transform=None) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture('x_obs', w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2)) ).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st, axis=-1, keepdims=True) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf(x=testpoint['w'], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def test_elemwise_velocity(): scaling = np.array([1, 2, 3]) x = floatX(np.ones_like(scaling)) pot = quadpotential.quad_potential(scaling, True) v = pot.velocity(x) npt.assert_allclose(v, scaling) assert v.dtype == pot.dtype
def _theano_single_twostage(H, q, p, q_grad, **theano_kwargs): """Perform a single step of a second order symplectic integration scheme. References ---------- Blanes, Sergio, Fernando Casas, and J. M. Sanz-Serna. "Numerical Integrators for the Hybrid Monte Carlo Method." SIAM Journal on Scientific Computing 36, no. 4 (January 2014): A1556-80. doi:10.1137/130932740. Mannseth, Janne, Tore Selland Kleppe, and Hans J. Skaug. "On the Application of Higher Order Symplectic Integrators in Hamiltonian Monte Carlo." arXiv:1608.07048 [Stat], August 25, 2016. http://arxiv.org/abs/1608.07048. """ epsilon = tt.scalar('epsilon') epsilon.tag.test_value = 1. a = floatX((3 - np.sqrt(3)) / 6) p_ae = p + a * epsilon * q_grad q_e2 = q + epsilon / 2 * H.pot.velocity(p_ae) p_1ae = p_ae + (1 - 2 * a) * epsilon * H.dlogp(q_e2) q_e = q_e2 + epsilon / 2 * H.pot.velocity(p_1ae) grad_e = H.dlogp(q_e) p_e = p_1ae + a * epsilon * grad_e v_e = H.pot.velocity(p_e) new_energy = energy(H, q_e, p_e) f = theano.function(inputs=[q, p, q_grad, epsilon], outputs=[q_e, p_e, v_e, grad_e, new_energy], **theano_kwargs) f.trust_input = True return f
def __init__(self, v): v = floatX(v) s = v ** .5 self.s = s self.inv_s = 1. / s self.v = v
def build_model(self): data = pd.read_csv(pm.get_data('wells.dat'), delimiter=' ', index_col='id', dtype={'switch': np.int8}) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P['1'] = 1 with pm.Model() as model: effects = pm.Normal('effects', mu=0, sigma=100, shape=len(P.columns)) logit_p = tt.dot(floatX(np.array(P)), effects) pm.Bernoulli('s', logit_p=logit_p, observed=floatX(data.switch.values)) return model
def _get_rvss( minibatch_RVs, local_RVs, observed_RVs, minibatch_tensors, total_size): """Returns local_RVs and observed_RVs. This function is used for backward compatibility of how input arguments are given. """ if minibatch_RVs is not None: _value_error(isinstance(minibatch_RVs, list), 'minibatch_RVs must be a list.') _value_error((local_RVs is None) and (observed_RVs is None), 'When minibatch_RVs is given, local_RVs and ' + 'observed_RVs must be None.') s = floatX(total_size / minibatch_tensors[0].shape[0]) local_RVs = OrderedDict() observed_RVs = OrderedDict([(v, s) for v in minibatch_RVs]) else: _value_error((isinstance(local_RVs, OrderedDict) and isinstance(observed_RVs, OrderedDict)), 'local_RVs and observed_RVs must be OrderedDict.') return local_RVs, observed_RVs
def __init__(self, approx, kernel=rbf, input_matrix=None, temperature=1): self.approx = approx self.temperature = floatX(temperature) self._kernel_f = kernel if input_matrix is None: input_matrix = tt.matrix('stein_input_matrix') self.input_matrix = input_matrix
def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0, adaptation_window=100, dtype=None): """Set up a diagonal mass matrix.""" if initial_diag is not None and initial_diag.ndim != 1: raise ValueError('Initial diagonal must be one-dimensional.') if initial_mean.ndim != 1: raise ValueError('Initial mean must be one-dimensional.') if initial_diag is not None and len(initial_diag) != n: raise ValueError('Wrong shape for initial_diag: expected %s got %s' % (n, len(initial_diag))) if len(initial_mean) != n: raise ValueError('Wrong shape for initial_mean: expected %s got %s' % (n, len(initial_mean))) if initial_diag is None: initial_diag = np.ones(n, dtype=theano.config.floatX) initial_weight = 1 if dtype is None: dtype = theano.config.floatX self.dtype = dtype self._n = n self._var = np.array(initial_diag, dtype=self.dtype, copy=True) self._var_theano = theano.shared(self._var) self._stds = np.sqrt(initial_diag) self._inv_stds = floatX(1.) / self._stds self._foreground_var = _WeightedVariance( self._n, initial_mean, initial_diag, initial_weight, self.dtype) self._background_var = _WeightedVariance(self._n, dtype=self.dtype) self._n_samples = 0 self.adaptation_window = adaptation_window
def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs): if (p is not None) and (n is not None) and (eta is None): warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta ' 'dimension parameter p -> n. Please update your code. ' 'Automatically re-assigning parameters for backwards compatibility.', DeprecationWarning) self.n = p self.eta = n eta = self.eta n = self.n elif (n is not None) and (eta is not None) and (p is None): self.n = n self.eta = eta else: raise ValueError('Invalid parameter: please use eta as the shape parameter and ' 'n as the dimension parameter.') shape = n * (n - 1) // 2 self.mean = floatX(np.zeros(shape)) if transform == 'interval': transform = transforms.interval(-1, 1) super(LKJCorr, self).__init__(shape=shape, transform=transform, *args, **kwargs) warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta ' 'dimension parameter p -> n. Please double check your initialization.', DeprecationWarning) self.tri_index = np.zeros([n, n], dtype='int32') self.tri_index[np.triu_indices(n, k=1)] = np.arange(shape) self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(shape)
def dlogp(inputs, gradients): g_logp, = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.) n, k = delta.shape chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = tt.switch(ok, g_cov, -np.nan) g_delta = tt.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def astep(self, q0): e = floatX(self.step_rand(self.step_size)) n_steps = np.array(self.path_length / e, dtype='int32') q = q0 p = self.H.pot.random() # initialize momentum initial_energy = self.compute_energy(q, p) q, p, current_energy = self.leapfrog(q, p, e, n_steps) energy_change = initial_energy - current_energy return metrop_select(energy_change, q, q0)
def logp(self, value): quaddist, logdet, ok = self._quaddist(value) k = value.shape[-1].astype(theano.config.floatX) norm = (gammaln((self.nu + k) / 2.) - gammaln(self.nu / 2.) - 0.5 * k * floatX(np.log(self.nu * np.pi))) inner = - (self.nu + k) / 2. * tt.log1p(quaddist / self.nu) return bound(norm + inner - logdet, ok)
def _init_uw_global_shared(start, global_RVs): global_order = pm.ArrayOrdering([v for v in global_RVs]) start = {v.name: start[v.name] for v in global_RVs} bij = pm.DictToArrayBijection(global_order, start) u_start = bij.map(start) w_start = np.zeros_like(u_start) uw_start = floatX(np.concatenate([u_start, w_start])) uw_global_shared = theano.shared(uw_start, 'uw_global_shared') return uw_global_shared, bij
def scipy_exponweib_sucks(value, alpha, beta): """ This function is required because SciPy's implementation of the Weibull PDF fails for some valid combinations of parameters, while the log-PDF fails for others. """ pdf = np.log(sp.exponweib.pdf(value, 1, alpha, scale=beta)) if np.isinf(pdf): return sp.exponweib.logpdf(value, 1, alpha, scale=beta) return floatX(pdf)
def grad(self, inputs, gradients): """ Cholesky decomposition reverse-mode gradient update. Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ References ---------- .. [0] I. Murray, "Differentiation of the Cholesky decomposition", http://arxiv.org/abs/1602.07527 """ x = inputs[0] dz = gradients[0] chol_x = self(x) ok = tt.all(tt.nlinalg.diag(chol_x) > 0) chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1)) dz = tt.switch(ok, dz, floatX(1)) # deal with upper triangular by converting to lower triangular if not self.lower: chol_x = chol_x.T dz = dz.T def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.) def conjugate_solve_triangular(outer, inner): """Computes L^{-T} P L^{-1} for lower-triangular L.""" solve = tt.slinalg.Solve(A_structure="upper_triangular") return solve(outer.T, solve(outer.T, inner.T).T) s = conjugate_solve_triangular( chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) if self.lower: grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s)) else: grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s)) return [tt.switch(ok, grad, floatX(np.nan))]
def test_gen_cloning_with_shape_change(self): data = floatX(np.random.uniform(size=(1000, 10))) minibatches = DataSampler(data, batchsize=50) gen = generator(minibatches) gen_r = tt_rng().normal(size=gen.shape).T X = gen.dot(gen_r) res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50,) shared = theano.shared(data) res2 = theano.clone(res, {gen: shared**2}) assert res2.eval().shape == (1000,)
def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = model.ndim scaling = floatX(np.random.rand(size)) step = BaseHMC(vars=model.vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) p = floatX(step.potential.random()) q = floatX(np.random.randn(size)) start = step.integrator.compute_state(p, q) for epsilon in [.01, .1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q, start.q, rtol=1e-5) npt.assert_allclose(state.p, start.p, rtol=1e-5)
def logp_vals_point(pt): if len(model.observed_RVs) == 0: return floatX(np.array([], dtype='d')) logp_vals = [] for var, logp in cached: logp = logp(pt) if var.missing_values: logp = logp[~var.observations.mask] logp_vals.append(logp.ravel()) return np.concatenate(logp_vals)
def __init__(self, A, dtype=None): """Compute the lower cholesky decomposition of the potential. Parameters ---------- A : matrix, ndim = 2 Inverse of covariance matrix for the potential vector """ if dtype is None: dtype = theano.config.floatX self.dtype = dtype self.L = floatX(scipy.linalg.cholesky(A, lower=True))
def extend(self, direction): """Double the treesize by extending the tree in the given direction. If direction is larger than 0, extend it to the right, otherwise extend it to the left. Return a tuple `(diverging, turning)` of type (DivergenceInfo, bool). `diverging` indicates, that the tree extension was aborted because the energy change exceeded `self.Emax`. `turning` indicates that the tree extension was stopped because the termination criterior was reached (the trajectory is turning back). """ if direction > 0: tree, diverging, turning = self._build_subtree( self.right, self.depth, floatX(np.asarray(self.step_size))) self.right = tree.right else: tree, diverging, turning = self._build_subtree( self.left, self.depth, floatX(np.asarray(-self.step_size))) self.left = tree.right self.depth += 1 self.accept_sum += tree.accept_sum self.n_proposals += tree.n_proposals if diverging or turning: return diverging, turning size1, size2 = self.log_size, tree.log_size if logbern(size2 - size1): self.proposal = tree.proposal self.log_size = np.logaddexp(self.log_size, tree.log_size) self.p_sum[:] += tree.p_sum left, right = self.left, self.right p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) return diverging, turning
def _get_scaling(total_size, shape, ndim): """ Gets scaling constant for logp Parameters ---------- total_size : int or list[int] shape : shape shape to scale ndim : int ndim hint Returns ------- scalar """ if total_size is None: coef = floatX(1) elif isinstance(total_size, int): if ndim >= 1: denom = shape[0] else: denom = 1 coef = floatX(total_size) / floatX(denom) elif isinstance(total_size, (list, tuple)): if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) if Ellipsis in total_size: sep = total_size.index(Ellipsis) begin = total_size[:sep] end = total_size[sep+1:] if Ellipsis in end: raise ValueError('Double Ellipsis in `total_size` is restricted, got %r' % total_size) else: begin = total_size end = [] if (len(begin) + len(end)) > ndim: raise ValueError('Length of `total_size` is too big, ' 'number of scalings is bigger that ndim, got %r' % total_size) elif (len(begin) + len(end)) == 0: return floatX(1) if len(end) > 0: shp_end = shape[-len(end):] else: shp_end = np.asarray([]) shp_begin = shape[:len(begin)] begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) return tt.as_tensor(floatX(coef))
def get_tau_sd(tau=None, sd=None): """ Find precision and standard deviation .. math:: \tau = \frac{1}{\sigma^2} Parameters ---------- tau : array-like, optional sd : array-like, optional Results ------- Returns tuple (tau, sd) Notes ----- If neither tau nor sd is provided, returns (1., 1.) """ if tau is None: if sd is None: sd = 1. tau = 1. else: tau = sd**-2. else: if sd is not None: raise ValueError("Can't pass both tau and sd") else: sd = tau**-.5 # cast tau and sd to float in a way that works for both np.arrays # and pure python tau = 1. * tau sd = 1. * sd return (floatX(tau), floatX(sd))
def forward_val(self, x_): x = x_.T lx = np.log(x) y = np.dot(self.A, lx) return floatX(y.T)
def random(self): n = floatX(normal(size=self.size)) n /= self.d_sqrt n = self.factor.solve_Lt(n) n = self.factor.apply_Pt(n) return n
def _make_model(self): pca = self.pca mCounts = np.int_(self.counts * self.seq_depth_factor) n_dim = pca.n_components_ n_modes = self.n_modes n_samp = mCounts.shape[1] n_features = mCounts.shape[0] if self.kmeansInit: sd_factor = 2 / n_modes else: sd_factor = 2 print("Defining model constants...") if pca.whiten: rot = np.sqrt(pca.explained_variance_[:, None]) * pca.components_ rot = theano.shared(floatX(rot)) cSd = floatX(1) tcov = np.eye(n_dim)[np.tril_indices(n_dim)] * sd_factor else: rot = theano.shared(floatX(pca.components_)) cSd = floatX(np.sqrt(pca.explained_variance_)) tcov = (np.diag(pca.explained_variance_)[np.tril_indices(n_dim)] * sd_factor) shift = theano.shared(floatX(pca.mean_[None, :]), broadcastable=(True, False)) multiNn = np.sum(mCounts, axis=0) print("Counts shape:") print(mCounts.shape) lcounts = floatX(self.pca.transform(self.tau_log_E_p)) print("Latent counts shape:") print(lcounts.shape) high_tumor = self.pheno["tcEst"] > 0.8 low_tumor = self.pheno["tcEst"] < 0.2 if self.kmeansInit: km = KMeans(n_clusters=n_modes, random_state=0, tol=1e-10, max_iter=100) mus_tumor = km.fit(lcounts[high_tumor, :]).cluster_centers_ mus_free = km.fit(lcounts[low_tumor, :]).cluster_centers_ else: mus_tumor = np.repeat(np.mean(lcounts[high_tumor, :], axis=0)[None, :], 10, axis=0) mus_free = np.repeat(np.mean(lcounts[low_tumor, :], axis=0)[None, :], 10, axis=0) mus_tumor = floatX(mus_tumor) mus_free = floatX(mus_free) try: chol_tumor = floatX( np.linalg.cholesky(np.cov(lcounts[high_tumor, :].T))) chol_tumor = chol_tumor[np.tril_indices(n_dim)] * sd_factor except np.linalg.LinAlgError: print( "Seems we have to few HIGH tumor content samples to infer a starting covariance." ) chol_tumor = tcov try: chol_free = floatX( np.linalg.cholesky(np.cov(lcounts[low_tumor, :].T))) chol_free = chol_free[np.tril_indices(n_dim)] * sd_factor except np.linalg.LinAlgError: print( "Seems we have to few LOW tumor content samples to infer a starting covariance." ) chol_free = tcov md = self.tau_log_E_p - pca.mean_[None, :] dev = md - np.dot(np.dot(md, pca.components_.T), pca.components_) dev_std = np.std(dev, axis=0) dev_mean = np.mean(dev, axis=0) if self.no_deviations is True: dev_f = dev_t = None else: dev_f = dev_t = theano.shared(floatX(dev)) p_f = floatX(self.p_f) p_t = floatX(self.p_t) sparsity = floatX(1) n = floatX(self.pheno["tcRes"].values[:, None] * self.res_scale) tc = floatX(self.pheno["tcEst"].values[:, None]) lb = floatX(1 - p_f) ub = floatX(p_t) padding = 1e-1 * (ub - lb) pa_start = ((n * tc) + 1) / (n + 2) pa_start = np.where(pa_start < lb, lb + padding, pa_start) pa_start = np.where(pa_start > ub, ub - padding, pa_start) pa_start = floatX(pa_start) def inverse_pca(X): return pm.math.dot(X, rot) + shift def pa2alpha(p_a): return (p_a + p_f - 1) / (p_t + p_f - 1) def alpha2pa(alpha): return (alpha * (p_t + p_f - 1)) - p_f + 1 def mixSep(x_f, x_t, alpha, dev_f, dev_t): exp_f = inverse_pca(x_f) exp_t = inverse_pca(x_t) if dev_f is not None: exp_f += dev_f if dev_t is not None: exp_t += dev_t exp_f = tt.nnet.softmax(exp_f) exp_t = tt.nnet.softmax(exp_t) result = ((1 - alpha) * exp_f) + (alpha * exp_t) return result print("Making model...") with pm.Model() as model: # bounds with nummerical padding p_a = pm.Beta( "p_a", alpha=floatX((n * tc) + 1), beta=floatX((n * (1 - tc)) + 1), transform=pm.distributions.transforms.Interval(lb, ub), shape=(n_samp, 1), testval=pa_start, ) alpha = pm.Deterministic("alpha", pa2alpha(p_a)) sdd = pm.HalfNormal.dist(sd=cSd * self.relax_prior) x_f_comps = list() for i in range(n_modes): mus_f = pm.Normal( "mus_f_{}".format(i), mu=0, sd=cSd * self.relax_prior, shape=n_dim, testval=mus_free[i, :], ) packed_L_f = pm.LKJCholeskyCov( "packed_L_f_{}".format(i), n=n_dim, eta=sparsity, sd_dist=sdd, testval=chol_free, ) chol_f = pm.expand_packed_triangular(n_dim, packed_L_f, lower=True) x_f_comps.append( pm.MvNormal.dist(mu=mus_f, chol=chol_f, shape=(n_samp, n_dim))) if n_modes > 1: w_f = pm.Dirichlet("w_f", a=np.ones(n_modes) * self.dirichlet_prior) x_f = pm.Mixture( "x_f", w=w_f, comp_dists=x_f_comps, shape=(n_samp, n_dim), testval=lcounts, ) else: x_f = pm.MvNormal("x_f", mu=mus_f, chol=chol_f, shape=(n_samp, n_dim)) if self.same_kernels: x_t_comps = x_f_comps else: x_t_comps = list() for i in range(n_modes): mus_t = pm.Normal( "mus_t_{}".format(i), mu=0, sd=cSd * self.relax_prior, shape=n_dim, testval=mus_tumor[i, :], ) packed_L_t = pm.LKJCholeskyCov( "packed_L_t_{}".format(i), n=n_dim, eta=sparsity, sd_dist=sdd, testval=chol_tumor, ) chol_t = pm.expand_packed_triangular(n_dim, packed_L_t, lower=True) x_t_comps.append( pm.MvNormal.dist(mu=mus_t, chol=chol_t, shape=(n_samp, n_dim))) if n_modes > 1: w_t = pm.Dirichlet("w_t", a=np.ones(n_modes) * self.dirichlet_prior) x_t = pm.Mixture( "x_t", w=w_t, comp_dists=x_t_comps, shape=(n_samp, n_dim), testval=lcounts, ) else: x_t = pm.MvNormal("x_t", mu=mus_t, chol=chol_t, shape=(n_samp, n_dim)) if self.sample_deviation is True: dev_f = pm.Normal( "dev_f", mu=dev_mean, sigma=dev_std, shape=(n_samp, n_features), testval=dev, ) dev_t = pm.Normal( "dev_t", mu=dev_mean, sigma=dev_std, shape=(n_samp, n_features), testval=dev, ) if self.hazard_model == "cox": b = pm.Normal("logHR", mu=0, sigma=1, shape=(2 * n_dim, 1)) for ev in self.events: ind = ev["mask"].values obs = np.array(ev["index_among"]) expressions = tt.concatenate([x_t[ind, :], x_f[ind, :]], axis=1) hazard = tt.exp(tt.dot(expressions, b)).T evp = pm.Categorical("event_{}".format(ev["sample"]), hazard, observed=obs) elif self.hazard_model == "mk": # This in not implemented and aims to model hazard with a gaussian mixture b = pm.Normal("kernel_weights", mu=0, sigma=1, shape=(10, )) pass x = pm.Deterministic("x", mixSep(x_f, x_t, alpha, dev_f, dev_t)) if self.use_multinomial: obs = pm.Multinomial("obs", n=multiNn, p=x, observed=mCounts.T, dtype="int64") else: dist = pm.Dirichlet.dist(mCounts.T + 1) pot = pm.Potential("obs", dist.logp(x)) return model
def __init__(self, approx, kernel=rbf, use_histogram=True, temperature=1): self.approx = approx self.temperature = floatX(temperature) self._kernel_f = kernel self.use_histogram = use_histogram
def astep(self, q0): p0 = self.potential.random() start_energy = self.compute_energy(q0, p0) if not self.adapt_step_size: step_size = self.step_size elif self.tune: step_size = np.exp(self.log_step_size) else: step_size = np.exp(self.log_step_size_bar) u = floatX(nr.uniform()) q = qn = qp = q0 qn_grad = qp_grad = self.dlogp(q) pn = pp = p0 tree_size, depth = 1., 0 keep_sampling = True while keep_sampling: direction = bern(0.5) * 2 - 1 q_edge, p_edge, q_edge_grad = { -1: (qn, pn, qn_grad), 1: (qp, pp, qp_grad) }[direction] tree = buildtree(self.leapfrog, q_edge, p_edge, q_edge_grad, u, direction, depth, step_size, self.Emax, start_energy) if direction == -1: qn, pn, qn_grad = tree.q, tree.p, tree.q_grad else: qp, pp, qp_grad = tree.q, tree.p, tree.q_grad if tree.is_valid_sample and bern(min(1, tree.leaf_size / tree_size)): q = tree.proposal tree_size += tree.leaf_size span = qp - qn keep_sampling = tree.is_valid_sample and (span.dot(pn) >= 0) and ( span.dot(pp) >= 0) depth += 1 w = 1. / (self.m + self.t0) self.h_bar = ( (1 - w) * self.h_bar + w * (self.target_accept - tree.p_accept * 1. / tree.n_proposals)) if self.tune: self.log_step_size = self.mu - self.h_bar * np.sqrt( self.m) / self.gamma mk = self.m**-self.k self.log_step_size_bar = mk * self.log_step_size + ( 1 - mk) * self.log_step_size_bar self.m += 1 stats = { 'depth': depth, 'step_size': step_size, 'tune': self.tune, 'accept': tree.p_accept * 1. / tree.n_proposals, 'h_bar': self.h_bar, 'step_size_bar': np.exp(self.log_step_size_bar), 'tree_size': tree_size, } return q, [stats]
def test_lognormal(self): self.pymc3_matches_scipy( Lognormal, Rplus, {'mu': R, 'tau': Rplusbig}, lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau**-.5, 0, np.exp(mu))))
def dirichlet_logpdf(value, a): return floatX((-betafn(a) + logpow(value, a - 1).sum(-1)).sum())
def betafn(a): return floatX(scipy.special.gammaln(a).sum(-1) - scipy.special.gammaln(a.sum(-1)))
def backward_val(self, y_): y = y_.T y = np.concatenate([y, -np.sum(y, 0, keepdims=True)]) x = np.exp(y) / np.sum(np.exp(y), 0, keepdims=True) return floatX(x.T)
def random(self): """Draw random value from QuadPotential.""" return floatX(normal(size=self.s.shape)) * self.inv_s
def random(self): """Draw random value from QuadPotential.""" n = floatX(normal(size=self.L.shape[0])) return scipy.linalg.solve_triangular(self.L.T, n)
def __init__(self, A): self.A = floatX(A) self.L = scipy.linalg.cholesky(A, lower=True)
def backward_val(self, y_): y = np.dot(y_, self.A) x = np.exp(y) / np.sum(np.exp(y), 0, keepdims=True) return floatX(x.T)
def backward(self, y_): y = tt.dot(y_, self.tA) # "softmax" with vector support and no deprication warning: e_y = tt.exp(y - tt.max(y, 0, keepdims=True)) x = e_y / tt.sum(e_y, 0, keepdims=True) return floatX(x.T)
def discrete_weibull_logpmf(value, q, beta): return floatX(np.log(np.power(q, np.power(value, beta)) - np.power(q, np.power(value + 1, beta))))
def test_mixture_of_mixture(self): if theano.config.floatX == 'float32': rtol = 1e-4 else: rtol = 1e-7 nbr = 4 with Model() as model: # mixtures components g_comp = Normal.dist( mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) l_comp = Lognormal.dist( mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr) # weight vector for the mixtures g_w = Dirichlet('g_w', a=floatX(np.ones(nbr)*0.0000001), transform=None) l_w = Dirichlet('l_w', a=floatX(np.ones(nbr)*0.0000001), transform=None) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None) mix = Mixture('mix', w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) test_point = model.test_point def mixmixlogp(value, point): floatX = theano.config.floatX priorlogp = st.dirichlet.logpdf(x=point['g_w'], alpha=np.ones(nbr)*0.0000001, ).astype(floatX) + \ st.expon.logpdf(x=point['mu_g']).sum(dtype=floatX) + \ st.dirichlet.logpdf(x=point['l_w'], alpha=np.ones(nbr)*0.0000001, ).astype(floatX) + \ st.expon.logpdf(x=point['mu_l']).sum(dtype=floatX) + \ st.dirichlet.logpdf(x=point['mix_w'], alpha=np.ones(2), ).astype(floatX) complogp1 = st.norm.logpdf(x=value, loc=point['mu_g']).astype(floatX) mixlogp1 = logsumexp(np.log(point['g_w']).astype(floatX) + complogp1, axis=-1, keepdims=True) complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l'])).astype(floatX) mixlogp2 = logsumexp(np.log(point['l_w']).astype(floatX) + complogp2, axis=-1, keepdims=True) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) mixmixlogpg = logsumexp(np.log(point['mix_w']).astype(floatX) + complogp_mix, axis=-1, keepdims=True) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) # check model logp assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) # check input and check logp again test_point['g_w'] = np.asarray([.1, .1, .2, .6]) test_point['mu_g'] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol)
def categorical_logpdf(value, p): if value >= 0 and value <= len(p): return floatX(np.log(p[value])) else: return -inf
def gumbel(value, mu, beta): return floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta))
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, dtype=None, Emax=1000, target_accept=0.8, gamma=0.05, k=0.75, t0=10, adapt_step_size=True, step_rand=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars: list of theano variables scaling: array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale: float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov: bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model: pymc3 Model instance blocked: bool, default=True potential: Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ self._model = modelcontext(model) if vars is None: vars = self._model.cont_vars vars = inputvars(vars) super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) self.adapt_step_size = adapt_step_size self.Emax = Emax self.iter_count = 0 size = self._logp_dlogp_func.size self.step_size = step_scale / (size**0.25) self.step_adapt = step_sizes.DualAverageAdaptation( self.step_size, target_accept, gamma, k, t0) self.target_accept = target_accept self.tune = True if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator( self.potential, self._logp_dlogp_func) self._step_rand = step_rand self._warnings = [] self._samples_after_tune = 0 self._num_divs_sample = 0
def test_elemwise_energy(): scaling = np.array([1, 2, 3]) x = floatX(np.ones_like(scaling)) pot = quadpotential.quad_potential(scaling, True) energy = pot.energy(x) npt.assert_allclose(energy, 0.5 * scaling.sum())
def _elbo_t(logp, uw_g, uw_l, inarray_g, inarray_l, c_g, c_l, n_mcsamples, random_seed): """Return expression of approximate ELBO based on Monte Carlo sampling. """ if random_seed is None: r = MRG_RandomStreams(gen_random_state()) else: r = MRG_RandomStreams(seed=random_seed) normal_const = floatX(1 + np.log(2.0 * np.pi)) elbo = 0 # Sampling local variational parameters if uw_l is not None: l_l = (uw_l.size / 2).astype('int32') u_l = uw_l[:l_l] w_l = uw_l[l_l:] ns_l = r.normal(size=(n_mcsamples, inarray_l.tag.test_value.shape[0])) zs_l = ns_l * tt.exp(w_l) + u_l elbo += tt.sum(c_l * (w_l + 0.5 * normal_const)) else: zs_l = None # Sampling global variational parameters if uw_g is not None: l_g = (uw_g.size / 2).astype('int32') u_g = uw_g[:l_g] w_g = uw_g[l_g:] ns_g = r.normal(size=(n_mcsamples, inarray_g.tag.test_value.shape[0])) zs_g = ns_g * tt.exp(w_g) + u_g elbo += tt.sum(c_g * (w_g + 0.5 * normal_const)) else: zs_g = None if (zs_l is not None) and (zs_g is not None): def logp_(z_g, z_l): return theano.clone(logp, OrderedDict({ inarray_g: z_g, inarray_l: z_l }), strict=False) sequences = [zs_g, zs_l] elif zs_l is not None: def logp_(z_l): return theano.clone(logp, OrderedDict({inarray_l: z_l}), strict=False) sequences = [zs_l] else: def logp_(z_g): return theano.clone(logp, OrderedDict({inarray_g: z_g}), strict=False) sequences = [zs_g] logps, _ = theano.scan(fn=logp_, outputs_info=None, sequences=sequences) elbo += tt.mean(logps) return elbo
def test_vonmises(self): self.pymc3_matches_scipy( VonMises, R, {'mu': Circ, 'kappa': Rplus}, lambda value, mu, kappa: floatX(sp.vonmises.logpdf(value, kappa, loc=mu)))
def grad(self): n = floatX(self.input_matrix.shape[0]) temperature = self.temperature svgd_grad = (self.density_part_grad / temperature + self.repulsive_part_grad) return svgd_grad / n
def test_advi_minibatch(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = floatX(sd * np.random.randn(n) + mu) d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value = floatX(np.zeros(1, )) def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100], ) minibatches = create_minibatch(data) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=5, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with self.assertRaises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e10)
def test_gumbel(self): self.pymc3_matches_scipy(Gumbel, R, {'mu': R, 'beta': Rplusbig}, lambda value, mu, beta: floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta)))
def MvNormalLogp(): """Compute the log pdf of a multivariate normal distribution. This should be used in MvNormal.logp once Theano#5908 is released. Parameters ---------- cov : tt.matrix The covariance matrix. delta : tt.matrix Array of deviations from the mean. """ cov = tt.matrix('cov') cov.tag.test_value = floatX(np.eye(3)) delta = tt.matrix('delta') delta.tag.test_value = floatX(np.zeros((2, 3))) solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') cholesky = Cholesky(nofail=True, lower=True) n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T result = n * k * tt.log(f(2) * np.pi) result += f(2) * n * tt.sum(tt.log(diag)) result += (delta_trans**f(2)).sum() result = f(-.5) * result logp = tt.switch(ok, result, -np.inf) def dlogp(inputs, gradients): g_logp, = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.) n, k = delta.shape chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = tt.switch(ok, g_cov, -np.nan) g_delta = tt.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] return theano.OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
def random(self): n = floatX(normal(size=self.L.shape[0])) return scipy.linalg.solve_triangular(self.L.T, n)
def logit(p): return tt.log(p / (floatX(1) - p))
def random(self): """Draw random value from QuadPotential.""" n = floatX(normal(size=self.L.shape[0])) return dot(self.L, n)
def forward(self, x_): x = x_.T lx = tt.log(x) y = tt.dot(self.tA, lx) return floatX(y.T)