def _hlm(self, model, gamma): with model: logger.info("Using tau_b_alpha: {}".format(self.tau_b_alpha)) tau_b = pm.InverseGamma("tau_b", alpha=self.tau_b_alpha, beta=1., shape=1) beta = pm.Normal("beta", 0, sd=tau_b, shape=self.n_gene_condition) logger.info("Using tau_iota_alpha: {}".format(self.tau_iota_alpha)) l_tau = pm.InverseGamma("tau_iota", alpha=self.tau_iota_alpha, beta=1., shape=1) l = pm.Normal("iota", mu=0, sd=l_tau, shape=self.n_interventions) mu = (gamma[self._gene_data_idx] + beta[self._gene_cond_data_idx] + l[self._intervention_data_idx]) if self.family == Family.gaussian: logger.info("Using sd_alpha: {}".format(self.sd_alpha)) sd = pm.InverseGamma("sd", alpha=self.sd_alpha, beta=1., shape=1) pm.Normal("x", mu=mu, sd=sd, observed=np.squeeze(self.data[READOUT].values)) else: raise NotImplementedError("Only gaussian family so far") return tau_b, beta, l_tau, l, sd
def _hlm(self, model, gamma): with model: logger.info("Using tau_b_alpha: {}".format(self.tau_b_alpha)) tau_b = pm.InverseGamma("tau_b", alpha=self.tau_b_alpha, beta=1., shape=1) beta = pm.Normal("beta", 0, sd=tau_b, shape=self.n_gene_condition) logger.info("Using tau_iota_alpha: {}".format(self.tau_iota_alpha)) l_tau = pm.InverseGamma("tau_iota", alpha=self.tau_iota_alpha, beta=1., shape=1) l = pm.Normal("iota", mu=0, sd=l_tau, shape=self.n_interventions) logger.info("Using kappa_sd: {}".format(self.kappa_sd)) c = pm.Normal("kappa", 0, self.kappa_sd, shape=1) if self._affinity == "data": logger.info("Using affinity from data") q = self.data[AFFINITY].values elif self._affinity == "leaveout": logger.info("Using no affinity") q = 1 elif self._affinity == "estimate": logger.info("Estimating affinity from data") q = pm.Uniform("aff", lower=0, upper=1, shape=self.n_interventions) else: raise ValueError("Wrong affinity") mu = l[self._intervention_data_idx] ll = (gamma[self._gene_data_idx] + beta[self._gene_cond_data_idx] + c * self.data[COPYNUMBER].values) if self._affinity == "estimate": mu += q[self._intervention_data_idx] * ll else: mu += q * ll if self.family == Family.gaussian: logger.info("Using sd_alpha: {}".format(self.sd_alpha)) sd = pm.InverseGamma("sd", alpha=self.sd_alpha, beta=1., shape=1) pm.Normal("x", mu=mu, sd=sd, observed=np.squeeze(self.data[READOUT].values)) else: raise NotImplementedError("Only gaussian family so far") if self._affinity == "estimate": return tau_b, beta, l_tau, l, sd, q, c return tau_b, beta, l_tau, l, sd, c
def garch_baseline_model(data): with pm.Model() as model: omega = pm.InverseGamma("omega", alpha=2.5, beta=0.05) alpha1 = pm.Uniform("alpha1", 0, 1) beta1 = pm.Uniform("beta1", 0, 1) vol = pm.InverseGamma("omega", alpha=2.5, beta=0.05) returns = pm.GARCH11('returns', omega=omega, alpha1=alpha1, beta1=beta1, initial_vol=vol, shape=len(data), observed=data['returns']) return model
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd, inv_b_upd, iv_upd, strategy, stock_price, strike_price, risk_free, time): with pm.Model() as update_model: prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd) likelihood = pm.InverseGamma('like', inv_a_upd, inv_b_upd, observed=iv_upd) with update_model: # step = pm.Metropolis() v_trace_update = pm.sample(10000, tune=1000) #print(v_trace['bv'][:]) trace_update = v_trace_update['bv'][:] #print(trace) pm.traceplot(v_trace_update) plt.show() pm.autocorrplot(v_trace_update) plt.show() pm.plot_posterior(v_trace_update[100:], color='#87ceeb', point_estimate='mean') plt.show() s = pm.summary(v_trace_update).round(2) print("\n Summary") print(s) a = np.random.choice(trace_update, 10000, replace=True) ar = [] for i in range(9999): t = a[i] / 100 ar.append(t) #print("Bayesian Volatility Values", ar) op = [] for i in range(9999): temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i], time) op.append(temp) #print("Bayesian Option Prices", op) plt.hist(ar, bins=50) plt.title("Volatility") plt.ylabel("Frequency") plt.show() plt.hist(op, bins=50) plt.title("Option Price") plt.ylabel("Frequency") plt.show() return trace_update
def run_model(self, **kwargs): """Run Bayesian model using prefit Y's for each Gene and Dataset distribution""" # Importing here since Theano base_compiledir needs to be set prior to import import pymc3 as pm click.echo("Building model") with pm.Model() as self.model: # Constants N = len(self.backgrounds) M = len(self.training_genes) MN = M * N # Prior constants mu_exp = self.df[self.training_genes].mean().mean() sd_exp = self.df[self.training_genes].std().mean() # Gene Model Priors gm_sd = pm.InverseGamma("gm_sd", 1, 1, shape=MN) gm_mu = pm.Normal("gm_mu", mu_exp, sd_exp, shape=MN) # Gene model pm.Normal( "x_hat", mu=gm_mu[self.x_ix], sd=gm_sd[self.x_ix], shape=MN, observed=self.index_df.value, ) x = pm.Normal("x", mu=gm_mu, sd=gm_sd, shape=MN) # Likelihood priors eps = pm.InverseGamma("eps", 1, 1) if N == 1: beta = [1] else: beta = pm.Dirichlet("beta", a=np.ones(N)) # Likelihood norm = np.zeros(M) gm_sd_2d = gm_sd.reshape((M, N)) for i in range(N): norm += beta[i] / gm_sd_2d[:, i] norm = pm.Deterministic("norm", norm) y = pm.Deterministic( "y", pm.math.dot((x / gm_sd).reshape((M, N)), beta)) norm_eps = pm.Deterministic("norm_eps", eps / norm) sample_genes = self.sample[self.training_genes].values pm.Laplace("y_hat", mu=(y / norm)[self.s_ix], b=norm_eps, observed=sample_genes) trace = pm.sample(**kwargs) self.trace = trace click.echo("Calculating posterior predictive samples") self.ppc = pm.sample_posterior_predictive(trace, model=self.model)
def run_pymc3(model_error): np.random.seed(182152) my_loglike = LogLike(model_error) logl = LogLikeWithGrad(my_loglike) with pm.Model(): # Define priors !Make sure that this list corresponds to the right extraction for latent_parameters b = pm.Normal("b", 3.0, sd=1.0) noise_function_prior_mean = 0.2 #mean=b/(a-1) var=b**2/(a-1)**/a -> a=mean**2/var but>2, so usually a=4 is a # good choice, then b=mean*(alpha-1), with alpha=4 this results in b=mean*3 sigma_std_function = pm.InverseGamma("sigma_std_function", alpha=4., beta=noise_function_prior_mean * 3) noise_derivative_prior_mean = 0.2 #mean=b/(a-1) var=b**2/(a-1)**/a -> a=mean**2/var but>2, so usually a=4 is a sigma_std_derivative = pm.InverseGamma( "sigma_std_derivative", alpha=4., beta=noise_derivative_prior_mean * 3) theta = tt.as_tensor_variable( [b, sigma_std_function, sigma_std_derivative]) # pm.DensityDist("likelihood", lambda v: logl(v), observed={"v": theta}) pm.Potential("likelihood", logl(theta)) # Inference! trace = pm.sample( draws=2000, step=pm.Metropolis(), chains=4, tune=100, discard_tuned_samples=True, ) print(trace) s = pm.summary(trace) print(s) means = s["mean"] sds = s["sd"] print( f"PM: Posterior for 'b' = {means['b']:6.3f} with sd {sds['b']:6.3f}.") print( f"PM: Posterior for 'sigma_std_function' = {means['sigma_std_function']:6.3f} with sd " f"{sds['sigma_std_function']:6.3f}.") print( f"PM: Posterior for 'sigma_std_derivative' = {means['sigma_std_derivative']:6.3f} with sd " f"{sds['sigma_std_derivative']:6.3f}.") #print(trace.stat_names) #accept = trace.get_sampler_stats('accept') #print("accept", accept) #pm.traceplot(trace, priors=[b.distribution,sigma_std_function.distribution, sigma_std_derivative.distribution]); plt.show() return trace
def build_model(self, name='normal_model'): # Define Stochastic variables with pm.Model(name=name) as self.model: # Global mean pitch angle self.mu_phi = pm.Uniform('mu_phi', lower=0, upper=90) self.sigma_phi = pm.InverseGamma('sigma_phi', alpha=2, beta=15, testval=8) self.sigma_gal = pm.InverseGamma('sigma_gal', alpha=2, beta=15, testval=8) # define a mean galaxy pitch angle self.phi_gal = pm.TruncatedNormal( 'phi_gal', mu=self.mu_phi, sd=self.sigma_phi, lower=0, upper=90, shape=len(self.galaxies), ) # draw arm pitch angles centred around this mean self.phi_arm = pm.TruncatedNormal( 'phi_arm', mu=self.phi_gal[self.gal_arm_map], sd=self.sigma_gal, lower=0, upper=90, shape=len(self.gal_arm_map), ) # convert to a gradient for a linear fit self.b = tt.tan(np.pi / 180 * self.phi_arm) # arm offset parameter self.c = pm.Cauchy('c', alpha=0, beta=10, shape=self.n_arms, testval=np.tile(0, self.n_arms)) # radial noise self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5) r = pm.Deterministic( 'r', tt.exp(self.b[self.point_arm_map] * self.data['theta'] + self.c[self.point_arm_map])) # likelihood function self.likelihood = pm.Normal( 'Likelihood', mu=r, sigma=self.sigma_r, observed=self.data['r'], )
def build_model(self, name=''): # Define Stochastic variables with pm.Model(name=name) as self.model: # Global mean pitch angle self.phi_gal = pm.Uniform('phi_gal', lower=0, upper=90, shape=len(self.galaxies)) # note we don't model inter-galaxy dispersion here # intra-galaxy dispersion self.sigma_gal = pm.InverseGamma('sigma_gal', alpha=2, beta=20, testval=5) # arm offset parameter self.c = pm.Cauchy('c', alpha=0, beta=10, shape=self.n_arms, testval=np.tile(0, self.n_arms)) # radial noise self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5) # define prior for Student T degrees of freedom # self.nu = pm.Uniform('nu', lower=1, upper=100) # Define Dependent variables self.phi_arm = pm.TruncatedNormal( 'phi_arm', mu=self.phi_gal[self.gal_arm_map], sd=self.sigma_gal, lower=0, upper=90, shape=self.n_arms) # convert to a gradient for a linear fit self.b = tt.tan(np.pi / 180 * self.phi_arm) r = pm.Deterministic( 'r', tt.exp(self.b[self.data['arm_index'].values] * self.data['theta'] + self.c[self.data['arm_index'].values])) # likelihood function self.likelihood = pm.StudentT( 'Likelihood', mu=r, sigma=self.sigma_r, nu=1, #self.nu, observed=self.data['r'], )
def build_model(self, name=''): # Define Stochastic variables with pm.Model(name=name) as self.model: # Global mean pitch angle self.phi_gal = pm.Uniform('phi_gal', lower=0, upper=90, shape=len(self.galaxies)) # note we don't model inter-galaxy dispersion here # intra-galaxy dispersion self.sigma_gal = pm.InverseGamma('sigma_gal', alpha=2, beta=20, testval=5) # arm offset parameter self.c = pm.Cauchy('c', alpha=0, beta=10, shape=self.n_arms, testval=np.tile(0, self.n_arms)) # radial noise self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5) # ----- Define Dependent variables ----- # Phi arm is drawn from a truncated normal centred on phi_gal with # spread sigma_gal gal_idx = self.gal_arm_map.astype('int32') self.phi_arm = pm.TruncatedNormal('phi_arm', mu=self.phi_gal[gal_idx], sd=self.sigma_gal, lower=0, upper=90, shape=self.n_arms) # transform to gradient for fitting self.b = tt.tan(np.pi / 180 * self.phi_arm) # r = exp(theta * tan(phi) + c) # do not track this as it uses a lot of memory arm_idx = self.data['arm_index'].values.astype('int32') r = tt.exp(self.b[arm_idx] * self.data['theta'] + self.c[arm_idx]) # likelihood function (assume likelihood here) self.likelihood = pm.Normal( 'Likelihood', mu=r, sigma=self.sigma_r, observed=self.data['r'], )
def reg_hs_regression(X, y_obs, ylabel='likelihood', **kwargs): """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)""" n_features = X_.eval().shape[1] if tau_0 is None: m0 = n_features/2 n_obs = X_.eval().shape[0] tau_0 = m0 / ((n_features - m0) * np.sqrt(n_obs)) with pm.Model() as model: tau = pm.HalfCauchy('tau', tau_0) sd_bias = pm.HalfCauchy('sd_bias', beta=2.5) lamb_m = pm.HalfCauchy('lambda_m', beta=1) slab_scale = kwargs.pop('slab_scale', 3) slab_scale_sq = slab_scale ** 2 slab_df = kwargs.pop('slab_df', 8) half_slab_df = slab_df / 2 # Regularization bit c_sq = pm.InverseGamma('c_sq', alpha=half_slab_df, beta=half_slab_df * slab_scale_sq) lamb_m_bar = tt.sqrt(c_sq) * lamb_m / (tt.sqrt(c_sq + tt.pow(tau, 2) * tt.pow(lamb_m, 2) ) ) w = pm.Normal('w', mu=0, sd=tau*lamb_m_bar, shape=n_features) bias = pm.Laplace('bias', mu=0, b=sd_bias) mu_ = tt.dot(X_, w) + bias sig = pm.HalfCauchy('sigma', beta=5) y = pm.Normal(ylabel, mu=mu_, sd=sig, observed=y_obs) model.name = "regularized_hshoe_reg"
def make_state_model_AR1(data, observe): ''' model for Two-State StoVol :param data: observation data :param observe: column name of y :return: PyMC model ''' # Prepare data nstate = data['covid_state_US'].nunique() log_returns = data[observe].to_numpy() state_idx = data["covid_state_US"].to_numpy() with pm.Model() as model: # Data _returns = pm.Data("_returns", log_returns) _state_idx = pm.intX(pm.Data("state_idx", state_idx)) # Prior scale = pm.InverseGamma("scale", alpha=2.5, beta=0.05, shape=nstate) log_vol = pm.GaussianRandomWalk('log_vol', mu=0, sigma=scale[_state_idx], shape=len(data)) nu = pm.Exponential("nu", 0.1) # Likelihood returns = pm.StudentT("returns", nu=nu, lam=np.exp(-2 * log_vol), observed=_returns) return model
def hs_regression(X, y_obs, ylabel='y', tau_0=None, regularized=False): """See Piironen & Vehtari, 2017 (DOI: 10.1214/17-EJS1337SI)""" if tau_0 is None: M = X.shape[1] m0 = M / 2 N = X.shape[0] tau_0 = m0 / ((M - m0) * np.sqrt(N)) if regularized: slab_scale = kwargs.pop('slab_scale', 3) slab_scale_sq = slab_scale**2 slab_df = kwargs.pop('slab_df', 8) half_slab_df = slab_df / 2 with pm.Model() as mhsr: tau = pm.HalfCauchy('tau', tau_0) c_sq = pm.InverseGamma('c_sq', alpha=half_slab_df, beta=half_slab_df * slab_scale_sq) lamb_m = pm.HalfCauchy('lambda_m', beta=1) lamb_m_bar = tt.sqrt(c_sq) * lamb_m / ( tt.sqrt(c_sq + tt.pow(tau, 2) * tt.pow(lamb_m, 2))) w = pm.Normal('w', mu=0, sd=tau * lamb_m_bar, shape=X.shape[1]) mu_ = pm.Deterministic('mu', tt.dot(X, w)) sig = pm.HalfCauchy('sigma', beta=10) y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze()) return mhsr else: with pm.Model() as mhs: tau = pm.HalfCauchy('tau', tau_0) lamb_m = pm.HalfCauchy('lambda_m', beta=1) w = pm.Normal('w', mu=0, sd=tau * lamb_m, shape=X.shape[1]) mu_ = pm.Deterministic('mu', tt.dot(X, w)) sig = pm.HalfCauchy('sigma', beta=10) y = pm.Normal('y', mu=mu_, sd=sig, observed=y_obs.squeeze()) return mhs
def analyze_data(X, y, if_scale=True): """ Function to analyze data :param X: input features :param y: output :param if_scale: if normalize=True, we normalize X and y :return: trace, result, yticks """ epa_cols = X.columns.tolist() if if_scale: X = scale(X, axis=0) y = scale(y, axis=0) with pm.Model() as Model_Linthipe_SOC: alpha = pm.Normal('alpha', mu=0, sd=1) beta = pm.Normal('beta', mu=0, sd=1, shape=X.shape[1]) sigma = pm.InverseGamma('sigma', alpha=2, beta=1) y_fit = pm.Normal('y_fit', mu=alpha + pm.math.dot(X, beta), sd=sigma**(1.0 / 2), observed=y) with Model_Linthipe_SOC: trace = pm.sample(1000, step=pm.Metropolis(), chains=2) result = pm.summary(trace) ind = result.index.tolist() ind[1:len(epa_cols) + 1] = epa_cols result.index = ind yticks = ['alpha'] + epa_cols + ["sigma"] return trace, result, yticks
def group_model(self): with pm.Model() as gmodel: # uniform priors on h m = pm.DiscreteUniform('h', 0., 20.) std = pm.InverseGamma('s', 3., 0.5) mean = 2 * m + 1 alphas = np.arange(1., 101., 5.) p = self.discreteNormal(alphas, mean, std) for i in range(self.nruns): hab_ten = pm.Categorical('h_{}'.format(i), p) alpha = tt.as_tensor_variable([hab_ten]) probs_a, probs_r = self.inferrer(alpha) # use a DensityDist pm.Categorical('actions_{}'.format(i), probs_a, observed=self.actions[i]) pm.Categorical('rewards_{}'.format(i), probs_r, observed=self.rewards[i]) return gmodel
def pm_horseshoe(X, y, b): m = 10 ss = 3 dof = 25 horseshoe = pm.Model() with horseshoe: sigma = pm.HalfNormal('sigma', 2) tau_0 = m / (X.shape[1] - m) * sigma / tt.sqrt(X.shape[0]) tau = pm.HalfCauchy('tau', tau_0) c2 = pm.InverseGamma('c2', dof / 2, dof / 2 * ss**2) lam = pm.HalfCauchy('lam', 1, shape=X.shape[1]) l1 = lam * tt.sqrt(c2) l2 = tt.sqrt(c2 + tau * tau * lam * lam) lam_d = l1 / l2 beta = pm.Normal('beta', 0, tau * lam_d, shape=X.shape[1]) y_hat = tt.dot(X, beta) likelihood = pm.Normal('likelihood', y_hat, observed=y) trace = pm.sample(1000) b_hat = trace.get_values('beta').mean(0) b_sig = trace.get_values('beta').std(0) plot_beta(b, b_hat, std=b_sig)
def _gamma_mix(self, model, z): with model: logger.info("Using tau_g_alpha: {}".format(self.tau_g_alpha)) tau_g = pm.InverseGamma("tau_g", alpha=self.tau_g_alpha, beta=1., shape=self.n_states) logger.info("Using mean_g: {}".format(self.gamma_means)) if self.n_states == 2: logger.info("Building two-state model") mean_g = pm.Normal("mu_g", mu=self.gamma_means, sd=1, shape=self.n_states) pm.Potential("m_opot", var=tt.switch(mean_g[1] - mean_g[0] < 0., -np.inf, 0.)) else: logger.info("Building three-state model") mean_g = pm.Normal("mu_g", mu=self.gamma_means, sd=1, shape=self.n_states) pm.Potential( 'm_opot', tt.switch(mean_g[1] - mean_g[0] < 0, -np.inf, 0) + tt.switch(mean_g[2] - mean_g[1] < 0, -np.inf, 0)) gamma = pm.Normal("gamma", mean_g[z], tau_g[z], shape=self.n_genes) return tau_g, mean_g, gamma
def test_pymc3_convert_dists(): """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs.""" tt.config.compute_test_value = "ignore" theano.config.cxx = "" with pm.Model() as model: norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0) mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0]) cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0) halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0) uniform_rv = pm.Uniform("uniform_rv", observed=1.0) gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0) invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0) exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0) halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0) beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0) binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5) dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1]) poisson_rv = pm.Poisson("poisson_rv", 10, observed=5) bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0) betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5) categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1) multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2]) # Convert to a Theano `FunctionGraph` fgraph = model_graph(model) rvs_by_name = { n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs } pymc_rv_names = {n.name for n in model.observed_RVs} assert all( isinstance(rvs_by_name[n].owner.op, RandomVariable) for n in pymc_rv_names) # Now, convert back to a PyMC3 model pymc_model = graph_model(fgraph) new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs} pymc_rv_names == new_pymc_rv_names
def run_model(sample: pd.Series, df: pd.DataFrame, training_genes: List[str], group: str = 'tissue', **kwargs): """ Run Bayesian model using prefit Y's for each Gene and Dataset distribution Args: sample: N-of-1 sample to run df: Background dataframe to use in comparison training_genes: Genes to use during training group: **kwargs: Returns: Model and Trace from PyMC3 """ # Importing here since Theano base_compiledir needs to be set prior to import import pymc3 as pm classes = sorted(df[group].unique()) df = df[[group] + training_genes] # Collect fits ys = {} for gene in training_genes: for i, dataset in enumerate(classes): cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene]) # Standard deviation can't be initialized to 0, so set to 0.1 cat_sd = 0.1 if cat_sd == 0 else cat_sd ys[f'{gene}={dataset}'] = (cat_mu, cat_sd) click.echo('Building model') with pm.Model() as model: # Linear model priors a = pm.Normal('a', mu=0, sd=1) b = [1] if len(classes) == 1 else pm.Dirichlet('b', a=np.ones(len(classes))) # Model error eps = pm.InverseGamma('eps', 2.1, 1) # Linear model declaration for gene in tqdm(training_genes): mu = a for i, dataset in enumerate(classes): name = f'{gene}={dataset}' y = pm.Normal(name, *ys[name]) mu += b[i] * y # Embed mu in laplacian distribution pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene]) # Sample trace = pm.sample(**kwargs) return model, trace
def sample(self, y, locations, X=None, **kwargs): self.X = X self.y = y self.locations = locations param_dicts_and_new_names = zip( (self.error_scale_parameter, self.scale_distribution_params), ('(sigma): noise scale', '(eta): kernel scale multiplier')) for param_dicts, new_name in param_dicts_and_new_names: if 'name' not in param_dicts: param_dicts.update({'name': new_name}) with self.model: if self.kernel_type is None: self.kernel_type = Matern52 self.kernel_parameter = { 'ls': pm.InverseGamma(name='(rho): spatial correlation', alpha=1, beta=1), 'input_dim': self.locations.shape[1] } gp_kernel = self.kernel_type(**self.kernel_parameter) if self.scale_distribution_for_kernel is None: self.scale_distribution_for_kernel = InverseGamma self.scale_distribution_params = {'alpha': 1, 'beta': 1} scale_for_kernel = pm.math.sqr( self.scale_distribution_for_kernel( **self.scale_distribution_params)) if self.error_scale_distribution is None: self.error_scale_distribution = HalfCauchy self.error_scale_parameter = {'beta': 5} self.gp = pm.gp.MarginalSparse(cov_func=scale_for_kernel * gp_kernel, approx="FITC") inducing_points = pm.gp.util.kmeans_inducing_points( 20, self.locations) error_variable = self.error_scale_distribution( **self.error_scale_parameter) y_ = self.gp.marginal_likelihood("y", X=self.locations, Xu=inducing_points, y=self.y, noise=error_variable) self.trace = pm.sample(**kwargs)
def build_model(self, n=None, name='archimedian_model'): with pm.Model(name=name) as self.model: if n is None: # one n per galaxy, or per arm? self.n_choice = pm.Categorical('n_choice', [1, 1, 0, 1, 1], testval=1, shape=len(self.galaxies)) self.n = pm.Deterministic('n', self.n_choice - 2) self.chirality_correction = tt.switch(self.n < 0, -1, 1) else: msg = 'Parameter $n$ must be a nonzero float' try: n = float(n) except ValueError: pass finally: assert isinstance(n, float) and n != 0, msg self.n_choice = None self.n = pm.Deterministic('n', np.repeat(n, len(self.galaxies))) self.chirality_correction = tt.switch(self.n < 0, -1, 1) self.a = pm.HalfCauchy('a', beta=1, testval=1, shape=self.n_arms) self.psi = pm.Normal( 'psi', mu=0, sigma=1, testval=0.1, shape=self.n_arms, ) self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5) # Unfortunately, as we need to reverse the theta points for arms # with n < 1, and rotate all arms to start at theta = 0, # we need to do some model-mangling self.t_mins = Series({ i: self.data.query('arm_index == @i')['theta'].min() for i in np.unique(self.data['arm_index']) }) r_stack = [ self.a[i] * tt.power( (self.data.query('arm_index == @i')['theta'].values - self.t_mins[i] + self.psi[i]), 1 / self.n[int(self.gal_arm_map[i])]) [::self.chirality_correction[int(self.gal_arm_map[i])]] for i in np.unique(self.data['arm_index']) ] r = pm.Deterministic('r', tt.concatenate(r_stack)) self.likelihood = pm.StudentT( 'Likelihood', mu=r, sigma=self.sigma_r, observed=self.data['r'].values, )
def fixture_model(): with pm.Model() as model: n = 5 dim = 4 with pm.Model(): cov = pm.InverseGamma("cov", alpha=1, beta=1) x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim)) eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim)) mu = pm.Deterministic("mu", at.sum(x + eps, axis=-1)) y = pm.Normal("y", mu=mu, sigma=1, shape=(n,)) return model, [cov, x, eps, y]
def run_model(sample, df, training_genes, weights, group: str = 'tissue', **kwargs): """ Run Bayesian model using prefit Y's for each Gene and Dataset distribution Args: sample: N-of-1 sample to run df: Background dataframe to use in comparison training_genes: Genes to use during training group: Column to use to distinguish different groups **kwargs: Returns: Model and Trace from PyMC3 """ classes = sorted(df[group].unique()) df = df[[group] + training_genes] # Collect fits ys = {} for gene in training_genes: for i, dataset in enumerate(classes): cat_mu, cat_sd = st.norm.fit(df[df[group] == dataset][gene]) # Standard deviation can't be initialized to 0, so set to 0.1 cat_sd = 0.1 if cat_sd == 0 else cat_sd ys[f'{gene}={dataset}'] = (cat_mu, cat_sd) print('Building model') with pm.Model() as model: # Linear model priors a = pm.Normal('a', mu=0, sd=1) # Model error eps = pm.InverseGamma('eps', 2.1, 1) # TODO: Try tt.stack to declare mu more intelligently via b * y # Linear model declaration for gene in tqdm(training_genes): mu = a for i, dataset in enumerate(classes): name = f'{gene}={dataset}' y = pm.Normal(name, *ys[name]) mu += weights[i] * y # Embed mu in laplacian distribution pm.Laplace(gene, mu=mu, b=eps, observed=sample[gene]) # Sample trace = pm.sample(**kwargs) return model, trace
def create_model(self, x=None, aD=None, bD=None, aA=None, bA=None, aN=None, bN=None, delta_t=None, N=None): with pm.Model() as model: D = pm.InverseGamma('D', alpha=aD, beta=bD) A = pm.Gamma('A', alpha=aA, beta=bA) sN = pm.InverseGamma('sN', alpha=aN, beta=bN) B = pm.Deterministic('B', pm.math.exp(-delta_t * D / A)) path = Ornstein_Uhlenbeck('path', A=A, B=B, shape=(N, )) X_obs = pm.Normal('X_obs', mu=path, sd=sN, observed=x) return model
def _set_simple_model(self): with pm.Model() as model: logger.info("Using tau_g_alpha: {}".format(self.tau_g_alpha)) tau_g = pm.InverseGamma("tau_g", alpha=self.tau_g_alpha, beta=1., shape=1) mean_g = pm.Normal("mu_g", mu=0, sd=1, shape=1) gamma = pm.Normal("gamma", mean_g, tau_g, shape=self.n_genes) param_hlm = self._hlm(model, gamma) self._set_steps(model, None, tau_g, mean_g, gamma, *param_hlm) return self
def create_model(self, x=None, aB=None, bB=None, aA=None, bA=None, delta_t=None, N=None): with pm.Model() as model: B = pm.Beta('B', alpha=aB, beta=bB) A = pm.InverseGamma('A', alpha=aA, beta=bA) path = Ornstein_Uhlenbeck('path', B=B, A=A, observed=x) return model
def sample(self, y, locations, X=None, approximation=False, **kwargs): self.X = X self.y = y self.locations = locations with self.model: if self.kernel_type is None: self.kernel_type = Matern52 self.kernel_parameter = { 'ls': pm.InverseGamma(name='rho: spatial correlation', alpha=1, beta=1), 'input_dim': self.locations.shape[1] } gp_kernel = self.kernel_type(**self.kernel_parameter) if self.scale_distribution_for_kernel is None: self.scale_distribution_for_kernel = InverseGamma self.scale_distribution_params = { 'name': 'kernel scale multiplier', 'alpha': 1, 'beta': 1 } scale_for_kernel = pm.math.sqr( self.scale_distribution_for_kernel( **self.scale_distribution_params)) if self.error_distribution is None: self.error_distribution = Normal self.error_parameter = { 'sigma': InverseGamma.dist(alpha=1, beta=1) } cov_kernel_func = scale_for_kernel * gp_kernel self.gp = pm.gp.Latent(cov_func=cov_kernel_func) gaussian_process_mean = self.gp.prior('f', X=self.locations) y_ = self.error_distribution(name='y', mu=gaussian_process_mean, observed=self.y, **self.error_parameter) if approximation: self.trace = pm.fit(method='advi', n=100_00).sample() else: self.trace = pm.sample(**kwargs)
def create_model(self, x=None, aD=None, bD=None, aA=None, bA=None, delta_t=None, N=None): with pm.Model() as model: D = pm.Gamma('D', alpha=aD, beta=bD) A = pm.InverseGamma('A', alpha=aA, beta=bA) B = pm.Deterministic('B', pm.math.exp(-delta_t * D / A)) path = Ornstein_Uhlenbeck('path', A=A, B=B, observed=x) return model
def model_factory(x_2_data, x_3_data, x_4_data, x_5_data, x_6_data, x_7_data, x_8_data, y_data, x_1_data): with pm.Model() as varying_intercept_slope_noncentered: # Priors mu_a = pm.Normal('mu_a', mu = 0.05, sd = 2) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b_1 = pm.InverseGamma('mu_b_1', mu = 0.05, sigma = 2) sigma_b_1 = pm.HalfCauchy('sigma_b_1', 5) mu_b_2 = pm.InverseGamma('mu_b_2', mu = 0.05, sigma = 2) sigma_b_2 = pm.HalfCauchy('sigma_b_2', 5) mu_b_3 = pm.InverseGamma('mu_b_3', mu = 0.05, sigma = 2) sigma_b_3 = pm.HalfCauchy('sigma_b_3', 5) mu_b_4 = pm.InverseGamma('mu_b_4', mu = 0.05, sigma = 2) sigma_b_4 = pm.HalfCauchy('sigma_b_4', 5) mu_b_5 = pm.InverseGamma('mu_b_5', mu = 0.05, sigma = 2) sigma_b_5 = pm.HalfCauchy('sigma_b_5', 5) mu_b_6 = pm.InverseGamma('mu_b_6', mu = 0.05, sigma = 2) sigma_b_6 = pm.HalfCauchy('sigma_b_6', 5) mu_b_7 = pm.InverseGamma('mu_b_7', mu = 0.05, sigma = 2) sigma_b_7 = pm.HalfCauchy('sigma_b_7', 5) # Non-center random intercepts + slopes u = pm.Normal('u', mu = 0, sd = 2, shape = len(hierachical_type)) a = mu_a + u * sigma_a # Random slopes b_1 = mu_b_1 + u * sigma_b_1 b_2 = mu_b_2 + u * sigma_b_2 b_3 = mu_b_3 + u * sigma_b_3 b_4 = mu_b_4 + u * sigma_b_4 b_5 = mu_b_5 + u * sigma_b_5 b_6 = mu_b_6 + u * sigma_b_6 b_7 = mu_b_7 + u * sigma_b_7 # Expected value y_hat = (a[x_1_data] + b_1[x_1_data]*x_2_data + b_2[x_1_data]*x_3_data + b_3[x_1_data]*x_4_data + b_4[x_1_data]*x_5_data + b_5[x_1_data]*x_6_data + b_6[x_1_data]*x_7_data + b_7[x_1_data]*x_8_data) # Data likelihood (discrete distributions only) pm.Bernoulli('y_like', logit_p = y_hat, observed = y_data) # dump trace model joblib.dump(varying_intercept_slope_noncentered, os.path.sep.join([BASE_DIR_OUTPUT, output_file_name_2])) return varying_intercept_slope_noncentered
def minicube_pymc_fit(xax, data, guesses, ncomps=1, sample=True, fmin=opt.fmin_bfgs, fmin_kwargs={}, **sampler_kwargs): ''' pymc fitting of a set of single Gaussians. ''' basic_model = pm.Model() with basic_model: params_dict = {} for i in range(ncomps): model_i, params_dict_i = \ spatial_gaussian_model(xax, data, guesses, comp_num=i) if i == 0: model = model_i else: model += model_i params_dict.update(params_dict_i) sigma_n = pm.InverseGamma('sigma_n', alpha=1, beta=1) Y_obs = pm.Normal('Y_obs', mu=model, sd=sigma_n, observed=data) start = pm.find_MAP(fmin=fmin, **fmin_kwargs) # Use the initial guesses for the Bernoulli parameters for i in range(ncomps): start['on{}'.format(i)] = guesses['on{}'.format(i)] if sample: # An attempt to use variational inference b/c it would be # way faster. This fails terribly in every case I've tried # trace = pm.fit(500, start=start, method='svgd', # inf_kwargs=dict(n_particles=100, # temperature=1e-4), # ).sample(500) trace = pm.sample(start=start, **sampler_kwargs) if sample: medians = parameter_medians(trace) stddevs = parameter_stddevs(trace) return medians, stddevs, trace, basic_model else: return start, basic_model
def train(self, niter = 1000, random_seed=123, tune=500, cores = 4): ### model training with self.scallop_model: # hyperparameter priors l = pm.InverseGamma("l", 5, 5, shape = self.dim) sigma_f = pm.HalfNormal("sigma_f", 1) # convariance function and marginal GP K = sigma_f ** 2 * pm.gp.cov.ExpQuad(self.dim, ls = l) self.gp = pm.gp.Marginal(cov_func=K) # marginal likelihood # convariance function and marginal GP sigma_n = pm.HalfNormal("sigma_n",1) tot_catch = self.gp.marginal_likelihood("tot_catch", X = self.x, y = self.y, noise = sigma_n) # model fitting self.trace = pm.sample(niter, random_seed=random_seed, progressbar=True, tune=tune, cores = cores)