def make_model(): # Construct the prior term location = pymc.Uniform('location', lower=[0, 0], upper=[1, 1]) # The locations of the sensors X = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] # The output of the model solver = Solver(X=X) @pymc.deterministic(plot=False) def model_output(value=None, loc=location): return solver(loc) # The hyper-parameters of the noise alpha = pymc.Exponential('alpha', beta=1.) beta = pymc.Exponential('beta', beta=1.) tau = pymc.Gamma('tau', alpha=alpha, beta=beta) # Load the observed data data = np.loadtxt('observed_data') # The observations at the sensor locations @pymc.stochastic(dtype=float, observed=True) def sensors(value=data, mu=model_output, tau=tau, gamma=1.): """The value of the response at the sensors.""" return gamma * pymc.normal_like(value, mu=mu, tau=tau) return locals()
def gamma_poisson(x, t): """ x: number of failures (N vector) t: operation time, thousands of hours (N vector) """ if x is not None: N = x.shape else: N = num_points # place an exponential prior on t, for when it is unknown t = pymc.Exponential('t', beta=1.0 / 50.0, value=t, size=N, observed=(t is not None)) alpha = pymc.Exponential('alpha', beta=1.0, value=1.0) beta = pymc.Gamma('beta', alpha=0.1, beta=1.0, value=1.0) theta = pymc.Gamma('theta', alpha=alpha, beta=beta, size=N) @pymc.deterministic def mu(theta=theta, t=t): return theta * t x = pymc.Poisson('x', mu=mu, value=x, observed=(x is not None)) return locals()
def three_model_comparison(p_df): a_n = len(p_df) t_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) t_lambda_1 = pm.Exponential("t_lambda_1", t_lam) #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df)) t_lambda_2 = pm.Exponential("t_lambda_2", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) t_lambda_3 = pm.Exponential("t_lambda_3", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1) t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df)) @pm.deterministic def lambda_(tau_1=t_tau_1, tau_2=t_tau_2, lambda_1=t_lambda_1, lambda_2=t_lambda_2, lambda_3=t_lambda_3): out = np.zeros(a_n) out[:tau_1] = lambda_1 # lambda before tau_1 is lambda1 out[tau_1:tau_2] = lambda_2 # lambda_2 between tau_1 and tau_2 out[tau_2:] = lambda_3 # lambda after (and including) tau is lambda_3 return out t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True) t_model = pm.Model( [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2]) #d_model = pm.Model([d_obs, t_lambda_1, t_lambda_2, tau]) return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
def two_model_comparison(p_df): a_n = len(p_df) d_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) lambda_1 = pm.Exponential("lambda_1", d_lam) #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df)) lambda_2 = pm.Exponential("lambda_2", d_lam) #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df)) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(a_n) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True) d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau]) #d_model = pm.Model([d_obs, lambda_1, lambda_2, tau]) return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
def make_gp_submodel(suffix, mesh, africa_val=None, with_africa_covariate=False): # The partial sill. amp = pm.Exponential('amp_%s' % suffix, .1, value=1.) # The range parameter. Units are RADIANS. scale = pm.Exponential('scale_%s' % suffix, 1, value=.08) # 1 radian = the radius of the earth, about 6378.1 km scale_in_km = scale * 6378.1 # The nugget variance, lower-bounded to preserve mixing. V = pm.Exponential('V_%s' % suffix, 1, value=1.) @pm.potential def V_bound(V=V): if V < .1: return -np.inf else: return 0 # Create the covariance & its evaluation at the data locations. @pm.deterministic(trace=True, name='C_%s' % suffix) def C(amp=amp, scale=scale): return pm.gp.FullRankCovariance(pm.gp.exponential.geo_rad, amp=amp, scale=scale) # Create the mean function if with_africa_covariate: beta = pm.Normal('beta_%s' % suffix, 0, .01, value=1) @pm.deterministic(trace=True, name='M_%s' % suffix) def M(mesh=mesh, africa_val=africa_val, beta=beta): M = pm.gp.Mean(retrieve_africa_val, meshes=[], africa_vals=[], beta=beta) store_africa_val(M, mesh, africa_val) return M else: @pm.deterministic(trace=True, name='M_%s' % suffix) def M(): return pm.gp.Mean(pm.gp.zero_fn) # Create the GP submodel sp_sub = pm.gp.GPSubmodel('sp_sub_%s' % suffix, M, C, mesh) sp_sub.f_eval.value = sp_sub.f_eval.value - sp_sub.f_eval.value.mean() return locals()
def param_selector(data): #Parameters for the hawkes process #Take the number events/ divided by the number of days mu = pm.Exponential( 'mu', len(data) / data[-1] + (random.choice([0, 1]) * 0.01 * random.random())) #Alpha is the coefficient for creation - should be an exponential since it's increasingly less likely to be large alpha = pm.Exponential( 'alpha', len(data) / data[-1] + (random.choice([0, 1]) * 0.01 * random.random())) #We are sure that the impact of one event, if it has one, decreases over time so beta must be positive beta = pm.Exponential('beta', 1) return mu, alpha, beta
def __init__(self, observed_frequencies=1.0, observed_power=1.0): self.observed_frequencies = observed_frequencies self.observed_power = observed_power # PyMC definitions # Define data and stochastics self.power_law_index = pymc.Uniform('power_law_index', lower=0.0, upper=6.0, doc='power law index') self.power_law_norm = pymc.Uniform('power_law_norm', lower=-100.0, upper=100.0, doc='power law normalization') # Model for the power law spectrum @pymc.deterministic(plot=False) def fourier_power_spectrum(p=self.power_law_index, a=self.power_law_norm, f=self.observed_frequencies): """A pure and simple power law model""" out = rnspectralmodels.power_law(f, [a, p]) return out self.spectrum = pymc.Exponential('spectrum', beta=1.0 / fourier_power_spectrum, value=observed_power, observed=True) # MCMC model as a list self.pymc_model = [self.power_law_index, self.power_law_norm, fourier_power_spectrum, self.spectrum]
def mk_allmodels_bayes(tree, chars, nparam, pi="Equal", dbname=None): """ Fit an mk model with nparam parameters distributed about the Q matrix. """ if type(chars) == dict: chars = [chars[l] for l in [n.label for n in tree.leaves()]] nchar = len(set(chars)) ncell = nchar**2 - nchar assert nparam <= ncell minp = pscore(tree, chars) treelen = sum([n.length for n in tree.descendants()]) ### Parameters # Prior on slowest distribution (beta = 1/mean) slow = pymc.Exponential("slow", beta=treelen / minp) paramscales = [None] * (nparam - 1) for p in range(nparam - 1): paramscales[p] = pymc.Uniform(name="paramscale_{}".format(str(p)), lower=2, upper=20) ### Model paramset = list(range(nparam + 1)) nonzeros = paramset[1:] all_mods = list(itertools.product(paramset, repeat=ncell)) all_mods = [ tuple(m) for m in all_mods if all([i in set(m) for i in nonzeros]) ] mod = make_qmat_stoch_mk(all_mods, name="mod") l = mk.create_likelihood_function_mk(tree=tree, chars=chars, Qtype="ARD", pi=pi, findmin=False) Q = np.zeros([nchar, nchar]) mask = np.ones([nchar, nchar], dtype=bool) mask[np.diag_indices(nchar)] = False @pymc.potential def mklik(mod=mod, slow=slow, paramscales=paramscales, name="mklik"): params = [0.0] * (nparam + 1) params[1] = slow for i, s in enumerate(paramscales): params[2 + i] = params[2 + (i - 1)] * s Qparams = [params[i] for i in mod] return l(np.array(Qparams)) if dbname is None: mod_mcmc = pymc.MCMC(locals(), calc_deviance=True) else: mod_mcmc = pymc.MCMC(locals(), calc_deviance=True, db="pickle", dbname=dbname) mod_mcmc.use_step_method(QmatMetropolis_mk, mod, all_mods) return mod_mcmc
def _fit_beta_distribution(data, n_iter): alpha_var = pm.Exponential('alpha', .5) beta_var = pm.Exponential('beta', .5) observations = pm.Beta('observations', alpha_var, beta_var, value=data, observed=True) model = pm.Model([alpha_var, beta_var, observations]) mcmc = pm.MCMC(model) mcmc.sample(n_iter) alphas = mcmc.trace('alpha')[:] betas = mcmc.trace('beta')[:] return alphas, betas
def main(): lambda_1 = pm.Exponential("lambda_1", 1) # prior on first behaviour lambda_2 = pm.Exponential("lambda_2", 1) # prior on second behaviour tau = pm.DiscreteUniform("tau", lower=0, upper=10) # prior on behaviour change print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value print lambda_1.random(), lambda_2.random(), tau.random() print "After calling random() on the variables..." print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value samples = [lambda_1.random() for i in range(20000)] plt.hist(samples, bins=70, normed=True, histtype="stepfilled") plt.title("Prior distribution for $\lambda_1$") plt.xlim(0, 8) plt.show() data = np.array([10, 5]) fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True) print "value: ", fixed_variable.value print "calling .random()" fixed_variable.random() print "value: ", fixed_variable.value n_data_points = 5 # in CH1 we had ~70 data points @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_data_points) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after tau is lambda2 return out data = np.array([10, 25, 15, 20, 35]) obs = pm.Poisson("obs", lambda_, value=data, observed=True) model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
def make_model(x): a = pm.Exponential('a', beta=x, value=0.5) @pm.deterministic def b(a=a): return 100 - a @pm.stochastic def c(value=0.5, a=a, b=b): return (value - a)**2 / b return locals()
def best(group1, group2): import pymc as pm group1 = np.random.normal(15, 2, 100) group2 = np.random.normal(15.3, 2, 100) # Generate Pooled Data pooled = np.concatenate((group1, group2)) mu1 = pm.Normal("mu_1", mu=pooled.mean(), tau=1.0 / pooled.var() / 1000.0) mu2 = pm.Normal("mu_2", mu=pooled.mean(), tau=1.0 / pooled.var() / 1000.0) sig1 = pm.Uniform("sigma_1", lower=pooled.var() / 1000.0, upper=pooled.var() * 1000) sig2 = pm.Uniform("sigma_2", lower=pooled.var() / 1000.0, upper=pooled.var() * 1000) v = pm.Exponential("nu", beta=1.0 / 29) t1 = pm.NoncentralT("t_1", mu=mu1, lam=1.0 / sig1, nu=v, value=group1[:], observed=True) t2 = pm.NoncentralT("t_2", mu=mu2, lam=1.0 / sig2, nu=v, value=group2[:], observed=True) model = pm.Model([t1, mu1, sig1, t2, mu2, sig2, v]) # Generate our MCMC object mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 2) mus1 = mcmc.trace('mu_1')[:] mus2 = mcmc.trace('mu_2')[:] sigmas1 = mcmc.trace('sigma_1')[:] sigmas2 = mcmc.trace('sigma_2')[:] nus = mcmc.trace('nu')[:] diff_mus = mus1 - mus2 diff_sigmas = sigmas1 - sigmas2 normality = np.log(nus) effect_size = (mus1 - mus2) / np.sqrt((sigmas1**2 + sigmas2**2) / 2.) print("mu_1", mus1.mean()) print("mu_2", mus2.mean())
def make_on_off(n_off, expo_off, n_on, expo_on, mean0): """ Make a PyMC model for inferring a Poisson signal rate parameter, `s`, for 'on-off' observations with uncertain background rate, `b`. Parameters ---------- n_off, n_on : int Event counts off-source and on-source expo_off, expo_on : float Exposures off-source and on-source mean0 : float Prior mean for both background and signal rates """ # PyMC's exponential dist'n uses beta = 1/scale = 1/mean. # Here we initialize rates to good guesses. b_est = float(n_off)/expo_off s_est = max(float(n_on)/expo_on - b_est, .1*b_est) b = pymc.Exponential('b', beta=1./mean0, value=b_est) s = pymc.Exponential('s', beta=1./mean0, value=s_est) # The expected number of counts on and off source, as deterministic functions. @pymc.deterministic def mu_off(b=b): return b*expo_off @pymc.deterministic def mu_on(s=s, b=b): return (s+b)*expo_on # Poisson likelihood functions: off_count = pymc.Poisson('off_count', mu=mu_off, value=n_off, observed=True) on_count = pymc.Poisson('on_count', mu=mu_on, value=n_on, observed=True) return locals()
def negative_b_mcm_model(p_df): n_mu = pm.Normal('n_mu', mu=1650, tau=0.00001) n_lam = pm.Uniform('n_uni_alpha', 0, 1) n_alpha = pm.Exponential('n_alpha', beta=n_lam) n_ob = pm.NegativeBinomial('n_observed', mu=n_mu, alpha=n_alpha, value=p_df, observed=True) n_es = pm.NegativeBinomial('n_estimated', mu=n_mu, alpha=n_alpha, observed=False) n_model = pm.Model([n_mu, n_lam, n_alpha, n_ob, n_es]) return n_mu, n_lam, n_alpha, n_ob, n_es, n_model
def create_mk_model(tree, chars, Qtype, pi): """ Create model objects to be passed to pymc.MCMC Creates Qparams and likelihood function """ if type(chars) == dict: chars = [chars[l] for l in [n.label for n in tree.leaves()]] nchar = len(set(chars)) if Qtype=="ER": N = 1 elif Qtype=="Sym": N = int(binom(nchar, 2)) elif Qtype=="ARD": N = int((nchar ** 2 - nchar)) else: ValueError("Qtype must be one of: ER, Sym, ARD") # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2 if N != 1: theta = [1.0/2.0]*N Qparams_init = pymc.Dirichlet("Qparams_init", theta, value = [0.5]) Qparams_init_full = pymc.CompletedDirichlet("Qparams_init_full", Qparams_init) else: Qparams_init_full = [[1.0]] # Exponential scaling factor for Qparams scaling_factor = pymc.Exponential(name="scaling_factor", beta=1.0, value=1.0) # Scaled Qparams; we would not expect them to necessarily add # to 1 as would be the case in a Dirichlet distribution @pymc.deterministic(plot=False) def Qparams(q=Qparams_init_full, s=scaling_factor): Qs = np.empty(N) for i in range(N): Qs[i] = q[0][i]*s return Qs l = mk.create_likelihood_function_mk(tree=tree, chars=chars, Qtype=Qtype, pi="Equal", findmin=False) @pymc.potential def mklik(q = Qparams, name="mklik"): return l(q) return locals()
def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", at.exp(kappa_log)) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen.prior["phi"].shape == (1, draws) assert gen.prior_predictive["y"].shape == (1, draws, n) assert "thetas" in gen.prior.data_vars
def make_model(): import cPickle as pickle with open('reaction_kinetics_data.pickle', 'rb') as fd: data = pickle.load(fd) y_obs = data['y_obs'] # The priors for the reaction rates: k1 = pymc.Lognormal('k1', mu=2, tau=1./(10. ** 2), value=5.) k2 = pymc.Lognormal('k2', mu=4, tau=1./(10. ** 2), value=5.) # The noise term #sigma = pymc.Uninformative('sigma', value=1.) sigma = pymc.Exponential('sigma', beta=1.) # The forward model re_solver = ReactionKineticsSolver() @pymc.deterministic def model_output(value=None, k1=k1, k2=k2): return re_solver(k1, k2) # The likelihood term @pymc.stochastic(observed=True) def output(value=y_obs, mod_out=model_output, sigma=sigma, gamma=1.): return gamma * pymc.normal_like(y_obs, mu=mod_out, tau=1/sigma ** 2) return locals()
def make_poisson(n, intvl, mean0): """ Make a PyMC model for inferring a Poisson distribution rate parameter, for a datum consisting of `n` counts observed in an interval of size `intvl`. The inference will use an exponential prior for the rate, with prior mean `mean0`. """ # PyMC's exponential dist'n uses beta = 1/scale = 1/mean. # Here we initialize rate to n/intvl. rate = pymc.Exponential('rate', beta=1./mean0, value=float(n)/intvl) # The expected number of counts, mu=rate*intvl, is a deterministic function # of the rate RV (and the constant intvl). @pymc.deterministic def mu(rate=rate): return rate*intvl # Poisson likelihood function: count = pymc.Poisson('count', mu=mu, value=n, observed=True) return locals()
import pymc as pm import numpy as np import matplotlib.pyplot as plt parameter = pm.Exponential( "poisson_param", 1 ) data_generator = pm.Poisson( "data_generator", parameter ) data_plus_one = data_generator + 1 # 'parents' influence another variable # 'children' subject of parent vars parameter.children data_generator.parents data_generator.children # 'value' attribute parameter.value data_generator.value data_plus_one.value # 'stochastic' vars - still random even if parents are known # 'deterministic' vars - not random if parents are known # Initializing variables # * name argument - retrieves posterior dist # * class specific arguments # * size - multivariate indp array of stochastic vars some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 ) betas = pm.Uniform( "betas", 0, 1, size=10 ) betas.value
import numpy as np import pymc as pm from matplotlib import pyplot as plt #count_data = np.loadtxt("txtdata.csv") count_data = np.loadtxt("txtdata_sim.csv") n_count_data = len(count_data) print(count_data.mean()) alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau])
def compare_groups(list1, list2): data = list1 + list2 count_data = np.array(data) n_count_data = len(count_data) plt.bar(np.arange(n_count_data), count_data, color="#348ABD") plt.xlabel("Time (days)") plt.ylabel("count of text-msgs received") plt.title( "Did the viewers' ad viewing increase with the number of ads shown?") plt.xlim(0, n_count_data) #plt.show() alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts print alpha lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] print tau_samples # histogram of the samples: ax = plt.subplot(311) ax.set_autoscaley_on(False) plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_1$", color="#A60628", normed=True) plt.legend(loc="upper left") plt.title(r"""Posterior distributions of the variables $\lambda_1,\;\lambda_2,\;\tau$""") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_1$ value") ax = plt.subplot(312) ax.set_autoscaley_on(False) plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_2$", color="#7A68A6", normed=True) plt.legend(loc="upper left") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_2$ value") plt.subplot(313) w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples) plt.hist(tau_samples, bins=n_count_data, alpha=1, label=r"posterior of $\tau$", color="#467821", weights=w, rwidth=2.) plt.xticks(np.arange(n_count_data)) plt.legend(loc="upper left") plt.ylim([0, .75]) plt.xlim([0, len(count_data)]) plt.xlabel(r"$\tau$ (iterations)") plt.ylabel("probability") plt.show()
plt.plot(stormsYears, stormsNumbers, '-ok') plt.xlim(year0, year1) plt.xlabel("Рік") plt.ylabel("Кількість штормів") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_storms2.png") plt.clf() switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=len(stormsNumbers) - 1, doc='Switchpoint[year]') avg = np.mean(stormsNumbers) early_mean = pm.Exponential('early_mean', beta=1./avg) late_mean = pm.Exponential('late_mean', beta=1./avg) @ pm.deterministic(plot=False) def rate(s=switchpoint, e=early_mean, l=late_mean): # Concatenate Poisson means out = np.zeros(len(stormsNumbers)) out[:s] = e out[s:] = l return out storms = pm.Poisson('storms', mu=rate, value=stormsNumbers, observed=True)
0.23972844, -0.78645389, -0.21687104, -0.2939634, 0.51229013, 0.04626286, 0.18329919, -1.12775839, -1.64187249, 0.33440094, -0.95224695, 0.15650266, -0.54056102, 0.12240128, -0.95397459, 0.44806432, -1.02955556, 0.31740861, -0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348, 0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097, 0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815, -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034, 0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774, -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811, -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219, 0.35333094 ]) # Model: the data are truncated-normally distributed with unknown upper bound. mu = pm.Normal('mu', 0, .01, value=0) tau = pm.Exponential('tau', .01, value=1) cutoff = pm.Exponential('cutoff', 1, value=1.3) D = pm.TruncatedNormal('D', mu, tau, -np.inf, cutoff, value=data, observed=True) M = pm.MCMC([mu, tau, cutoff, D]) # Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value. M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf) # Get a handle to the step method handling cutoff to investigate its behavior. S = M.step_method_dict[cutoff][0]
def mk_multi_bayes(tree, chars,nregime,qidx, pi="Equal" ,seglen=0.02,stepsize=0.05): """ Create a Bayesian multi-mk model. User specifies which regime models to use and the Bayesian model finds the switchpoints. Args: tree (Node): Root node of tree. chars (dict): Dict mapping tip labels to discrete character states. Character states must be in the form of [0,1,2...] regime (int): The number of distinct regimes to test. Set to 1 for an Mk model, set to greater than 1 for a multi-regime Mk model. qidx (np.array): Index specifying the model to test columns: 0, 1, 2 - index axes of q 3 - index of params This scheme allows flexible specification of models. E.g.: Symmetric mk2: params = [0.2]; qidx = [[0,0,1,0], [0,1,0,0]] Asymmetric mk2: params = [0.2,0.6]; qidx = [[0,0,1,0], [0,1,0,1]] NOTE: The qidx corresponding to the first q matrix (first column 0) is always the root regime pi (str or np.array): Option to weight the root node by given values. Either a string containing the method or an array of weights. Weights should be given in order. Accepted methods of weighting root: Equal: flat prior Equilibrium: Prior equal to stationary distribution of Q matrix Fitzjohn: Root states weighted by how well they explain the data at the tips. seglen (float): Size of segments to break tree into. The smaller this value, the more "fine-grained" the analysis will be. Optional, defaults to 2% of the root-to-tip length. stepsize (float): Maximum size of steps for switchpoints to take. Optional, defaults to 5% of root-to-tip length. """ if type(chars) == dict: data = chars.copy() chars = [chars[l] for l in [n.label for n in tree.leaves()]] else: data = dict(zip([n.label for n in tree.leaves()],chars)) # Preparations nchar = len(set(chars)) nparam = len(set([n[-1] for n in qidx])) # This model has 2 components: Q parameters and switchpoints # They are combined in a custom likelihood function ########################################################################### # Switchpoint: ########################################################################### # Modeling the movement of the regime shift(s) is the tricky part # Regime shifts will only be allowed to happen at a node seg_map = tree_map(tree,seglen) switch = [None]*(nregime-1) for regime in range(nregime-1): switch[regime]= make_switchpoint_stoch(seg_map, name=str("switch_{}".format(regime))) ########################################################################### # Qparams: ########################################################################### # Each Q parameter is an exponential Qparams = [None] * nparam for i in range(nparam): Qparams[i] = pymc.Exponential(name=str("Qparam_{}".format(i)), beta=1.0, value=0.1*(i+1)) ########################################################################### # Likelihood ########################################################################### # The likelihood function l = cyexpokit.make_mklnl_func(tree, data,nchar,nregime,qidx) @pymc.deterministic def likelihood(q = Qparams, s=switch,name="likelihood"): return l(np.array(q),np.array([x[0].ni for x in s],dtype=np.intp),np.array([x[1] for x in s])) @pymc.potential def multi_mklik(lnl=likelihood): if not (np.isnan(lnl)): return lnl else: return -np.inf mod = pymc.MCMC(locals()) for s in switch: mod.use_step_method(SwitchpointMetropolis, s, tree, seg_map,stepsize=stepsize,seglen=seglen) return mod
def run_mcmc(gp, img, compare_img, transverse_sigma=1.0, motion_angle=0.0): """Estimate PSF using Markov Chain Monte Carlo gp - Gaussian priors - array of N objects with attributes a, b, sigma img - image to apply PSF to compare_img - comparison image transverse_sigma - prior motion_angle - prior Model a Point Spread Function consisting of the sum of N collinear Gaussians, blurred in the transverse direction and the result rotated. Each of the collinear Gauusians is parameterized by a (amplitude), b (center), and sigma (std. deviation). The Point Spread Function is applied to the image img and the result compared with the image compare_img. """ print "gp.shape", gp.shape print "gp", gp motion_angle = np.deg2rad(motion_angle) motion_angle = pm.VonMises("motion_angle", motion_angle, 1.0, value=motion_angle) transverse_sigma = pm.Exponential("transverse_sigma", 1.0, value=transverse_sigma) N = gp.shape[0] mixing_coeffs = pm.Exponential("mixing_coeffs", 1.0, size=N) #mixing_coeffs.set_value(gp['a']) mixing_coeffs.value = gp['a'] longitudinal_sigmas = pm.Exponential("longitudinal_sigmas", 1.0, size=N) #longitudinal_sigmas.set_value(gp['sigma']) longitudinal_sigmas.value = gp['sigma'] b = np.array(sorted(gp['b']), dtype=float) cut_points = (b[1:] + b[:-1]) * 0.5 long_means = [None] * b.shape[0] print long_means left_mean = pm.Gamma("left_mean", 1.0, 2.5 * gp['sigma'][0]) long_means[0] = cut_points[0] - left_mean right_mean = pm.Gamma("right_mean", 1.0, 2.5 * gp['sigma'][-1]) long_means[-1] = cut_points[-1] + right_mean for ix in range(1, N - 1): long_means[ix] = pm.Uniform("mid%d_mean" % ix, lower=cut_points[ix - 1], upper=cut_points[ix]) print "cut_points", cut_points print "long_means", long_means #longitudinal_means = pm.Normal("longitudinal_means", 0.0, 0.04, size=N) #longitudinal_means.value = gp['b'] dtype = np.dtype([('a', np.float), ('b', np.float), ('sigma', np.float)]) @pm.deterministic def psf(mixing_coeffs=mixing_coeffs, longitudinal_sigmas=longitudinal_sigmas, \ longitudinal_means=long_means, transverse_sigma=transverse_sigma, motion_angle=motion_angle): gp = np.ones((N, ), dtype=dtype) gp['a'] = mixing_coeffs gp['b'] = longitudinal_means gp['sigma'] = longitudinal_sigmas motion_angle_deg = np.rad2deg(motion_angle) if True: print "gp: a", mixing_coeffs print " b", longitudinal_means print " s", longitudinal_sigmas print "tr-sigma", transverse_sigma, "angle=", motion_angle_deg return generate_sum_gauss(gp, transverse_sigma, motion_angle_deg) @pm.deterministic def image_fitness(psf=psf, img=img, compare_img=compare_img): img_convolved = ndimage.convolve(img, psf) img_diff = img_convolved.astype(int) - compare_img return img_diff.std() if False: trial_psf = generate_sum_gauss(gp, 2.0, 50.0, plot_unrot_kernel=True, plot_rot_kernel=True, verbose=True) print "trial_psf", trial_psf.min(), trial_psf.mean(), trial_psf.max( ), trial_psf.std() obs_psf = pm.Uniform("obs_psf", lower=-1.0, upper=1.0, doc="Point Spread Function", value=trial_psf, observed=True, verbose=False) print "image_fitness value started at", image_fitness.value known_fitness = pm.Exponential("fitness", image_fitness + 0.001, value=0.669, observed=True) #mcmc = pm.MCMC([motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, longitudinal_means, image_fitness, known_fitness], verbose=2) mcmc = pm.MCMC([ motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, image_fitness, known_fitness, left_mean, right_mean ] + long_means, verbose=2) pm.graph.dag(mcmc, format='png') plt.show() #mcmc.sample(20000, 1000) mcmc.sample(2000) motion_angle_samples = mcmc.trace("motion_angle")[:] transverse_sigma_samples = mcmc.trace("transverse_sigma")[:] image_fitness_samples = mcmc.trace("image_fitness")[:] best_fit = np.percentile(image_fitness_samples, 1.0) best_fit_selection = image_fitness_samples < best_fit print mcmc.db.trace_names for k in [k for k in mcmc.stats().keys() if k != "known_fitness"]: #samples = mcmc.trace(k)[:] samples = mcmc.trace(k).gettrace() print samples.shape selected_samples = samples[best_fit_selection] print k, samples.mean(axis=0), samples.std(axis=0), \ selected_samples.mean(axis=0), selected_samples.std(axis=0) ax = plt.subplot(211) plt.hist(motion_angle_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\theta$", color="#A60628", normed=True) plt.legend(loc="upper right") plt.title("Posterior distributions of $p_\\theta$, $p_\\sigma$") ax = plt.subplot(212) plt.hist(transverse_sigma_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\sigma$", color="#467821", normed=True) plt.legend(loc="upper right") plt.show() for k, v in mcmc.stats().iteritems(): print k, v # deprecated? use discrepancy... print mcmc.goodness() mcmc.write_csv("out.csv") pm.Matplot.plot(mcmc) plt.show()
def time_drug_evaluation(self): # fmt: off drug = np.array([ 101, 100, 102, 104, 102, 97, 105, 105, 98, 101, 100, 123, 105, 103, 100, 95, 102, 106, 109, 102, 82, 102, 100, 102, 102, 101, 102, 102, 103, 103, 97, 97, 103, 101, 97, 104, 96, 103, 124, 101, 101, 100, 101, 101, 104, 100, 101 ]) placebo = np.array([ 99, 101, 100, 101, 102, 100, 97, 101, 104, 101, 102, 102, 100, 105, 88, 101, 100, 104, 100, 100, 100, 101, 102, 103, 97, 101, 101, 100, 101, 99, 101, 100, 100, 101, 100, 99, 101, 100, 102, 99, 100, 99 ]) # fmt: on y = pd.DataFrame({ "value": np.r_[drug, placebo], "group": np.r_[["drug"] * len(drug), ["placebo"] * len(placebo)], }) y_mean = y.value.mean() y_std = y.value.std() * 2 sigma_low = 1 sigma_high = 10 with pm.Model(): group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std) group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std) group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high) group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high) lambda_1 = group1_std**-2 lambda_2 = group2_std**-2 nu = pm.Exponential("ν_minus_one", 1 / 29.0) + 1 pm.StudentT("drug", nu=nu, mu=group1_mean, lam=lambda_1, observed=drug) pm.StudentT("placebo", nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo) diff_of_means = pm.Deterministic("difference of means", group1_mean - group2_mean) pm.Deterministic("difference of stds", group1_std - group2_std) pm.Deterministic( "effect size", diff_of_means / np.sqrt( (group1_std**2 + group2_std**2) / 2)) pm.sample(draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False)
def rn_model_load(analysis_frequencies, analysis_power): # __all__ = ['analysis_power', 'analysis_frequencies', 'power_law_index', # 'power_law_norm', 'power_law_spectrum', 'spectrum'] estimate = rn_utils.do_simple_fit(analysis_frequencies, analysis_power) c_estimate = estimate[0] m_estimate = estimate[1] # Define data and stochastics @pymc.stochastic power_law_index = pymc.Uniform('power_law_index', value=m_estimate, lower=0.0, upper=m_estimate + 2, doc='power law index') @pymc.stochastic power_law_norm = pymc.Uniform('power_law_norm', value=c_estimate, lower=c_estimate * 0.8, upper=c_estimate * 1.2, doc='power law normalization') # Model for the power law spectrum @pymc.deterministic(plot=False) def power_law_spectrum(p=power_law_index, a=power_law_norm, f=analysis_frequencies): """A pure and simple power law model""" out = a * (f ** (-p)) return out #@pymc.deterministic(plot=False) #def power_law_spectrum_with_constant(p=power_law_index, a=power_law_norm, # c=constant, f=frequencies): # """Simple power law with a constant""" # out = empty(frequencies) # out = c + a/(f**p) # return out #@pymc.deterministic(plot=False) #def broken_power_law_spectrum(p2=power_law_index_above, # p1=power_law_index_below, # bf=break_frequency, # a=power_law_norm, # f=analysis_frequencies): # """A broken power law model""" # out = np.empty(len(f)) # out[f < bf] = a * (f[f < bf] ** (-p1)) # out[f > bf] = a * (f[f >= bf] ** (-p2)) * bf ** (p2 - p1) # return out # This is the PyMC model we will use: fits the model defined in # beta=1.0 / model to the power law spectrum we are analyzing # value=analysis_power spectrum = pymc.Exponential('spectrum', beta=1.0 / power_law_spectrum, value=analysis_power, observed=True) return locals()
for v in range(len(gap)): ret.append(chunk[v] + gap[v]) return ret return predict predictions = [ mc.Deterministic(eval=chunkPrediction(i), name='chunk%sPrediction' % i, parents={'gap': gaps[i]}, doc='chunk %s prediction' % i) for i in range(len(chunks) - 1) ] noise = mc.Exponential('noise', 1, 1) observations = [ mc.Normal('chunk%sObservation' % i, predictions[i - 1], noise, value=chunks[i - 1]['mouth'], observed=True) for i in range(1, len(chunks)) ] @mc.deterministic def mouthChanges(early=mouthFactor[0], late=mouthFactor[1]): return late - early
def bayes_ttest(groups=None, N=40, show=False): """ Run a Bayesian t-test on sample or true data. """ if groups is None: # Generate some data group1, group2 = gen_data(N=40) elif len(groups) != 2: print('T-test requires only 2 groups, not %i' % len(groups)) return None else: group1, group2 = groups pooled = np.concatenate((group1, group2)) # Pooled data # Establish priors mu1 = pm.Normal("mu_1", mu=pooled.mean(), tau=1.0 / pooled.var() / N) mu2 = pm.Normal("mu_2", mu=pooled.mean(), tau=1.0 / pooled.var() / N) sig1 = pm.Uniform("sigma_1", lower=pooled.var() / 1000.0, upper=pooled.var() * 1000) sig2 = pm.Uniform("sigma_2", lower=pooled.var() / 1000.0, upper=pooled.var() * 1000) v = pm.Exponential("nu", beta=1.0 / 29) # Set up posterior distribution t1 = pm.NoncentralT("t_1", mu=mu1, lam=1.0 / sig1, nu=v, value=group1, observed=True) t2 = pm.NoncentralT("t_1", mu=mu2, lam=1.0 / sig2, nu=v, value=group2, observed=True) # Generate the model model = pm.Model([t1, mu1, sig1, t2, mu2, sig2, v]) # Push priors mcmc = pm.MCMC(model) # Generate MCMC object mcmc.sample(40000, 10000, 2) # Run MCMC sampler # "trace" # Get the numerical results mus1 = mcmc.trace('mu_1')[:] mus2 = mcmc.trace('mu_2')[:] sigmas1 = mcmc.trace('sigma_1')[:] sigmas2 = mcmc.trace('sigma_2')[:] nus = mcmc.trace('nu')[:] diff_mus = mus1 - mus2 # Difference in mus diff_sigmas = sigmas1 - sigmas2 normality = np.log(nus) effect_size = (mus1 - mus2) / np.sqrt((sigmas1**2 + sigmas2**2) / 2.) print('\n Group 1 mu: %.4f\n Group 2 mu: %.4f\n Effect size: %.4f' % (mus1.mean(), mus2.mean(), effect_size.mean())) if show: # Plot some basic metrics if desired from pymc.Matplot import plot as mcplot # mcplot(mcmc) # This plots 5 graphs, only useful as a benchmark. # Finally, what can this tell is about the null hypothesis? # Split distribution fig2 = plt.figure() ax2 = fig2.add_subplot(121) minx = min(min(mus1), min(mus2)) maxx = max(max(mus1), max(mus2)) xs = np.linspace(minx, maxx, 1000) gkde1 = stats.gaussian_kde(mus1) gkde2 = stats.gaussian_kde(mus2) ax2.plot(xs, gkde1(xs), label='$\mu_1$') ax2.plot(xs, gkde2(xs), label='$\mu_2$') ax2.set_title('$\mu_1$ vs $\mu_2$') ax2.legend() # Difference of mus ax3 = fig2.add_subplot(122) minx = min(diff_mus) maxx = max(diff_mus) xs = np.linspace(minx, maxx, 1000) gkde = stats.gaussian_kde(diff_mus) ax3.plot(xs, gkde(xs), label='$\mu_1-\mu_2$') ax3.legend() ax3.axvline(0, color='#000000', alpha=0.3, linestyle='--') ax3.set_title('$\mu_1-\mu_2$') plt.show() return
def exponential_beta(n=2): with pm.Model() as model: pm.Beta("x", 3, 1, size=n, transform=None) pm.Exponential("y", 1, size=n, transform=None) return model.compute_initial_point(), model, None