def gamma_poisson(x, t): """ x: number of failures (N vector) t: operation time, thousands of hours (N vector) """ if x is not None: N = x.shape else: N = num_points # place an exponential prior on t, for when it is unknown t = pymc.Exponential('t', beta=1.0 / 50.0, value=t, size=N, observed=(t is not None)) alpha = pymc.Exponential('alpha', beta=1.0, value=1.0) beta = pymc.Gamma('beta', alpha=0.1, beta=1.0, value=1.0) theta = pymc.Gamma('theta', alpha=alpha, beta=beta, size=N) @pymc.deterministic def mu(theta=theta, t=t): return theta * t x = pymc.Poisson('x', mu=mu, value=x, observed=(x is not None)) return locals()
def model_three(): # Priors # modSig, hoc, mExt modSig = mc.Gamma('modSig', alpha=17.4, beta=0.0079, value=2000.) hoc = mc.Uniform('hoc', lower=1., upper=50000., value=6000.) mExt = mc.Gamma('mExt', alpha=5.6, beta=20.87, value=0.2) sigma = mc.Uniform('sigma', lower=0., upper=100., value=1.) # Model - switch mc and hoc @mc.deterministic def y_mean(modSig=modSig, mBulk=data.meanBulk, hoc=hoc, moistC=data.mcs, moistE=mExt): return roth.flameSpread(sigma=(modSig+100.), bulk=mBulk, hoc=hoc, moist=moistC, mExt=moistE) # Likelihood # The likelihood is N(y_mean, sigma^2), where sigma # is pulled from a uniform distribution. y_obs = mc.Normal('y_obs', value=data.rates, mu=y_mean, tau=sigma**-2, observed=True) # Add a data posterior prediction deterministic @mc.deterministic def y_sim(mu=y_mean, sigma=sigma): return mc.rnormal(mu, sigma**-2) return vars()
def make_model(): # Construct the prior term location = pymc.Uniform('location', lower=[0, 0], upper=[1, 1]) # The locations of the sensors X = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] # The output of the model solver = Solver(X=X) @pymc.deterministic(plot=False) def model_output(value=None, loc=location): return solver(loc) # The hyper-parameters of the noise alpha = pymc.Exponential('alpha', beta=1.) beta = pymc.Exponential('beta', beta=1.) tau = pymc.Gamma('tau', alpha=alpha, beta=beta) # Load the observed data data = np.loadtxt('observed_data') # The observations at the sensor locations @pymc.stochastic(dtype=float, observed=True) def sensors(value=data, mu=model_output, tau=tau, gamma=1.): """The value of the response at the sensors.""" return gamma * pymc.normal_like(value, mu=mu, tau=tau) return locals()
def mixture_model(random_seed=1234): """Sample mixture model to use in benchmarks""" np.random.seed(1234) size = 1000 w_true = np.array([0.35, 0.4, 0.25]) mu_true = np.array([0.0, 2.0, 5.0]) sigma = np.array([0.5, 0.5, 1.0]) component = np.random.choice(mu_true.size, size=size, p=w_true) x = np.random.normal(mu_true[component], sigma[component], size=size) with pm.Model() as model: w = pm.Dirichlet("w", a=np.ones_like(w_true)) mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape) enforce_order = pm.Potential( "enforce_order", at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) + at.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), ) tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape) pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x) # Initialization can be poorly specified, this is a hack to make it work start = { "mu": mu_true.copy(), "tau_log__": np.log(1.0 / sigma**2), "w_stickbreaking__": np.array([-0.03, 0.44]), } return model, start
def model_gen(): variables = [] intercept = pymc.Normal("intercept", mu=0, tau=50**-2) sd = pymc.Gamma("sd", alpha=3, beta=2.0) responses = pymc.Normal("responses", mu=zeros((nPredictors, 1)), tau=ones((nPredictors, 1)) * 5**-2) variables.append(intercept) variables.append(responses) variables.append(sd) obsMeans = intercept + pymc.sum(responses * observedPredictors, axis=0) obs = pymc.Normal("obs", mu=obsMeans, tau=sd**-2, observed=True, value=data) variables.append(obs) return variables
def model_gen(): variables = [] means = pymc.Normal("means", mu=zeros(dimensions), tau=ones(dimensions)) sds = pymc.Gamma("sds", alpha=ones(dimensions) * 1, beta=ones(dimensions) * 1) variables.append(means) variables.append(sds) @pymc.deterministic def precisions(stdev=sds): precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel() return precisions @pymc.deterministic def obsMeans(means=means): return (ones(shape) * means[:, newaxis]).ravel() obs = pymc.Normal("obs", mu=obsMeans, tau=precisions, observed=True, value=data.ravel()) variables.append(obs) return variables
def make_model(): gamma = 1. kappa = pm.Gamma('kappa', 4., 1., size=5) sigma2 = pm.Gamma('sigma2', 0.1, 1., value=100.) data = np.loadtxt('data.txt').reshape((7, 6)) y = data[:, 1:] y = y.reshape((1, y.shape[0] * y.shape[1])) / 500. f = CatalysisModelDMNLESS() @pm.deterministic def model_output(kappa=kappa): return f(kappa)['f'] @pm.stochastic(observed=True) def output(value=y, model_output=model_output, sigma2=sigma2, gamma=gamma): return gamma * pm.normal_like(y, model_output, 1. / sigma2) return locals()
def PearsonModel(size, snratio, g, g_true, npostsamples=5): ### unique_g_true = np.unique(g_true) ### shearcal_m = pymc.Uniform('shearcal_m', -1, 1) shearcal_c = pymc.Uniform('shearcal_c', -0.05, 0.05) @pymc.deterministic(trace=False) def mu(m=shearcal_m, c=shearcal_c): return (1 + m) * g_true + c ### tau = pymc.Gamma('tau', alpha=25 * .75, beta=.75) @pymc.deterministic def sigma(tau=tau): return 1. / np.sqrt(tau) ### kurtosis = pymc.Uniform('kurtosis', 0.6, 10) ### @pymc.stochastic(observed=True) def data(value=g, mu=mu, sig=sigma, kur=kurtosis): x = value - mu norm = 2**(2 * kur - 2) * np.abs(scipy.special.gamma(kur))**2 / ( np.pi * sig * scipy.special.gamma(2 * kur - 1)) prob = norm * (1 + (x / sig)**2)**(-kur) return np.sum(np.log(prob)) ### @pymc.deterministic def postpred_g(g_true=unique_g_true, m=shearcal_m, c=shearcal_c, sigma=sigma, kur=kurtosis, npostsamples=npostsamples): mu = (1 + m) * g_true + c samples = PearsonSamples(sigma, kur, npostsamples * len(g_true)) return (np.reshape(samples, (npostsamples, -1)) + mu).T return locals()
def VoigtModel_SNPowerlaw(size, snratio, g, g_true, npostsamples=5): g = np.ascontiguousarray(g.astype(np.double)) g_true = np.ascontiguousarray(g_true.astype(np.double)) unique_g_true = np.unique(g_true) ### shearcal_m = pymc.Uniform('shearcal_m', -1, 1) shearcal_c = pymc.Uniform('shearcal_c', -0.05, 0.05) @pymc.deterministic(trace=False) def mu(m=shearcal_m, c=shearcal_c): return np.ascontiguousarray((1 + m) * g_true + c) ### tau = pymc.Gamma('tau1', alpha=25 * .75, beta=.75) @pymc.deterministic def sigma(tau=tau): return 1. / np.sqrt(tau) ### loggamma = pymc.Uniform('loggamma', -7, 2.5) @pymc.deterministic def gamma(loggamma=loggamma): return np.exp(loggamma) ### @pymc.stochastic(observed=True) def data(value=g, mu=mu, sigma=sigma, gamma=gamma): return vtools.likelihood(value, mu, sigma, gamma) ### @pymc.deterministic def postpred_g(g_true=unique_g_true, m=shearcal_m, c=shearcal_c, sigma=sigma, gamma=gamma, npostsamples=npostsamples): mu = (1 + m) * g_true + c samples = vtools.voigtSamples(sigma, gamma, npostsamples * len(g_true)) return (np.reshape(samples, (npostsamples, -1)) + mu).T return locals()
def test_broadcasting_in_shape(self): with pm.Model() as model: mu = pm.Gamma("mu", 1.0, 1.0, shape=2) comp_dists = pm.Poisson.dist(mu, shape=2) mix = pm.MixtureSameFamily( "mix", w=np.ones(2) / 2, comp_dists=comp_dists, shape=(1000,) ) prior = pm.sample_prior_predictive(samples=self.n_samples) assert prior["mix"].shape == (self.n_samples, 1000)
def model2(x, g, p2): a = pm.Normal('a', mu=p2['a'], tau=1.0 / p2['a']) intercept = pm.Normal('intercept', p2['intercept'], 1 / p2['intercept']) obs_tau = pm.Gamma('obs_tau', alpha=0.1, beta=3) @pm.deterministic def line(x=x, a=a): return a * x y = pm.Normal('y', mu=line, tau=obs_tau, value=g, observed=True) return locals()
def BentVoigtModel3(size, snratio, g, g_true): g = np.ascontiguousarray(g.astype(np.double)) g_true = np.ascontiguousarray(g_true.astype(np.double)) ### # sizepivot_x = pymc.Uniform('sizepivot_x', 1.0, 8.0) sizepivot_x = pymc.Normal('sizepivot_x', 2.0, 1. / 0.2**2) # tau sizepivotm_y = pymc.Normal('sizepivotm_y', 0.0, 1. / 0.08**2) # tau sizeslope_m = pymc.Uniform('sizeslope_m', -6, 6) shearcal_c = pymc.Uniform('shearcal_c', -0.1, 0.1) @pymc.deterministic(trace=False) def shearcal_m(sizepivot_x=sizepivot_x, sizepivotm_y=sizepivotm_y, slope=sizeslope_m): m = np.zeros_like(size) m[size >= sizepivot_x] = sizepivotm_y m[size < sizepivot_x] = slope * (size[size < sizepivot_x] - sizepivot_x) + sizepivotm_y return m @pymc.deterministic(trace=False) def mu(m=shearcal_m, c=shearcal_c): return np.ascontiguousarray((1 + m) * g_true + c) ### tau = pymc.Gamma('tau1', alpha=25 * .75, beta=.75) @pymc.deterministic def sigma(tau=tau): return 1. / np.sqrt(tau) ### loggamma = pymc.Uniform('loggamma', -7, 2.5) @pymc.deterministic def gamma(loggamma=loggamma): return np.exp(loggamma) ### @pymc.stochastic(observed=True) def data(value=g, mu=mu, sigma=sigma, gamma=gamma): return vtools.likelihood(value, mu, sigma, gamma) return locals()
def SingleGaussianModel(size, snratio, g, g_true, npostsamples=5): g = np.ascontiguousarray(g.astype(np.double)) g_true = np.ascontiguousarray(g_true.astype(np.double)) unique_g_true = np.unique(g_true) twopi = 2 * np.pi ### shearcal_m = pymc.Uniform('shearcal_m', -1, 1) shearcal_c = pymc.Uniform('shearcal_c', -0.05, 0.05) @pymc.deterministic(trace=False) def mu(m=shearcal_m, c=shearcal_c): return (1 + m) * g_true + c ### tau = pymc.Gamma('tau', alpha=25 * .75, beta=.75) @pymc.deterministic def sigma(tau=tau): return 1. / np.sqrt(tau) ### @pymc.stochastic(observed=True) def data(value=g, mu=mu, tau=tau): half_delta2 = 0.5 * (g - mu)**2 prob = np.exp(-half_delta2 * tau) / np.sqrt(twopi / tau) return np.sum(np.log(prob)) ### # @pymc.deterministic # def postpred_g(g_true = unique_g_true, m = shearcal_m, c = shearcal_c, # sigma = sigma, npostsamples = npostsamples): # # mu = (1+m)*g_true + c # # samples = sigma*np.random.standard_normal(npostsamples*len(g_true)) # # return (np.reshape(samples, (npostsamples, -1)) + mu).T # # ### return locals()
def simple_hierarchical_model(y): """ PyMC implementation of the simple hierarchical model from section 3.1.1:: y[i,j] | alpha[j], sigma^2 ~ N(alpha[j], sigma^2) i = 1, ..., n_j, j = 1, ..., J; alpha[j] | mu, tau^2 ~ N(mu, tau^2) j = 1, ..., J. sigma^2 ~ Inv-Chi^2(5, 20) mu ~ N(5, 5^2) tau^2 ~ Inv-Chi^2(2, 10) Parameters ---------- y : a list of lists of observed data, len(y) = J, len(y[j]) = n_j """ inv_sigma_sq = mc.Gamma('inv_sigma_sq', alpha=2.5, beta=50.) mu = mc.Normal('mu', mu=5., tau=5.**-2.) inv_tau_sq = mc.Gamma('inv_tau_sq', alpha=1., beta=10.) J = len(y) alpha = mc.Normal('alpha', mu=mu, tau=inv_tau_sq, size=J) y = [mc.Normal('y_%d'%j, mu=alpha[j], tau=inv_sigma_sq, value=y[j], observed=True) for j in range(J)] @mc.deterministic def mu_by_tau(mu=mu, tau=inv_tau_sq**-.5): return mu/tau @mc.deterministic def alpha_by_sigma(alpha=alpha, sigma=inv_sigma_sq**-.5): return alpha/sigma alpha_bar = mc.Lambda('alpha_bar', lambda alpha=alpha: pl.sum(alpha)) @mc.deterministic def alpha_bar_by_sigma(alpha_bar=alpha_bar, sigma=inv_sigma_sq**-.5): return alpha_bar/sigma return vars()
def model(x, g, p0): a = pm.Normal('a', mu=p0['a'], tau=1.0 / p0['a']) b = pm.Normal('b', mu=p0['b'], tau=1.0 / p0['b']) # intercept = pm.Uniform('intercept', 0, 1, value=0.01) obs_tau = pm.Gamma('obs_tau', alpha=0.1, beta=3) @pm.deterministic def power(x=x, a=a, b=b): return a * x**b y = pm.Normal('y', mu=power, tau=obs_tau, value=g, observed=True) return locals()
def _model(data): #lam1 = mc.Uniform('lam1', 1., upper=10, value=5) #p = mc.Uniform('p', 1, 5, value=2) #lam2 = mc.Uniform('lam2', 50, upper=200, value=100) alpha = mc.Uniform('alpha', lower=0.1, upper=100, value=0.5) beta = mc.Uniform('beta', lower=0.1, upper=100, value=1) #p = mc.Uniform('p', 0.1, 1, value=0.9) #c = mc.Uniform('c', 8, 12, value=10) y = mc.Gamma('y', alpha=alpha, beta=beta, value=data, observed=True)
def test_density_dist(self): obs = np.random.normal(-1, 0.1, size=10) with pm.Model(): mu = pm.Normal("mu", 0, 1) sd = pm.Gamma("sd", 1, 2) a = pm.DensityDist( "a", mu, sd, random=lambda mu, sd, rng=None, size=None: rng.normal(loc=mu, scale=sd, size=size), observed=obs, ) prior = pm.sample_prior_predictive(return_inferencedata=False) npt.assert_almost_equal(prior["a"].mean(), 0, decimal=1)
def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): mu = pm.Gamma("mu", 3, 1, size=1) goals = pm.Poisson("goals", mu, size=shape) trace1 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "mu", "goals"] ) trace2 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "goals"] ) if shape == 2: # want to test shape as an int shape = (2,) assert trace1["goals"].shape == (10,) + shape assert trace2["goals"].shape == (10,) + shape
def model_gen(): variables = [] factors = pymc.Normal( "factormagnitudes", mu=zeros(observations), tau=ones(observations), ) limits = ones(dimensions) * -Inf limits[0] = 0.0 loadings = pymc.TruncatedNormal("factorloadings", mu=ones(dimensions), tau=ones(dimensions) * (1**-2), a=limits, b=Inf) returnSDs = pymc.Gamma("residualsds", alpha=ones(dimensions) * 1, beta=ones(dimensions) * .5) variables.append(loadings) variables.append(returnSDs) variables.append(factors) @pymc.deterministic def returnPrecisions(stdev=returnSDs): precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel() return precisions @pymc.deterministic def meanReturns(factors=factors, loadings=loadings): means = factors[newaxis, :] * loadings[:, newaxis] return means.ravel() returns = pymc.Normal("returns", mu=meanReturns, tau=returnPrecisions, observed=True, value=data.ravel()) variables.append(returns) return variables
def test_hashing_of_rv_tuples(): obs = np.random.normal(-1, 0.1, size=10) with pm.Model() as pmodel: mu = pm.Normal("mu", 0, 1) sigma = pm.Gamma("sigma", 1, 2) dd = pm.Normal("dd", observed=obs) for freerv in [mu, sigma, dd] + pmodel.free_RVs: for structure in [ freerv, { "alpha": freerv, "omega": None }, [freerv, []], (freerv, []), ]: assert isinstance(hashable(structure), int)
def getModel(): D = pm.Dirichlet('1-Dirichlet', theta=[3, 2, 4]) #@UndefinedVariable C1 = pm.Categorical('2-Cat', D) #@UndefinedVariable C2 = pm.Categorical('10-Cat', D) #@UndefinedVariable C3 = pm.Categorical('11-Cat', D) #@UndefinedVariable G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5) #@UndefinedVariable U1 = pm.Uniform('12-Unif', lower=-100, upper=500) #@UndefinedVariable U2 = pm.Uniform('13-Unif', lower=-100, upper=500) #@UndefinedVariable U3 = pm.Uniform('14-Unif', lower=-100, upper=500) #@UndefinedVariable N0_1 = pm.Normal('5-Norm0_1', mu=U1, tau=1) #@UndefinedVariable N0_2 = pm.Normal('6-Norm0_2', mu=U2, tau=1) #@UndefinedVariable N0_3 = pm.Normal('7-Norm0_3', mu=U3, tau=1) #@UndefinedVariable aMu = [N0_1.value, N0_2.value, N0_3.value] fL1 = lambda n=C1: np.select([n == 0, n == 1, n == 2], aMu) fL2 = lambda n=C2: np.select([n == 0, n == 1, n == 2], aMu) fL3 = lambda n=C3: np.select([n == 0, n == 1, n == 2], aMu) p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]') p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]') p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]') N = pm.Normal('3-Norm', mu=p_N1, tau=1) #@UndefinedVariable obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=0) #@UndefinedVariable @UnusedVariable obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=150) #@UndefinedVariable @UnusedVariable return pm.Model( [D, C1, C2, C3, N, G0_0, N0_1, N0_2, N0_3, N, obsN1, obsN2])
def fonnesbeck_example(Oxygen_Flux, Ymagnitude, verbose=True): # Linear regression n = pm.Normal('n', 0.0, tau=1e-5, value=0) m = pm.Normal('m', 0.0, tau=1e-5, value=0) # No container needed mu = n + m * Oxygen_Flux #Precision tau = pm.Gamma('tau', 0.01, 0.01, value=0.01) # Don't have to sepcify size variable resp = pm.Normal('resp', mu, tau=tau, value=Ymagnitude, observed=True) mcmc = pm.MCMC(locals()) mcmc.sample(30000, burn=10000) if verbose: print n.value print(n.summary()) return m.value, n.value
x = HtWtData[:, 1] y = HtWtData[:, 2] # Re-center data at mean, to reduce autocorrelation in MCMC sampling. # Standardize (divide by SD) to make initialization easier. x_m = np.mean(x) x_sd = np.std(x) y_m = np.mean(y) y_sd = np.std(y) zx = (x - x_m) / x_sd zy = (y - y_m) / y_sd # THE MODEL with pm.Model() as model: # define the priors tau = pm.Gamma('tau', 0.001, 0.001) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.Normal('beta1', mu=0, tau=1.0E-12) mu = beta0 + beta1 * zx # define the likelihood yl = pm.Normal('yl', mu=mu, tau=tau, observed=zy) # Generate a MCMC chain start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(10000, step, start, progressbar=False) # EXAMINE THE RESULTS burnin = 5000 thin = 10 ## Print summary for each trace
def oneway(x, y, data=None, control=None, iterations=24000, burn=4000, verbose=False): """ Generate a oneway anova via bayesian markov-chain monte carlo :param x: str or ndarray, x axis (grouping axis) :param y: str or ndarray, y axis (values) :param data: pandas dataframe or Null :param control: If x has greater than two groups, you need to compare vs. the control :param iterations: :param burn: :param verbose: bool; default=False; turns on pymc progress bar :return: a results object. """ if data is None: x, y, _, data = jp.components.create_df(x, y, None) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() groups = sorted(set(df[x])) if len(groups) > 2 and control is None: raise ValueError('Need a control group') elif len(groups) <= 2 and control is None: control = groups[0] means = {} # prior distributions sigmas = {} # deltas_mean = {} # deltas vs control deltas_sig = {} obs = {} # observations nu = {} def delta(dist, control, exp): return dist[exp] - dist[control] for group in groups: means[group] = pm.Uniform('mean_{}'.format(group), np.percentile(df[y], 1), np.percentile(df[y], 99)) nu[group] = pm.Uniform('nu_{}'.format(group), 0, 1000) sigmas[group] = pm.Gamma('sigma_{}'.format(group), np.median(df[y].dropna()), nu[group]) obs[group] = pm.Normal('obs_{}'.format(group), means[group], 1 / sigmas[group]**2, value=df[df[x] == group][y], observed=True, trace=True) for group in groups: if group == control: continue else: deltas_mean[group] = pm.Deterministic( delta, doc='Delta function of mean', name='deltam_{}'.format(group), parents={ 'dist': means, 'exp': group, 'control': control }, trace=True) deltas_sig[group] = pm.Deterministic( delta, doc='Delta function of sigma', name='deltas_{}'.format(group), parents={ 'dist': sigmas, 'exp': group, 'control': control }, trace=True) mylist = [means[i] for i in groups] + [obs[i] for i in groups] + [ deltas_mean[i] for i in groups if i != control ] mylist += [sigmas[i] for i in groups] + [nu[i] for i in groups] + [ deltas_sig[i] for i in groups if i != control ] model = pm.Model(mylist) map_ = pm.MAP(model, ) map_.fit() mcmc = pm.MCMC(model, ) mcmc.sample(iterations, burn=burn, progress_bar=verbose) res = JbResult(mcmc, groups) for group in groups: if res.df is None: res.df = pd.DataFrame() res.df['Mean'] = mcmc.trace('mean_{}'.format(group))[:] res.df['Sigma'] = mcmc.trace('sigma_{}'.format(group))[:] res.df['Group'] = group if group == control: continue res.df['Mean Delta'] = mcmc.trace('deltam_{}'.format(group))[:] res.df['Sigma Delta'] = mcmc.trace('deltas_{}'.format(group))[:] else: tdf = pd.DataFrame() tdf['Mean'] = mcmc.trace('mean_{}'.format(group))[:] tdf['Sigma'] = mcmc.trace('sigma_{}'.format(group))[:] tdf['Group'] = group if group == control: res.df = res.df.append(tdf) continue tdf['Mean Delta'] = mcmc.trace('deltam_{}'.format(group))[:] tdf['Sigma Delta'] = mcmc.trace('deltas_{}'.format(group))[:] res.df = res.df.append(tdf) return res
hmm_models = pickle.load(f)['air1'].values() means = [hmm.means_.T[0] for hmm in hmm_models] variances = [hmm.covars_.T[0, 0] for hmm in hmm_models] transitions = [hmm.transmat_ for hmm in hmm_models] n_states = 3 means = np.array(zip(*means)).clip(0.001) variances = np.array(zip(*variances)) #.clip(0.001) transitions = np.array(zip(*transitions)) totals = np.sum(transitions, axis=2)[:, :, np.newaxis] transitions = (transitions / totals)[:, :, :n_states - 1] mean_params = [ pymc.Gamma('mean_param{}'.format(i), alpha=1, beta=.1) for i in range(n_states * 2) ] var_params = [ pymc.Gamma('var_param{}'.format(i), alpha=1, beta=.1) for i in range(n_states * 2) ] trans_params = [ pymc.Beta('trans_params{}'.format(i), alpha=1, beta=1) for i in range(n_states**2) ] mean_obs = [] mean_pred = [] var_obs = [] var_pred = []
## evaluate Logistic regression mod1 = smf.glm(formula=formula, data=dta, family=sm.families.Binomial()).fit() mod1.summary() ### LOGISTIC REGRESSION - BAYESIAN APPROACH #df.head(5) x1 = df[1] x2 = df[2] x3 = df[3] x4 = df[7] y = df[11] ## define hyper priors of our model tau = pm.Gamma('tau', 1.e-3, 1.e-3, value=10.) sigma = pm.Lambda('sigma', lambda tau=tau: tau**-.5) beta0 = pm.Normal('beta0', 0., 1e-6, value=0.) beta_clump_t = pm.Normal('beta_clump_t', 0., 1e-6, value=0.) beta_cell_size = pm.Normal('beta_cell_size', 0., 1e-6, value=0.) beta_cell_shape = pm.Normal('beta_cell_shape', 0., 1e-6, value=0.) beta_chromatin = pm.Normal('beta_chromatin', 0., 1e-6, value=0.) ######################################## MODEL 1 ################################################### ## given, betas and observed x we predict y # "model" the observed y values: again, I reiterate that PyMC treats y as # evidence -- as fixed; it's going to use this as evidence in updating our belief # about the "unobserved" parameters (b0, b1, and err), which are the # things we're interested in inferring after all
import pymc as pm import matplotlib.pyplot as plt from plot_post import plot_post # THE DATA. N = 30 z = 8 y = np.repeat([1, 0], [z, N - z]) # THE MODEL. with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Prior nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd eta = pm.Gamma('eta', .1, .1) theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0 theta1 = pm.exp(-eta) # theta from model index 1 theta = pm.switch(pm.eq(model_index, 0), theta0, theta1) # Likelihood y = pm.Bernoulli('y', p=theta, observed=y) # Sampling start = pm.find_MAP() steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]] steps.append(pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1])) trace = pm.sample(10000, steps, start=start, progressbar=False) # EXAMINE THE RESULTS. burnin = 1000 thin = 5
def run_mcmc(gp, img, compare_img, transverse_sigma=1.0, motion_angle=0.0): """Estimate PSF using Markov Chain Monte Carlo gp - Gaussian priors - array of N objects with attributes a, b, sigma img - image to apply PSF to compare_img - comparison image transverse_sigma - prior motion_angle - prior Model a Point Spread Function consisting of the sum of N collinear Gaussians, blurred in the transverse direction and the result rotated. Each of the collinear Gauusians is parameterized by a (amplitude), b (center), and sigma (std. deviation). The Point Spread Function is applied to the image img and the result compared with the image compare_img. """ print "gp.shape", gp.shape print "gp", gp motion_angle = np.deg2rad(motion_angle) motion_angle = pm.VonMises("motion_angle", motion_angle, 1.0, value=motion_angle) transverse_sigma = pm.Exponential("transverse_sigma", 1.0, value=transverse_sigma) N = gp.shape[0] mixing_coeffs = pm.Exponential("mixing_coeffs", 1.0, size=N) #mixing_coeffs.set_value(gp['a']) mixing_coeffs.value = gp['a'] longitudinal_sigmas = pm.Exponential("longitudinal_sigmas", 1.0, size=N) #longitudinal_sigmas.set_value(gp['sigma']) longitudinal_sigmas.value = gp['sigma'] b = np.array(sorted(gp['b']), dtype=float) cut_points = (b[1:] + b[:-1]) * 0.5 long_means = [None] * b.shape[0] print long_means left_mean = pm.Gamma("left_mean", 1.0, 2.5 * gp['sigma'][0]) long_means[0] = cut_points[0] - left_mean right_mean = pm.Gamma("right_mean", 1.0, 2.5 * gp['sigma'][-1]) long_means[-1] = cut_points[-1] + right_mean for ix in range(1, N - 1): long_means[ix] = pm.Uniform("mid%d_mean" % ix, lower=cut_points[ix - 1], upper=cut_points[ix]) print "cut_points", cut_points print "long_means", long_means #longitudinal_means = pm.Normal("longitudinal_means", 0.0, 0.04, size=N) #longitudinal_means.value = gp['b'] dtype = np.dtype([('a', np.float), ('b', np.float), ('sigma', np.float)]) @pm.deterministic def psf(mixing_coeffs=mixing_coeffs, longitudinal_sigmas=longitudinal_sigmas, \ longitudinal_means=long_means, transverse_sigma=transverse_sigma, motion_angle=motion_angle): gp = np.ones((N, ), dtype=dtype) gp['a'] = mixing_coeffs gp['b'] = longitudinal_means gp['sigma'] = longitudinal_sigmas motion_angle_deg = np.rad2deg(motion_angle) if True: print "gp: a", mixing_coeffs print " b", longitudinal_means print " s", longitudinal_sigmas print "tr-sigma", transverse_sigma, "angle=", motion_angle_deg return generate_sum_gauss(gp, transverse_sigma, motion_angle_deg) @pm.deterministic def image_fitness(psf=psf, img=img, compare_img=compare_img): img_convolved = ndimage.convolve(img, psf) img_diff = img_convolved.astype(int) - compare_img return img_diff.std() if False: trial_psf = generate_sum_gauss(gp, 2.0, 50.0, plot_unrot_kernel=True, plot_rot_kernel=True, verbose=True) print "trial_psf", trial_psf.min(), trial_psf.mean(), trial_psf.max( ), trial_psf.std() obs_psf = pm.Uniform("obs_psf", lower=-1.0, upper=1.0, doc="Point Spread Function", value=trial_psf, observed=True, verbose=False) print "image_fitness value started at", image_fitness.value known_fitness = pm.Exponential("fitness", image_fitness + 0.001, value=0.669, observed=True) #mcmc = pm.MCMC([motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, longitudinal_means, image_fitness, known_fitness], verbose=2) mcmc = pm.MCMC([ motion_angle, transverse_sigma, mixing_coeffs, longitudinal_sigmas, image_fitness, known_fitness, left_mean, right_mean ] + long_means, verbose=2) pm.graph.dag(mcmc, format='png') plt.show() #mcmc.sample(20000, 1000) mcmc.sample(2000) motion_angle_samples = mcmc.trace("motion_angle")[:] transverse_sigma_samples = mcmc.trace("transverse_sigma")[:] image_fitness_samples = mcmc.trace("image_fitness")[:] best_fit = np.percentile(image_fitness_samples, 1.0) best_fit_selection = image_fitness_samples < best_fit print mcmc.db.trace_names for k in [k for k in mcmc.stats().keys() if k != "known_fitness"]: #samples = mcmc.trace(k)[:] samples = mcmc.trace(k).gettrace() print samples.shape selected_samples = samples[best_fit_selection] print k, samples.mean(axis=0), samples.std(axis=0), \ selected_samples.mean(axis=0), selected_samples.std(axis=0) ax = plt.subplot(211) plt.hist(motion_angle_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\theta$", color="#A60628", normed=True) plt.legend(loc="upper right") plt.title("Posterior distributions of $p_\\theta$, $p_\\sigma$") ax = plt.subplot(212) plt.hist(transverse_sigma_samples, histtype='stepfilled', bins=25, alpha=0.85, label="posterior of $p_\\sigma$", color="#467821", normed=True) plt.legend(loc="upper right") plt.show() for k, v in mcmc.stats().iteritems(): print k, v # deprecated? use discrepancy... print mcmc.goodness() mcmc.write_csv("out.csv") pm.Matplot.plot(mcmc) plt.show()
n = 21 a = 6 b = 2 sigma = 2 x = np.linspace(0, 1, n) y_obs = a * x + b + np.random.normal(0, sigma, n) data = pd.DataFrame(np.array([x, y_obs]).T, columns=['x', 'y']) data.plot(x='x', y='y', kind='scatter', s=50) #plt.show() # define priors a = pymc.Normal('slope', mu=0, tau=1.0 / 10**2) b = pymc.Normal('intercept', mu=0, tau=1.0 / 10**2) tau = pymc.Gamma("tau", alpha=0.1, beta=0.1) # define likelihood @pymc.deterministic def mu(a=a, b=b, x=x): return a * x + b y = pymc.Normal('y', mu=mu, tau=tau, value=y_obs, observed=True) # inference m = pymc.Model([a, b, tau, x, y]) mc = pymc.MCMC(m) mc.sample(iter=11000, burn=10000)
binom.rvs(n=ntrl, p=.49, size=npg), binom.rvs(n=ntrl, p=.51, size=npg))) n_subj = len(cond_of_subj) n_cond = len(set(cond_of_subj)) # THE MODEL with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Constants for hyperprior: shape_Gamma = 1.0 rate_Gamma = 0.1 # Hyperprior on mu and kappa: kappa = pm.Gamma('kappa', shape_Gamma, rate_Gamma, shape=n_cond) mu0 = pm.Beta('mu0', 1, 1) a_Beta0 = mu0 * kappa[cond_of_subj] b_Beta0 = (1 - mu0) * kappa[cond_of_subj] mu1 = pm.Beta('mu1', 1, 1, shape=n_cond) a_Beta1 = mu1[cond_of_subj] * kappa[cond_of_subj] b_Beta1 = (1 - mu1[cond_of_subj]) * kappa[cond_of_subj] #Prior on theta theta0 = pm.Beta('theta0', a_Beta0, b_Beta0, shape=n_subj) theta1 = pm.Beta('theta1', a_Beta1, b_Beta1, shape=n_subj) # if model_index == 0 then sample from theta1 else sample from theta0 theta = pm.switch(pm.eq(model_index, 0), theta1, theta0)