def main(): with pm.Model() as model: # Using a strong prior. Meaning the mean is towards zero than towards 1 prior = pm.Beta('prior', 0.5, 3) output = pm.Binomial('output', n=100, observed=50, p=prior) step = pm.Metropolis() trace = pm.sample(1000, step=step) pm.traceplot(trace) pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True, rope=[0.45, 0.55]) # Rope is an interval that you define # This is a value you eppect. You can check # If ROPE fall on HPD or not. If it falls, it means # our value is within HPD and may be increasing sample # size would make our mean estimate better. # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace, varnames=['prior']) # summary [look at mc error here. This is the std error, should be low] pm.df_summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)['prior']
def plot_traces_pymc(trcs, varnames=None): ''' Convenience fn: plot traces with overlaid means and values Handle nested traces for hierarchical models ''' nrows = len(trcs.varnames) if varnames is not None: nrows = len(varnames) ax = pm.traceplot(trcs, varnames=varnames, figsize=(12, nrows*1.4), lines={k: v['mean'] for k, v in pm.df_summary(trcs,varnames=varnames).iterrows()}, combined=True) # don't label the nested traces (a bit clumsy this: consider tidying) dfmns = pm.df_summary(trcs, varnames=varnames)['mean'].reset_index() dfmns.rename(columns={'index':'featval'}, inplace=True) dfmns = dfmns.loc[dfmns['featval'].apply(lambda x: re.search('__[1-9]{1,}', x) is None)] dfmns['draw'] = dfmns['featval'].apply(lambda x: re.search('__0{1}$', x) is None) dfmns['pos'] = np.arange(dfmns.shape[0]) dfmns.set_index('pos', inplace=True) for i, r in dfmns.iterrows(): if r['draw']: ax[i,0].annotate('{:.2f}'.format(r['mean']), xy=(r['mean'],0) ,xycoords='data', xytext=(5,10) ,textcoords='offset points', rotation=90 ,va='bottom', fontsize='large', color='#AA0022')
def main(): df = generateData(a=5, b=2, latent_error_y=30) #Parameters beta are [5, 2] #Variance is 30 g = sns.lmplot(x='x', y='y', data=df, fit_reg=True, size=6, scatter_kws={'alpha':0.8, 's':60}) #Encode model specification as design matrices fml = 'y ~ 1 + x' (mx_en, mx_ex) = pt.dmatrices(fml, df, return_type='dataframe', NA_action='raise') #Fit OLS model smfit = sm.OLS(endog=mx_en,exog=mx_ex, hasconst=True).fit() print(smfit.summary()) #Model specifications are wrapped in a with-statement with pm.Model() as mdl_ols: #Use GLM submodule for simplified patsy-like model spec #Use Normal family - normal distribution likelihood, HalfCauchy distribution priors pm.glm.GLM.from_formula('y ~ 1 + x', df, family=pm.glm.families.Normal()) #Find MAP(maximum a posterior) using Powell optimization #Mode of the posterior distribution start_MAP = pm.find_MAP(fmin=fmin_powell, disp=True) #Take samples using NUTS from the joint probability distribution #Iteratively converges by minimising loss on posterior predictive distribution yhat with respect to true y trc_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS()) ax = pm.traceplot(trc_ols[-1000:], figsize=(12, len(trc_ols.varnames)*1.5), lines = {k: v['mean'] for k, v in pm.df_summary(trc_ols[-1000:]).iterrows()}) print(pm.df_summary(trc_ols[-1000:])) xlims = (df['x'].min() - np.ptp(df['x']) / 10 , df['x'].max() + np.ptp(df['x']) / 10) plotPosteriorCr(mdl_ols, trc_ols, df, xlims) plt.show() print('x')
def fitFlat(x, y): model = pm.Model() with model: # Priors for unknown model parameters a = pm.Flat('a') b = pm.Flat('b') c = pm.Flat('c') # Expected value of outcome mu = Model(x, a, b, c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=0.01 * np.ones_like(y), observed=y) # do sampling trace = pm.sample(1000, progressbar=False, init='ADVI', step=pm.NUTS(), njobs=1) # give summary summary = pm.df_summary(trace) return summary
def fit(x, y, lowerVec, upperVec): lA, lB, lC = lowerVec uA, uB, uC = upperVec model = pm.Model() with model: # Priors for unknown model parameters a = pm.Uniform('a', lower=lA, upper=uA) b = pm.Uniform('b', lower=lB, upper=uB) c = pm.Uniform('c', lower=lC, upper=uC) # Expected value of outcome mu = Model(x, a, b, c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=0.1 * np.ones_like(y), observed=y) # do sampling trace = pm.sample(1000, progressbar=False, init='ADVI', step=pm.NUTS(), njobs=1) # give summary summary = pm.df_summary(trace) return summary
def fit(x,y,meanVec,stdVec,errors): aMu,bMu,cMu = meanVec aStd,bStd,cStd = stdVec model = pm.Model() if False: df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error']) print df with model: # Priors for unknown model parameters a = pm.Normal('a', mu=aMu, sd=aStd) b = pm.Normal('b', mu=bMu, sd=bStd) c = pm.Normal('c', mu=cMu, sd=cStd) # Expected value of outcome mu = Model(x,a,b,c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=errors, observed=y) # do sampling trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1) # give summary summary = pm.df_summary(trace) return summary
def fit(x,y,errors,signA): model = pm.Model() if False: df = pd.DataFrame(np.transpose([x,y,errors]),columns=['x','y','error']) print df with model: # Priors for unknown model parameters LowerA = 0. UpperA = 0.1 if signA == -1.0: UpperA = 0. LowerA = -0.1 a = pm.Uniform('a', lower=LowerA, upper=UpperA) b = pm.Uniform('b', lower=0., upper=1.0) c = pm.Uniform('c', lower=0., upper=1.0) # Expected value of outcome mu = Model(x,a,b,c) # Likelihood (sampling distribution) of observations Like = pm.Normal('Like', mu=mu, sd=errors, observed=y) # do sampling trace = pm.sample(1000,progressbar=False,init='ADVI',step = pm.NUTS(),njobs=1) # give summary summary = pm.df_summary(trace) return summary
def plot_traces_pymc(self, trcs, varnames=None): ''' Convenience fn: plot traces with overlaid means and values Code adapted from: https://github.com/jonsedar/pymc3_vs_pystan/blob/master/convenience_functions.py ''' nrows = len(trcs.varnames) if varnames is not None: nrows = len(varnames) ax = pm.traceplot(trcs, varnames=varnames, figsize=(12,nrows*1.4), lines={k: v['mean'] for k, v in pm.df_summary(trcs,varnames=varnames).iterrows()}) for i, mn in enumerate(pm.df_summary(trcs, varnames=varnames)['mean']): ax[i,0].annotate('{:.2f}'.format(mn), xy=(mn,0), xycoords='data', xytext=(5,10), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='#AA0022')
def plot_traces(trcs, varnames=None): ''' Convenience fn: plot traces with overlaid means and values INPUT: pymc trace OUTPUT: display of model coefficient distributions ''' nrows = len(trcs.varnames) if varnames is not None: nrows = len(varnames) ax = pm.traceplot( trcs, varnames=varnames, figsize=(12, nrows * 1.4), lines={ k: v['mean'] for k, v in pm.df_summary(trcs, varnames=varnames).iterrows() }, combined=True) # don't label the nested traces (a bit clumsy this: consider tidying) dfmns = pm.df_summary(trcs, varnames=varnames)['mean'].reset_index() dfmns.rename(columns={'index': 'featval'}, inplace=True) dfmns = dfmns.loc[dfmns['featval'].apply( lambda x: re.search('__[1-9]{1,}', x) is None)] dfmns['draw'] = dfmns['featval'].apply( lambda x: re.search('__0{1}$', x) is None) dfmns['pos'] = np.arange(dfmns.shape[0]) dfmns.set_index('pos', inplace=True) for i, r in dfmns.iterrows(): if r['draw']: ax[i, 0].annotate('{:.2f}'.format(r['mean']), xy=(r['mean'], 0), xycoords='data', xytext=(5, 10), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='#AA0022')
def summary(self): def trace_sd(x): return pd.Series(np.std(x, 0), name='sd') def trace_mean(x): return pd.Series(np.mean(x, 0), name='mean') def trace_quantiles(x): return pd.DataFrame(pm.quantiles(x, [1, 5, 25, 50, 75, 95, 99])) summary = pm.df_summary( self.trace, stat_funcs=[trace_mean, trace_sd, trace_quantiles]) return summary
def plot_trace(trace): # Traceplot with vertical lines at the mean value ax = pm.traceplot( trace, figsize=(14, len(trace.varnames) * 1.8), lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}) matplotlib.rcParams['font.size'] = 16 # Labels with the median value for i, mn in enumerate(pm.df_summary(trace)['mean']): ax[i, 0].annotate('{:0.2f}'.format(mn), xy=(mn, 0), xycoords='data', size=8, xytext=(-18, 18), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='red')
def _nuts_inference(self, inference_args): """ Runs NUTS inference. Parameters ---------- inference_args : dict, arguments to be passed to the PyMC3 sample method. See PyMC3 doc for permissible values. """ with self.cached_model: step = pm.NUTS() nuts_trace = pm.sample(step=step, **inference_args) self.trace = nuts_trace self.summary = pm.df_summary(self.trace)
def plot_traces(traces, retain=1000): ''' Convenience function: Plot traces with overlaid means and values ''' ax = pm.traceplot( traces[-retain:], figsize=(12, len(traces.varnames) * 1.5), lines={ k: v['mean'] for k, v in pm.df_summary(traces[-retain:]).iterrows() }) for i, mn in enumerate(pm.df_summary(traces[-retain:])['mean']): ax[i, 0].annotate('{:.2f}'.format(mn), xy=(mn, 0), xycoords='data', xytext=(5, 10), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='#AA0022')
def _advi_inference(self, inference_args): """ Runs variational ADVI and then samples from those results. Parameters ---------- inference_args : dict, arguments to be passed to the PyMC3 fit method. See PyMC3 doc for permissible values. """ with self.cached_model: inference = pm.ADVI() approx = pm.fit(method=inference, **inference_args) self.approx = approx self.trace = approx.sample(draws=self.default_advi_sample_draws) self.summary = pm.df_summary(self.trace) self.advi_hist = inference.hist
def summary(self, varnames=None): """Generate summary statistics for model as Pandas dataframe. Parameters ---------- varnames : iterable of str or None, optional The model variables to generate summaries for (default None). If None, defaults to all variables. Returns ------- summary : pandas.DataFrame The dataframe with summary statistics. """ varnames = varnames or self.model_variables return pm.df_summary(self.trace, varnames=varnames)
def predict_test(trc, X_test, X_train, hyper=0): ''' Calculate mean prediction values for test data using mean coefficient values INPUT: pymc trace, df test, df train, number of hyperpriors OUTPUT: np array of mean prediction values for test data ''' coeff = pm.df_summary(trc[-500:]) X_test_std = standardize_2sd_test(X_test[fts_num], X_train[fts_num]) preds = [] for i in range(len(X_test)): if X_test.iloc[i, :]['Reservoir_Code'] == 0: pred = coeff.ix[0 + hyper, 0] + np.dot( X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper, 0].values) if X_test.iloc[i, :]['Reservoir_Code'] == 1: pred = coeff.ix[1 + hyper, 0] + np.dot( X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper, 0].values) else: pred = coeff.ix[2 + hyper, 0] + np.dot( X_test_std.iloc[i, :].values, coeff.ix[hyper + 3:-1 - hyper, 0].values) preds.append(pred) return np.array(preds)
# plt.show() print(pm.dic(trace2, unpooled_model)) # x_shared.set_value([6, 6, 7]) # x_shared1.set_value([20, 40, 40]) # y_shared.set_value([0, 0, 0]) elec_year1 = np.delete(elec_year, np.s_[:6]) elec_year1 = np.append([2, 3, 4, 5, 6, 7], elec_year1) x_shared.set_value(elec_year1) with unpooled_model: trace3 = pm.sample(3000) post_pred = pm.sample_ppc(trace3) abc = post_pred['Observed'].mean(axis=0) print(abc) print(pm.df_summary(trace2, varnames2)) # 读取后验区间,加.mean()是为了转换为np型数据便于计算 aaa = pm.df_summary(trace2, varnames2) bbb = pd.DataFrame(aaa) hpd2_5 = bbb['hpd_2.5'] hpd97_5 = bbb['hpd_97.5'] hpd25_beta = hpd2_5[:1].mean() hpd975_beta = hpd97_5[:1].mean() hpd25_early_rate = hpd2_5[1:2].mean() hpd975_early_rate = hpd97_5[1:2].mean() hpd25_late_rate = hpd2_5[2:3].mean() hpd975_late_rate = hpd97_5[2:3].mean()
# start = pm.find_MAP() step1 = pm.Slice([tau1, a_0]) trace2 = pm.sample(1000, tune=500, step=step1) chain2 = trace2 varnames1 = [ 'a0', 'δ', 'sigma', 'tau1'] pm.plot_posterior(chain2, varnames1, kde_plot=True) plt.show() pm.energyplot(chain2) # 能量图对比,重合度越高表示模型越优 plt.show() # 画出自相关曲线 varnames1 = [ 'a0', 'δ', 'sigma', 'tau1'] pm.autocorrplot(chain2, varnames1) plt.show() print(pm.df_summary(chain2, varnames1)) print(pm.waic(trace=trace2, model=partial_model)) # ====================================================================== # 后验分析: # 画出后验与原始图形对比图 # # ====================================================================== # Bx_.set_value([7,8] , [5,6]) with partial_model: pp_trace = pm.sample_ppc(trace2, 1000) # pp_trace['Observed'].mean(axis=0) fig, ax = plt.subplots(figsize=(8, 6)) # ax.plot(x_plot, spline(x_plot), c='k', label="True function")
xp = elec_year2[ip * 7:(ip + 1) * 7, :] # 原始数据 yp = elec_faults2[ip * 7:(ip + 1) * 7, :] ax.plot(xp, yp, marker='o', alpha=.8) yipred_yplot = np.array([ yipred_mean[i * 6:(i + 1) * 6] for i in np.arange(7 * ip, (ip + 1) * 7) ]) xipred = np.array([np.arange(6) + 1 for i in np.arange(7)]) ax.plot(xipred, yipred_yplot[:], 'k+-', color='r') plt.tight_layout() plt.show() varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u'] tmp = pm.df_summary(chain, varnames2) betaMAP = tmp['mean'][0] beta1MAP = tmp['mean'][np.arange(companiesABC) + 1] beta2MAP = tmp['mean'][np.arange(companiesABC) + 1 * companiesABC + 1] beta3MAP = tmp['mean'][np.arange(companiesABC) + 2 * companiesABC + 1] beta4MAP = tmp['mean'][np.arange(companiesABC) + 3 * companiesABC + 1] uMAP = tmp['mean'][4 * companiesABC + 1] # am0MAP = tmp['mean'][4*companiesABC+2] # am1MAP = tmp['mean'][4*companiesABC+3] # print(am0MAP) # print(beta1MAP) # print(tmp) # print(beta2MAP) # print(beta3MAP) ppcsamples = 500
def model(): global data alpha_prior = 0.1 beta_prior = 1. alpha_init = np.ones((N_GROUPS, 1)) noise_init = np.ones((N_GROUPS, 1)) * 1e-2 parts_ones = np.ones((TOTAL_PARTS)) data_ones = np.ones(len(data[0])) hds = store_hds_old(paren_lst, filt) ns = np.sum(data, axis=1) smooth = np.ones((TOTAL_PARTS, N_ALGS)) * beta_prior #bias in choice of starting parenthesis start_p = store_start_p(paren_lst, n=TOTAL_PARTS, lst=["("]) start_np = 1 - start_p #init_beta = np.array([0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])*10 init_beta = np.ones(N_ALGS) * beta_prior print "Starting MCMC...." with pm.Model() as m: alpha = pm.Exponential('alpha', alpha_prior, shape=1, testval=2) #alpha = tt.as_tensor([10]) #alpha = pm.Deterministic('alpha', alpha) beta = pm.Dirichlet( 'beta', init_beta, #np.ones(N_ALGS)*beta_prior, # testval=np.ones(N_ALGS), shape=N_ALGS) theta = pm.Dirichlet('theta', alpha * beta, shape=(TOTAL_PARTS, N_ALGS)) nw1 = 1 nw2 = 9 noise = pm.Beta("noise", nw1, nw2, shape=TOTAL_PARTS, testval=0.05) #noise = tt.tile(noise, (1, N_ALGS)) new_algs = map( lambda x: theta[x].dot(format_algs_theano(hds, noise[x])), np.arange(TOTAL_PARTS)) theta_resp = tt.concatenate([new_algs], axis=0) #theta_resp = theta + noise * 0.5 bias = pm.Beta("bias", 1, 1, shape=(TOTAL_PARTS, 1)) #bias = tt.tile(bias, (1,N_ALGS)) biased_theta_resps = start_p * bias * theta_resp + start_np * ( 1. - bias) * theta_resp sum_norm = biased_theta_resps.sum(axis=1).reshape((TOTAL_PARTS, 1)) biased_theta_resps = biased_theta_resps / sum_norm #biased_theta_resps = theta_resp pm.Multinomial('resp', n=ns, p=biased_theta_resps, shape=(TOTAL_PARTS, N_RESPS), observed=data) #db = Text('trace') step = pm.NUTS() #step = pm.Metropolis() trace = pm.sample(MCMC_STEPS, step=step, tune=BURNIN, target_accept=0.9, thin=MCMC_THIN) print_star("Model Finished!") if MCMC_CHAINS > 1: print pm.gelman_rubin(trace) summary = pm.df_summary(trace) print summary which = 45 samp = 100 return trace, summary
# theta = beta + beta1 * elec_year + beta2 * elec_tem1 + beta3 * elec_hPa1 + beta4 * elec_RH1 theta = beta + beta1 * elec_year1 + beta2 * elec_tem1 Observed = pm.StudentT("Observed", mu=theta, sd=sigma, nu=nu, observed=elec_faults1) # 观测值 start = pm.find_MAP() # step = pm.Metropolis() trace1 = pm.sample(4000, start=start) chain1 = trace1[1000:] varnames1 = ['beta', 'beta1', 'beta2'] pm.traceplot(chain1, varnames1) plt.show() print(pm.df_summary(trace1, varnames1)) # 画出自相关曲线 pm.autocorrplot(chain1) plt.show() faults_m = np.mean(elec_faults) faults_sd = np.std(elec_faults) year_m = np.mean(elec_year) year_std = np.std(elec_year) tem_m = np.mean(elec_tem) tem_std = np.std(elec_tem) hPa_m = np.mean(elec_hPa) hPa_std = np.std(elec_hPa) RH_m = np.mean(elec_RH) RH_std = np.std(elec_RH)
u = pm.Normal('u', 0, 0.01) beta_mu = pm.Deterministic('beta_mu', tt.exp(u + beta + \ (beta1[Num_shared] * xs_year + beta2[Num_shared] * xs_char1 +\ beta3[Num_shared] * xs_char2 + beta4[Num_shared] * xs_year * xs_year))) Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=ys_faults) # 观测值 trace_1 = pm.sample(3000, init='advi+adapt_diag' ) pm.traceplot(trace_1) plt.show() burnin = 2000 chain = trace_1[burnin:] # get MAP estimate varnames2 = ['beta', 'beta1', 'beta2', 'beta3','beta4', 'u'] tmp = pm.df_summary(chain, varnames2) betaMAP = tmp['mean'][0] beta1MAP = tmp['mean'][np.arange(companiesABC) + 1] beta2MAP = tmp['mean'][np.arange(companiesABC) + 1*companiesABC+1] beta3MAP = tmp['mean'][np.arange(companiesABC) + 2*companiesABC+1] beta4MAP = tmp['mean'][np.arange(companiesABC) + 3*companiesABC+1] uMAP = tmp['mean'][4*companiesABC+1] # 模型拟合效果图 ppcsamples = 500 ppcsize = 100 # ppc = defaultdict(list) burnin = 2000 fig = plt.figure(figsize=(16, 8)) fig.text(0.5, -0.02, 'Test Interval (ms)', ha='center', fontsize=20) fig.text(-0.02, 0.5, 'Proportion of Long Responses', va='center', rotation='vertical', fontsize=20)
ax.text(x=0.7, y=1.2, s="Pr(Lab > Class) = %.3f" % p) ax.legend() plt.savefig(save_filebase + feature + 'Comparison.pdf') plt.close() sample_directory = "C:/Users/robsc/Documents/GitHub/MultiModalAnalysis/REFLECT/saved_samples/" with open(sample_directory+"pooled_5000.pkl", 'rb') as buff: pool_samps = pickle.load(buff) with open(sample_directory+"individual_5000.pkl", 'rb') as buff: ind_samps = pickle.load(buff) with open(sample_directory+"hierarchical_5000.pkl", 'rb') as buff: hier_samps = pickle.load(buff) index_list = ["Intercept"] + features + ["Uncertainty"] ps_df = pm.df_summary(pool_samps) ps_df.index = index_list hs_df = pm.df_summary(hier_samps) in_df = pm.df_summary(ind_samps) reflect_indices = [e for e in hs_df.index if 'reflect' in e] rh_df = hs_df.loc[reflect_indices, :] rh_df.index = ["%s" % e for e in index_list] ri_df = in_df.loc[reflect_indices, :] ri_df.index = ["%s" % e for e in index_list] leads_indices = [e for e in hs_df.index if 'leads' in e] lh_df = hs_df.loc[leads_indices, :] lh_df.index = ["%s" % e for e in index_list] li_df = in_df.loc[leads_indices, :] li_df.index = ["%s" % e for e in index_list]
# y ~ Normal(m[g] * p, s) mu_est = pm.Deterministic("mu_est", T.sum(effects[g] * predictors, 1)) yd = pm.Normal('y', mu_est, s[g]**-2, observed=y) start = pm.find_MAP() #h = find_hessian(start) step = pm.NUTS(model.vars, scaling=start) with model: trace = pm.sample(3000, step, start) #%% pm.traceplot(trace) dftmp = pm.df_summary(trace, varnames=['group_effects']) print(dftmp['mean']) import statsmodels.formula.api as smf # from patsy import dmatrices import pandas as pd tbl = pd.DataFrame(predictors, columns=['C1', 'C2', 'C3']) tbl['group'] = pd.Series(group, dtype="category") tbl['yd'] = y md2 = smf.mixedlm("yd ~ -1 + C1 + C2 + C3", tbl, groups=tbl["group"]) mdf2 = md2.fit() print(mdf2.summary()) #%% X = np.tile(group_predictors[group], (1, 3)) * predictors beta0 = np.linalg.lstsq(X, y) fitted = np.dot(X, beta0[0])
} for p in pathways } y_bmp = {} g = {} def logp_f(f, b, eps): if f in evidence: return T.log(1 - math.e**(-1 * b) + epsilon) if f in metfrag_evidence: a_p = (1.0 / (1 - metfrag_evidence[f])) - 1 return a_p * T.log(1 - math.e**(-1 * b) + epsilon) - b return T.log(eps) - b psi = {} for feat, pathways in reverse_path_dict.items(): y_bmp[feat] = sum([bmp[pname][feat] for pname in pathways]) g[feat] = Bernoulli('g_' + feat, 1 - math.e**(-y_bmp[feat])) psi[feat] = pymc3.Potential('psi_' + feat, logp_f(feat, y_bmp[feat], eps)) if __name__ == '__main__': n = 1000 with model: trace = pymc3.sample(n) t1 = trace print(pymc3.df_summary(trace)) trace = pymc3.sample(10 * n) t2 = trace print(pymc3.df_summary(trace)) print(pymc3.stats.compare([t1, t2]))
sd=std / n_hidden**.5, shape=[n_hidden, K], testval=W1_init) b1 = Normal('b1', mu=0, sd=std / n_hidden**.5, shape=[K], testval=b1_init) # Building NN likelihood h1 = tt.nnet.softplus(tt.dot(X_shared, W0) + b0) mu_est = tt.dot(h1, W1) + b1 # Regression likelihood Normal('y_hat', mu=mu_est, sd=std_out, observed=Y_shared) # Inference with neural_network: # Sample from posterior v_params = pm.advi(n=n_iter) trace = pm.sample_vp(v_params, draws=5000) print(pm.df_summary(trace)) pm.traceplot(trace) # Posterior predictive samples ppc = pm.sample_ppc(trace, samples=500) pred = ppc['y_hat'] mse = np.mean((pred - Y_train)**2) print('MC test MSE: ', mse)
def mixed_effects(): le = preprocessing.LabelEncoder() # Convert categorical variables to integer # participants_idx = le.fit_transform(messages['prev_sender']) classes = 'FF49_industry' # classes = 'underwriter_tier' # classes = 'amends' print("Grouping by: {}".format(classes)) FF49_industry = le.fit_transform(df['FF49_industry']) class_idx = le.fit_transform(df[classes]) n_classes = len(le.classes_) NSamples = 50000 burn = NSamples/10 thin = 2 covariates = [ 'Intercept', '#Syndicate Members', '#Lead Underwriters', 'Underwriter Rank', # 'FF49 Industry', 'Amends Down', '#S1A Amendments', 'Share Overhang', 'log(1+Sales)', 'log(Proceeds)', 'CASI', # 'media_1st_pricing', # 'VC', 'IPO Market Returns', 'Industry Returns', 'BAA Spread', ] y = df['days_to_first_price_update'].values # y = np.ma.masked_values(list(df.days_to_first_price_update), value=-999) with pm.Model() as model: # Parameters: intercept = pm.Gamma('Intercept', alpha=.1, beta=.1, shape=n_classes) beta_underwriter_syndicate_size = pm.Normal('#Syndicate Members', mu=0, sd=20) beta_underwriter_num_leads = pm.Normal('#Lead Underwriters', mu=0, sd=20) beta_underwriter_rank_avg = pm.Normal('Underwriter Rank', mu=0, sd=20) beta_num_SEC_amendments = pm.Normal('#S1A Amendments', mu=0, sd=20) # beta_FF49_industry = pm.Normal('FF49 Industry', mu=0, sd=20) beta_amends_down = pm.Normal('Amends Down', mu=0, sd=20) beta_share_overhang = pm.Normal('Share Overhang', mu=0, sd=20) beta_log_sales = pm.Normal('log(1+Sales)', mu=0, sd=20) beta_log_proceeds = pm.Normal('log(Proceeds)', mu=0, sd=20) beta_CASI = pm.Normal('CASI', mu=0, sd=20) # beta_media_1st_pricing = pm.Normal('media_1st_pricing', mu=0, sd=20) # beta_VC = pm.Normal('VC', mu=0, sd=20) beta_BAA_spread = pm.Normal('BAA Spread', mu=0, sd=20) beta_M3_initial_returns = pm.Normal('IPO Market Returns', mu=0, sd=20) beta_M3_indust_rets = pm.Normal('Industry Returns', mu=0, sd=20) # Hyperparameters ## alpha: hyperparameters for neg-binom distribution alpha = pm.Gamma('alpha', alpha=.1, beta=.1) # #Poisson Model Formula mu = 1 + tt.exp( intercept[class_idx] + beta_underwriter_syndicate_size * df.underwriter_syndicate_size + beta_underwriter_num_leads * df.underwriter_num_leads + beta_underwriter_rank_avg * df.underwriter_rank_avg # + beta_FF49_industry * FF49_industry + beta_amends_down * df['Amends Down'] + beta_num_SEC_amendments * df.num_SEC_amendments + beta_share_overhang * df['Share Overhang'] + beta_log_sales * df['log(1+Sales)'] + beta_CASI * df['CASI'] + beta_log_proceeds * df['log(Proceeds)'] # + beta_media_1st_pricing * df.media_1st_pricing # + beta_VC * df.VC + beta_BAA_spread * df['BAA Spread'] + beta_M3_initial_returns * df.M3_initial_returns + beta_M3_indust_rets * df.M3_indust_rets ) # Dependent Variable BoundedNegativeBinomial = pm.Bound(pm.NegativeBinomial, lower=1) y_est = BoundedNegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y) y_pred = BoundedNegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape) # y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y) # y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape) # y_est = pm.Poisson('y_est', mu=mu, observed=data) # y_pred = pm.Poisson('y_pred', mu=mu, shape=data.shape) start = pm.find_MAP() step = pm.Metropolis(start=start) # step = pm.NUTS() # backend = pm.backends.Text('test') # trace = pm.sample(NSamples, step, start=start, chain=1, njobs=2, progressbar=True, trace=backend) trace = pm.sample(NSamples, step, start=start, njobs=1, progressbar=True) trace2 = trace trace = trace[-burn::thin] # waic = pm.waic(trace) # dic = pm.dic(trace) # with pm.Model() as model: # trace_loaded = pm.backends.sqlite.load('FF49_industry.sqlite') # y_pred.dump('FF49_industry_missing/y_pred') ## POSTERIOR PREDICTIVE CHECKS y_pred = trace.get_values('y_pred') pm.summary(trace, vars=covariates) # PARAMETER POSTERIORS anno_kwargs = {'xycoords': 'data', 'textcoords': 'offset points', 'rotation': 90, 'va': 'bottom', 'fontsize': 'large'} anno_kwargs2 = {'xycoords': 'data', 'textcoords': 'offset points', 'rotation': 0, 'va': 'bottom', 'fontsize': 'large'} n0, n1, n2, n3 = 1, 5, 9, 14 # numbering for posterior plots # intercepts # mn = pm.df_summary(trace)['mean']['Intercept_log__0'] # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=blue, **anno_kwargs2) # mn = pm.df_summary(trace)['mean']['Intercept_log__1'] # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=purple, **anno_kwargs2) # coeffs # mn = pm.df_summary(trace)['mean'][2] # ax[1,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5, 10), color=red, **anno_kwargs) # mn = pm.df_summary(trace)['mean'][3] # ax[2,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) # mn = pm.df_summary(trace)['mean'][4] # ax[3,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) # plt.savefig('figure1_mixed.png') ax = pm.traceplot(trace, vars=['Intercept']+trace.varnames[n0:n1], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n0:n1]): # +1 because up and down intercept ax[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure1_mixed.png') ax2 = pm.traceplot(trace, trace.varnames[n1:n2], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n1:n2]): # +1 because up and down intercept ax2[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure2_mixed.png') ax3 = pm.traceplot(trace, trace.varnames[n2:n3], lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()} ) for i, mn in enumerate(pm.df_summary(trace)['mean'][n2:n3]): # +1 because up and down intercept ax3[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs) plt.savefig('figure3_mixed.png') # _ = plt.figure(figsize=(5, 6)) _ = pm.forestplot(trace, vars=['Intercept'], ylabels=le.classes_) plt.savefig('forestplot_intercepts.png') _ = pm.forestplot(trace, vars=covariates[1:], ylabels=covariates[1:]) plt.savefig('forestplot_mixed.png') # pm.traceplot(trace, vars=['alpha', 'y_pred']) # def participant_y_pred(entity_name, burn=1000, hierarchical_trace=trace): # """Return posterior predictive for person""" # ix = np.where(le.classes_ == entity_name)[0][0] # return hierarchical_trace['y_pred'][burn:, ix] def participant_y_pred(entity_name, burn=1000, ypred=y_pred): """Return posterior predictive for person""" ix = np.where(le.classes_ == entity_name)[0][0] return ypred[burn:, ix] days = 7 fig = plt.figure(figsize=(16,10)) fig.add_subplot(221) entity_plotA('Up', days=days) fig.add_subplot(222) entity_plotB('Up') fig.add_subplot(223) entity_plotA('Down', days=days) fig.add_subplot(224) entity_plotB('Down') plt.savefig("figure4-postpreddist-updown")
Observed = pm.Weibull("Observed", alpha=alpha, beta=beta_mu, observed=ys_faults) # 观测值 trace_1 = pm.sample(10000, init='advi+adapt_diag') pm.traceplot(trace_1, varnames=['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u']) plt.show() burnin = 9000 chain = trace_1[burnin:] # get MAP estimate varnames2 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4', 'u'] tmp = pm.df_summary(chain, varnames2) betaMAP = tmp['mean'][0] beta1MAP = tmp['mean'][np.arange(companiesABC) + 1] beta2MAP = tmp['mean'][np.arange(companiesABC) + 1 * companiesABC + 1] beta3MAP = tmp['mean'][np.arange(companiesABC) + 2 * companiesABC + 1] beta4MAP = tmp['mean'][np.arange(companiesABC) + 3 * companiesABC + 1] uMAP = tmp['mean'][4 * companiesABC + 1] # am0MAP = tmp['mean'][4*companiesABC+2] # am1MAP = tmp['mean'][4*companiesABC+3] # print(am0MAP) # print(beta1MAP) # print(tmp) # print(beta2MAP) # print(beta3MAP) # 模型拟合效果图 ppcsamples = 500
t2 = time.time() print("Found MAP, took %f seconds" % (t2 - t1)) ## take samples t1 = time.time() traces_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS(), progressbar=True) print() t2 = time.time() print("Done sampling, took %f seconds" % (t2 - t1)) pm.summary(traces_ols) ## plot the samples and the marginal distributions _ = pm.traceplot( traces_ols, figsize=(12, len(traces_ols.varnames) * 1.5), lines={k: v["mean"] for k, v in pm.df_summary(traces_ols).iterrows()}, ) plt.show() do_tstudent = False if do_tstudent: print("Robust Student-t analysis...") t1 = time.time() with pm.Model() as mdl_studentt: ## Define weakly informative Normal priors to give Ridge regression b1 = pm.Normal("b", mu=0, sd=100)
MU = ALPHA + dot(X_INPUT, BETA) NU = Deterministic('NU', Exponential('nu_', 1 / 29)) # Likelihood (sampling distribution) of observations # Y_OBS = Normal('Y_OBS', mu=mu, sigma=sigma, observed=Y_OUTPUT) Y_OBS = StudentT('Y_OBS', mu=MU, sigma=SIGMA, observed=Y_OUTPUT, nu=NU) with cost_model: TRACE = sample(SAMPLES, tune=TUNE, cores=6) traceplot(TRACE) with cost_model: Y_PRED = sample_posterior_predictive(TRACE, 1000, cost_model) Y_ = Y_PRED['Y_OBS'].mean(axis=0) PP['model_cost'] = exp(Y_) # depends on imput/output SUMMARY = df_summary(TRACE) with open('Time_and_Material_cost_model.pkl', 'wb') as f: dump({'model': cost_model, 'TRACE': TRACE}, f) PROMPTS['F_BASENAME'] = F_BASENAME with HDFStore('Time_and_Material_pricing_version_fp_2.h5') as store: store['PP'] = PP store['X'] = X store['Y'] = PP[MEASURE] store['MRR'] = MRR store['PROMPTS'] = DataFrame(PROMPTS, index=[1]) store['SUMMARY'] = SUMMARY _DELTA = 100 * (1 - (exp(Y_).sum() / PP[MEASURE].sum())) print('*' * 80 + '\n' + '*' * 80)
#temp = 35.+15.*Uniform('temp', lower=-1, upper=1) #alpha = 3.45+0.75*Uniform('alpha', lower=-1, upper=1) plnorm = 0.3+0.2*Normal('plnorm', 0., 0.5) #src.sed.setBB(temp=temp) src.sed.setPL(turnover=tp,plnorm=plnorm) modflux = pho.getFlux(src) def logp(obs): return -0.5*((modflux-obs)/sigma)**2. Y_obs = DensityDist('Y_obs', logp, observed=Y) trace = sample(1000, n_init=50000) # obtain starting values via MAP #start = find_MAP(fmin=optimize.fmin_powell) # instantiate sampler #step = NUTS(scaling=start) # draw 2000 posterior samples #trace = sample(5000, step, start=start) out = np.array([35.+15.*trace['tp'], 0.3+0.2*trace['plnorm']]) import corner print df_summary(trace) labels = ['TP', 'plnorm'] fig = corner.corner(out.T,labels=labels, plot_density=False, plot_contours=False) fig.savefig("out.pdf")
sd3 = (-4*sigma + mu, 4*sigma + mu) x = np.linspace(sd3[0], sd3[1], 300) y = stats.norm(mu, sigma).pdf(x) ax.plot(x, y) if trace[var].ndim > 1: t = trace[var][i] else: t = trace[var] sns.distplot(t, kde=False, norm_hist=True, ax=ax) fig.tight_layout() #%% pm.traceplot(trace, combined=True) plt.show() burnin = 0 df_summary1 = pm.df_summary(trace[burnin:],varnames=['w']) wpymc = np.asarray(df_summary1['mean']) df_summary2 = pm.df_summary(trace[burnin:],varnames=['z']) zpymc = np.asarray(df_summary2['mean']) import statsmodels.formula.api as smf tbltest['Pheno'] = Pheno md = smf.mixedlm("Pheno ~ Condi1*Condi2", tbltest, groups=tbltest["subj"]) mdf = md.fit() fixed = np.asarray(mdf.fe_params).flatten() plt.figure() plt.plot(w0,'r') plt.plot(wpymc,'b') plt.plot(fixed,'g') plt.legend(['real','PyMC','LME'])
psi = pm.Deterministic('psi', Invlogit(alpha + alpha1 * elec_year[0:84])) Observed = pm.ZeroInflatedNegativeBinomial( 'Observed', psi=psi, mu=theta, alpha=sdd, observed=elec_faults[0:84]) # 观测值 # step1 = pm.Slice([theta1, Δ_a]) start = pm.find_MAP() trace_1 = pm.sample(1000, start=start, njobs=1) # , init='advi+adapt_diag' pm.traceplot(trace_1) plt.show() # 后验分析 varnames2 = ['theta'] tmp = pm.df_summary(trace_1, varnames2) betaMAP = tmp['mean'][np.arange(12)] print(betaMAP) with model_1: pp_trace = pm.sample_ppc(trace_1, 1000) ip = 0 fig, ax = plt.subplots(figsize=(8, 6)) x_plot = np.linspace(0.9, 12.1, 12) low, high = np.percentile(pp_trace['Observed'], [5, 95], axis=0) xp = elec_year2[ip * 7:(ip + 1) * 7, :] # 原始数据 yp = elec_faults2[ip * 7:(ip + 1) * 7, :] ax.plot(xp, yp, marker='o', alpha=.8) ax.plot(x_plot, betaMAP[:], marker='*', alpha=.8, label="Fitting estimate") ax.fill_between(x_plot, low[:12], high[:12], alpha=0.5)