def test_normal_vector(self, caplog): with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) trace = pm.sample() with model: # test list input ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10) ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=[]) assert len(ppc) == 0 # test list input ppc0 = pm.fast_sample_posterior_predictive([model.test_point], samples=10) ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=[]) assert len(ppc) == 0 # test keep_size parameter ppc = pm.sample_posterior_predictive(trace, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) with pytest.warns(UserWarning): ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=["a"]) assert "a" in ppc assert ppc["a"].shape == (12, 2) # test keep_size parameter with inference data as input... idata = az.from_pymc3(trace) ppc = pm.sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) with pytest.warns(UserWarning): ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=["a"]) assert "a" in ppc assert ppc["a"].shape == (12, 2) # test keep_size parameter ppc = pm.fast_sample_posterior_predictive(trace, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) with pytest.warns(UserWarning): ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=["a"]) assert "a" in ppc assert ppc["a"].shape == (12, 2) # test keep_size parameter with inference data as input ppc = pm.fast_sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) with pytest.warns(UserWarning): ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=["a"]) assert "a" in ppc assert ppc["a"].shape == (12, 2) # size unsupported by fast_ version argument. [2019/08/19:rpg] ppc = pm.sample_posterior_predictive(trace, samples=10, var_names=["a"], size=4) assert "a" in ppc assert ppc["a"].shape == (10, 4, 2)
mu = pm.Normal('mu', mu=0, sd=10, shape=groups) sigma = pm.HalfNormal('sigma', sd=10, shape=groups) v = pm.Exponential('v', 1 / 30) y = pm.StudentT('y', mu=mu[idx], sd=sigma[idx], nu=v, observed=tip) trace1 = pm.sample(5000) # outliers, but own can vary with pm.Model() as model2: mu = pm.Normal('mu', mu=0, sd=10, shape=groups) sigma = pm.HalfNormal('sigma', sd=10, shape=groups) v = pm.Exponential('v', 1 / 30, shape=groups) y = pm.StudentT('y', mu=mu[idx], sd=sigma[idx], nu=v[idx], observed=tip) trace2 = pm.sample(5000) y_pred = pm.sample_posterior_predictive(trace, 100, model) data_ppc = az.from_pymc3(trace=trace, posterior_predictive=y_pred) ax0 = az.plot_ppc(data_ppc, kind='kde', mean=False) plt.xlim(-2, 8) y_pred1 = pm.sample_posterior_predictive(trace1, 100, model1) data_ppc1 = az.from_pymc3(trace=trace, posterior_predictive=y_pred1) az.plot_ppc(data_ppc1, kind='kde', mean=False) plt.xlim(-2, 8) # works best by far y_pred2 = pm.sample_posterior_predictive(trace2, 100, model2) data_ppc2 = az.from_pymc3(trace=trace, posterior_predictive=y_pred2) az.plot_ppc(data_ppc2, kind='kde', mean=False) plt.xlim(-2, 8) """ Compute the probability of superiority directly from the posterior (without
def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture idat = az.from_pymc3(trace) with pmodel: pp = pm.sample_posterior_predictive(idat.posterior, var_names=["d"])
a = pm.Normal("a", mu=100, sd=250) b = pm.Normal("b", mu=10, sd=250) sigma = pm.HalfNormal("sigma", sd=200) y_pred = pm.Normal('y_pred', mu=a + b * day_idx, sd=sigma, observed=reactions.Reaction, dims="driver_idx_day") ## inference trace_p = pm.sample(samples, chains=chains, tune=tune) prior_p = pm.sample_prior_predictive(samples=samples) posterior_predictive_p = pm.sample_posterior_predictive(trace_p, samples=samples) ## STEP 1 ## export inference results in ArviZ InferenceData obj ## will also capture all the sampler statistics data_p = az.from_pymc3(trace=trace_p, prior=prior_p, posterior_predictive=posterior_predictive_p) ## STEP 2 ## extract dag dag_p = get_dag(fullyPooled_model) ## insert dag into sampler stat attributes data_p.sample_stats.attrs["graph"] = str(dag_p) ## STEP 3 ## save data fileName_p = "reaction_times_pooled" arviz_to_json(data_p, fileName_p + '.npz')
#model-inference fileName='coal_mining_disasters_PyMC3' samples=10000 tune=10000 chains=2 coords = {"year": years} with pm.Model(coords=coords) as disaster_model: switchpoint = pm.DiscreteUniform('switchpoint', lower=years.min(), upper=years.max(), testval=1900) early_rate = pm.Exponential('early_rate', 1) late_rate = pm.Exponential('late_rate', 1) rate = pm.math.switch(switchpoint >= years, early_rate, late_rate) disasters = pm.Poisson('disasters', rate, observed=disaster_data, dims='year') #inference trace = pm.sample(samples, chains=chains, tune=tune) prior = pm.sample_prior_predictive(samples=samples) posterior_predictive = pm.sample_posterior_predictive(trace,samples=samples) ## STEP 1 # will also capture all the sampler statistics data = az.from_pymc3(trace=trace, prior=prior, posterior_predictive=posterior_predictive) ## STEP 2 # extract dag dag = get_dag(disaster_model) # insert dag into sampler stat attributes data.sample_stats.attrs["graph"] = str(dag) ## STEP 3 # save data arviz_to_json(data, fileName+'.npz')
def causality_test(): """ Load csv file to build EDA plots and PyMC models """ #Load Data https://github.com/grjd/causalityagingbrain/blob/main/dataset_gh.csv csv_path = "" dataframe = pd.read_csv(csv_path, sep=';') dataframe_orig = dataframe.copy() plots_and_stuff(df) corrmatrix = df.corr(method='pearson') mask = np.zeros_like(corrmatrix) mask[np.triu_indices_from(mask)] = True plt.figure(figsize=(7, 7)) heatmap = sns.heatmap(corrmatrix, mask=mask, annot=True, center=0, square=True, linewidths=.5) #heatmap = sns.heatmap(atrophy_corr,annot=True, center=0,square=True, linewidths=.5) heatmap.set_xticklabels(colsofinterest_Eng, rotation=45, fontsize='small', horizontalalignment='right') heatmap.set_yticklabels(colsofinterest_Eng, rotation=0, fontsize='small', horizontalalignment='right') fig_file = os.path.join(figures_dir, 'heat_CorrChapter.png') plt.savefig(fig_file) # Standardize regressors and target df["brain_std"] = standardize(df["fr_BrainSegVol_to_eTIV_y1"]) df["age_std"] = standardize(df["edad_visita1"]) df["cog_std"] = standardize(df["fcsrtlibdem_visita1"]) # Encode Categorical Variables df["school_id"] = pd.Categorical(df["nivel_educativo"]).codes df["sex_id"] = pd.Categorical(df["sexo"]).codes ################################################################ ################## SEX (0M, 1F) -> BRAIN ####################### ################################################################# with pm.Model() as mXB: #sigma = pm.Uniform("sigma", 0, 1) sigma = pm.HalfNormal("sigma", sd=1) #mu_x = pm.Normal("mu_x", 0.7, 0.3, shape=2) mu_x = pm.Normal("mu_x", 0.0, 1.0, shape=2) #brain_remained = pm.Normal("brain_remained", mu_x[df["sex_id"]], sigma, observed=df["fr_BrainSegVol_to_eTIV_y1"]) brain_remained = pm.Normal("brain_remained", mu_x[df["sex_id"]], sigma, observed=df["brain_std"]) # men - women # mu[0] 0.695, mu[1] 0.709 Women came at late age with less atrophy, bigger brains diff_fm = pm.Deterministic("diff_fm", mu_x[0] - mu_x[1]) mXB_trace = pm.sample(1000) print(az.summary(mXB_trace)) az.plot_trace(mXB_trace, var_names=["mu_x", "sigma"]) plt.savefig(os.path.join(figures_dir, 'pm_trace_sex_brain-hn.png')) az.plot_forest(mXB_trace, combined=True, model_names=["X~B"], var_names=["mu_x"], hdi_prob=0.95) plt.savefig(os.path.join(figures_dir, 'pm_forest_sex_brain-hn.png')) # Posterior Predictive checks y_pred_g = pm.sample_posterior_predictive(mXB_trace, 100, mXB) data_ppc = az.from_pymc3(trace=mXB_trace, posterior_predictive=y_pred_g) ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False) ax[0].legend(fontsize=15) plt.savefig(os.path.join(figures_dir, 'ppc_xXB-hn.png')) ################################################################ ################## AGE -> BRAIN ################################ ################################################################# print('Calling to PyMC3 Model Age - > Brain...\n') with pm.Model() as m_AB: alpha = pm.Normal("alpha", 0, 1) #0.2 betaA = pm.Normal("betaA", 0, 1) #0.5 #sigma = pm.Exponential("sigma", 1) sigma = pm.HalfNormal("sigma", sd=1) mu = pm.Deterministic("mu", alpha + betaA * df["age_std"]) brain_std = pm.Normal("brain_std", mu=mu, sigma=sigma, observed=df["brain_std"].values) prior_samples = pm.sample_prior_predictive() m_AB_trace = pm.sample(1000) print(az.summary(m_AB_trace, var_names=["alpha", "betaA", "sigma"])) az.plot_trace(m_AB_trace, var_names=["alpha", "betaA", "sigma"]) plt.savefig(os.path.join(figures_dir, 'pm_trace_age_brain.png')) az.plot_forest([ m_AB_trace, ], model_names=["A~B"], var_names=["betaA"], combined=True, hdi_prob=0.95) plt.savefig(os.path.join(figures_dir, 'pm_forest_AtoB.png')) # Posterior Predictive checks y_pred_g = pm.sample_posterior_predictive(m_AB_trace, 100, m_AB) data_ppc = az.from_pymc3(trace=m_AB_trace, posterior_predictive=y_pred_g) ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False) ax[0].legend(fontsize=15) plt.savefig(os.path.join(figures_dir, 'ppc_AB-hn.png')) ################################################################ ################## SEX+AGE -> BRAIN ####################### ################################################################# print('Calling to PyMC3 Model Age + Sex - > Brain...\n') sexco = pd.Categorical(df.loc[:, "sexo"].astype(int)) with pm.Model() as m_XAB: alphax = pm.Normal("alphax", 0, 1, shape=2) betaA = pm.Normal("betaA", 0, 1) mu = alphax[sexco] + betaA * df["age_std"] sigma = pm.Exponential("sigma", 1) #mu = pm.Deterministic("mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"]) brain_std = pm.Normal("brain_std", mu=mu, sigma=sigma, observed=df["brain_std"].values) prior_samples = pm.sample_prior_predictive() m_XAB_trace = pm.sample() print(az.summary(m_XAB_trace, var_names=["alphax", "betaA", "sigma"])) az.plot_trace(m_XAB_trace, var_names=["alphax", "betaA"]) plt.savefig(os.path.join(figures_dir, 'pm_trace_ageandsex_brain.png')) az.plot_forest([ m_XAB_trace, mXB_trace, m_AB_trace, ], model_names=["XA~B", "X~B", "A~B"], var_names=["alphax", "mu_x", "betaA"], combined=True, hdi_prob=0.95) plt.savefig(os.path.join(figures_dir, 'pm_forest_mXAtoB.png')) # Posterior Predictive checks y_pred_g = pm.sample_posterior_predictive(m_XAB_trace, 100, m_XAB) data_ppc = az.from_pymc3(trace=m_XAB_trace, posterior_predictive=y_pred_g) ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False) ax[0].legend(fontsize=15) plt.savefig(os.path.join(figures_dir, 'ppc_XAB-hn.png')) print('Calling to PyMC3 Model Brain - > Memory...\n') with pm.Model() as m_BC: alpha = pm.Normal("alpha", 0, 1) #0.2 betaB = pm.Normal("betaB", 0, 1) #0.5 sigma = pm.Exponential("sigma", 1) mu = pm.Deterministic("mu", alpha + betaB * df["brain_std"]) cognition_std = pm.Normal("cognition_std", mu=mu, sigma=sigma, observed=df["cog_std"].values) prior_samples = pm.sample_prior_predictive() m_BC_trace = pm.sample() az.plot_trace(m_BC_trace, var_names=["alpha", "betaB"]) plt.savefig(os.path.join(figures_dir, 'pm_trace_brain_cog.png')) print(az.summary(m_BC_trace, var_names=["alpha", "betaB", "sigma"])) # Scatter plot x = Brain atrophy Y= Memory test mu_mean = m_BC_trace['mu'] mu_hpd = pm.hpd(mu_mean) plt.figure(figsize=(9, 9)) df.plot('brain_std', 'cog_std', kind='scatter') #, xlim = (-2, 2) plt.plot(df.brain_std, mu_mean.mean(0), 'C2') plt.savefig(os.path.join(figures_dir, 'scatter_hpd_B2M.png')) print('Saved Figure scatter_hpd_B2M.png \n') print('Calling to PyMC3 Model School - > Memory...\n') # School -> Memory method2 m5_9 with pm.Model() as mSM2: #sigma = pm.Uniform("sigma", 0, 1) sigma = pm.Exponential("sigma", 1) mu = pm.Normal("mu", 0.0, 0.5, shape=df["school_id"].max() + 1) memory = pm.Normal("memory", mu[df["school_id"]], sigma, observed=df["cog_std"]) mSM2_trace = pm.sample() print(az.summary(mSM2_trace)) az.plot_trace(mSM2_trace, var_names=["mu", "sigma"]) plt.savefig(os.path.join(figures_dir, 'pm_trace2_school_memory.png')) az.plot_forest(mSM2_trace, combined=True, var_names=["mu"], hdi_prob=0.95) plt.savefig(os.path.join(figures_dir, 'pm_forest2_school_memory.png')) pdb.set_trace() print('Calling to PyMC3 Model Age - > Memory...\n') with pm.Model() as m_AC: alpha = pm.Normal("alpha", 0, 1) betaA = pm.Normal("betaA", 0, 1) sigma = pm.Exponential("sigma", 1) mu = pm.Deterministic("mu", alpha + betaA * df["age_std"]) cognition_std = pm.Normal("cognition_std", mu=mu, sigma=sigma, observed=df["cog_std"].values) prior_samples = pm.sample_prior_predictive() m_AC_trace = pm.sample() az.plot_trace(m_AC_trace, var_names=["alpha", "betaA"]) plt.savefig(os.path.join(figures_dir, 'pm_trace_age_cog.png')) print(az.summary(m_AC_trace, var_names=["alpha", "betaA", "sigma"])) # Scatter A2M mu_mean = m_AC_trace['mu'] mu_hpd = pm.hpd(mu_mean) plt.figure(figsize=(9, 9)) df.plot('age_std', 'cog_std', kind='scatter') #, xlim = (-2, 2) plt.plot(df.age_std, mu_mean.mean(0), 'C2') plt.savefig(os.path.join(figures_dir, 'scatter_hpd_A2M.png')) print('Saved Figure scatter_hpd_A2M.png \n') print('Calling to PyMC3 Model Age + Brain - > Memory...\n') with pm.Model() as m_BAC: alpha = pm.Normal("alpha", 0, 1) betaA = pm.Normal("betaA", 0, 1) betaB = pm.Normal("betaB", 0, 1) sigma = pm.Exponential("sigma", 1) mu = pm.Deterministic( "mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"]) cognition_std = pm.Normal("cognition_std", mu=mu, sigma=sigma, observed=df["cog_std"].values) prior_samples = pm.sample_prior_predictive() m_BAC_trace = pm.sample() print( az.summary(m_BAC_trace, var_names=["alpha", "betaB", "betaA", "sigma"])) az.plot_forest([ m_BAC_trace, m_AC_trace, m_BC_trace, ], model_names=["BA~C", "A~C", "B~C"], var_names=["betaA", "betaB"], combined=True, hdi_prob=0.95) plt.savefig(os.path.join(figures_dir, 'pm_forest_mBAC_AB2M.png'))
az.plot_kde(data['EQ'].values, rug=True) plt.yticks([0], alpha=0); with pm.Model() as model_g: mu = pm.Normal('mu', mu=0, sd=5) tau = pm.HalfCauchy('tau', beta=5) theta_tilde = pm.Normal('theta_tilde',mu=0, sd=1) theta= pm.Deterministic('theta',mu+tau*theta_tilde) y = pm.Normal('y', mu=theta, sd=tau, observed=data['EQ'].values) trace_g = pm.sample(1000, chains=2) prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(trace_g) pm.data = az.from_pymc3( trace=trace_g, prior=prior, posterior_predictive=posterior_predictive, ) pm_data pip install pymc3>=3.8 prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(trace_g) pm.data = az.from_pymc3( trace=trace_g, prior=prior, posterior_predictive=posterior_predictive, )
emcee_data = az.from_emcee( sampler, var_names=[ "mean", "log_sigma1", "log_rho1", "log_tau", "log_sigma2", "log_rho2", "log_jitter", ], ) with model: pm_data = az.from_pymc3(trace) numpyro_data = az.from_numpyro(mcmc) bins = np.linspace(1.5, 2.75, 25) plt.hist( np.exp(np.asarray((emcee_data.posterior["log_rho1"].T)).flatten()), bins, histtype="step", density=True, label="emcee", ) plt.hist( np.exp(np.asarray((pm_data.posterior["log_rho1"].T)).flatten()), bins, histtype="step",
y_ar.append(generate_ar_sample(tdaynew, omega, tt, use_ar=True)) a0_pred = np.array(y_ar) ####### # Save the samples ####### # Convert the data to arviz structure # Save the predictions dims = ('chain','draw','time') ds = az.from_pymc3_predictions({'a0':a0_pred}, \ coords={'time':predtime,'chain':np.array([1])}, dims={'a0':dims}) # Save the posterior ds2 = az.from_pymc3(trace=trace) # Update the observed data becuase it comes out as a theano.tensor in the way # our particular model is specified ds2.observed_data['X_obs'] = xr.DataArray(X, dims=('time',), coords={'time':timein}) # This merges the data sets ds2.extend(ds) # Save ds2.to_netcdf(outputnc) print(ds2) print('Done') print(72*'#')
} with pm.Model() as model: mu = pm.Normal('mu', mu=0, sd=5) tau = pm.HalfCauchy('tau', beta=5) theta_tilde = pm.Normal('theta_tilde', mu=0, sd=1, shape=eight_school_data['J']) theta = pm.Deterministic('theta', mu + tau * theta_tilde) pm.Normal('obs', mu=theta, sd=eight_school_data['sigma'], observed=eight_school_data['y']) trace = pm.sample(draws, chains=chains) prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(trace) pm_data = az.from_pymc3( trace=trace, prior=prior, posterior_predictive=posterior_predictive, coords={'school': np.arange(eight_school_data['J'])}, dims={'theta': ['school'], 'theta_tilde': ['school']}, ) #pm_data #%% az.plot_posterior(pm_data) plt.show() #%% data = az.load_arviz_data('centered_eight') az.plot_posterior(data, coords={"school": ["Choate", "Deerfield"]}) plt.show()
y = pm.Normal('y', mu = μ, sd = σ, observed = data) trace_g = pm.sample(1000) az.plot_trace(trace_g) # %% az.plot_joint(trace_g, kind='kde', fill_last=False) # %% az.summary(trace_g) # %% y_pred_g = pm.sample_posterior_predictive(trace_g, 100, model_g) # %% data_ppc = az.from_pymc3(trace=trace_g, posterior_predictive=y_pred_g) ax = az.plot_ppc(data_ppc, figsize=(12,6), mean=False) ax[0].legend(fontsize=15) # %% np.mean(stats.t(loc=0, scale=1, df=100).rvs(100)) # %% plt.figure(figsize=(10, 6)) x_values = np.linspace(-10, 10, 500) for df in [1, 2, 30]: distri = stats.t(df) x_pdf = distri.pdf(x_values) plt.plot(x_values, x_pdf, label=fr'$\nu = {df}$', lw=3) x_pdf = stats.norm.pdf(x_values)
ax.set_ylabel("kilocal per g (std)") # %% shared_N = shared(dcc["N"].values) with pm.Model() as m5_5: sigma = pm.Exponential("sigma", 1) bN = pm.Normal("bN", 0, 0.5) a = pm.Normal("a", 0, 0.2) mu = pm.Deterministic("mu", a + bN * shared_N) K = pm.Normal("K", mu, sigma, observed=dcc["K"]) m5_5_trace = pm.sample() m5_5_data = az.from_pymc3(m5_5_trace) # %% az.summary(m5_5_trace, var_names=["a", "bN", "sigma"]) # %% xseq = np.linspace(dcc["N"].min() - 0.15, dcc["N"].max() + 0.15, 30) shared_N.set_value(xseq) with m5_5: m5_5_posterior_predictive = pm.sample_posterior_predictive( m5_5_trace, var_names=["mu"], samples=4000) mu_mean = m5_5_posterior_predictive["mu"].mean(axis=0)
label="Non-robust regression", alpha=0.5) # plot the data plt.plot(x, y, 'C0o') # get the mean of the intercept from the posterior alpha_m = trace["alpha"].mean() # get the mean of the coefficient from the posterior beta_m = trace["beta"].mean() # plot the robust linear regression plt.plot(x, alpha_m + beta_m * x, c="k", label="Robust linear regression") # plot the variety of predicted results az.plot_hpd(x, ppc["obs"]) # set the last details of the graph plt.xlabel("x") plt.ylabel("y", rotation=0) #plt.legend(loc=2) plt.tight_layout() plt.show() # ----------------- analyse the posterior -------------------- # with model_t: az.plot_trace(trace, var_names=["alpha", "beta", "sigma", "vu"]) # get the summary log.info("the trace summary is: %s", az.summary(trace)) # let's also run a posterior predictive check ppc = pm.sample_posterior_predictive(trace, samples=2000) data_ppc = az.from_pymc3(trace=trace, posterior_predictive=ppc) ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=True) plt.xlim(0, 12)
# reparameterized # says this should have issue w/ parameter non-identifiablity, but seems fine with pm.Model() as model_mg: p = pm.Dirichlet('p', a=np.ones(clusters)) means = pm.Normal('means', mu=cs_exp.mean(), sd=10, shape=clusters) sd = pm.HalfNormal('sd', sd=10) y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=cs_exp) trace_mg = pm.sample(random_seed=123) varnames = ['means', 'p'] az.plot_trace(trace_mg, varnames) az.summary(trace_mg, varnames) ppc_mg = pm.sample_posterior_predictive(trace_mg, 2000, model=model_mg) data_ppc = az.from_pymc3(trace=trace_mg, posterior_predictive=ppc_mg) ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False) clusters = 2 with pm.Model() as model_mgp: p = pm.Dirichlet('p', a=np.ones(clusters)) means = pm.Normal('means', mu=np.array([.9, 1]) * cs_exp.mean(), sd=10, shape=clusters) sd = pm.HalfNormal('sd', sd=10) # Potential adds a constraint to the model # sayi order_means = pm.Potential('order_means', tt.switch(means[1]-means[0] < 0, -np.inf, 0)) y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=cs_exp)
def test_sample_posterior_predictive_w(self): data0 = np.random.normal(0, 1, size=50) warning_msg = "The number of samples is too small to check convergence reliably" with pm.Model() as model_0: mu = pm.Normal("mu", mu=0, sigma=1) y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) idata_0 = az.from_pymc3(trace_0) with pm.Model() as model_1: mu = pm.Normal("mu", mu=0, sigma=1, shape=len(data0)) y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) idata_1 = az.from_pymc3(trace_1) with pm.Model() as model_2: # Model with no observed RVs. mu = pm.Normal("mu", mu=0, sigma=1) with pytest.warns(UserWarning, match=warning_msg): trace_2 = pm.sample(10, tune=0, return_inferencedata=False) traces = [trace_0, trace_1] idatas = [idata_0, idata_1] models = [model_0, model_1] ppc = pm.sample_posterior_predictive_w(traces, 100, models) assert ppc["y"].shape == (100, 50) ppc = pm.sample_posterior_predictive_w(idatas, 100, models) assert ppc["y"].shape == (100, 50) with model_0: ppc = pm.sample_posterior_predictive_w([idata_0.posterior], None) assert ppc["y"].shape == (20, 50) with pytest.raises( ValueError, match="The number of traces and weights should be the same"): pm.sample_posterior_predictive_w([idata_0.posterior], 100, models, weights=[0.5, 0.5]) with pytest.raises( ValueError, match="The number of models and weights should be the same"): pm.sample_posterior_predictive_w([idata_0.posterior], 100, models) with pytest.raises( ValueError, match= "The number of observed RVs should be the same for all models" ): pm.sample_posterior_predictive_w([trace_0, trace_2], 100, [model_0, model_2])
# Expand dims twice to get extra axes for chain and sample point_extra_dim = { k: np.expand_dims(np.expand_dims(v, axis=0), axis=0) for k, v in pm.find_MAP().items() } self.trace = az.from_dict(point_extra_dim) elif fit_method == 'mcmc': self.trace = pm.sample(tune=mcmc_iter, draws=mcmc_iter, return_inferencedata=True, chains=chains, cores=cores) elif fit_method == 'vi': self.approx = pm.fit(vi_iter) self.trace = self.approx.sample() self.trace = az.from_pymc3(self.trace) else: raise NotImplementedError( f'Fit method {fit_method} not supported') def predict(self, X, predict_kwargs={'diag': True, 'pred_noise': True}): ''' Generate GP predictions at new spatiotemporal coordinates. Arguments --------- X : sequence of Numpy arrays The first array will usually be a Sx2 array of spatial coordinates while the second array will be a Tx1 array of temporal coordinates. predict_kwargs : dict Additional keyword arguments passed to PyMC3 gp object. "diag" controls whether or not
np.random.seed(909) # simulate height height = np.random.normal(10, 2, size=N) # leg as proportion of height leg_prop = np.random.uniform(low=0.4, high=0.5, size=N) leg_left = leg_prop * height + np.random.normal(scale=0.02, size=N) leg_right = leg_prop * height + np.random.normal(scale=0.02, size=N) d = pd.DataFrame(dict(height=height, leg_left=leg_left, leg_right=leg_right)) d.head(2) pd.plotting.scatter_matrix(d) with pm.Model() as m1: br = pm.Normal('br', 2, 10) bl = pm.Normal('bl', 2, 10) sigma = pm.Exponential('sigma', 1) a = pm.Normal('a', 10, 100) mu = a + bl * d.leg_left + br * d.leg_right height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height) trc1 = pm.sample() pm.summary(trc1, alpha=0.11) ar.plot_forest(trc1, var_names=['a', 'bl', 'br'], combined=True, figsize=(5, 2)) pl.axvline(color='k', ls=':') with m1: prior = pm.sample_prior_predictive() ppc = pm.sample_posterior_predictive(trc1, samples=500, model=m1) ar1 = ar.from_pymc3(trace=trc1, prior=prior, posterior_predictive=ppc) ar.plot_pair(trc1, var_names=['a', 'br', 'bl'], divergences=True, kind='kde')
def fit(self, p1, p2, p1_mode, p2_mode, divisor = 20, deviation = 0.2): """Fits the model and plots the prior predictive distribution. Args: p1 (tuple): Tuple of integers, where the first value is the value of p and the second value is the number of components. First value can be specified as a float, while the second value must be an integer. p2 (tuple): Tuple of integers, where the first value is the value of p and the second value is the number of components. First value can be specified as a float, while the second value must be an integer. p1_mode (str): String indicating whether the seasonal component should be multiplicative or additive. If anything else than "multiplicative" is specified, the mode defaults to additive. p2_mode (str): String indicating whether the seasonal component should be multiplicative or additive. If anything else than "multiplicative" is specified, the mode defaults to additive. divisor (int, optional): A scaling parameter for adjusting the standard deviation of the distribution of p. Defaults to 20. deviation (float, optional): Parameter specifying the standard deviation of the beta for the seasonal component. Defaults to 0.2. Example: Pc.fit(p1 = (7, 2), p2 = (365.25, 2), p1_mode = "additive", p2_mode = "multiplicative", divisor = 15, deviation = 0.3) """ ## NB: we assume that input is in days. ## common across week & month (I guess) ## NB: deviation might as well just go in on each place then. ## p1 p1_mu, p1_components = p1 p1_sd = p1_mu/divisor ## normalize week. p1_mu = (p1_mu - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min()) p1_sd = (p1_sd - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min()) beta_p1_sd = deviation ## p2 p2_mu, p2_components = p2 p2_sd = p2_mu/divisor ## normalize month. p2_mu = (p2_mu - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min()) p2_sd = (p2_sd - self.train[self.time_codes].min()) / (self.train[self.time_codes].max() - self.train[self.time_codes].min()) beta_p2_sd = deviation with pm.Model(coords=self.coords) as m0: # shared t1_shared = pm.Data('t1_shared', self.t1_train) t2_shared = pm.Data('t2_shared', self.t2_train) t3_shared = pm.Data('t3_shared', np.array(self.t3_train)) idx_shared = pm.Data('idx_shared', self.idx_train) # prepare fourier week #seasonal_component(name, name_beta, mu, sd, beta_sd, n_components, shape, time_scaled) beta_p1_waves, x_p1_waves, p1_flat = self.seasonal_component(name = "p1", name_beta = "beta_p1_waves", mu = p1_mu, sd = p1_sd, beta_sd = beta_p1_sd, n_components = p1_components, shape = self.n_train, t1 = t1_shared, t2 = t2_shared, t3 = t3_shared, mode = p1_mode) beta_p2_waves, x_p2_waves, p2_flat = self.seasonal_component(name = "p2", name_beta = "beta_p2_waves", mu = p2_mu, sd = p2_sd, beta_sd = beta_p2_sd, n_components = p2_components, shape = self.n_train, t1 = t1_shared, t2 = t2_shared, t3 = t3_shared, mode = p2_mode) # other priors beta_line = pm.Normal('beta_line', mu = 0, sd = 0.3, shape = self.n_train) alpha = pm.Normal('alpha', mu = 0.5, sd = 0.3, shape = self.n_train) mu = alpha[idx_shared] + beta_line[idx_shared] * t1_shared + p1_flat + p2_flat # sigma sigma = pm.Exponential('sigma', 1) # likelihood y_pred = pm.Normal( 'y_pred', mu = mu, sd = sigma, observed = self.y_train, dims = "idx") self.model = m0 ##### Part 6: Sampling ###### ## sample prior with self.model: prior_pred = pm.sample_prior_predictive(100) # like setting this low. m0_idata = az.from_pymc3(prior=prior_pred) az.plot_ppc( m0_idata, group="prior", figsize = (18, 10))