def model_returns_t_alpha_beta(data, bmark, samples=2000): """Run Bayesian alpha-beta-model with T distributed returns. This model estimates intercept (alpha) and slope (beta) of two return sets. Usually, these will be algorithm returns and benchmark returns (e.g. S&P500). The data is assumed to be T distributed and thus is robust to outliers and takes tail events into account. If a pandas.DataFrame is passed as a benchmark, then multiple linear regression is used to estimate alpha and beta. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. bmark : pandas.DataFrame DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g., Fama-French SMB, HML, and UMD). If bmark has more recent returns than returns_train, these dates will be treated as missing values and predictions will be generated for them taking market correlations into account. samples : int (optional) Number of posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ if data.shape[0] != bmark.shape[0]: data = pd.Series(data, index=bmark.index) data_no_missing = data.dropna() if bmark.ndim == 1: bmark = pd.DataFrame(bmark) bmark = bmark.loc[data_no_missing.index] n_bmark = bmark.shape[1] with pm.Model() as model: sigma = pm.HalfCauchy('sigma', beta=1, testval=data_no_missing.values.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3) # alpha and beta X = bmark.loc[data_no_missing.index] X.loc[:, 'ones'] = 1. y = data_no_missing alphabeta_init = np.linalg.lstsq(X, y)[0] alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alphabeta_init[-1]) beta_reg = pm.Normal('beta', mu=0, sd=1, testval=alphabeta_init[:-1], shape=n_bmark) bmark_theano = tt.as_tensor_variable(bmark.values.T) mu_reg = alpha_reg + tt.dot(beta_reg, bmark_theano) StudentT('returns', nu=nu + 2, mu=mu_reg, sd=sigma, observed=data) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return model, trace
plt.plot(x_3, (alpha_c + beta_c * x_3), 'k', label='y ={:.2f} + {:.2f} * x'.format(alpha_c, beta_c)) plt.plot(x_3, y_3, 'bo') plt.xlabel('$x$', fontsize=16) plt.ylabel('$y$', rotation=0, fontsize=16) plt.legend(loc=0, fontsize=14) plt.subplot(1, 2, 2) sns.kdeplot(y_3) plt.xlabel('$y$', fontsize=16) plt.tight_layout() with pm.Model() as model_t: alpha = pm.Normal('alpha', mu=0, sd=100) beta = pm.Normal('beta', mu=0, sd=1) epsilon = pm.HalfCauchy('epsilon', 5) nu = pm.Deterministic('nu', pm.Exponential('nu_', 1 / 29) + 1) y_pred = pm.StudentT('y_pred', mu=alpha + beta * x_3, sd=epsilon, nu=nu, observed=y_3) start = pm.find_MAP() step = pm.Metropolis() trace_t = pm.sample(2000, step=step, start=start) pm.traceplot(trace_t) plt.figure() beta_c, alpha_c = stats.linregress(x_3, y_3)[:2]
# model specifications in PyMC3 are wrapped in a with-statement with pm.Model() as model: # Define priors A_answer = pm.Normal('A_answer', 0, sd=50) lambda_0_answer = pm.Normal('lambda_0_answer', 0, sd=20) lambda_1_answer = pm.Normal('lambda_1_answer', 0, sd=20) model_answer = pm.Deterministic( 'model_answer', A_answer * tt.pow(np.array(question_count), lambda_0_answer) * tt.pow(np.array(answerer_count), lambda_1_answer)) sigma = pm.HalfCauchy('sigma', beta=10) observations = pm.Normal('observations', mu=model_answer, sd=sigma, observed=np.array(answer_count)) # Inference! step = pm.Metropolis(vars=[ A_answer, lambda_0_answer, lambda_1_answer, sigma, model_answer, observations ]) start = pm.find_MAP() # initialization using MAP trace = pm.sample(10000, step=step, start=start) # getting rid of the initial part of the MCMC
#http://barnesanalytics.com/bayesian-regression-with-pymc3-in-python import pandas as pd import pymc3 as pm import matplotlib.pyplot as plt import numpy as np df = pd.read_csv('D:/SDP Math/math_done/thads2013n.txt', sep=',') #needs Data Set df = df[df['BURDEN'] > 0] df = df[df['AGE1'] > 0] plt.scatter(df['AGE1'], df['BURDEN']) plt.show() with pm.Model() as model: # Define priors sigma = pm.HalfCauchy('sigma', beta=10, testval=1.) intercept = pm.Normal('Intercept', 0, sd=20) x_coeff = pm.Normal('x', 0, sd=20) # Define likelihood likelihood = pm.Normal('y', mu=intercept + x_coeff * df['AGE1'], sd=sigma, observed=df['BURDEN']) # Inference! trace = pm.sample(3000) pm.traceplot(trace) plt.show() print(np.mean([1 if obj < 0 else 0 for obj in trace['x']]))
%config InlineBackend.figure_format = 'retina' %matplotlib inline plt.rcParams["figure.figsize"] = (10, 5) np.random.seed(42) # Prepare the data Ns = [90, 50, 80] mus = [4, 10, 20] sds = [0.5, 4, 1.5] mix = np.array([]) for N, mu, sd in zip(Ns, mus, sds): mix = np.append(mix, norm(mu, sd).rvs(N)) # Sampling n = len(Ns) with pm.Model() as model: p = pm.Dirichlet("p", np.ones(n)) k = pm.Categorical("k", p=p, shape=sum(Ns)) means = pm.Normal("means", mu=[10, 10, 10], sd=10, shape=n) sigmas = pm.HalfCauchy("sigmas", 5, shape=n) y = pm.Normal("y", mu=means[k], sd=sigmas[k], observed=mix) trace = pm.sample(5000, tune=1000) # Plot pm.traceplot( trace, varnames=["means", "p", "sigmas"], lines={"means": mus, "sigmas":sds} ) plt.savefig("./results/4-23-mixture-model.png")
axes[k,l].plot(0, label="Cohen's d = {:.2f}\nProb sup = {:.2f}".format(d_cohen, ps) ,alpha=0) axes[k,l].set_xlabel('$\mu_{}-\mu_{}$'.format(i, j), fontsize=18) axes[k,l].legend(loc=0, fontsize=14) plt.tight_layout() # >>>>>>>>>>>>>>>>>> Hierarchical Models <<<<<<<<<<<<<<<<<<<<< # N_samples = [30, 30, 30] G_samples = [18, 18, 18] group_idx = np.repeat(np.arange(len(N_samples)), N_samples) data = [] for i in range(0, len(N_samples)): data.extend(np.repeat([1, 0], [G_samples[i], N_samples[i]-G_samples[i]])) with pm.Model() as h_model: alpha = pm.HalfCauchy('alpha', beta=10) beta = pm.HalfCauchy('beta', beta=10) theta = pm.Beta('theta', alpha=alpha, beta=beta, shape=len(N_samples)) y = pm.Bernoulli('y', p=theta[group_idx], observed=data) trace = pm.sample(2000) chain = trace[200:] pm.traceplot(chain) pm.summary(chain) # shrinkage x = np.linspace(0, 1, 100) for i in np.random.randint(0, len(chain), size=100): pdf = stats.beta(chain['alpha'][i], chain['beta'][i]).pdf(x) plt.plot(x, pdf, 'g', alpha=0.1)
print('このPythonコードが対応していないOSを使用しています.') sys.exit() jpfont = FontProperties(fname=FontPath) #%% 回帰モデルからのデータ生成 n = 50 np.random.seed(99) u = st.norm.rvs(scale=0.7, size=n) x = st.uniform.rvs(loc=-np.sqrt(3.0), scale=2.0 * np.sqrt(3.0), size=n) y = 1.0 + 2.0 * x + u #%% 回帰モデルの係数と誤差項の分散の事後分布の設定(ラプラス+半コーシー分布) b0 = np.zeros(2) tau_coef = np.ones(2) tau_sigma = 1.0 regression_laplace_halfcauchy = pm.Model() with regression_laplace_halfcauchy: sigma = pm.HalfCauchy('sigma', beta=tau_sigma) a = pm.Laplace('a', mu=b0[0], b=tau_coef[0]) b = pm.Laplace('b', mu=b0[1], b=tau_coef[1]) y_hat = a + b * x likelihood = pm.Normal('y', mu=y_hat, sigma=sigma, observed=y) #%% 事後分布からのサンプリング n_draws = 5000 n_chains = 4 n_tune = 1000 with regression_laplace_halfcauchy: trace = pm.sample(draws=n_draws, chains=n_chains, tune=n_tune, random_seed=123) print(az.summary(trace)) #%% 事後分布のグラフの作成
os.chdir(dir_name) # Save the dataframe in this directory firing_data.to_csv("firing_data.csv") # Start setting up the model # Since we already standardized the firing data by the mean firing of every neuron at every point of time, we do not need to include main effects to capture the variability of # a neuron's firing across time. We just use palatability slopes, one for each time point with pm.Model() as model: # Palatability slopes, one for each time point (one set for each laser condition) coeff_pal = pm.Normal("coeff_pal", mu=0, sd=1, shape=(len(analyze_indices), unique_lasers[0].shape[0])) # Observation standard deviation sd = pm.HalfCauchy("sd", 1) # Regression equation for the mean observation regression = coeff_pal[ tt.cast(firing_data["Time"], 'int32'), tt.cast(firing_data["Laser"], 'int32')] * firing_data["Palatability"] # Actual observations obs = pm.Normal("obs", mu=regression, sd=sd, observed=firing_data["Firing"]) # Metropolis sampling works best! tr = pm.sample(tune=10000, draws=50000, cores=4, start=pm.find_MAP(),
def tlogit(x): return 1 / (1 + tt.exp(-x)) def Phi(x): # probit transform return 0.5 + 0.5 * pm.math.erf(x / pm.math.sqrt(2)) # 建模,模型1,只有模型因子是单个pooling的,其余为多个 with pm.Model() as model1: # define priors alpha = pm.HalfCauchy('alpha', 10, testval=.6) beta3 = pm.Normal('beta3', 0, 100) beta2 = pm.Normal('beta2', 0, 100) beta1 = pm.Normal('beta1', 0, 100, shape=companiesABC) beta = pm.Normal('beta', 0, 100, shape=companiesABC) # u = pm.Normal('u', 0, 0.0001) # beta_mu = pm.Deterministic('beta_mu', tt.exp(beta[Num_shared] + beta1[Num_shared] * xs_year + beta2 * xs_char1 + beta3 * xs_char2)) linerpredi = tt.exp(beta[companyABC] + beta1[companyABC] * elec_year + beta2 * elec_Pca_char1 + beta3 * elec_Pca_char2) # latent model for contamination sigma_p = pm.Uniform('sigma_p', lower=0, upper=3) mu_p = pm.Normal('mu_p', mu=0, tau=.001) probitphi = pm.Normal('probitphi',
def student_t_likelihood( cases, name_student_t="_new_cases_studentT", name_sigma_obs="sigma_obs", pr_beta_sigma_obs=30, nu=4, offset_sigma=1, model=None, data_obs=None, ): r""" Set the likelihood to apply to the model observations (`model.new_cases_obs`) We assume a :class:`~pymc3.distributions.continuous.StudentT` distribution because it is robust against outliers [Lange1989]_. The likelihood follows: .. math:: P(\text{data\_obs}) &\sim StudentT(\text{mu} = \text{new\_cases\_inferred}, sigma =\sigma, \text{nu} = \text{nu})\\ \sigma &= \sigma_r \sqrt{\text{new\_cases\_inferred} + \text{offset\_sigma}} The parameter :math:`\sigma_r` follows a :class:`~pymc3.distributions.continuous.HalfCauchy` prior distribution with parameter beta set by ``pr_beta_sigma_obs``. If the input is 2 dimensional, the parameter :math:`\sigma_r` is different for every region. Parameters ---------- cases : :class:`~theano.tensor.TensorVariable` The daily new cases estimated by the model. Will be compared to the real world data ``data_obs``. One or two dimensonal array. If 2 dimensional, the first dimension is time and the second are the regions/countries name_student_t : The name under which the studentT distribution is saved in the trace. name_sigma_obs : The name under which the distribution of the observable error is saved in the trace pr_beta_sigma_obs : float The beta of the :class:`~pymc3.distributions.continuous.HalfCauchy` prior distribution of :math:`\sigma_r`. nu : float How flat the tail of the distribution is. Larger nu should make the model more robust to outliers. Defaults to 4 [Lange1989]_. offset_sigma : float An offset added to the sigma, to make the inference procedure robust. Otherwise numbers of ``cases`` would lead to very small errors and diverging likelihoods. Defaults to 1. model: The model on which we want to add the distribution data_obs : array The data that is observed. By default it is ``model.new_cases_ob`` Returns ------- None References ---------- .. [Lange1989] Lange, K., Roderick J. A. Little, & Jeremy M. G. Taylor. (1989). Robust Statistical Modeling Using the t Distribution. Journal of the American Statistical Association, 84(408), 881-896. doi:10.2307/2290063 """ model = modelcontext(model) if data_obs is None: data_obs = model.new_cases_obs sigma_obs = pm.HalfCauchy(name_sigma_obs, beta=pr_beta_sigma_obs, shape=model.shape_of_regions) pm.StudentT( name=name_student_t, nu=nu, mu=cases[:len(data_obs)], sigma=tt.abs_(cases[:len(data_obs)] + offset_sigma)**0.5 * sigma_obs, # offset and tt.abs to avoid nans observed=data_obs, )
""" 電灯電力需要実績月報・用途別使用電力量・販売電力合計・10社計 電気事業連合会ウェブサイト・電力統計情報より入手 http://www.fepc.or.jp/library/data/tokei/index.html """ data = pd.read_csv('electricity.csv', index_col=0) y0 = np.log(data.values.reshape((data.shape[0]//3, 3)).sum(axis=1)) y = 100 * (y0 - y0[0]) n = y.size series_date = pd.date_range(start='1/1/1989', periods=n, freq='Q') #%% 確率的トレンド+季節変動 trend_coef = np.array([2.0, -1.0]) seasonal_coef = np.array([-1.0, -1.0, -1.0]) timeseries_decomp = pm.Model() with timeseries_decomp: sigma = pm.HalfCauchy('sigma', beta=1.0) tau = pm.HalfCauchy('tau', beta=1.0) omega = pm.HalfCauchy('omega', beta=1.0) trend = pm.AR('trend', trend_coef, sigma=tau, shape=n) seasonal = pm.AR('seasonal', seasonal_coef, sigma=omega, shape=n) observation = pm.Normal('y', mu=trend+seasonal, sigma=sigma, observed=y) #%% 事後分布からのサンプリング n_draws = 5000 n_chains = 4 n_tune = 2000 with timeseries_decomp: trace = pm.sample(draws=n_draws, chains=n_chains, tune=n_tune, target_accept=0.95, random_seed=123) param_names = ['sigma', 'tau', 'omega'] print(pm.summary(trace, var_names=param_names)) #%% 事後分布のグラフの作成
] ## predictor variables x_cov = df[cov].copy() X = x_cov.loc[y.index, :].copy() # mask NA X_masked = np.ma.masked_invalid(X) # model with pm.Model() as model: # priors intercept = pm.Normal('intercept', mu=0, sigma=100) beta = pm.Normal('beta', mu=0, sigma=100, shape=X_masked.shape[1]) alpha = pm.HalfCauchy('alpha', beta=5) # impute missing X chol, stds, corr = pm.LKJCholeskyCov('chol', n=X_masked.shape[1], eta=2, sd_dist=pm.Exponential.dist(1), compute_corr=True) cov = pm.Deterministic('cov', chol.dot(chol.T)) X_mu = pm.Normal('X_mu', mu=0, sigma=100, shape=X_masked.shape[1], testval=X_masked.mean(axis=0)) X_modeled = pm.MvNormal('X', mu=X_mu, chol=chol, observed=X_masked)
# varnames = ["alpha", "beta", "epsilon", "nu"] # pm.traceplot(trace_up, varnames) # plt.savefig("hierarchical_linear_regression_unhierarchical_traceplot.png") with pm.Model() as hierarchical_model: alpha_tmp_mu = pm.Normal("alpha_tmp_mu", mu=0, sd=10) alpha_tmp_sd = pm.HalfNormal("alpha_tmp_sd", 10) beta_mu = pm.Normal("beta_mu", mu=0, sd=10) beta_sd = pm.HalfNormal("beta_sd", sd=10) alpha_tmp = pm.Normal("alpha_tmp", mu=alpha_tmp_mu, sd=alpha_tmp_sd, shape=M) beta = pm.Normal("beta", mu=beta_mu, sd=beta_sd, shape=M) epsilon = pm.HalfCauchy("epsilon", 5) nu = pm.Exponential("nu", 1 / 30) y_pred = pm.StudentT( "y_pred", mu=alpha_tmp[idx] + beta[idx] * x_centered, sd=epsilon, nu=nu, observed=y_m, ) alpha = pm.Deterministic("alpha", alpha_tmp - beta * x_m.mean()) alpha_mu = pm.Deterministic("alpha_mu", alpha_tmp_mu - beta_mu * x_m.mean()) alpha_sd = pm.Deterministic("alpha_sd", alpha_tmp_sd - beta_mu * x_m.mean())
# Plot raw data fig, ax = plt.subplots() y_pos = np.arange(8) ax.errorbar(y,y_pos, xerr=sigma, fmt='o') ax.set_yticks(y_pos) ax.set_yticklabels(names) ax.invert_yaxis() # labels read top-to-bottom plt.show() # Centered model with pm.Model() as Centered_eight: mu_alpha = pm.Normal('mu_alpha', mu=0, sigma=5) sigma_alpha = pm.HalfCauchy('sigma_alpha', beta=5) alpha = pm.Normal('alpha', mu=mu_alpha, sigma=sigma_alpha, shape=J) obs = pm.Normal('obs', mu=alpha, sigma=sigma, observed=y) np.random.seed(0) with Centered_eight: trace_centered = pm.sample(1000, chains=4) pm.summary(trace_centered).round(2) # Effective sample size is << 4*1000, especially for tau # Also, PyMC3 gives various warnings about not mixing # Display the total number and percentage of divergent chains diverging = trace_centered['diverging'] print('Number of Divergent Chains: {}'.format(diverging.nonzero()[0].size))
# Impute missing values sib_mean = pm.Exponential("sib_mean", 1.0) siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings) p_disab = pm.Beta("p_disab", 1.0, 1.0) disability_imp = pm.Bernoulli("disability_imp", p_disab, observed=masked_values(disability, value=-999)) p_mother = pm.Beta("p_mother", 1.0, 1.0) mother_imp = pm.Bernoulli("mother_imp", p_mother, observed=masked_values(mother_hs, value=-999)) s = pm.HalfCauchy("s", 5.0, testval=5) beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1) expected_score = (beta[0] + beta[1] * male + beta[2] * siblings_imp + beta[3] * disability_imp + beta[4] * age + beta[5] * mother_imp + beta[6] * early_ident) observed_score = pm.Normal("observed_score", expected_score, s, observed=score) with model: start = pm.find_MAP() step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start) step2 = pm.BinaryGibbsMetropolis(
def Bayesian_Calibration(DataComp,DataField,DataPred,output_folder): # This is data preprocessing part n = np.shape(DataField)[0] # number of measured data m = np.shape(DataComp)[0] # number of simulation data p = np.shape(DataField)[1] -1 # number of input x q = np.shape(DataComp)[1] - p -1 # number of calibration parameters t xc = DataComp[:,1:] # simulation input x + calibration parameters t xf = DataField[:,1:] # observed input yc = DataComp[:,0] # simulation output yf = DataField[:,0] # observed output x_pred = DataPred[:,1:] # design points for predictions y_true = DataPred[:,0] # true measured value for design points for predictions n_pred = np.shape(x_pred)[0] # number of predictions N = n+m+n_pred # Put points xc, xf, and x_pred on [0,1] for i in range(p): x_min = min(min(xc[:,i]),min(xf[:,i])) x_max = max(max(xc[:,i]),max(xf[:,i])) xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min) xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min) x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min) # Put calibration parameters t on domain [0,1] for i in range(p,(p+q)): t_min = min(xc[:,i]) t_max = max(xc[:,i]) xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min) # standardization of output yf and yc yc_mean = np.mean(yc) yc_sd = np.std(yc) yc = (yc-yc_mean)/yc_sd yf = (yf-yc_mean)/yc_sd # This is modeling part with pm.Model() as model: # Claim prior part eta = pm.HalfCauchy("eta", beta=3) # for eta of gaussian process lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # 2,1 for lengthscale of gaussian process tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise y_pred = pm.Normal('y_pred', 0, 1.5, shape=n_pred) # for y prediction # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]] xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1) x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1) X = tt.concatenate([xf1, xc, x_pred1], axis = 0) # Concate data into a big matrix[[yf], [yc], [y_pred]] y = tt.concatenate([yf, yc, y_pred], axis = 0) # Covariance funciton of gaussian process cov_z = eta**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale) # Gaussian process with covariance funciton of cov_z gp = pm.gp.Marginal(cov_func = cov_z) # Bayesian inference outcome = gp.marginal_likelihood("outcome", X=X, y=y, noise=sigma1) trace = pm.sample(250,cores=1) # This part is for data collection and visualization pm.summary(trace).to_csv(output_folder + '/trace_summary.csv') pd.DataFrame(np.array(trace['tf'])).to_csv(output_folder + '/tf.csv') print(pm.summary(trace)) #Draw Picture of cvrmse_dist and calculate index name_columns = [] n_columns = n_pred for i in range(n_columns): name_columns.append('y_pred'+str(i+1)) y_prediction = pd.DataFrame(np.array(trace['y_pred']),columns=name_columns) y_prediction = y_prediction*yc_sd+yc_mean # Scale y_prediction back y_prediction.to_csv(output_folder + '/y_pred.csv') # Store y_prediction # Calculate the distribution of cvrmse cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction-y_true),axis=1)/n_pred)/np.mean(y_true) print(np.mean(cvrmse)) # Calculate the index and store it into csv index_cal(y_prediction,y_true).to_csv(output_folder + '/index.csv') # Draw pictrue of cvrmse distribution plt.subplot(1, 1, 1) plt.hist(cvrmse) plt.savefig(output_folder + '/cvrmse_dist.pdf') plt.close() # y_prediction_mean = np.array(pm.summary(trace)['mean'][0:n_pred])*yc_sd+yc_mean # cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true))/len(y_prediction_mean-y_true))/np.mean(y_true) #Draw Picture of Prediction_Plot y_prediction_mean = np.array(pm.summary(trace)['mean'][0:n_pred])*yc_sd+yc_mean y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][0:n_pred])*yc_sd+yc_mean y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][0:n_pred])*yc_sd+yc_mean # cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true))/len(y_prediction_mean-y_true))/np.mean(y_true) # print(cvrmse) plt.subplot(1, 1, 1) # estimated probability plt.scatter(x=range(n_pred), y=y_prediction_mean) # error bars on the estimate plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975) # actual outcomes plt.scatter(x=range(n_pred), y=y_true, marker='x') plt.xlabel('predictor') plt.ylabel('outcome') plt.savefig(output_folder + '/Prediction_Plot.pdf')
""" Posterior Predictive Check Plot =============================== _thumb: .6, .5 """ import arviz as az import numpy as np import pymc3 as pm az.style.use('arviz-darkgrid') # Data of the Eight Schools Model J = 8 y = np.array([28., 8., -3., 7., -1., 1., 18., 12.]) sigma = np.array([15., 10., 16., 11., 9., 11., 10., 18.]) with pm.Model() as centered_eight: mu = pm.Normal('mu', mu=0, sd=5) tau = pm.HalfCauchy('tau', beta=5) theta = pm.Normal('theta', mu=mu, sd=tau, shape=J) obs = pm.Normal('obs', mu=theta, sd=sigma, observed=y) centered_eight_trace = pm.sample() with centered_eight: ppc_samples = pm.sample_ppc(centered_eight_trace) az.ppcplot(y, ppc_samples)
marker='o', color='red') plt.plot(times, y, label='True speed', color='k', alpha=0.5) plt.legend() plt.xlabel('Time (Seconds)') plt.ylabel(r'$y(t)$') plt.show() ode_model = DifferentialEquation(func=freefall, times=times, n_states=1, n_theta=2, t0=0) with pm.Model() as model: # Specify prior distributions for soem of our model parameters sigma = pm.HalfCauchy('sigma', 1) gamma = pm.Lognormal('gamma', 0, 1) # If we know one of the parameter values, we can simply pass the value. ode_solution = ode_model(y0=[0], theta=[gamma, 9.8]) # The ode_solution has a shape of (n_times, n_states) Y = pm.Normal('Y', mu=ode_solution, sigma=sigma, observed=yobs) prior = pm.sample_prior_predictive() trace = pm.sample(2000, tune=1000, cores=1) posterior_predictive = pm.sample_posterior_predictive(trace) data = az.from_pymc3(trace=trace, prior=prior, posterior_predictive=posterior_predictive) az.plot_posterior(data)
def run_factorization(self, N, S, X, Z, I, K, num_cov, k, n): # Smart initialization rat = k / n nans = np.isnan(rat) conc_inits = np.zeros((1, S)) beta_inits = np.zeros((num_cov, S)) for index_s in range(S): column_rat = rat[:, index_s] column_nans = np.isnan(column_rat) valid_rat = column_rat[~column_nans] conc_init = min(1.0 / np.var(valid_rat), 1000.0) m_init = min(max(np.mean(valid_rat), 1.0 / 1000), 1.0 - (1.0 / 1000)) conc_inits[0, index_s] = conc_init beta_inits[0, index_s] = np.log(m_init / (1.0 - m_init)) U_init = np.random.rand(N, K) for n_iter in range(N): U_init[n_iter, :] = U_init[n_iter, :] / np.sum(U_init[n_iter, :]) # Run bb-mf with pm.Model() as bb_glm: CONC = pm.HalfCauchy('CONC', beta=5, shape=(1, S), testval=conc_inits) BETA = pm.Normal('BETA', mu=0, tau=(1 / 1000000.0), shape=(S, num_cov), testval=beta_inits.T) #U = pm.Normal('U', mu=0, tau=(1/10000.0), shape=(N, K), testval=np.random.randn(N, K)) U = pm.Dirichlet('U', a=np.ones(K) * 1.0, shape=(N, K), testval=U_init) V = pm.Normal('V', mu=0, tau=(1 / 10000.0), shape=(S, K), testval=np.random.randn(S, K)) MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1, S), testval=np.zeros((1, S))) SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1, S), testval=np.ones((1, S))) mu_a_mat = pm.math.dot(np.ones((I, 1)), MU_A) sigma_a_mat = pm.math.dot(np.ones((I, 1)), SIGMA_A) A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(I, S), testval=np.zeros((I, S))) p = pm.math.invlogit( pm.math.dot(X, BETA.T) + pm.math.dot(U, V.T) + A[Z, :]) conc_mat = pm.math.dot(np.ones((N, 1)), CONC) R = pm.BetaBinomial('like', alpha=(p * conc_mat)[~nans], beta=((1.0 - p) * conc_mat)[~nans], n=n[~nans], observed=k[~nans]) approx = pm.fit(method='advi', n=30000) pickle.dump(approx, open(self.output_root + '_model', 'wb')) #approx = pickle.load( open(self.output_root + '_model', "rb" ) ) means_dict = approx.bij.rmap(approx.params[0].eval()) U = backward_stickbreaking(means_dict['U_stickbreaking__']) np.savetxt(self.output_root + '_temper_U.txt', U, fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_U_init.txt', U_init, fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t') np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
return 1 / (1 + np.exp(-x)) def tlogit(x): return 1 / (1 + tt.exp(-x)) def Phi(x): # probit transform return 0.5 + 0.5 * pm.math.erf(x / pm.math.sqrt(2)) # 建模,模型,原始模型,不加污染数据时候的模型 with pm.Model() as model_1: # define priors alpha = pm.HalfCauchy('alpha', 10, testval=.6) # sd_5 = pm.HalfNormal('sd_5', 0.5) mu_4 = pm.Normal('mu_4', mu=0, tau=.001) sd_4 = pm.HalfCauchy('sd_4', 10) mu_3 = pm.Normal('mu_3', mu=0, tau=.001) sd_3 = pm.HalfCauchy('sd_3', 10) mu_2 = pm.Normal('mu_2', mu=0, tau=.001) sd_2 = pm.HalfCauchy('sd_2', 10) mu_1 = pm.Normal('mu_1', mu=0, tau=.001) sd_1 = pm.HalfCauchy('sd_1', 10) # mu_0 = pm.Normal('mu_0', mu=0, tau=.001) # sd_0 = pm.HalfCauchy('sd_0', 20) beta4 = pm.Normal('beta4', mu_4, sd_4, shape=companiesABC)
"--- Read Data ---" path = "../../3. Data/Combined_Sample.dta" Data = pd.read_stata(path) Data[['wavecode', 'w_clothm', 'ltotR']] = Data[['wavecode', 'w_clothm', 'ltotR']].round(4) Data[['wavecode', 'w_clothm', 'ltotR']].head() wave_idx = Data['wavecode'] clothm_mean = np.squeeze(np.mean(Data[['w_clothm']])) with pm.Model() as hierarchical_model: # Hyperpriors mu_a = pm.Normal('mu_alpha', mu=clothm_mean, sd=10) sigma_a = pm.HalfCauchy('sigma_alpha', beta=2) mu_b = pm.Normal('mu_beta', mu=0., sd=10) sigma_b = pm.HalfCauchy('sigma_beta', beta=2) # Intercept for each county, distributed around group mean mu_a a = pm.Normal('alpha', mu=mu_a, sd=sigma_a, shape=len(Data.wavecode.unique())) # Intercept for each county, distributed around group mean mu_a b = pm.Normal('beta', mu=mu_b, sd=sigma_b, shape=len(Data.wavecode.unique())) # Error standard deviation eps = pm.HalfCauchy('eps', beta=2) # Expected value w_est = a[wave_idx] + b[wave_idx] * Data.ltotR # Data likelihood
b10 = pm.Normal('b10_Max_Rate', mu=0, sd=100) b11 = pm.Normal('b11_Avg_Pressure', mu=0, sd=100) b12 = pm.Normal('b12_Max_Pressure', mu=0, sd=100) b13 = pm.Normal('b13_Fluid_Gal/Perf', mu=0, sd=100) # define linear model y = (b0[dfs['Reservoir_Code']] + b1 * X_dmat['Clusters_Stage'] + b2 * X_dmat['Perfs_Cluster'] + b3 * X_dmat['Num_of_Stages'] + b4 * X_dmat['ISIP_Ft'] + b5 * X_dmat['Rate_Ft'] + b6 * X_dmat['Rate_Perf'] + b7 * X_dmat['Avg_Prop_Conc'] + b8 * X_dmat['Max_Prop_Conc'] + b9 * X_dmat['Rate_Cluster'] + b10 * X_dmat['Max_Rate'] + b11 * X_dmat['Avg_Pressure'] + b12 * X_dmat['Max_Pressure'] + b13 * X_dmat['Fluid_Gal_Perf']) ## Likelihood (sampling distribution) of observations epsilon = pm.HalfCauchy('epsilon', beta=10) likelihood = pm.Normal('likelihood', mu=y, sd=epsilon, observed=dfs[ft_endog]) if run_res_unpooled: trc_res_unpooled = pm.backends.text.load( '../../other/traces_txt/trc_res_unpooled') else: step = pm.NUTS() start = pm.find_MAP() trace = pm.backends.Text('../../other/traces_txt/trc_res_unpooled') trc_res_unpooled = pm.sample(2000, step, start, trace) #Run unpooled model metrics for training data
sd_dist = pm.HalfCauchy.dist(beta=10000) packed_chol = pm.LKJCholeskyCov('packed_chol', n=D, eta=1, sd_dist=sd_dist) chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol, np.eye(D)) tau = tt.dot(invchol.T, invchol) # Mixture density B = pm.DensityDist('B', logp_g(mu, tau), shape=(n_samples, D)) Y_hat = tt.sum(X[:, :, np.newaxis] * B.reshape((n_samples, D // 2, 2)), axis=1) # Model error err = pm.HalfCauchy('err', beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err * np.eye(2), observed=Y) with model: approx = pm.variational.inference.fit( n=1000, obj_optimizer=pm.adagrad(learning_rate=0.1)) plt.figure() plt.plot(approx.hist) plt.savefig('../images/1G_ADVI' + data + '_lag' + str(lag) + 'convergence.png') gbij = approx.gbij means = gbij.rmap(approx.mean.eval()) with open('../data/1G_results' + data + '_lag' + str(lag) + '.pickle', 'wb') as f:
Num_5 = 5 * len(elec_faults1) model_knots = np.linspace(1, 6, Num_5) Num_5B = 5 * len(elec_faultsB) model_knotsB = np.linspace(1, 6, Num_5B) basis_funcs = sp.interpolate.BSpline(knots, np.eye(Num_5), k=3) Bx = basis_funcs(elec_year) # 表示在取值为x时的插值函数值 basis_funcsB = sp.interpolate.BSpline(knotsB, np.eye(Num_5B), k=3) BxB = basis_funcs(elec_yearB) # 表示在取值为x时的插值函数值 # shared:符号变量(symbolic variable),a之所以叫shared variable是因为a的赋值在不同的函数中都是一致的搜索,即a是被shared的 Bx_ = shared(Bx) Bx_B = shared(BxB) # #将两个数据一起计算,其中可以共用部分数据,但如何将环境变量加进去 with pm.Model() as partial_model: # define priors sigma = pm.HalfCauchy('sigma', 10) σ_a = pm.HalfCauchy('σ_a', 5.) σ_aB = pm.HalfCauchy('σ_aB', 5.) a0 = pm.Normal('a0', 0., 20.) Δ_a = pm.Normal('Δ_a', 0., 10., shape=Num_5) Δ_aB = pm.Normal('Δ_aB', 0., 10., shape=Num_5B) # δ_1 = pm.Gamma('δ_1', alpha=0.000001, beta=0.000001) # δ = pm.Normal('δ', 0, sd = (δ_1*δ_1)) δ = pm.Normal('δ', 0, sd=100) δB = pm.Normal('δB', 0, sd=100) theta1 = pm.Deterministic('theta1', a0 + (σ_a * Δ_a).cumsum()) theta1B = pm.Deterministic('theta1B', a0 + (σ_aB * Δ_aB).cumsum()) # theta1 = a0 + (σ_a * Δ_a).cumsum()
def bcfa(Y, M): r"""Constructs a Bayesian confirmatory factor analysis (BCFA) model. Args: Y (numpy.ndarray): An $n \times p$ matrix of data where $n$ is the sample size and $p$ is the number of manifest variables. M (numpy.ndarray): An $p \times m$ matrix to describe model structure where $m$ is the number of latent variables. Notes: $$\mathbf{Y}$$ probably should be standardized first if you are using continuous data. Entries in $\mathbf{M}$ should be [0, 1]. $\mathbf{M}_{(i,j)}$ represents the variance of the normal prior placed on the regression coefficient from the $j$th latent variable to the $i$th manifest variable. Values of 0 remove the coefficient from the model entirely, 1 represents a "full-strength" coefficient, and values (0, 1) are for cross-loadings. Returns: None: Model is placed in the context. """ # counts n, p = Y.shape p_, m = M.shape assert p == p_, "M is the wrong shape" # intercepts for manifest variables sd = max(np.abs(Y.mean()).max() * 2.5, 2.5) nu = pm.Normal(name=r"$\nu$", mu=0, sd=sd, shape=p, testval=Y.mean()) # unscaled regression coefficients Phi = pm.Normal(name=r"$\Phi$", mu=0, sd=1, shape=M.shape, testval=M) # scaled regression coefficients Lambda = pm.Deterministic(r"$\Lambda$", Phi * np.sqrt(M)) # intercepts for latent variables alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=2.5, shape=m, testval=0) # means of manifest variables mu = nu + matrix_dot(Lambda, alpha) # standard deviations of manifest variables D = pm.HalfCauchy(name=r"$D$", beta=2.5, shape=p, testval=Y.std()) # correlations between manifest variables Omega = np.eye(p) # covariance matrix for manifest variables Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None]) # covariance matrix on latent variables f = pm.Lognormal.dist(sd=0.25) L = pm.LKJCholeskyCov(name=r"$L$", eta=1, n=m, sd_dist=f) ch = pm.expand_packed_triangular(m, L, lower=True) Gamma = tt.dot(ch, ch.T) sd = tt.sqrt(tt.diag(Gamma)) Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :]) # covariance of manifest variables Sigma = matrix_dot(Lambda, Psi, Lambda.T) + Theta # observations pm.MvNormal(name="Y", mu=mu, cov=Sigma, observed=Y, shape=Y.shape)
from sklearn.datasets import load_iris url = 'https://github.com/aloctavodia/BAP/blob/master/code/data/space_flu.csv?raw=true' df_sf = pd.read_csv(url) age = df_sf.age.values[:, None] space_flu = df_sf.space_flu ax = df_sf.plot.scatter('age', 'space_flu', figsize=(8, 5)) ax.set_yticks([0, 1]) ax.set_yticklabels(['healthy', 'sick']) plt.savefig('../figures/space_flu.pdf', bbox_inches='tight') with pm.Model() as model_space_flu: ℓ = pm.HalfCauchy('ℓ', 1) cov = pm.gp.cov.ExpQuad(1, ℓ) + pm.gp.cov.WhiteNoise(1E-5) gp = pm.gp.Latent(cov_func=cov) f = gp.prior('f', X=age) y_ = pm.Bernoulli('y', p=pm.math.sigmoid(f), observed=space_flu) trace_space_flu = pm.sample( 1000, chains=1, compute_convergence_checks=False) X_new = np.linspace(0, 80, 200)[:, None] with model_space_flu: f_pred = gp.conditional('f_pred', X_new) pred_samples = pm.sample_posterior_predictive(trace_space_flu, vars=[f_pred], samples=1000)
def main(Xy_training_path, output_trace_path, response_variable, predictor_variables, cities, samples, scaler_type, sector): Xy_training = pd.read_csv(Xy_training_path) if sector != "none": Xy_training = Xy_training[Xy_training["BUILDING_CLASS"]==sector] if cities != []: # select cities to do the analysis Xy_training = Xy_training.loc[Xy_training['CITY'].isin(cities)] degree_index = Xy_training.groupby('CITY').all().reset_index().reset_index()[['index', 'CITY']] degree_index["CODE"] = degree_index.index.values Xy_training = Xy_training.merge(degree_index, on='CITY') Xy_training['BUILDING_CLASS'] = Xy_training['BUILDING_CLASS'].apply(lambda x: int(1) if x == "Residential" else int(0)) if scaler_type == "minmax": scaler = MinMaxScaler(feature_range=(0.1, 0.99)) fields_to_scale = [response_variable] + predictor_variables Xy_training[fields_to_scale] = pd.DataFrame(scaler.fit_transform(Xy_training[fields_to_scale]), columns=Xy_training[fields_to_scale].columns) elif scaler_type == "minmax_extended": scaler = MinMaxScaler(feature_range=(-0.99, 0.99)) fields_to_scale = [response_variable] + predictor_variables Xy_training[fields_to_scale] = pd.DataFrame(scaler.fit_transform(Xy_training[fields_to_scale]), columns=Xy_training[fields_to_scale].columns) elif scaler_type =="standard": scaler = StandardScaler() fields_to_scale = [response_variable] + predictor_variables Xy_training[fields_to_scale] = pd.DataFrame(scaler.fit_transform(Xy_training[fields_to_scale]), columns=Xy_training[fields_to_scale].columns) else: scaler = None print("not scaling variables") Xy_training[response_variable] = Xy_training[response_variable].astype(theano.config.floatX) mn_counties = Xy_training.CITY.unique() n_counties = len(mn_counties) county_idx = Xy_training.CODE.values with pm.Model() as hierarchical_model: # log(y) = alpha + beta*log(GFA*HDD) + gamma*log(GFA*CDD) + eps # Coefficients of all population global_b1 = pm.Normal('global_b1', mu=0, sd=10) sigma_b1 = pm.Uniform('sigma_b1', lower=0, upper=10) global_b2 = pm.Normal('global_b2', mu=0, sd=10) sigma_b2 = pm.Uniform('sigma_b2', lower=0, upper=10) # Coefficients for each city, distributed around the group means # b1 = pm.Normal('b1', mu=global_b1, sd=sigma_b1, shape=n_counties) # b2 = pm.Normal('b2', mu=global_b2, sd=sigma_b2, shape=n_counties) # Coefficients for each city, distributed around the group means b1_offset = pm.Normal('b1_offset', mu=0, sd=1, shape=n_counties) b1 = pm.Deterministic("b1", global_b1 + b1_offset * sigma_b1) # b2_offset = pm.Normal('b2_offset', mu=0, sd=1, shape=n_counties) b2 = pm.Deterministic("b2", global_b2 + b2_offset * sigma_b2) # Model error eps = pm.HalfCauchy('eps', 5) y_obs = Xy_training[response_variable] x1 = Xy_training[predictor_variables[0]].values model = b1[county_idx] + b2[county_idx] * x1 # Data likelihood y_like = pm.Normal('y_like', mu=model, sd=eps, observed=y_obs) with hierarchical_model: step = pm.NUTS(target_accept=0.98) # increase to avoid divergence problemsstep = pm.NUTS() # increase to avoid divergence problems hierarchical_trace = pm.sample(draws=samples, step=step, n_init=samples, njobs=2) # save to disc with open(output_trace_path, 'wb') as buff: pickle.dump({'inference': hierarchical_model, 'trace': hierarchical_trace, 'scaler': scaler, 'city_index_df': degree_index, 'response_variable': response_variable, 'predictor_variables': predictor_variables, 'sector':sector}, buff)
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder): # This is data preprocessing part n = np.shape(DataField)[0] # number of measured data m = np.shape(DataComp)[0] # number of simulation data p = np.shape(DataField)[1] - n_y # number of input x q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t xc = DataComp[:,n_y:] # simulation input x + calibration parameters t xf = DataField[:,n_y:] # observed input yc = DataComp[:,:n_y] # simulation output yf = DataField[:,:n_y] # observed output x_pred = DataPred[:,n_y:] # design points for predictions y_true = DataPred[:,:n_y] # true measured value for design points for predictions n_pred = np.shape(x_pred)[0] # number of predictions N = n+m+n_pred # Put points xc, xf, and x_pred on [0,1] for i in range(p): x_min = min(min(xc[:,i]),min(xf[:,i])) x_max = max(max(xc[:,i]),max(xf[:,i])) xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min) xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min) x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min) # Put calibration parameters t on domain [0,1] for i in range(p,(p+q)): t_min = min(xc[:,i]) t_max = max(xc[:,i]) xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min) # store mean and std of yc for future scale back use yc_mean = np.zeros(n_y) yc_sd = np.zeros(n_y) # standardization of output yf and yc for i in range(n_y): yc_mean[i] = np.mean(yc[:,i]) yc_sd[i] = np.std(yc[:,i]) yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i] yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i] # This is modeling part with pm.Model() as model: # Claim prior part eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction # Setup prior of right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y) colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist) colchol = pm.expand_packed_triangular(n_y, colchol_packed) # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]] xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1) x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1) X = tt.concatenate([xf1, xc, x_pred1], axis = 0) # Concate data into a big matrix[[yf], [yc], [y_pred]] y = tt.concatenate([yf, yc, y_pred], axis = 0) # Covariance funciton of gaussian process cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale) # Gaussian process with covariance funciton of cov_z gp = MultiMarginal(cov_func = cov_z) # Bayesian inference matrix_shape = [n+m+n_pred,n_y] outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape) trace = pm.sample(250,cores=1) # This part is for data collection and visualization pm.summary(trace).to_csv(output_folder + '/trace_summary.csv') print(pm.summary(trace)) name_columns = [] n_columns = n_pred for i in range(n_columns): for j in range(n_y): name_columns.append('y'+str(j+1)+'_pred'+str(i+1)) y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns) #Draw Picture of cvrmse_dist and calculate index for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index]) y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction # Calculate the distribution of cvrmse cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i]) # Calculate the index and store it into csv index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv') # Draw pictrue of cvrmse distribution of each y plt.subplot(n_y, 1, i+1) plt.hist(cvrmse) plt.savefig(output_folder + '/cvrmse_dist.pdf') plt.close() #Draw Picture of Prediction_Plot for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i] y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i] y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i] plt.subplot(n_y, 1, i+1) # estimated probability plt.scatter(x=range(n_pred), y=y_prediction_mean) # error bars on the estimate plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975) # actual outcomes plt.scatter(x=range(n_pred), y=y_true[:,i], marker='x') plt.xlabel('predictor') plt.ylabel('outcome') # This is just to print original cvrmse to test whether outcome good if i == 0: cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0]) print(cvrmse) plt.savefig(output_folder + '/Prediction_Plot.pdf') plt.close()
def student_t_likelihood( new_cases_inferred, pr_beta_sigma_obs=30, nu=4, offset_sigma=1, model=None, data_obs=None, name_student_t="_new_cases_studentT", name_sigma_obs="sigma_obs" ): """ Set the likelihood to apply to the model observations (`model.new_cases_obs`) We assume a student-t distribution, the mean of the distribution matches `new_cases_inferred` as provided. Parameters ---------- new_cases_inferred : array One or two dimensonal array. If 2 dimensional, the first dimension is time and the second are the regions/countries pr_beta_sigma_obs : float nu : float How flat the tail of the distribution is. Larger nu should make the model more robust to outliers offset_sigma : float model: The model on which we want to add the distribution data_obs : array The data that is observed. By default it is ``model.new_cases_ob`` name_student_t : The name under which the studentT distribution is saved in the trace. name_sigma_obs : The name under which the distribution of the observable error is saved in the trace Returns ------- None TODO ---- #@jonas, can we make it more clear that this whole stuff gets attached to the # model? like the with model as context... #@jonas doc description for sigma parameters """ model = modelcontext(model) len_sigma_obs = () if model.sim_ndim == 1 else model.sim_shape[1] sigma_obs = pm.HalfCauchy(name_sigma_obs, beta=pr_beta_sigma_obs, shape=len_sigma_obs) if data_obs is None: data_obs = model.new_cases_obs pm.StudentT( name=name_student_t, nu=nu, mu=new_cases_inferred[: len(data_obs)], sigma=tt.abs_(new_cases_inferred[: len(data_obs)] + offset_sigma) ** 0.5 * sigma_obs, # offset and tt.abs to avoid nans observed=data_obs, )
plt.rcParams["figure.figsize"] = (10, 5) np.random.seed(42) # Prepare the data n_cluster = [90, 50, 75] std_devs = [2, 2, 2] mus = [9, 21, 35] mix = np.random.normal( np.repeat(mus, n_cluster), np.repeat(std_devs, n_cluster) ) # Sampling n = len(n_cluster) with pm.Model() as model: p = pm.Dirichlet("p", np.ones(n)) k = pm.Categorical("k", p=p, shape=sum(n_cluster)) means = pm.Normal("means", mu=[10, 10, 10], sd=10, shape=n) sigmas = pm.HalfCauchy("sigmas", 5) y = pm.Normal("y", mu=means[k], sd=sigmas, observed=mix) trace = pm.sample(5000, tune=1000, chains=1) # Plot samples = pm.sample_posterior_predictive( trace=trace, samples=100, model=model) for sample in samples["y"]: sns.kdeplot(sample, color="red", alpha=0.1) sns.kdeplot(mix, color="blue", linewidth=3) plt.savefig("./results/4-24-mixture-model.png")