def r2(trace): """ R squared :param trace: """ y_true = trace.observed_data["y"].values y_pred = trace.posterior_predictive.stack(sample=("chain", "draw"))["y"].values.T try: print(az.r2_score(y_true, y_pred)) except (TypeError, ValueError) as e: print(e)
plt.plot(x, y, 'b.') plt.plot(x, alpha_m + beta_m * x, c='k', label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x') az.plot_hpd(x, ppc['y_pred'], credible_interval=0.5, color='gray') az.plot_hpd(x, ppc['y_pred'], color='gray') plt.xlabel('x') plt.ylabel('y', rotation=0) plt.savefig('B11197_03_07.png', dpi=300) # In[12]: az.r2_score(y, ppc['y_pred']) # ### The multivariate normal distribution # Actually the bivariate # In[13]: sigma_x1 = 1 sigmas_x2 = [1, 2] rhos = [-0.90, -0.5, 0, 0.5, 0.90] k, l = np.mgrid[-5:5:.1, -5:5:.1] pos = np.empty(k.shape + (2,)) pos[:, :, 0] = k
ax.set_ylabel("y",rotation = 0) plt.legend() plt.show() # plot 2 fig, ax = plt.subplots(figsize = (8,4)) ax.scatter(x,y) ax.plot(x, alpha_m + beta_m * x, c = "k",label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x') az.plot_hpd(x,trace["mu"],credible_interval = 0.98,color = "k") ax.set_xlabel("x") ax.set_ylabel("y",rotation = 0) plt.legend() plt.show() # plot 3 # get the y from the posterior distribution ppc = pm.sample_posterior_predictive(trace,samples = 2000,model = linear_model) plt.figure() plt.plot(x,y,"b.") plt.plot(x,alpha_m + beta_m * x,c = "k",label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x') az.plot_hpd(x,ppc["obs"],credible_interval = 0.5, color = "gray") az.plot_hpd(x,ppc["obs"],color = "gray") plt.xlabel("x") plt.ylabel("y") plt.show() # ------------------- get the R squared of our model ------------------------ # r_squared = az.r2_score(y, ppc["obs"]) log.info("The r squared of the model is: %s", r_squared)
def bayesian_regression(): df_grocery = pd.read_csv('year_ward_grocery.csv') df_grocery['female_perc'] = df_grocery.apply(lambda row: row['female'] / row['population'], axis=1) df_diabetes = pd.read_csv('diabetes_estimates_osward_2016.csv', encoding='utf-8', header=0).dropna() df_geo = pd.read_csv('london_pcd2geo_2015.csv', encoding='utf-8') df_geo = df_geo[['osward','oslaua']] df_geo = df_geo.drop_duplicates() df = df_grocery.merge(df_diabetes, how='inner', left_on='area_id', right_on='osward') df = df.merge(df_geo, how='inner', on='osward') plt.figure(figsize=(8, 8)) plt.plot(df['energy_carb'], df['estimated_diabetes_prevalence'], 'bo') plt.xlabel(f'energy_carb', size = 18) plt.ylabel(f'estimated_diabetes_prevalence', size = 18) X1=df['energy_carb'].values X2=df['h_energy_nutrients_norm'].values X3=df['avg_age'].values X4=df['female_perc'].values X5=df['num_transactions'].values X6=df['people_per_sq_km'].values X5 = np.array([np.log2(x) for x in X5]) X6 = np.array([np.log2(x) for x in X6]) Y=df['estimated_diabetes_prevalence'].values oslaua2index = {} i=0 for v in df['oslaua'].values: if v not in oslaua2index: oslaua2index[v]=i i += 1 df['oslaua_idx'] = df.apply(lambda row : oslaua2index[row['oslaua']], axis=1) n_oslauas = n_counties = len(df['oslaua_idx'].unique()) oslaua_idx = df['oslaua_idx'].values hierarchical_model = pm.Model() with hierarchical_model: # Hyperpriors for group nodes mu_a = pm.Normal('mu_a', mu=0., sigma=100) sigma_a = pm.HalfNormal('sigma_a', 5.) a = pm.Normal('a', mu=mu_a, sigma=sigma_a, shape=n_oslauas) mu_b1 = pm.Normal('mu_b1', mu=0., sigma=100) sigma_b1 = pm.HalfNormal('sigma_b1', 5.) b1 = pm.Normal('b1', mu=mu_b1, sigma=sigma_b1, shape=n_oslauas) mu_b2 = pm.Normal('mu_b2', mu=0., sigma=100) sigma_b2 = pm.HalfNormal('sigma_b2', 5.) b2 = pm.Normal('b2', mu=mu_b2, sigma=sigma_b2, shape=n_oslauas) mu_b3 = pm.Normal('mu_b3', mu=0., sigma=100) sigma_b3 = pm.HalfNormal('sigma_b3', 5.) b3 = pm.Normal('b3', mu=mu_b3, sigma=sigma_b3, shape=n_oslauas) mu_b4 = pm.Normal('mu_b4', mu=0., sigma=100) sigma_b4 = pm.HalfNormal('sigma_b4', 5.) b4 = pm.Normal('b4', mu=mu_b4, sigma=sigma_b4, shape=n_oslauas) mu_b5 = pm.Normal('mu_b5', mu=0., sigma=100) sigma_b5 = pm.HalfNormal('sigma_b5', 5.) b5 = pm.Normal('b5', mu=mu_b5, sigma=sigma_b5, shape=n_oslauas) mu_b6 = pm.Normal('mu_b6', mu=0., sigma=100) sigma_b6 = pm.HalfNormal('sigma_b6', 5.) b6 = pm.Normal('b6', mu=mu_b6, sigma=sigma_b6, shape=n_oslauas) # Model error eps = pm.HalfCauchy('eps', 5.) estimate = a[oslaua_idx] + b1[oslaua_idx]*X1 + b2[oslaua_idx]*X2 + b3[oslaua_idx]*X3 + b4[oslaua_idx]*X4 + b5[oslaua_idx]*X5 + b6[oslaua_idx]*X6 # Likelihood (sampling distribution) of observations likelihood = pm.Normal('likelihood', mu=estimate, sigma=eps, observed=Y) hierarchical_trace = pm.sample(10000, tune=10000, target_accept=.9) ppc = pm.sample_posterior_predictive(hierarchical_trace, samples=10000, model=hierarchical_model) np.asarray(ppc['likelihood']).shape print(az.r2_score(Y, ppc['likelihood']))