def get_varying_intercept_model_results(): # read in Cipriani data df = get_model_input_df() data_dict = { 'N': df.shape[0], 'Y_meas': df['lnSD'].values, 'X_meas': df['lnMean'].values, 'SD_Y': np.sqrt(df['var_lnSD'].values), 'SD_X': np.sqrt(df['var_lnMean'].values), 'K': len(df.scale.unique()), 'scale_group': df.scale_rank.values } varying_intercept_stan_model = compile_model( os.path.join(stan_model_path, 'varying_intercept_regression.stan'), model_name='varying_intercept_regression') fit = varying_intercept_stan_model.sampling(data=data_dict, iter=4000, warmup=1000, chains=3, control={'adapt_delta': 0.99}, check_hmc_diagnostics=True, seed=1) pystan.check_hmc_diagnostics(fit) data = az.from_pystan( posterior=fit, posterior_predictive=['Y_pred'], observed_data=['X_meas', 'Y_meas'], log_likelihood='log_lik', ) return data
def get_shrinkage_plot(data): df = get_model_input_df() data_dict = { 'N': df.shape[0], 'Y_meas': df['lnSD'].values, 'X_meas': df['lnMean'].values, 'SD_Y': np.sqrt(df['var_lnSD'].values), 'SD_X': np.sqrt(df['var_lnMean'].values), 'K': len(df.scale.unique()), 'scale_group': df.scale_rank.values } fig, axes = plt.subplots(figsize=(10, 10)) x_meas = data_dict['X_meas'] y_meas = data_dict['Y_meas'] x_true_trace = np.reshape( data.posterior.X.values, (data.posterior.X.shape[0] * data.posterior.X.shape[1], data.posterior.X.shape[2])) y_true_trace = np.reshape( data.posterior.Y.values, (data.posterior.Y.shape[0] * data.posterior.Y.shape[1], data.posterior.Y.shape[2])) # get posterior means x_true = x_true_trace.mean(axis=0) y_true = y_true_trace.mean(axis=0) axes.scatter(x_meas, y_meas, label='measured data of lnMean and lnSD', alpha=0.7) axes.scatter(x_true, y_true, label='estimated true values of lnMean and lnSD', alpha=0.7) for xm, ym, xt, yt in zip(x_meas, y_meas, x_true, y_true): axes.arrow(xm, ym, xt - xm, yt - ym, color='gray', linestyle='--', length_includes_head=True, alpha=0.4, head_width=.015) plt.tight_layout() plt.xlabel('lnMean') plt.ylabel('lnSD') plt.title('Shrinkage effect of Bayesian varying intercept regression') axes.legend(loc='upper left') plt.savefig(os.path.join(parent_dir_name, f'output/shrinkage_plot.tiff'), format='tiff', dpi=500, bbox_inches="tight") return plt
def get_model_results_dict(): df = get_model_input_df() model_res_dict = {} # fixed effects meta analyses (lnVR and lnCVR) for model in ['fema', 'rema' ]: # lnVR, # random effects meta analyses (lnVR and lnCVR) stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'), model_name=model) for effect_statistic in ['lnVR', 'lnCVR']: data_dict = get_data_dict(df, effect_statistic) fit = stan_model.sampling(data=data_dict, iter=4000, warmup=1000, chains=3, control={'adapt_delta': 0.99}, check_hmc_diagnostics=True, seed=1) data = az.from_pystan( posterior=fit, posterior_predictive=['Y_pred'], observed_data=['Y'], log_likelihood='log_lik', ) model_res_dict[f'{model}_{effect_statistic}'] = data model = 'remr' stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'), model_name=model) effect_statistic = 'lnVR' data_dict = get_data_dict(df, effect_statistic) fit = stan_model.sampling(data=data_dict, iter=4000, warmup=1000, chains=3, control={'adapt_delta': 0.99}, check_hmc_diagnostics=True, seed=1) pystan.check_hmc_diagnostics(fit) data = az.from_pystan( posterior=fit, posterior_predictive=['Y_pred'], observed_data=['Y_meas', 'X_meas'], log_likelihood='log_lik', ) model_res_dict[f'{model}_{effect_statistic}'] = data return model_res_dict
def get_lnMean_lnSD_plot(): df = get_model_input_df() # check whether the standard deviation of the change variable is correlated with its mean (in active and placebo) # and save the OLS summary as html file scale_list = ['HAMD17', 'HAMD21', 'HAMD24', 'HAMDunspecified', 'MADRS'] for scale in scale_list: for group in [0, 1]: lm = sm.OLS( df.query(f'scale == "{scale}" & is_active == {group}') ['lnSD'].values, sm.add_constant( df.query(f'scale == "{scale}" & is_active == {group}') ['lnMean'].values)) res = lm.fit() with open( os.path.join(parent_dir_name, f'output/lm_res_{group}_{scale}.html'), 'w') as f: f.write(res.summary2().as_html()) ax = df.query('is_active == 1').plot.scatter( x='lnMean', y='lnSD', grid=True, color='r', figsize=(8, 6), title='ln(mean of negative change) vs. ln(sd of change)') _ = df.query('is_active == 0').plot.scatter(x='lnMean', y='lnSD', grid=True, ax=ax) # NOQA _ = ax.legend(('active', 'placebo')) # NOQA for sid, df_ in df.groupby('study_id'): df_a = df_.query('is_active == 1') df_p = df_.query('is_active == 0') plt.plot((df_a.lnMean.values[0], df_p.lnMean.values[0]), (df_a.lnSD.values[0], df_p.lnSD.values[0]), c='gray', linestyle='-', alpha=0.2) plt.savefig(os.path.join(parent_dir_name, f'output/lnMean_lnSD_plot.tiff'), format='tiff', dpi=1200) return plt
def get_bar_chart_studies_per_depression_scale(): df = get_model_input_df() tmp = df.groupby('scale').agg({ 'study_id': lambda x: len(x.unique()) }).reset_index() fig, ax = plt.subplots() _ = ax.bar(x=tmp.scale, height=tmp.study_id, color='grey') _ = ax.set_title('studies per depression scale') _ = plt.xticks(rotation=75) _ = plt.ylabel('count') plt.savefig(os.path.join(parent_dir_name, f'output/bar_studies_per_depression_scale.tiff'), format='tiff', dpi=500, bbox_inches="tight") return plt
def get_prior_comparison(): df = get_model_input_df() model_res_dict = {} model = 'remr_prior' stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'), model_name=model) effect_statistic = 'lnVR' data_dict = get_data_dict(df, effect_statistic) prior_dict = {'reference': (0, 1), 'optimisitc': (np.log(2), 0.43)} # from scipy import stats # stats.norm.cdf(0, loc=np.log(2), scale=0.43) # 0.05348421366569122 for prior, (mu_prior_loc, mu_prior_scale) in prior_dict.items(): data_dict_prior = data_dict.copy() data_dict_prior['mu_prior_loc'] = mu_prior_loc data_dict_prior['mu_prior_scale'] = mu_prior_scale fit = stan_model.sampling(data=data_dict_prior, iter=4000, warmup=1000, chains=3, control={'adapt_delta': 0.99}, check_hmc_diagnostics=True, seed=1) pystan.check_hmc_diagnostics(fit) data = az.from_pystan( posterior=fit, posterior_predictive=['Y_pred'], observed_data=['Y_meas', 'X_meas'], log_likelihood='log_lik', ) model_res_dict[f'{model}_{effect_statistic}_{prior}'] = data return model_res_dict
def plot_varying_intercept_regression_lines(data): df = get_model_input_df() # Extracting traces (and combine all chains) alphas = np.reshape( data.posterior.alpha.values, (data.posterior.alpha.shape[0] * data.posterior.alpha.shape[1], data.posterior.alpha.shape[2])) beta = np.reshape( data.posterior.beta.values, (data.posterior.beta.shape[0] * data.posterior.beta.shape[1])) # Plotting regression line x_min, x_max = 1., 3.5 x = np.linspace(x_min, x_max, 100) scale_list = sorted(df.scale.unique()) # get posterior means alpha_means = alphas.mean(axis=0) beta_mean = beta.mean() # Plot a subset of sampled regression lines np.random.shuffle(alphas) np.random.shuffle(beta) fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(10, 10), sharex=True, sharey=True) fig.suptitle('Fitted varying intercept regression') d = df[[ 'scale', 'scale_rank' ]].drop_duplicates().set_index('scale').to_dict('dict')['scale_rank'] for scale in scale_list: scale_index = d[scale] - 1 # Plot mean regression line y = alpha_means[scale_index] + beta_mean * x row, col = int(scale_index / 2), scale_index % 2 _ = axes[row, col].plot(x, y, linestyle='--', alpha=0.5, color='black') # Plot measured data df_a = df.query(f'scale == "{scale}" & is_active == 1') df_p = df.query(f'scale == "{scale}" & is_active == 0') _ = axes[row, col].scatter(df_a.lnMean.values, df_a.lnSD.values, alpha=0.8) _ = axes[row, col].scatter(df_p.lnMean.values, df_p.lnSD.values, alpha=0.8) # Plot sample trace regression for j in range(1000): _ = axes[row, col].plot(x, alphas[j, scale_index] + beta[j] * x, color='lightsteelblue', alpha=0.005) # NOQA axes[row, col].set_ylabel('lnSD') axes[row, col].set_title(f'{scale}') axes[-1, 0].set_xlabel('lnMean') axes[-1, 1].set_xlabel('lnMean') plt.tight_layout() plt.xlim(x_min, x_max) plt.subplots_adjust(top=0.9, bottom=0.1) plt.savefig(os.path.join( parent_dir_name, f'output/varying_intercept_regression_lines.tiff'), format='tiff', dpi=500, bbox_inches="tight") return plt