Example #1
0
def get_varying_intercept_model_results():
    # read in Cipriani data
    df = get_model_input_df()
    data_dict = {
        'N': df.shape[0],
        'Y_meas': df['lnSD'].values,
        'X_meas': df['lnMean'].values,
        'SD_Y': np.sqrt(df['var_lnSD'].values),
        'SD_X': np.sqrt(df['var_lnMean'].values),
        'K': len(df.scale.unique()),
        'scale_group': df.scale_rank.values
    }
    varying_intercept_stan_model = compile_model(
        os.path.join(stan_model_path, 'varying_intercept_regression.stan'),
        model_name='varying_intercept_regression')
    fit = varying_intercept_stan_model.sampling(data=data_dict,
                                                iter=4000,
                                                warmup=1000,
                                                chains=3,
                                                control={'adapt_delta': 0.99},
                                                check_hmc_diagnostics=True,
                                                seed=1)
    pystan.check_hmc_diagnostics(fit)
    data = az.from_pystan(
        posterior=fit,
        posterior_predictive=['Y_pred'],
        observed_data=['X_meas', 'Y_meas'],
        log_likelihood='log_lik',
    )
    return data
Example #2
0
def get_shrinkage_plot(data):
    df = get_model_input_df()
    data_dict = {
        'N': df.shape[0],
        'Y_meas': df['lnSD'].values,
        'X_meas': df['lnMean'].values,
        'SD_Y': np.sqrt(df['var_lnSD'].values),
        'SD_X': np.sqrt(df['var_lnMean'].values),
        'K': len(df.scale.unique()),
        'scale_group': df.scale_rank.values
    }
    fig, axes = plt.subplots(figsize=(10, 10))
    x_meas = data_dict['X_meas']
    y_meas = data_dict['Y_meas']

    x_true_trace = np.reshape(
        data.posterior.X.values,
        (data.posterior.X.shape[0] * data.posterior.X.shape[1],
         data.posterior.X.shape[2]))
    y_true_trace = np.reshape(
        data.posterior.Y.values,
        (data.posterior.Y.shape[0] * data.posterior.Y.shape[1],
         data.posterior.Y.shape[2]))

    #  get posterior means
    x_true = x_true_trace.mean(axis=0)
    y_true = y_true_trace.mean(axis=0)

    axes.scatter(x_meas,
                 y_meas,
                 label='measured data of lnMean and lnSD',
                 alpha=0.7)
    axes.scatter(x_true,
                 y_true,
                 label='estimated true values of lnMean and lnSD',
                 alpha=0.7)

    for xm, ym, xt, yt in zip(x_meas, y_meas, x_true, y_true):
        axes.arrow(xm,
                   ym,
                   xt - xm,
                   yt - ym,
                   color='gray',
                   linestyle='--',
                   length_includes_head=True,
                   alpha=0.4,
                   head_width=.015)

    plt.tight_layout()
    plt.xlabel('lnMean')
    plt.ylabel('lnSD')
    plt.title('Shrinkage effect of Bayesian varying intercept regression')
    axes.legend(loc='upper left')
    plt.savefig(os.path.join(parent_dir_name, f'output/shrinkage_plot.tiff'),
                format='tiff',
                dpi=500,
                bbox_inches="tight")

    return plt
Example #3
0
def get_model_results_dict():
    df = get_model_input_df()
    model_res_dict = {}

    # fixed effects meta analyses (lnVR and lnCVR)
    for model in ['fema', 'rema'
                  ]:  # lnVR, # random effects meta analyses (lnVR and lnCVR)
        stan_model = compile_model(os.path.join(stan_model_path,
                                                f'{model}.stan'),
                                   model_name=model)
        for effect_statistic in ['lnVR', 'lnCVR']:
            data_dict = get_data_dict(df, effect_statistic)

            fit = stan_model.sampling(data=data_dict,
                                      iter=4000,
                                      warmup=1000,
                                      chains=3,
                                      control={'adapt_delta': 0.99},
                                      check_hmc_diagnostics=True,
                                      seed=1)

            data = az.from_pystan(
                posterior=fit,
                posterior_predictive=['Y_pred'],
                observed_data=['Y'],
                log_likelihood='log_lik',
            )

            model_res_dict[f'{model}_{effect_statistic}'] = data

    model = 'remr'
    stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'),
                               model_name=model)
    effect_statistic = 'lnVR'
    data_dict = get_data_dict(df, effect_statistic)

    fit = stan_model.sampling(data=data_dict,
                              iter=4000,
                              warmup=1000,
                              chains=3,
                              control={'adapt_delta': 0.99},
                              check_hmc_diagnostics=True,
                              seed=1)
    pystan.check_hmc_diagnostics(fit)

    data = az.from_pystan(
        posterior=fit,
        posterior_predictive=['Y_pred'],
        observed_data=['Y_meas', 'X_meas'],
        log_likelihood='log_lik',
    )
    model_res_dict[f'{model}_{effect_statistic}'] = data
    return model_res_dict
Example #4
0
def get_lnMean_lnSD_plot():
    df = get_model_input_df()
    # check whether the standard deviation of the change variable is correlated with its mean (in active and placebo)
    # and save the OLS summary as html file
    scale_list = ['HAMD17', 'HAMD21', 'HAMD24', 'HAMDunspecified', 'MADRS']
    for scale in scale_list:
        for group in [0, 1]:
            lm = sm.OLS(
                df.query(f'scale == "{scale}" & is_active == {group}')
                ['lnSD'].values,
                sm.add_constant(
                    df.query(f'scale == "{scale}" & is_active == {group}')
                    ['lnMean'].values))
            res = lm.fit()

            with open(
                    os.path.join(parent_dir_name,
                                 f'output/lm_res_{group}_{scale}.html'),
                    'w') as f:
                f.write(res.summary2().as_html())

    ax = df.query('is_active == 1').plot.scatter(
        x='lnMean',
        y='lnSD',
        grid=True,
        color='r',
        figsize=(8, 6),
        title='ln(mean of negative change) vs. ln(sd of change)')
    _ = df.query('is_active == 0').plot.scatter(x='lnMean',
                                                y='lnSD',
                                                grid=True,
                                                ax=ax)  # NOQA
    _ = ax.legend(('active', 'placebo'))  # NOQA

    for sid, df_ in df.groupby('study_id'):
        df_a = df_.query('is_active == 1')
        df_p = df_.query('is_active == 0')

        plt.plot((df_a.lnMean.values[0], df_p.lnMean.values[0]),
                 (df_a.lnSD.values[0], df_p.lnSD.values[0]),
                 c='gray',
                 linestyle='-',
                 alpha=0.2)

    plt.savefig(os.path.join(parent_dir_name, f'output/lnMean_lnSD_plot.tiff'),
                format='tiff',
                dpi=1200)
    return plt
Example #5
0
def get_bar_chart_studies_per_depression_scale():
    df = get_model_input_df()
    tmp = df.groupby('scale').agg({
        'study_id': lambda x: len(x.unique())
    }).reset_index()
    fig, ax = plt.subplots()
    _ = ax.bar(x=tmp.scale, height=tmp.study_id, color='grey')
    _ = ax.set_title('studies per depression scale')
    _ = plt.xticks(rotation=75)
    _ = plt.ylabel('count')
    plt.savefig(os.path.join(parent_dir_name,
                             f'output/bar_studies_per_depression_scale.tiff'),
                format='tiff',
                dpi=500,
                bbox_inches="tight")
    return plt
Example #6
0
def get_prior_comparison():
    df = get_model_input_df()
    model_res_dict = {}

    model = 'remr_prior'
    stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'),
                               model_name=model)
    effect_statistic = 'lnVR'
    data_dict = get_data_dict(df, effect_statistic)

    prior_dict = {'reference': (0, 1), 'optimisitc': (np.log(2), 0.43)}
    # from scipy import stats
    # stats.norm.cdf(0, loc=np.log(2), scale=0.43)
    # 0.05348421366569122
    for prior, (mu_prior_loc, mu_prior_scale) in prior_dict.items():
        data_dict_prior = data_dict.copy()

        data_dict_prior['mu_prior_loc'] = mu_prior_loc
        data_dict_prior['mu_prior_scale'] = mu_prior_scale

        fit = stan_model.sampling(data=data_dict_prior,
                                  iter=4000,
                                  warmup=1000,
                                  chains=3,
                                  control={'adapt_delta': 0.99},
                                  check_hmc_diagnostics=True,
                                  seed=1)
        pystan.check_hmc_diagnostics(fit)

        data = az.from_pystan(
            posterior=fit,
            posterior_predictive=['Y_pred'],
            observed_data=['Y_meas', 'X_meas'],
            log_likelihood='log_lik',
        )
        model_res_dict[f'{model}_{effect_statistic}_{prior}'] = data
    return model_res_dict
Example #7
0
def plot_varying_intercept_regression_lines(data):
    df = get_model_input_df()

    # Extracting traces (and combine all chains)
    alphas = np.reshape(
        data.posterior.alpha.values,
        (data.posterior.alpha.shape[0] * data.posterior.alpha.shape[1],
         data.posterior.alpha.shape[2]))
    beta = np.reshape(
        data.posterior.beta.values,
        (data.posterior.beta.shape[0] * data.posterior.beta.shape[1]))

    # Plotting regression line
    x_min, x_max = 1., 3.5
    x = np.linspace(x_min, x_max, 100)
    scale_list = sorted(df.scale.unique())

    #  get posterior means
    alpha_means = alphas.mean(axis=0)
    beta_mean = beta.mean()

    # Plot a subset of sampled regression lines
    np.random.shuffle(alphas)
    np.random.shuffle(beta)

    fig, axes = plt.subplots(nrows=4,
                             ncols=2,
                             figsize=(10, 10),
                             sharex=True,
                             sharey=True)
    fig.suptitle('Fitted varying intercept regression')
    d = df[[
        'scale', 'scale_rank'
    ]].drop_duplicates().set_index('scale').to_dict('dict')['scale_rank']
    for scale in scale_list:
        scale_index = d[scale] - 1

        # Plot mean regression line
        y = alpha_means[scale_index] + beta_mean * x
        row, col = int(scale_index / 2), scale_index % 2
        _ = axes[row, col].plot(x, y, linestyle='--', alpha=0.5, color='black')
        # Plot measured data
        df_a = df.query(f'scale == "{scale}" & is_active == 1')
        df_p = df.query(f'scale == "{scale}" & is_active == 0')
        _ = axes[row, col].scatter(df_a.lnMean.values,
                                   df_a.lnSD.values,
                                   alpha=0.8)
        _ = axes[row, col].scatter(df_p.lnMean.values,
                                   df_p.lnSD.values,
                                   alpha=0.8)
        # Plot sample trace regression
        for j in range(1000):
            _ = axes[row, col].plot(x,
                                    alphas[j, scale_index] + beta[j] * x,
                                    color='lightsteelblue',
                                    alpha=0.005)  # NOQA

        axes[row, col].set_ylabel('lnSD')
        axes[row, col].set_title(f'{scale}')

    axes[-1, 0].set_xlabel('lnMean')
    axes[-1, 1].set_xlabel('lnMean')

    plt.tight_layout()
    plt.xlim(x_min, x_max)
    plt.subplots_adjust(top=0.9, bottom=0.1)
    plt.savefig(os.path.join(
        parent_dir_name, f'output/varying_intercept_regression_lines.tiff'),
                format='tiff',
                dpi=500,
                bbox_inches="tight")

    return plt