Ejemplo n.º 1
0
def _plot_rank(idata, plot_kws=None):
    default_plot_kws = dict(figsize=(12, 10))
    plot_kws = {} if plot_kws is None else plot_kws
    plot_kws = {**default_plot_kws, **plot_kws}

    axes = az.plot_rank(idata, **plot_kws)
    if axes.ndim == 1:
        axes = axes.reshape(1, -1)
    n, m = axes.shape
    for i in range(n):
        for j in range(m):
            label = axes[i, j].get_title().replace("\n", "[") + "]"
            axes[i, j].set_title(label)
            axes[i, j].xaxis.set_tick_params(labelsize=8)
            axes[i, j].yaxis.set_tick_params(labelsize=8)
            if j > 0:
                axes[i, j].set_ylabel("")
            else:
                label = axes[i, j].get_ylabel()
                axes[i, j].set_ylabel(label, size=12)

            if (i + 1) < n:
                axes[i, j].set_xlabel("")
            else:
                label = axes[i, j].get_xlabel()
                axes[i, j].set_xlabel(label, size=12)

    plt.subplots_adjust(left=0.08,
                        right=0.97,
                        top=0.97,
                        bottom=0.08,
                        wspace=0.15,
                        hspace=0.15)
    return axes
Ejemplo n.º 2
0
    numpyro.sample("y", dist.Normal(mu, sigma), obs=y)


m9_2 = MCMC(NUTS(model, target_accept_prob=0.95),
            num_warmup=500,
            num_samples=500,
            num_chains=3)
m9_2.run(random.PRNGKey(11), y=y)

m9_2.print_summary(0.95)

az.plot_trace(az.from_numpyro(m9_2))
pml.savefig('mcmc_traceplot_unigauss_bad.png')
plt.show()

az.plot_rank(az.from_numpyro(m9_2))
pml.savefig('mcmc_trankplot_unigauss_bad.png')
plt.show()

# Model with proper priors


def model(y):
    alpha = numpyro.sample("alpha", dist.Normal(1, 10))
    sigma = numpyro.sample("sigma", dist.Exponential(1))
    mu = alpha
    numpyro.sample("y", dist.Normal(mu, sigma), obs=y)


m9_3 = MCMC(NUTS(model), num_warmup=500, num_samples=500, num_chains=3)
m9_3.run(random.PRNGKey(11), y=y)
Ejemplo n.º 3
0
    def on_post(self, req, resp, **kwargs):
        raw_data = load(req.bounded_stream)['params']
        # print(raw_data)
        y1 = pd.Series(raw_data['y1'])
        y0 = pd.Series(raw_data['y0'])
        y_comb = pd.concat([y1, y0])
        x_comb = pd.concat(
            [pd.Series([1] * y1.size),
             pd.Series([0] * y0.size)])
        dat_list = {
            'y': y_comb,
            'x': x_comb,
            'N': y1.size + y0.size,
            'sd_m': raw_data['sd_m'],
            'sd_m_diff': raw_data['max_diff'] / norm.ppf(.975),
            'sd_st': raw_data['sd_st'],
            'sd_st_r': np.log(raw_data['max_st_r']) / norm.ppf(.975),
            'nu_choice': raw_data['nu_choice'],
            'prob': raw_data['prob'] / 100
        }

        params = {
            new_list: {}
            for new_list in
            ['m0', 'm1', 'm_diff', 'st0', 'st1', 'st_ratio', 'nu']
        }

        pkl_file = 'stan_scripts/dmdv.pkl'
        if raw_data['prob'] > 0:
            del params['nu']
            pkl_file = 'stan_scripts/dmdv_quantile.pkl'

        dmdv = pickle.load(open(pkl_file, 'rb'))
        fit = dmdv.sampling(data=dat_list,
                            chains=4,
                            iter=raw_data['n_iter'],
                            seed=12345)

        # ('mean', 'se_mean', 'sd', 'n_eff', 'Rhat')

        summary = fit.summary(pars=params.keys(), probs=[])['summary']

        posteriors = fit.extract()
        for i in range(len(params)):
            param = list(params.keys())[i]
            params[param] = {
                'mean': summary[i][0],
                'median': np.median(posteriors[param]),
                'mcse': summary[i][1],
                'sd': summary[i][2],
                'post': posteriors[param].tolist(),
                'ess': summary[i][3],
                'rhat': summary[i][4],
                # todo: 'warnings':
            }

        rk_IOBytes = io.BytesIO()
        az.plot_rank(fit, var_names=('m_diff', 'st_ratio'))
        plt.savefig(rk_IOBytes, format='png')
        plt.close()
        params['rk_hash'] = img_b64(rk_IOBytes)

        if raw_data['prob'] > 0:
            params['mean_hash'] = create_qty_plt(
                posteriors['m_diff'],
                'percentile-' + str(raw_data['prob']) + ' difference')
        else:
            params['mean_hash'] = create_qty_plt(posteriors['m_diff'],
                                                 'mean difference')
        params['sc_hash'] = create_qty_plt(posteriors['st_ratio'], 'SD ratio')

        params['raw_data'] = raw_data

        resp.media = params
Ejemplo n.º 4
0
    def on_post(self, req, resp, **kwargs):
        raw_data = load(req.bounded_stream)['params']
        # print(raw_data)
        y1 = pd.Series(raw_data['y1'])
        y0 = pd.Series(raw_data['y0'])
        y_comb = pd.concat([y1, y0])
        x_comb = pd.concat(
            [pd.Series([1] * y1.size),
             pd.Series([0] * y0.size)])
        # Get maximum risk difference
        # max_prop_diff = raw_data['max_diff'] / (raw_data['max_val'] - raw_data['min_val'])
        # Set max logit difference to qlogis(.7 + max_RD) - qlogis(.7); censor .7 + max_RD at .99
        # sd_m_diff = logistic.ppf(
        #     min([.99, .7 + max_prop_diff])) - logistic.ppf(.7)
        # Change max difference to qlogis(.9 + max_RD) - qlogis(.9) if variable is extreme
        # if (raw_data['sd_m'] > 3):
        #     sd_m_diff = logistic.ppf(
        #         min([.99, .9 + max_prop_diff])) - logistic.ppf(.9)
        dat_list = {
            'y': y_comb,
            'x': x_comb,
            'N': y1.size + y0.size,
            'min_val': raw_data['min_val'],
            'max_val': raw_data['max_val'],
            'a0': raw_data['sd_m'],
            'sd_m_diff': raw_data['max_diff'] / norm.ppf(.975)
        }

        params = {
            new_list: {}
            for new_list in [
                'm0', 'm1', 'm_diff', 'st0', 'st1', 'st_ratio', 'shape0_alpha',
                'shape0_beta', 'shape1_alpha', 'shape1_beta'
            ]
        }

        pkl_file = 'stan_scripts/dmdv_beta.pkl'

        dmdv_beta = pickle.load(open(pkl_file, 'rb'))
        fit = dmdv_beta.sampling(data=dat_list,
                                 chains=4,
                                 iter=raw_data['n_iter'],
                                 seed=12345)

        # ('mean', 'se_mean', 'sd', 'n_eff', 'Rhat')
        lo_prob = (1 - raw_data['interval'] / 100) / 2
        hi_prob = 1 - lo_prob

        summary = fit.summary(pars=params.keys(),
                              probs=(lo_prob, .5, hi_prob))['summary']

        posteriors = fit.extract()
        for i in range(len(params)):
            param = list(params.keys())[i]
            params[param] = {
                'mean': summary[i][0],
                'median': summary[i][4],
                'mcse': summary[i][1],
                'sd': summary[i][2],
                'int.lo': summary[i][3],
                'int.hi': summary[i][5],
                'post': posteriors[param].tolist(),
                'ess': summary[i][6],
                'rhat': summary[i][7],
                # todo: 'warnings':
            }

        rk_IOBytes = io.BytesIO()
        az.plot_rank(fit, var_names=('m_diff', 'st_ratio'))
        plt.savefig(rk_IOBytes, format='png')
        plt.close()
        params['rk_hash'] = img_b64(rk_IOBytes)

        params['mean_hash'] = create_qty_plt(posteriors['m_diff'],
                                             'mean difference')
        params['sc_hash'] = create_qty_plt(posteriors['st_ratio'], 'SD ratio')

        params['raw_data'] = raw_data

        resp.media = params
Ejemplo n.º 5
0
    def on_post(self, req, resp, **kwargs):
        raw_data = load(req.bounded_stream)['params']
        success = pd.Series(raw_data['success'])
        total = pd.Series(raw_data['total'])
        dat_list = {
            'pass': success,
            'total': total,
            'sd_m': raw_data['sd_m'] / norm.ppf(.975),
            'sd_m_diff': np.log(raw_data['sd_m_diff']) / norm.ppf(.975)
        }
        params = {
            new_list: {}
            for new_list in [
                'm0', 'm_diff', 'means_logit[2]', 'odds_ratio',
                'means_prob[1]', 'means_prob[2]', 'prob_ratio', 'prob_diff'
            ]
        }
        two_group_logistic = pickle.load(
            open('stan_scripts/two_group_logistic.pkl', 'rb'))
        fit = two_group_logistic.sampling(data=dat_list,
                                          chains=4,
                                          iter=raw_data['n_iter'],
                                          seed=12345)
        lo_prob = (1 - raw_data['interval'] / 100) / 2
        hi_prob = 1 - lo_prob

        summary = fit.summary(pars=params.keys(),
                              probs=(lo_prob, .5, hi_prob))['summary']

        posteriors = fit.extract()
        for i in range(len(params)):
            param = list(params.keys())[i]
            par_sum = summary[i]
            if param == 'means_logit[2]':
                param = 'm1'
            params[param] = {
                'mean': par_sum[0],
                'median': par_sum[4],
                'mcse': par_sum[1],
                'sd': par_sum[2],
                'int.lo': par_sum[3],
                'int.hi': par_sum[5],
                'ess': par_sum[6],
                'rhat': par_sum[7]
            }
            if param in [
                    'm0', 'm_diff', 'odds_ratio', 'prob_ratio', 'prob_diff'
            ]:
                params[param]['post'] = posteriors[param].tolist()

        params.pop('means_logit[2]')
        rk_IOBytes = io.BytesIO()
        az.plot_rank(fit, var_names=('odds_ratio'))
        plt.savefig(rk_IOBytes, format='png')
        plt.close()
        params['rk_hash'] = img_b64(rk_IOBytes)

        params['or_hash'] = create_qty_plt(posteriors['odds_ratio'],
                                           'odds ratio')
        params['pr_hash'] = create_qty_plt(posteriors['prob_ratio'],
                                           'prob. ratio')
        params['pd_hash'] = create_qty_plt(posteriors['prob_diff'],
                                           'prob. diff')

        params['raw_data'] = raw_data

        resp.media = params
Ejemplo n.º 6
0
    def on_post(self, req, resp, **kwargs):
        raw_data = load(req.bounded_stream)['params']
        eff = pd.Series(raw_data['eff'])
        scales = pd.Series(raw_data['scales'])
        dat_list = {
            'eff': eff,
            's': scales,
            'N': eff.size,
            'mean_sd': raw_data['mean_sd'] / norm.ppf(.975),
            'rnd_sd': raw_data['rnd_sd'] / norm.ppf(.975)
        }
        params = {
            new_list: {}
            for new_list in ['eff_mean', 'eff_sd', 'eff_model']
        }
        meta_re = pickle.load(open('stan_scripts/meta_re.pkl', 'rb'))
        fit = meta_re.sampling(data=dat_list,
                               chains=4,
                               iter=raw_data['n_iter'],
                               seed=12345)
        lo_prob = (1 - raw_data['interval'] / 100) / 2
        hi_prob = 1 - lo_prob

        summary = fit.summary(pars=params.keys(),
                              probs=(lo_prob, .5, hi_prob))['summary']

        posteriors = fit.extract()
        for i in range(len(params)):
            param = list(params.keys())[i]
            if param == 'eff_model':
                for j in range(raw_data['N']):
                    par_sum = summary[i + j]
                    params[param][param + '[' + str(j + 1) + ']'] = {
                        'mean': par_sum[0],
                        'median': par_sum[4],
                        'mcse': par_sum[1],
                        'sd': par_sum[2],
                        'int.lo': par_sum[3],
                        'int.hi': par_sum[5],
                        'ess': par_sum[6],
                        'rhat': par_sum[7]
                    }
            else:
                par_sum = summary[i]
                params[param] = {
                    'mean': par_sum[0],
                    'median': par_sum[4],
                    'mcse': par_sum[1],
                    'sd': par_sum[2],
                    'post': posteriors[param].tolist(),
                    'int.lo': par_sum[3],
                    'int.hi': par_sum[5],
                    'ess': par_sum[6],
                    'rhat': par_sum[7]
                }

        rk_IOBytes = io.BytesIO()
        az.plot_rank(fit, var_names=('eff_mean', 'eff_sd'))
        plt.savefig(rk_IOBytes, format='png')
        plt.close()
        params['rk_hash'] = img_b64(rk_IOBytes)

        params['mean_hash'] = create_qty_plt(posteriors['eff_mean'],
                                             'average effect')
        params['sd_hash'] = create_qty_plt(posteriors['eff_sd'], 'tau')

        params['raw_data'] = raw_data

        resp.media = params
Ejemplo n.º 7
0
"""
Rank plot
=========

_thumb: .1, .8
"""
import arviz as az

az.style.use('arviz-darkgrid')

data = az.load_arviz_data('centered_eight')
az.plot_rank(data, var_names=('tau', 'mu'))

Ejemplo n.º 8
0
"""
Rank plot
=========

_thumb: .1, .8
"""
import arviz as az

az.style.use("arviz-darkgrid")

data = az.load_arviz_data("centered_eight")
az.plot_rank(data, var_names=("tau", "mu"))
Ejemplo n.º 9
0
    beta_eff[i] = get_beta_eff(i, beta_start, beta_end, k, 90)

plt.plot(np.arange(0, 180), beta_eff)
plt.xlabel('Time (days)')
plt.ylabel('Beta (effective)')
plt.savefig('../results/plots/beta_over_time.pdf')
plt.clf()
"""
seir.plot_incidence()
plt.plot(np.arange(0,180),daily_cases.newcountconfirmed[20:200],label='observed')
plt.legend()
plt.tight_layout()
plt.savefig('../results/plots/model_fit.pdf')
"""

inference_data = az.from_cmdstan('../results/outputs/*.csv')
az.plot_trace(inference_data)
plt.savefig('../results/plots/model_trace.pdf')
az.plot_joint(inference_data,
              var_names=['beta_start', 'beta_end'],
              kind='kde',
              figsize=(6, 6))
plt.tight_layout()
plt.savefig('../results/plots/model_joint_betas.pdf')
az.plot_posterior(inference_data)
plt.savefig('../results/plots/model_posterior.pdf')
az.plot_autocorr(inference_data, combined=True)
plt.savefig('../results/plots/model_autocorrelation.pdf')
az.plot_rank(inference_data)
plt.savefig('../results/plots/model_rank.pdf')
Ejemplo n.º 10
0
"""
Rank plot
=========

_thumb: .1, .8
"""
import arviz as az

data = az.load_arviz_data("centered_eight")
ax = az.plot_rank(data, var_names=("tau", "mu"), backend="bokeh")
As we discussed in the Numerical Diagnostics section, we can see autocorrelation as a factor that decrease the actual amount of information contained in a sample. So we want to reduce autocorrelation as much as possible.

bad_chains = np.linspace(0, 1, 1000).reshape(2, -1)
az.plot_autocorr(bad_chains)

The autocorrelation plot shows the _degree of autocorrelation_ by default it used a maximum window of 100. The previous figure, corresponding to `bad_chains` show a very high autocorrelation while the next figure corresponding to `good_chains` show a very low autocorrelation.

good_chains = stats.uniform.rvs(0, 1, size=(2, 500))
az.plot_autocorr(good_chains)

## Rank plot

<br>

    az.plot_rank()


Rank plots are histograms of the ranked posterior draws, ranked over all chains and then plotted separately for each chain. The idea behind this plot is that if all of the chains are targeting the same posterior, we expect the ranks in each chain to be uniform. Additionally, if rank plots of all chains look similar, this indicates good mixing of the chains

This is a [recently](https://arxiv.org/abs/1903.08008) proposed visual test, author argue superiority over trace plots:

az.plot_rank(good_chains);

We can see that for the `good_chains` the histogram of the ranks is more or less uniform, uniformity will increasing with the sample size, and we can also see that both chains look similar with not distinctive pattern. This is in clear contrast  the results for the `bad_chains`, while they are uniform both chains are exploring two separate set of values. Notice how this is consistent to the way we create `bad_chains`, 1000 ordered number from 0 to 1 split in two halves.

az.plot_rank(bad_chains);

The following is a snippet so you can get a better intuition of how to interpret rank plots. Notice that `az.plot_rank` is doing a more involved computation, but to get intuition this block of code should be enough. Here the histogram of the rank (right panel) is rotated with respect to the previous histogram to match the cumulative distribution on the left panel. So you can see the bottom bar on the right contains the first 100 values from the cumulative distribution on the left, the second bar the second 100 values and so on. 

You can see a rank plot as a device for detecting an excess of any given number, try for example uncommenting the line before and see how and excess of zeros affects the rank plot.