def test_density_dist_without_random_not_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu', 0, 1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100))
        trace = pm.sample(100)

    samples = 500
    with pytest.raises(ValueError):
        pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100)
Example #2
0
    def test_sum_normal(self):
        with pm.Model() as model:
            a = pm.Normal("a", sigma=0.2)
            b = pm.Normal("b", mu=a)
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
            ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[b])
            assert len(ppc) == 1
            assert ppc["b"].shape == (1000,)
            scale = np.sqrt(1 + 0.2 ** 2)
            _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf)
            assert pval > 0.001
Example #3
0
    def test_model_shared_variable(self):
        x = np.random.randn(100)
        y = x > 0
        x_shared = theano.shared(x)
        y_shared = theano.shared(y)
        with pm.Model() as model:
            coeff = pm.Normal("x", mu=0, sd=1)
            logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))

            obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
            trace = pm.sample(100)

        x_shared.set_value([-1, 0, 1.0])
        y_shared.set_value([0, 0, 0])

        samples = 100
        with model:
            post_pred = pm.sample_posterior_predictive(
                trace, samples=samples, vars=[logistic, obs]
            )

        expected_p = np.array(
            [logistic.eval({coeff: val}) for val in trace["x"][:samples]]
        )
        assert post_pred["obs"].shape == (samples, 3)
        assert np.allclose(post_pred["p"], expected_p)
Example #4
0
    def test_deterministic_of_observed(self):
        meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
        meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
        with pm.Model() as model:
            mu_in_1 = pm.Normal("mu_in_1", 0, 1)
            sigma_in_1 = pm.HalfNormal("sd_in_1", 1)
            mu_in_2 = pm.Normal("mu_in_2", 0, 1)
            sigma_in_2 = pm.HalfNormal("sd__in_2", 1)

            in_1 = pm.Normal("in_1", mu_in_1, sigma_in_1, observed=meas_in_1)
            in_2 = pm.Normal("in_2", mu_in_2, sigma_in_2, observed=meas_in_2)
            out_diff = in_1 + in_2
            pm.Deterministic("out", out_diff)

            trace = pm.sample(100)
            ppc_trace = pm.trace_to_dataframe(
                trace, varnames=[n for n in trace.varnames if n != "out"]
            ).to_dict("records")
            ppc = pm.sample_posterior_predictive(
                model=model,
                trace=ppc_trace,
                samples=len(ppc_trace),
                vars=(model.deterministics + model.basic_RVs),
            )

            rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
            assert np.allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
Example #5
0
    def test_vector_observed(self):
        with pm.Model() as model:
            mu = pm.Normal("mu", mu=0, sigma=1)
            a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.0, 1.0]))
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a])
            assert "a" in ppc
            assert ppc["a"].shape == (10, 2)

            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4)
            assert "a" in ppc
            assert ppc["a"].shape == (10, 4, 2)
def test_density_dist_with_random_sampleable():
    with pm.Model() as model:
        mu = pm.Normal('mu', 0, 1)
        normal_dist = pm.Normal.dist(mu, 1)
        pm.DensityDist('density_dist', normal_dist.logp, observed=np.random.randn(100), random=normal_dist.random)
        trace = pm.sample(100)

    samples = 500
    ppc = pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100)
    assert len(ppc['density_dist']) == samples
Example #7
0
    def test_normal_vector(self):
        with pm.Model() as model:
            mu = pm.Normal('mu', 0., 1.)
            a = pm.Normal('a', mu=mu, sd=1,
                          observed=np.array([.5, .2]))
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 2)

            ppc = pm.sample_posterior_predictive(trace, samples=10, vars=[a], size=4)
            assert 'a' in ppc
            assert ppc['a'].shape == (10, 4, 2)
    def test_sample_posterior_predictive(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            ppc = pm.sample_posterior_predictive(self.trace)

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_posterior_predictive(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()
def test_mixture_random_shape():
    # test the shape broadcasting in mixture random
    y = np.concatenate([nr.poisson(5, size=10),
                        nr.poisson(9, size=10)])
    with pm.Model() as m:
        comp0 = pm.Poisson.dist(mu=np.ones(2))
        w0 = pm.Dirichlet('w0', a=np.ones(2))
        like0 = pm.Mixture('like0',
                           w=w0,
                           comp_dists=comp0,
                           observed=y)

        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
                                shape=(20, 2))
        w1 = pm.Dirichlet('w1', a=np.ones(2))
        like1 = pm.Mixture('like1',
                           w=w1,
                           comp_dists=comp1,
                           observed=y)

        comp2 = pm.Poisson.dist(mu=np.ones(2))
        w2 = pm.Dirichlet('w2',
                          a=np.ones(2),
                          shape=(20, 2))
        like2 = pm.Mixture('like2',
                           w=w2,
                           comp_dists=comp2,
                           observed=y)

        comp3 = pm.Poisson.dist(mu=np.ones(2),
                                shape=(20, 2))
        w3 = pm.Dirichlet('w3',
                          a=np.ones(2),
                          shape=(20, 2))
        like3 = pm.Mixture('like3',
                           w=w3,
                           comp_dists=comp3,
                           observed=y)

    rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3],
                                             point=m.test_point,
                                             size=100)
    assert rand0.shape == (100, 20)
    assert rand1.shape == (100, 20)
    assert rand2.shape == (100, 20)
    assert rand3.shape == (100, 20)

    with m:
        ppc = pm.sample_posterior_predictive([m.test_point], samples=200)
    assert ppc['like0'].shape == (200, 20)
    assert ppc['like1'].shape == (200, 20)
    assert ppc['like2'].shape == (200, 20)
    assert ppc['like3'].shape == (200, 20)
Example #10
0
    def test_normal_scalar(self):
        with pm.Model() as model:
            mu = pm.Normal('mu', 0., 1.)
            a = pm.Normal('a', mu=mu, sd=1, observed=0.)
            trace = pm.sample()

        with model:
            # test list input
            ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
            ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_posterior_predictive(trace, samples=1000, vars=[a])
            assert 'a' in ppc
            assert ppc['a'].shape == (1000,)
        _, pval = stats.kstest(ppc['a'],
                               stats.norm(loc=0, scale=np.sqrt(2)).cdf)
        assert pval > 0.001

        with model:
            ppc = pm.sample_posterior_predictive(trace, samples=10, size=5, vars=[a])
            assert ppc['a'].shape == (10, 5)
Example #11
0
    def test_normal_scalar(self):
        with pm.Model() as model:
            mu = pm.Normal("mu", 0.0, 1.0)
            a = pm.Normal("a", mu=mu, sigma=1, observed=0.0)
            trace = pm.sample()

        with model:
            # test list input
            n = trace["mu"].shape[0]
            ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
            ppc = pm.sample_posterior_predictive(trace, samples=n, vars=[])
            assert len(ppc) == 0
            ppc = pm.sample_posterior_predictive(trace, samples=n, vars=[a])
            assert "a" in ppc
            assert ppc["a"].shape == (n,)
        # mu's standard deviation may have changed thanks to a's observed
        _, pval = stats.kstest(ppc["a"] - trace["mu"], stats.norm(loc=0, scale=1).cdf)
        assert pval > 0.001

        with model:
            ppc = pm.sample_posterior_predictive(trace, samples=10, size=5, vars=[a])
            assert ppc["a"].shape == (10, 5)
Example #12
0
    def test_sample_prior_and_posterior(self):
        def build_toy_dataset(N, K):
            pi = np.array([0.2, 0.5, 0.3])
            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
            x = np.zeros((N, 3), dtype=np.float32)
            y = np.zeros((N,), dtype=np.int)
            for n in range(N):
                k = np.argmax(np.random.multinomial(1, pi))
                x[n, :] = np.random.multivariate_normal(mus[k],
                                                        np.diag(stds[k]))
                y[n] = k
            return x, y

        N = 100  # number of data points
        K = 3  # number of mixture components
        D = 3  # dimensionality of the data

        X, y = build_toy_dataset(N, K)

        with pm.Model() as model:
            pi = pm.Dirichlet('pi', np.ones(K))

            comp_dist = []
            mu = []
            packed_chol = []
            chol = []
            for i in range(K):
                mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D))
                packed_chol.append(
                    pm.LKJCholeskyCov('chol_cov_%i' % i,
                                      eta=2,
                                      n=D,
                                      sd_dist=pm.HalfNormal.dist(2.5))
                )
                chol.append(pm.expand_packed_triangular(D, packed_chol[i],
                                                        lower=True))
                comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))

            pm.Mixture('x_obs', pi, comp_dist, observed=X)
        with model:
            trace = pm.sample(30, tune=10, chains=1)

        n_samples = 20
        with model:
            ppc = pm.sample_posterior_predictive(trace, n_samples)
            prior = pm.sample_prior_predictive(samples=n_samples)
        assert ppc['x_obs'].shape == (n_samples,) + X.shape
        assert prior['x_obs'].shape == (n_samples,) + X.shape
        assert prior['mu0'].shape == (n_samples, D)
        assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)
Example #13
0
    def test_model_not_drawable_prior(self):
        data = np.random.poisson(lam=10, size=200)
        model = pm.Model()
        with model:
            mu = pm.HalfFlat("sigma")
            pm.Poisson("foo", mu=mu, observed=data)
            trace = pm.sample(tune=1000)

        with model:
            with pytest.raises(ValueError) as excinfo:
                pm.sample_prior_predictive(50)
            assert "Cannot sample" in str(excinfo.value)
            samples = pm.sample_posterior_predictive(trace, 50)
            assert samples["foo"].shape == (50, 200)
Example #14
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6), shape=6)
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(20, tune=10, cores=1)
     sim_priors = pm.sample_prior_predictive(samples=20,
                                             model=dm_model)
     sim_ppc = pm.sample_posterior_predictive(burned_trace,
                                              samples=20,
                                              model=dm_model)
     assert sim_priors['probs'].shape == (20, 6)
     assert sim_priors['obs'].shape == (20,) + obs.distribution.shape
     assert sim_ppc['obs'].shape == (20,) + obs.distribution.shape
Example #15
0
clusters = 2

# reparameterized
# says this should have issue w/ parameter non-identifiablity, but seems fine
with pm.Model() as model_mg:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    means = pm.Normal('means', mu=cs_exp.mean(), sd=10, shape=clusters)
    sd = pm.HalfNormal('sd', sd=10)
    y = pm.NormalMixture('y', w=p, mu=means, sd=sd, observed=cs_exp)
    trace_mg = pm.sample(random_seed=123)

varnames = ['means', 'p']
az.plot_trace(trace_mg, varnames)
az.summary(trace_mg, varnames)

ppc_mg = pm.sample_posterior_predictive(trace_mg, 2000, model=model_mg)
data_ppc = az.from_pymc3(trace=trace_mg, posterior_predictive=ppc_mg)
ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)

clusters = 2
with pm.Model() as model_mgp:
    p = pm.Dirichlet('p', a=np.ones(clusters))
    means = pm.Normal('means', mu=np.array([.9, 1]) * cs_exp.mean(),
                      sd=10, shape=clusters)
    sd = pm.HalfNormal('sd', sd=10)

    # Potential adds a constraint to the model
    # sayi
    order_means = pm.Potential('order_means',
                               tt.switch(means[1]-means[0] < 0,
                                         -np.inf, 0))
Example #16
0
 def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture):
     pmodel, trace = point_list_arg_bug_fixture
     with pmodel:
         pp = pm.sample_posterior_predictive([trace[15]], var_names=['d'])
Example #17
0
 def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
     pmodel, trace = point_list_arg_bug_fixture
     idat = az.from_pymc3(trace)
     with pmodel:
         pp = pm.sample_posterior_predictive(idat.posterior,
                                             var_names=['d'])
Example #18
0
    sd = trace_x['sd'].mean()
    dist = stats.norm(means_y, sd)
    ax[idx].plot(x, np.sum(dist.pdf(x_) * p_y, 1), 'C0', lw=2)
    ax[idx].plot(x, dist.pdf(x_) * p_y, 'k--', alpha=0.7)

    az.plot_kde(data, plot_kwargs={'linewidth': 2, 'color': 'k'}, ax=ax[idx])
    ax[idx].set_title('K = {}'.format(clusters[idx]))
    ax[idx].set_yticks([])
    ax[idx].set_xlabel('x')
pml.savefig('gmm_chooseK_pymc3_kde.pdf')

# Posteroior predictive check

nclusters = len(clusters)
ppc_mm = [
    pm.sample_posterior_predictive(traces[i], 1000, models[i])
    for i in range(nclusters)
]

fig, ax = plt.subplots(2,
                       2,
                       figsize=(10, 6),
                       sharex=True,
                       constrained_layout=True)
ax = np.ravel(ax)


def iqr(x, a=0):
    return np.subtract(*np.percentile(x, [75, 25], axis=a))

def PPCs(stat, model):
    sx, sy, st, sdx, sdy, sdt = stat

    if model == "bm":
        bm = pm.Model()

        with bm:
            D = pm.Lognormal("D", 0, 1)
            pm.Normal(
                "like_x", mu=0, sd=tt.sqrt(2 * D * sdt), observed=sdx
            )
            pm.Normal(
                "like_y", mu=0, sd=tt.sqrt(2 * D * sdt), observed=sdy
            )

            trace_bm = pm.sample(2000, chains=2, cores=1, progressbar=False)

        ppc_bm = pm.sample_posterior_predictive(
            trace_bm, model=bm, progressbar=False
        )
        simulated_dx_bm = ppc_bm[bm.observed_RVs[0].name]
        simulated_dy_bm = ppc_bm[bm.observed_RVs[1].name]
        simulated_x_bm = np.insert(
            np.cumsum(simulated_dx_bm, axis=1), 0, 0, axis=1
        )
        simulated_y_bm = np.insert(
            np.cumsum(simulated_dy_bm, axis=1), 0, 0, axis=1
        )

        # ppc in autocorrX, lag=1
        pxstd = []
        for i in range(4000):
            pxstd.append(calAutoCorr(simulated_x_bm[i, :], 1)[-1])

        fig, axes = plt.subplots(1, 3, figsize=(16, 4))
        for i, j in zip(simulated_x_bm[::2, :], simulated_y_bm[::2, :]):
            axes[0].plot(i, j, alpha=0.2)
        axes[0].plot(sx, sy, c="k", label="True data")
        axes[1].hist(simulated_x_bm.std(axis=1), bins=30)
        axes[2].hist(pxstd, bins=30)
        axes[2].axvline(
            x=autoCorrFirstX(sx, sdt), ls="--", c="r", label="data autocorrX"
        )
        axes[1].axvline(x=sx.std(), ls="--", c="r", label="data std in x")
        axes[0].legend()
        axes[1].legend()
        axes[2].legend()
        axes[0].set_title("PP Samples from BM model and True track")
        axes[1].set_title("PP Samples std in x")
        axes[2].set_title("PP Samples autocorrX")

    if model == "me":

        model_stick = pm.Model()
        with model_stick:

            me = pm.Lognormal("me", 0, 1)
            _, sig = sticking_covariance(len(sx) - 1, 0, me)

            pm.MvNormal("likex", mu=0, cov=sig, observed=sx)
            pm.MvNormal("likey", mu=0, cov=sig, observed=sy)

        with model_stick:
            trace_stick = pm.sample(2000, chains=2, cores=1, progressbar=False)

        # manually generate ppc samples
        simulated_x_stick, simulated_y_stick = (
            np.zeros((4000, len(sx))),
            np.zeros((4000, len(sx))),
        )
        for i in range(4000):
            mu1, Sigma1 = sticking_covariance(
                len(sx) - 1, 0, trace_stick["me"][i]
            )
            x = np.random.multivariate_normal(mu1, Sigma1)
            mu2, Sigma2 = sticking_covariance(
                len(sx) - 1, 0, trace_stick["me"][i]
            )
            y = np.random.multivariate_normal(mu2, Sigma2)

            simulated_x_stick[i, :], simulated_y_stick[i, :] = x, y

        # ppc in autocorrX, lag=1
        pxstd = []
        for i in range(4000):
            pxstd.append(calAutoCorr(simulated_x_stick[i, :], 1)[-1])

        fig, axes = plt.subplots(1, 3, figsize=(16, 4))
        for i, j in zip(simulated_x_stick[::2, :], simulated_y_stick[::2, :]):
            axes[0].plot(i, j, alpha=0.2)
        axes[0].plot(sx, sy, c="k", label="True data")
        axes[1].hist(pxstd, bins=30)
        axes[2].hist(simulated_x_stick.std(axis=1), bins=30)
        axes[1].axvline(
            x=autoCorrFirstX(sx, sdt), ls="--", c="r", label="data autocorrX"
        )
        axes[2].axvline(x=sx.std(), ls="--", c="r", label="data std in x")
        axes[0].legend()
        axes[1].legend()
        axes[2].legend()
        axes[0].set_title("PP Samples from Stuck model and True track")
        axes[1].set_title("PP Samples autocorrX")
        axes[2].set_title("PP Samples std in x")
        plt.show()

    if model == "hpw":
        model_hpw = pm.Model()
        with model_hpw:
            D = pm.Lognormal("D", 0, 1)
            k = pm.Lognormal("k", 0, 1)

            mean_x = (-sx[:-1]) * (1 - tt.exp(-k * sdt))
            mean_y = (-sy[:-1]) * (1 - tt.exp(-k * sdt))
            std = tt.sqrt(D * (1 - tt.exp(-2 * k * sdt)) / k)

            pm.Normal("like_x", mu=mean_x, sd=std, observed=sdx)
            pm.Normal("like_y", mu=mean_y, sd=std, observed=sdy)

        with model_hpw:
            trace_hpw = pm.sample(
                2000, tune=2000, chains=2, cores=1, progressbar=False
            )

        simulated_x_hpw, simulated_y_hpw = (
            np.zeros((4000, len(sx))),
            np.zeros((4000, len(sx))),
        )
        for i in range(trace_hpw["D"].shape[0]):
            base = base_HPW_D(
                [0, 0],
                [i for i in range(len(sx))],
                trace_hpw["D"][i],
                [0, 0],
                trace_hpw["k"][i],
            )
            simulated_x_hpw[i, :], simulated_y_hpw[i, :] = base[0], base[1]

        pxstd = []
        for i in range(4000):
            pxstd.append(calAutoCorr(simulated_x_hpw[i, :], 1)[-1])

        fig, axes = plt.subplots(1, 3, figsize=(16, 4))
        for i, j in zip(simulated_x_hpw[::2, :], simulated_y_hpw[::2, :]):
            axes[0].plot(i, j, alpha=0.2)
        axes[0].plot(sx, sy, c="k", label="True data")
        axes[1].hist(pxstd, bins=30)
        axes[2].hist(simulated_x_hpw.std(axis=1), bins=30)
        axes[1].axvline(
            x=autoCorrFirstX(sx, sdt), ls="--", c="r", label="data autocorrX"
        )
        axes[2].axvline(x=sx.std(), ls="--", c="r", label="data std in x")
        axes[0].legend()
        axes[1].legend()
        axes[2].legend()
        axes[0].set_title("PP Samples from Stuck model and True track")
        axes[1].set_title("PP Samples autocorrX")
        axes[2].set_title("PP Samples std in x")
        plt.show()
def model_factory(X_continuos, X_categorical_selection, X_categorical_audience,
                  X_categorical_browser, X_categorical_city,
                  X_categorical_device, y_data, variables_to_be_used,
                  variant_df, arviz_inference, samples):
    """ please check run_model_oob's function docstring below for a description  
        of the inputs.
    """

    with pm.Model() as varying_intercept_slope_noncentered:
        # with pm.Model(coords=coords) as varying_intercept_slope_noncentered:

        # # build tensors from Pandas DataFrame/Series
        # X_continuos_var = pm.Data('X_continuos', X_continuos, dims=("X_continuos_index"))
        # X_categorical_selection_var = pm.Data('X_categorical_selection', X_categorical_selection, dims=("X_categorical_selection_index"))
        # X_categorical_audience_var = pm.Data('X_categorical_audience', X_categorical_audience, dims=("X_categorical_audience_index"))
        # X_categorical_browser_var = pm.Data('X_categorical_browser', X_categorical_browser, dims=("X_categorical_browser_index"))
        # X_categorical_city_var = pm.Data('X_categorical_city', X_categorical_city, dims=("X_categorical_city_index"))
        # X_categorical_device_var = pm.Data('X_categorical_device', X_categorical_device, dims=("X_categorical_device_index"))

        # hyperpriors for intercept
        mu_alpha_tmp = pm.Laplace('mu_alpha_tmp',
                                  mu=0.05,
                                  b=1.,
                                  shape=(variant_df.shape[0] - 1))
        mu_alpha = theano.tensor.concatenate([[0], mu_alpha_tmp])

        sigma_alpha_tmp = pm.HalfNormal('sigma_alpha_tmp',
                                        sigma=1.,
                                        shape=(variant_df.shape[0] - 1))
        sigma_alpha = theano.tensor.concatenate([[0], sigma_alpha_tmp])

        # prior for non-centered random intercepts
        u = pm.Laplace('u', mu=0.05, b=1.)

        # random intercept
        alpha_eq = mu_alpha + u * sigma_alpha
        alpha_eq_deter = pm.Deterministic('alpha_eq_deter', alpha_eq)
        alpha = pm.Laplace('alpha',
                           mu=alpha_eq_deter,
                           b=1.,
                           shape=(variant_df.shape[0]))

        #######################################################################

        # hyperpriors for slopes (continuos)
        mu_beta_continuos_tmp = pm.Laplace('mu_beta_continuos_tmp',
                                           mu=0.05,
                                           b=1.,
                                           shape=(1,
                                                  (variant_df.shape[0] - 2)))
        mu_beta_continuos = theano.tensor.concatenate(
            [np.zeros((1, 1)), mu_beta_continuos_tmp], axis=1)
        sigma_beta_continuos_tmp = pm.HalfNormal(
            'sigma_beta_continuos_tmp',
            sigma=1.,
            shape=(1, (variant_df.shape[0] - 2)))
        sigma_beta_continuos = theano.tensor.concatenate(
            [np.zeros((1, 1)), sigma_beta_continuos_tmp], axis=1)

        # prior for non-centered random slope (continuos)
        g = pm.Laplace('g', mu=0.05, b=1., shape=(1, 1))

        # random slopes (continuos)
        beta_continuos_eq = mu_beta_continuos + pm.math.dot(
            g, sigma_beta_continuos)
        beta_con_deter_percentage = pm.Deterministic(
            'beta_con_deter_percentage', beta_continuos_eq)
        beta_con_tmp_percentage = pm.Laplace('beta_con_tmp_percentage',
                                             mu=beta_con_deter_percentage,
                                             b=1.,
                                             shape=(1,
                                                    (variant_df.shape[0] - 1)))
        beta_con_percentage = theano.tensor.concatenate(
            [np.zeros((1, 1)), beta_con_tmp_percentage], axis=1)

        # expected value (continuos)
        dot_product_continuos = pm.math.dot(
            theano.tensor.shape_padaxis(X_continuos, axis=1),
            beta_con_percentage)

        #######################################################################

        # hyperpriors for slopes (categorical_selection)
        mu_beta_categorical_selection_tmp = pm.Laplace(
            'mu_beta_categorical_selection_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            mu_beta_categorical_selection_tmp
        ],
                                                                  axis=1)
        sigma_beta_categorical_selection_tmp = pm.HalfNormal(
            'sigma_beta_categorical_selection_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            sigma_beta_categorical_selection_tmp
        ],
                                                                     axis=1)

        # prior for non-centered random slope (categorical_selection)
        non_centered_selection = pm.Laplace(
            'non_centered_selection',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   len(pd.unique(X_categorical_selection))))

        #random slopes (categorical_selection)
        beta_categorical_eq_selection = mu_beta_categorical_selection + pm.math.dot(
            non_centered_selection, sigma_beta_categorical_selection)
        beta_cat_deter_selection = pm.Deterministic(
            'beta_cat_deter_selection', beta_categorical_eq_selection)
        beta_cat_tmp_selection = pm.Laplace(
            'beta_cat_tmp_selection',
            mu=beta_cat_deter_selection,
            b=1.,
            shape=(len(pd.unique(X_categorical_selection)),
                   (variant_df.shape[0] - 1)))
        beta_cat_selection = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_selection)), 1)),
            beta_cat_tmp_selection
        ],
                                                       axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_audience)
        mu_beta_categorical_audience_tmp = pm.Laplace(
            'mu_beta_categorical_audience_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            mu_beta_categorical_audience_tmp
        ],
                                                                 axis=1)
        sigma_beta_categorical_audience_tmp = pm.HalfNormal(
            'sigma_beta_categorical_audience_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            sigma_beta_categorical_audience_tmp
        ],
                                                                    axis=1)

        # prior for non-centered random slope (categorical_audience)
        non_centered_audience = pm.Laplace(
            'non_centered_audience',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   len(pd.unique(X_categorical_audience))))

        #random slopes (categorical_audience)
        beta_categorical_eq_audience = mu_beta_categorical_audience + pm.math.dot(
            non_centered_audience, sigma_beta_categorical_audience)
        beta_cat_deter_audience = pm.Deterministic(
            'beta_cat_deter_audience', beta_categorical_eq_audience)
        beta_cat_tmp_audience = pm.Laplace(
            'beta_cat_tmp_audience',
            mu=beta_cat_deter_audience,
            b=1.,
            shape=(len(pd.unique(X_categorical_audience)),
                   (variant_df.shape[0] - 1)))
        beta_cat_audience = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_audience)), 1)),
            beta_cat_tmp_audience
        ],
                                                      axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_browser)
        mu_beta_categorical_browser_tmp = pm.Laplace(
            'mu_beta_categorical_browser_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            mu_beta_categorical_browser_tmp
        ],
                                                                axis=1)
        sigma_beta_categorical_browser_tmp = pm.HalfNormal(
            'sigma_beta_categorical_browser_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            sigma_beta_categorical_browser_tmp
        ],
                                                                   axis=1)

        # prior for non-centered random slope (categorical_browser)
        non_centered_browser = pm.Laplace(
            'non_centered_browser',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   len(pd.unique(X_categorical_browser))))

        #random slopes (categorical_browser)
        beta_categorical_eq_browser = mu_beta_categorical_browser + pm.math.dot(
            non_centered_browser, sigma_beta_categorical_browser)
        beta_cat_deter_browser = pm.Deterministic('beta_cat_deter_browser',
                                                  beta_categorical_eq_browser)
        beta_cat_tmp_browser = pm.Laplace(
            'beta_cat_tmp_browser',
            mu=beta_cat_deter_browser,
            b=1.,
            shape=(len(pd.unique(X_categorical_browser)),
                   (variant_df.shape[0] - 1)))
        beta_cat_browser = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_browser)), 1)),
            beta_cat_tmp_browser
        ],
                                                     axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_city)
        mu_beta_categorical_city_tmp = pm.Laplace(
            'mu_beta_categorical_city_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_city = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_city)), 1)),
            mu_beta_categorical_city_tmp
        ],
                                                             axis=1)
        sigma_beta_categorical_city_tmp = pm.HalfNormal(
            'sigma_beta_categorical_city_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_city = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_city)), 1)),
            sigma_beta_categorical_city_tmp
        ],
                                                                axis=1)

        # prior for non-centered random slope (categorical_city)
        non_centered_city = pm.Laplace(
            'non_centered_city',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_city)),
                   len(pd.unique(X_categorical_city))))

        #random slopes (categorical_city)
        beta_categorical_eq_city = mu_beta_categorical_city + pm.math.dot(
            non_centered_city, sigma_beta_categorical_city)
        beta_cat_deter_city = pm.Deterministic('beta_cat_deter_city',
                                               beta_categorical_eq_city)
        beta_cat_tmp_city = pm.Laplace('beta_cat_tmp_city',
                                       mu=beta_cat_deter_city,
                                       b=1.,
                                       shape=(len(
                                           pd.unique(X_categorical_city)),
                                              (variant_df.shape[0] - 1)))
        beta_cat_city = theano.tensor.concatenate([
            np.zeros(
                (len(pd.unique(X_categorical_city)), 1)), beta_cat_tmp_city
        ],
                                                  axis=1)

        #######################################################################

        # hyperpriors for slopes (categorical_device)
        mu_beta_categorical_device_tmp = pm.Laplace(
            'mu_beta_categorical_device_tmp',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   (variant_df.shape[0] - 2)))
        mu_beta_categorical_device = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_device)), 1)),
            mu_beta_categorical_device_tmp
        ],
                                                               axis=1)
        sigma_beta_categorical_device_tmp = pm.HalfNormal(
            'sigma_beta_categorical_device_tmp',
            sigma=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   (variant_df.shape[0] - 2)))
        sigma_beta_categorical_device = theano.tensor.concatenate([
            np.zeros((len(pd.unique(X_categorical_device)), 1)),
            sigma_beta_categorical_device_tmp
        ],
                                                                  axis=1)

        # prior for non-centered random slope (categorical_device)
        non_centered_device = pm.Laplace(
            'non_centered_device',
            mu=0.05,
            b=1.,
            shape=(len(pd.unique(X_categorical_device)),
                   len(pd.unique(X_categorical_device))))

        #random slopes (categorical_device)
        beta_categorical_eq_device = mu_beta_categorical_device + pm.math.dot(
            non_centered_device, sigma_beta_categorical_device)
        beta_cat_deter_device = pm.Deterministic('beta_cat_deter_device',
                                                 beta_categorical_eq_device)
        beta_cat_tmp_device = pm.Laplace('beta_cat_tmp_device',
                                         mu=beta_cat_deter_device,
                                         b=1.,
                                         shape=(len(
                                             pd.unique(X_categorical_device)),
                                                (variant_df.shape[0] - 1)))
        beta_cat_device = theano.tensor.concatenate([
            np.zeros(
                (len(pd.unique(X_categorical_device)), 1)), beta_cat_tmp_device
        ],
                                                    axis=1)
        # theano.printing.Print('vector', attrs=['shape'])(beta_cat_device)

        #######################################################################

        # hyperpriors for epsilon
        sigma_epsilon = pm.HalfNormal('sigma_epsilon',
                                      sigma=1.,
                                      shape=(variant_df.shape[0]))

        # epsilon
        epsilon = pm.HalfNormal(
            'epsilon',
            sigma=sigma_epsilon,  # not working
            shape=(variant_df.shape[0]))

        #######################################################################

        y_hat_tmp = (alpha + dot_product_continuos +
                     beta_cat_selection[X_categorical_selection, :] +
                     beta_cat_audience[X_categorical_audience, :] +
                     beta_cat_browser[X_categorical_browser, :] +
                     beta_cat_city[X_categorical_city, :] +
                     beta_cat_device[X_categorical_device, :] + epsilon)

        # softmax
        y_hat = theano.tensor.nnet.softmax(y_hat_tmp)
        # theano.printing.Print('vector', attrs=['shape'])(y_hat)

        # likelihood
        y_like = pm.Categorical('y_like', p=y_hat, observed=y_data)

        # predicting new values from the posterior distribution of the previously trained model
        # Check whether the predicted output is correct (e.g. if we have 4 classes to be predicted,
        # then there should be present the numbers 0, 1, 2, 3 ... no more, no less!)
        post_pred_big_tmp = pm.sample_posterior_predictive(
            trace=arviz_inference, samples=samples)

    return post_pred_big_tmp
Example #21
0

np.random.seed(0)
Y = stats.bernoulli(0.7).rvs(20)



with pm.Model() as model:
    θ = pm.Beta("θ", 1, 1)
    y_obs = pm.Binomial("y_obs",n=1, p=θ, observed=Y)
    trace = pm.sample(1000, cores=1, chains=2, return_inferencedata=False)

idata = az.from_pymc3(trace)

pred_dists = (pm.sample_prior_predictive(1000, model)["y_obs"],
              pm.sample_posterior_predictive(idata, 1000, model)["y_obs"])

dist=pred_dists[0]
print(dist.shape)
num_success = dist.sum(1)
print(num_success.shape)

fig, ax = plt.subplots()
az.plot_dist(pred_dists[0].sum(1), hist_kwargs={"color":"0.5", "bins":range(0, 22)})
ax.set_title(f"Prior predictive distribution",fontweight='bold')
ax.set_xlim(-1, 21)
ax.set_ylim(0, 0.15) 
ax.set_xlabel("number of success")

fig, ax = plt.subplots()
az.plot_dist(pred_dists[1].sum(1), hist_kwargs={"color":"0.5", "bins":range(0, 22)})
Example #22
0
    ℓ = pm.HalfCauchy('ℓ', 1)
    cov = pm.gp.cov.ExpQuad(1, ℓ) + pm.gp.cov.WhiteNoise(1E-5)
    gp = pm.gp.Latent(cov_func=cov)
    f = gp.prior('f', X=age)
    y_ = pm.Bernoulli('y', p=pm.math.sigmoid(f), observed=space_flu)
    trace_space_flu = pm.sample(1000,
                                chains=1,
                                cores=1,
                                compute_convergence_checks=False)

X_new = np.linspace(0, 80, 200)[:, None]

with model_space_flu:
    f_pred = gp.conditional('f_pred', X_new)
    pred_samples = pm.sample_posterior_predictive(trace_space_flu,
                                                  var_names=['f_pred'],
                                                  samples=1000)

_, ax = plt.subplots(figsize=(10, 6))

fp = logistic(pred_samples['f_pred'])
fp_mean = np.nanmean(fp, 0)

ax.scatter(age,
           np.random.normal(space_flu, 0.02),
           marker='.',
           color=[f'C{ci}' for ci in space_flu])

ax.plot(X_new[:, 0], fp_mean, 'C2', lw=3)

az.plot_hdi(X_new[:, 0], fp, color='C2')
Example #23
0
eight_school_data = {
    'J': 8,
    'y': np.array([28., 8., -3., 7., -1., 1., 18., 12.]),
    'sigma': np.array([15., 10., 16., 11., 9., 11., 10., 18.])
}

with pm.Model() as model:
    mu = pm.Normal('mu', mu=0, sd=5)
    tau = pm.HalfCauchy('tau', beta=5)
    theta_tilde = pm.Normal('theta_tilde', mu=0, sd=1, shape=eight_school_data['J'])
    theta = pm.Deterministic('theta', mu + tau * theta_tilde)
    pm.Normal('obs', mu=theta, sd=eight_school_data['sigma'], observed=eight_school_data['y'])

    trace = pm.sample(draws, chains=chains)
    prior = pm.sample_prior_predictive()
    posterior_predictive = pm.sample_posterior_predictive(trace)

    pm_data = az.from_pymc3(
            trace=trace,
            prior=prior,
            posterior_predictive=posterior_predictive,
            coords={'school': np.arange(eight_school_data['J'])},
            dims={'theta': ['school'], 'theta_tilde': ['school']},
        )
#pm_data

#%%
az.plot_posterior(pm_data)
plt.show()

#%%
    if cpu == 'bear':
        plt.savefig('frequencyfit.png')
    else:
        plt.show()

# In[ ]:

nflin = np.linspace(nf.min(), nf.max(), 100)
fslin = np.linspace(fs.min(), fs.max(), 100) + f2_.min()
mulin = nflin * np.median(trace['m']) + np.median(trace['c'])

with pm_model:
    f_pred = gp.conditional("f_pred", nflin[:, None])
    expf_pred = pm.Deterministic('expf_pred', tt.exp(f_pred))
    pred_samples = pm.sample_posterior_predictive(trace,
                                                  vars=[expf_pred],
                                                  samples=1000)

# In[ ]:

with plt.style.context(lk.MPLSTYLE):
    fig, ax = plt.subplots()
    plot_gp_dist(ax,
                 pred_samples['expf_pred'],
                 fslin,
                 palette='viridis',
                 fill_alpha=.05)

    ax.plot(fslin,
            np.exp(mulin),
            label='Mean Trend',
    marriage_std = pm.Normal("divorce_std",
                             mu=mu,
                             sigma=sigma,
                             observed=data["Marriage_std"].values)
    prior_samples = pm.sample_prior_predictive()
    m_5_4_trace = pm.sample()

# %%
mu_m_5_4_mean = m_5_4_trace["mu"].mean(axis=0)
residuals = data["Marriage_std"] - mu_m_5_4_mean

# %%
with m_5_4:
    m_5_4_ppc = pm.sample_posterior_predictive(m_5_4_trace,
                                               var_names=["mu", "divorce_std"],
                                               samples=1000)

mu_mean = m_5_4_ppc["mu"].mean(axis=0)
mu_hpd = az.hpd(m_5_4_ppc["mu"], credible_interval=0.89)

D_sim = m_5_4_ppc["divorce_std"].mean(axis=0)
D_PI = az.hpd(m_5_4_ppc["divorce_std"], credible_interval=0.89)

# %%
fig, ax = plt.subplots(figsize=(6, 6))
plt.errorbar(
    data["Divorce_std"].values,
    m_5_4_ppc["divorce_std"].mean(0),
    yerr=np.abs(m_5_4_ppc["divorce_std"].mean(0) - mu_hpd.T),
    fmt="C0o",
Example #26
0
X = x[:, None]

# 100 evenly spaced numbers from 0 to 10 (based on data)
X_new = np.linspace(np.floor(x.min()), np.ceil(x.max()), 100)[:,None]

with pm.Model() as model_reg:
    # hyperprior for lengthscale kernel parameter
    l = pm.Gamma('l', 2, 0.5)
    # instanciate a covariance function
    cov = pm.gp.cov.ExpQuad(1, ls=l)
    # instanciate a GP prior
    gp = pm.gp.Marginal(cov_func=cov)
    # prior
    ϵ = pm.HalfNormal('ϵ', 25)
    # likelihood
    y_pred = gp.marginal_likelihood('y_pred', X=X, y=y, noise=ϵ)
    trace_reg = pm.sample(2000)

    f_pred = gp.conditional('f_pred', X_new)

az.plot_trace(trace_reg)

pred_samples = pm.sample_posterior_predictive(trace_reg, vars=[f_pred],
                                              samples=82, model=model_reg)


_, ax = plt.subplots(figsize=(12,5))
ax.plot(X_new, pred_samples['f_pred'].T, 'C1-', alpha=0.3)
ax.plot(X, y, 'ko')
ax.set_xlabel('X')
Example #27
0
def causality_test():
    """ Load csv file to build EDA plots and PyMC models
	"""
    #Load Data https://github.com/grjd/causalityagingbrain/blob/main/dataset_gh.csv
    csv_path = ""
    dataframe = pd.read_csv(csv_path, sep=';')
    dataframe_orig = dataframe.copy()
    plots_and_stuff(df)

    corrmatrix = df.corr(method='pearson')
    mask = np.zeros_like(corrmatrix)
    mask[np.triu_indices_from(mask)] = True
    plt.figure(figsize=(7, 7))
    heatmap = sns.heatmap(corrmatrix,
                          mask=mask,
                          annot=True,
                          center=0,
                          square=True,
                          linewidths=.5)
    #heatmap = sns.heatmap(atrophy_corr,annot=True, center=0,square=True, linewidths=.5)
    heatmap.set_xticklabels(colsofinterest_Eng,
                            rotation=45,
                            fontsize='small',
                            horizontalalignment='right')
    heatmap.set_yticklabels(colsofinterest_Eng,
                            rotation=0,
                            fontsize='small',
                            horizontalalignment='right')
    fig_file = os.path.join(figures_dir, 'heat_CorrChapter.png')
    plt.savefig(fig_file)

    # Standardize regressors and target
    df["brain_std"] = standardize(df["fr_BrainSegVol_to_eTIV_y1"])
    df["age_std"] = standardize(df["edad_visita1"])
    df["cog_std"] = standardize(df["fcsrtlibdem_visita1"])
    # Encode Categorical Variables
    df["school_id"] = pd.Categorical(df["nivel_educativo"]).codes
    df["sex_id"] = pd.Categorical(df["sexo"]).codes

    ################################################################
    ################## SEX (0M, 1F) -> BRAIN #######################
    #################################################################
    with pm.Model() as mXB:
        #sigma = pm.Uniform("sigma", 0, 1)
        sigma = pm.HalfNormal("sigma", sd=1)
        #mu_x = pm.Normal("mu_x", 0.7, 0.3, shape=2)
        mu_x = pm.Normal("mu_x", 0.0, 1.0, shape=2)
        #brain_remained = pm.Normal("brain_remained", mu_x[df["sex_id"]], sigma, observed=df["fr_BrainSegVol_to_eTIV_y1"])
        brain_remained = pm.Normal("brain_remained",
                                   mu_x[df["sex_id"]],
                                   sigma,
                                   observed=df["brain_std"])
        # men - women
        # mu[0]  0.695,  mu[1]  0.709 Women came at late age with less atrophy, bigger brains
        diff_fm = pm.Deterministic("diff_fm", mu_x[0] - mu_x[1])
        mXB_trace = pm.sample(1000)
    print(az.summary(mXB_trace))
    az.plot_trace(mXB_trace, var_names=["mu_x", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_sex_brain-hn.png'))
    az.plot_forest(mXB_trace,
                   combined=True,
                   model_names=["X~B"],
                   var_names=["mu_x"],
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_sex_brain-hn.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(mXB_trace, 100, mXB)
    data_ppc = az.from_pymc3(trace=mXB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_xXB-hn.png'))

    ################################################################
    ################## AGE -> BRAIN ################################
    #################################################################
    print('Calling to PyMC3 Model Age - > Brain...\n')
    with pm.Model() as m_AB:
        alpha = pm.Normal("alpha", 0, 1)  #0.2
        betaA = pm.Normal("betaA", 0, 1)  #0.5
        #sigma = pm.Exponential("sigma", 1)
        sigma = pm.HalfNormal("sigma", sd=1)
        mu = pm.Deterministic("mu", alpha + betaA * df["age_std"])
        brain_std = pm.Normal("brain_std",
                              mu=mu,
                              sigma=sigma,
                              observed=df["brain_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_AB_trace = pm.sample(1000)
    print(az.summary(m_AB_trace, var_names=["alpha", "betaA", "sigma"]))
    az.plot_trace(m_AB_trace, var_names=["alpha", "betaA", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_age_brain.png'))
    az.plot_forest([
        m_AB_trace,
    ],
                   model_names=["A~B"],
                   var_names=["betaA"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_AtoB.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(m_AB_trace, 100, m_AB)
    data_ppc = az.from_pymc3(trace=m_AB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_AB-hn.png'))
    ################################################################
    ################## SEX+AGE -> BRAIN #######################
    #################################################################
    print('Calling to PyMC3 Model Age + Sex - > Brain...\n')
    sexco = pd.Categorical(df.loc[:, "sexo"].astype(int))
    with pm.Model() as m_XAB:
        alphax = pm.Normal("alphax", 0, 1, shape=2)
        betaA = pm.Normal("betaA", 0, 1)
        mu = alphax[sexco] + betaA * df["age_std"]
        sigma = pm.Exponential("sigma", 1)
        #mu = pm.Deterministic("mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"])
        brain_std = pm.Normal("brain_std",
                              mu=mu,
                              sigma=sigma,
                              observed=df["brain_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_XAB_trace = pm.sample()
    print(az.summary(m_XAB_trace, var_names=["alphax", "betaA", "sigma"]))
    az.plot_trace(m_XAB_trace, var_names=["alphax", "betaA"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_ageandsex_brain.png'))
    az.plot_forest([
        m_XAB_trace,
        mXB_trace,
        m_AB_trace,
    ],
                   model_names=["XA~B", "X~B", "A~B"],
                   var_names=["alphax", "mu_x", "betaA"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_mXAtoB.png'))
    # Posterior Predictive checks
    y_pred_g = pm.sample_posterior_predictive(m_XAB_trace, 100, m_XAB)
    data_ppc = az.from_pymc3(trace=m_XAB_trace, posterior_predictive=y_pred_g)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=False)
    ax[0].legend(fontsize=15)
    plt.savefig(os.path.join(figures_dir, 'ppc_XAB-hn.png'))

    print('Calling to PyMC3 Model Brain - > Memory...\n')
    with pm.Model() as m_BC:
        alpha = pm.Normal("alpha", 0, 1)  #0.2
        betaB = pm.Normal("betaB", 0, 1)  #0.5
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic("mu", alpha + betaB * df["brain_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_BC_trace = pm.sample()
    az.plot_trace(m_BC_trace, var_names=["alpha", "betaB"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_brain_cog.png'))
    print(az.summary(m_BC_trace, var_names=["alpha", "betaB", "sigma"]))
    # Scatter plot x = Brain atrophy Y= Memory test
    mu_mean = m_BC_trace['mu']
    mu_hpd = pm.hpd(mu_mean)
    plt.figure(figsize=(9, 9))
    df.plot('brain_std', 'cog_std', kind='scatter')  #, xlim = (-2, 2)
    plt.plot(df.brain_std, mu_mean.mean(0), 'C2')
    plt.savefig(os.path.join(figures_dir, 'scatter_hpd_B2M.png'))
    print('Saved Figure scatter_hpd_B2M.png \n')

    print('Calling to PyMC3 Model School - > Memory...\n')
    # School -> Memory method2  m5_9
    with pm.Model() as mSM2:
        #sigma = pm.Uniform("sigma", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Normal("mu", 0.0, 0.5, shape=df["school_id"].max() + 1)
        memory = pm.Normal("memory",
                           mu[df["school_id"]],
                           sigma,
                           observed=df["cog_std"])
        mSM2_trace = pm.sample()
    print(az.summary(mSM2_trace))
    az.plot_trace(mSM2_trace, var_names=["mu", "sigma"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace2_school_memory.png'))
    az.plot_forest(mSM2_trace, combined=True, var_names=["mu"], hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest2_school_memory.png'))
    pdb.set_trace()

    print('Calling to PyMC3 Model Age - > Memory...\n')
    with pm.Model() as m_AC:
        alpha = pm.Normal("alpha", 0, 1)
        betaA = pm.Normal("betaA", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic("mu", alpha + betaA * df["age_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_AC_trace = pm.sample()
    az.plot_trace(m_AC_trace, var_names=["alpha", "betaA"])
    plt.savefig(os.path.join(figures_dir, 'pm_trace_age_cog.png'))
    print(az.summary(m_AC_trace, var_names=["alpha", "betaA", "sigma"]))
    # Scatter A2M
    mu_mean = m_AC_trace['mu']
    mu_hpd = pm.hpd(mu_mean)
    plt.figure(figsize=(9, 9))
    df.plot('age_std', 'cog_std', kind='scatter')  #, xlim = (-2, 2)
    plt.plot(df.age_std, mu_mean.mean(0), 'C2')
    plt.savefig(os.path.join(figures_dir, 'scatter_hpd_A2M.png'))
    print('Saved Figure scatter_hpd_A2M.png \n')

    print('Calling to PyMC3 Model Age + Brain - > Memory...\n')
    with pm.Model() as m_BAC:
        alpha = pm.Normal("alpha", 0, 1)
        betaA = pm.Normal("betaA", 0, 1)
        betaB = pm.Normal("betaB", 0, 1)
        sigma = pm.Exponential("sigma", 1)
        mu = pm.Deterministic(
            "mu", alpha + betaA * df["age_std"] + betaB * df["brain_std"])
        cognition_std = pm.Normal("cognition_std",
                                  mu=mu,
                                  sigma=sigma,
                                  observed=df["cog_std"].values)
        prior_samples = pm.sample_prior_predictive()
        m_BAC_trace = pm.sample()
    print(
        az.summary(m_BAC_trace, var_names=["alpha", "betaB", "betaA",
                                           "sigma"]))
    az.plot_forest([
        m_BAC_trace,
        m_AC_trace,
        m_BC_trace,
    ],
                   model_names=["BA~C", "A~C", "B~C"],
                   var_names=["betaA", "betaB"],
                   combined=True,
                   hdi_prob=0.95)
    plt.savefig(os.path.join(figures_dir, 'pm_forest_mBAC_AB2M.png'))
Example #28
0
# predict
with glm_model:
    # WARNING: for GLM all the shared variable have to be built inside the "with context",
    # during the learning step. Instead, for HBM the shared variable has to be built outside
    # the "with context" both for the learning and for the prediction step.
    pm.set_data({'x_1_shared': x_1_data})
    pm.set_data({'x_2_shared': x_2_data})
    pm.set_data({'x_3_shared': x_3_data})
    pm.set_data({'x_4_shared': x_4_data})
    pm.set_data({'x_5_shared': x_5_data})
    pm.set_data({'x_6_shared': x_6_data})
    pm.set_data({'x_7_shared': x_7_data})
    pm.set_data({'x_8_shared': x_8_data})
    pm.set_data({'y_shared': y_data})
    post_pred = pm.sample_posterior_predictive(glm_model_trace,
                                               samples=samples)
    #    # TEST:
    #    post_pred = pm.sample_posterior_predictive(trace=trace_df.to_dict('records'),
    #                                         samples=len(trace_df))
    print('post_pred shape', post_pred['y_like'].shape)

# check number of predicted '1'
booked_sum = y_test_oob.sum().astype(int)
print('number_of_bookings = %.i' % (booked_sum))

# sort output values from PyMC3 prediction
transposed_output = pd.DataFrame(post_pred['y_like'])
transposed_output_sorted = transposed_output.sum(axis=0)
transposed_output_sorted.sort_values(axis=0, inplace=True, ascending=False)
transposed_output_sorted.rename('probability', inplace=True)
transposed_output_sorted_tmp = transposed_output_sorted.reset_index()
with pm.Model() as model_g: 
    μ = pm.Uniform('μ', lower = 40, upper = 70)
    σ = pm.HalfNormal('σ', sd = 10)
    y = pm.Normal('y', mu = μ, sd = σ, observed = data)
    trace_g = pm.sample(1000)

az.plot_trace(trace_g)

# %%
az.plot_joint(trace_g, kind='kde', fill_last=False)

# %%
az.summary(trace_g)

# %%
y_pred_g = pm.sample_posterior_predictive(trace_g, 100, model_g)

# %%
data_ppc = az.from_pymc3(trace=trace_g, posterior_predictive=y_pred_g)
ax = az.plot_ppc(data_ppc, figsize=(12,6), mean=False)
ax[0].legend(fontsize=15)

# %%
np.mean(stats.t(loc=0, scale=1, df=100).rvs(100))

# %%
plt.figure(figsize=(10, 6))
x_values = np.linspace(-10, 10, 500)
for df in [1, 2, 30]:
    distri = stats.t(df)
    x_pdf = distri.pdf(x_values)
Example #30
0
with continent_model:
  # prior
  d = pm.Beta("continent share", 2, 1)
  # likelihood
  landings = pm.Bernoulli("landings", d, observed=[1, 1, 0, 0, 1])

map_estimate = pm.find_MAP(model=continent_model)
map_estimate


with continent_model:
    trace = pm.sample(1000)



az.plot_trace(trace)

np.mean(trace['continent share'] > 0.7)

az.plot_posterior(house_sample, var_names="house_model/house location");

az.summary(house_sample)

ppc = pm.sample_posterior_predictive(trace, samples=500, model=continent_model)


sns.distplot(ppc['landings'], kde=False)



Example #31
0
fig, ax = plt.subplots(figsize=(12, 5))
# scatter the true data
ax.scatter(x, y, alpha=0.3)
# get the mean of the posterior mean
mu_m = trace["mu"].mean(0)
# plot the mean of the data
ax.plot(x, mu_m, c="k")
# get the mean standard deviation of the data
eps_m = trace["eps"].mean(0)
# color the area inside 1 std from the mean
ax.fill_between(x, mu_m + eps_m, mu_m - eps_m, color="C1", alpha=0.6)
# color the area inside 2 std from the mean
ax.fill_between(x, mu_m + 2 * eps_m, mu_m - 2 * eps_m, color="C1", alpha=0.4)
ax.set_xlabel("x")
ax.set_ylabel("y")
plt.show()

# --------------- predict and analyse --------------------------------- #

# predict values for new values of the predictors
x_shared.set_value([20])
# sample from the posterior of y
ppc = pm.sample_posterior_predictive(trace, 2000, model=vv_model)
# get the values
y_ppc = ppc["y_obs"][:, 0]
# plot
fig, ax = plt.subplots(figsize=(12, 5))
az.plot_kde(y_ppc)
ax.set_xlabel("y", fontsize=25)
ax.set_ylabel("pdf", fontsize=25)
plt.show()
Example #32
0
print('Actual Sample size')
print(sample_size)
print('Effective Sample Size')
"""
print(pm.diagnostics.effective_n(trace))

#pm.traceplot(trace)
#plt.show()

# set up scikikt model
sci_model = LogisticRegression()
sci_model.fit(x_train,y_train)
sci_y_pred_train = sci_model.predict(x_train)
print(sci_y_pred_train[:5])

y_pred_train = np.mean(pm.sample_posterior_predictive(trace,model=lrh_model)['y'],axis=0)
y_pred_train[np.where(y_pred_train<0.5)] = 0
y_pred_train[np.where(y_pred_train>=0.5)] = 1

print('train - scikit')
print(accuracy_score(y_train,sci_y_pred_train))
print(classification_report(y_train,sci_y_pred_train))

print('train - basyesian')
print(accuracy_score(y_train,y_pred_train))
print(classification_report(y_train,y_pred_train))

sci_y_pred_test = sci_model.predict(x_test)
x_shared.set_value(x_test)
y_shared.set_value(y_test)
y_pred_test = np.mean(pm.sample_posterior_predictive(trace,model=lrh_model)['y'],axis=0)
Example #33
0
         label="Non-robust regression",
         alpha=0.5)
# plot the data
plt.plot(x, y, 'C0o')
# get the mean of the intercept from the posterior
alpha_m = trace["alpha"].mean()
# get the mean of the coefficient from the posterior
beta_m = trace["beta"].mean()
# plot the robust linear regression
plt.plot(x, alpha_m + beta_m * x, c="k", label="Robust linear regression")
# plot the variety of predicted results
az.plot_hpd(x, ppc["obs"])
# set the last details of the graph
plt.xlabel("x")
plt.ylabel("y", rotation=0)
#plt.legend(loc=2)
plt.tight_layout()
plt.show()

# ----------------- analyse the posterior -------------------- #

with model_t:
    az.plot_trace(trace, var_names=["alpha", "beta", "sigma", "vu"])
    # get the summary
    log.info("the trace summary is: %s", az.summary(trace))
    # let's also run a posterior predictive check
    ppc = pm.sample_posterior_predictive(trace, samples=2000)
    data_ppc = az.from_pymc3(trace=trace, posterior_predictive=ppc)
    ax = az.plot_ppc(data_ppc, figsize=(12, 6), mean=True)
    plt.xlim(0, 12)
x_data = np.random.normal(mu[comp], sigma[comp], size=n_data)
plt.figure()
plt.hist(x_data, bins=200, label=r'Actual Data')

# inference
if __name__ == '__main__':
    with pm.Model() as model:
        w = pm.Dirichlet('w', np.ones_like(weight))
        mu = pm.Normal('mu', 0., 10., shape=weight.size)
        tau = pm.Gamma('tau', 1., 1., shape=weight.size)
        x_observed = pm.NormalMixture('x_observed', w, mu, tau=tau, \
                                      observed=x_data)
        trace = pm.sample(5000,
                          n_init=10000,
                          tune=1000,
                          random_seed=42,
                          cores=2)
    plt.figure()
    plt.hist(trace['w'], bins=50, label=r"posterior of $\weights$")
    plt.figure()
    plt.hist(trace['mu'], bins=50, label=r"posterior of $\mus$")
    plt.figure()
    plt.hist(trace['tau'], bins=50, label=r"posterior of $\taus$")
    with model:
        p_trace = pm.sample_posterior_predictive(trace, 5000, random_seed=42)
    plt.figure()
    plt.hist(p_trace['x_observed'], bins=50, density=True, histtype='step',\
        lw=2, alpha=.05, label='Posterior predictive distribution')
    plt.hist(x_data, bins=50, density=True, histtype='step', lw=2, \
        label='Observed data')
plt.show()
print(comp)
# az.compare({'model_0':trace_0, 'model_1':trace_1, 'model_2':trace_2}, method='BB-pseudo-BMA')

# Now we are going to use the previously copmuted weights to generate predictions based not
# on a single model but on the weighted set of models. This is one way to perform
# model averaging. Using PyMC3 we can call the sample_posterior_predictive_w
# function as follows:
ppc_w = pm.sample_posterior_predictive_w(
    traces,
    1000, [model_0, model_1, model_2],
    weights=comp.weight.sort_index(ascending=True),
    progressbar=False)

# We are also going to compute PPCs for the lowest-WAIC model
ppc_2 = pm.sample_posterior_predictive(trace_2,
                                       1000,
                                       model_2,
                                       progressbar=False)

# A simple way to compare both kind of predictions is to plot their mean and hpd interval
mean_w = ppc_w['kcal'].mean()
hpd_w = pm.hpd(ppc_w['kcal']).mean(0)
mean = ppc_2['kcal'].mean()
hpd = pm.hpd(ppc_2['kcal']).mean(0)

plt.errorbar(mean,
             1,
             xerr=[[mean - hpd[0]], [hpd[1] - mean]],
             fmt='o',
             label='model 2')
plt.errorbar(mean_w,
             0,