def fit(self, X, y):
        self.model = pm.Model()

        with self.model:
            prob = pm.Uniform('prob', lower=0.03, upper=0.97)
            xi = pm.Bernoulli(
                'xi', p=prob,
                shape=X.shape[1])  # inclusion probability for each variable
            alpha = pm.Normal('alpha', mu=0.0, sd=5.0)  # Intercept
            beta = pm.Normal(
                'beta', mu=0.0, sd=1.0,
                shape=X.shape[1])  # Prior for the non-zero coefficients
            p = pm.math.dot(
                X, xi * beta
            )  # Deterministic function to map the stochastics to the output
            y_obs = pm.Bernoulli('y_obs', invlogit(p + alpha),
                                 observed=y)  # Data likelihood

        with self.model:
            self.trace = pm.sample(2000,
                                   random_seed=4816,
                                   cores=1,
                                   progressbar=False,
                                   chains=1)

        return None
    def fit(self, X, y):
        self.model = pm.Model()

        with self.model:
            xi = pm.Bernoulli(
                'xi', .05,
                shape=X.shape[1])  # inclusion probability for each variable
            alpha = pm.Cauchy('alpha', alpha=0, beta=2.0)  # Intercept
            coeff_shape = pm.Exponential("coeff_shape", lam=0.05)
            beta = pm.Cauchy(
                'beta', alpha=0, beta=coeff_shape,
                shape=X.shape[1])  # Prior for the non-zero coefficients
            p = pm.math.dot(
                X, xi * beta
            )  # Deterministic function to map the stochastics to the output
            y_obs = pm.Bernoulli('y_obs', invlogit(p + alpha),
                                 observed=y)  # Data likelihood

        with self.model:
            self.trace = pm.sample(4000,
                                   random_seed=4816,
                                   cores=1,
                                   progressbar=False,
                                   chains=1)

        return None
Ejemplo n.º 3
0
def part2():

    with pm.Model() as model:
        G1 = pm.Bernoulli('G1', 0.5)
        ## p(G2|G1)
        G2_p = pm.Deterministic('G2_p', pm.math.switch(G1, 0.9, 0.1))
        G2 = pm.Bernoulli('G2', G2_p)
        ## p(G3|G1)
        G3_p = pm.Deterministic('G3_p', pm.math.switch(G1, 0.9, 0.1))
        G3 = pm.Bernoulli('G3', G3_p)

        X2 = pm.Normal('X2', mu=pm.math.switch(G2, 60, 50), sd=3.16)

        X3 = pm.Normal('X3', mu=pm.math.switch(G3, 60, 50), sd=3.16)
        trace = pm.sample(400000, tune=50, progressbar=True)

    X2_50 = 0
    G1 = 0
    X3_50 = 0
    for sample in trace:
        if 49.5 <= sample['X2'] <= 50.5:
            X2_50 += 1
            if sample['G1'] == 1:
                G1 += 1
            if 49.5 <= sample['X3'] <= 50.5:
                X3_50 += 1

    print "X2:", X2_50, " G1:", G1, " X3:", X3_50

    print "P(G1==2|X2==50): ", G1/X2_50
    print "P(X3==50|X2==50): ", X3_50/X2_50
Ejemplo n.º 4
0
def generate_global_distribution(constraints, N):
    hyperedges = foulis_randall_product()
    hyperedges_tallies = zeros(12)
    global_distribution = zeros(16)
    while sum(global_distribution) < N:
        with pm.Model():
            pm.Uniform('C', 0.0, 1.0)
            pm.Bernoulli('A', 0.5)
            pm.Bernoulli('B', 0.5)
            pm.Bernoulli('X', 0.5)
            pm.Bernoulli('Y', 0.5)
            S = pm.sample(N, tune=0, step=pm.Metropolis())
            c = S.get_values('C')
            a = S.get_values('A')
            b = S.get_values('B')
            x = S.get_values('X')
            y = S.get_values('Y')
        for i in range(0, N):
            if (c[i] < constraints[x[i]][y[i]][a[i], b[i]]):
                for edge in get_hyperedges(hyperedges,
                                           [a[i], b[i], x[i], y[i]]):
                    hyperedges_tallies[edge] += 1
                global_distribution[get_vertex(a[i], b[i], x[i], y[i])] += 1
    z = [0, 1]
    for a, b, x, y in product(z, z, z, z):
        summed_tally = (sum(hyperedges_tallies[e]
                            for e in get_hyperedges(hyperedges, [a, b, x, y])))
        global_distribution[get_vertex(a, b, x, y)] /= summed_tally
    global_distribution *= 3
    return global_distribution
Ejemplo n.º 5
0
def fun_infer_model_Bernoulli(df,
                              samples=10,
                              tune=100,
                              Krange=np.arange(2, 20),
                              path="./",
                              name=""):
    ch = 1

    N = df.shape[0]
    Q = df.shape[1]
    for K in Krange:
        with pm.Model() as model:
            learner = pm.Bernoulli('learner', shape=(N, K))
            question = pm.Bernoulli('question',
                                    a=np.repeat(.1, K),
                                    shape=(Q, K))
            x = pm.math.dot(learner, question.T)
            results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df)

        for RV in model.basic_RVs:
            print(RV.name, RV.logp(model.test_point))
        model_to_graphviz(model)

        with model:
            trace = pm.sample(samples,
                              chains=ch,
                              tune=tune,
                              discard_tuned_samples=True)

        a = pm.math.dot(trace['learner'].mean(0),
                        trace['question'][:, :].mean(0).T)
        pd.DataFrame(a.eval()).to_csv(path + name + "estim_" + str(K) + ".txt",
                                      sep="\t")
        print("finished: " + str(K))
    return model
Ejemplo n.º 6
0
def get_chains(obs, obs_train, true_train):
    '''
    these arrays are n_galaxies x n_classifiers, and are 0 for isolated, 1 for merger.
    '''
    obs_classes, obj_ids, cfer_ids = make_flat(obs)
    a_00, b_00, a_11, b_11 = get_beta_pars(true_train, obs_train)

    n_cfers, n_obj = obs.shape

    with pm.Model() as explicit_model:
        f_M = pm.Beta('fM', alpha=0.5, beta=0.5)
        r_Is = pm.Beta('rI', alpha=a_00, beta=b_00, shape=n_cfers)
        r_Ms = pm.Beta('rM', alpha=a_11, beta=b_11, shape=n_cfers)

        #the "real" data
        true_class = pm.Bernoulli('true_class', f_M, shape=n_obj)
        N_M_true = pm.Deterministic('N_M', TT.sum(true_class))
        p_obs_1 = TT.switch(TT.eq(true_class[obj_ids], 1), r_Ms[cfer_ids],
                            1 - r_Is[cfer_ids])
        observed = pm.Bernoulli('observed', p_obs_1, observed=obs_classes)

    with explicit_model:
        trace = pm.sample(draws=5000)

    return trace.get_values('fM'), trace.get_values('rI'), trace.get_values(
        'rM'), trace.get_values('true_class')
Ejemplo n.º 7
0
def model_fit_using_se(data,
                       u_dim,
                       method='mcmc',
                       num_iter=10,
                       num_sample=1000):

    search_dim = data['search'].shape[1]
    num_obs = data['search'].shape[0]

    cov_u, mu_u = np.eye(u_dim), np.zeros(u_dim)
    cov_nlp, mu_nlp = np.eye(search_dim), np.zeros(search_dim)

    cov_nlp = np.loadtxt("similarity_matrix.csv",
                         dtype='float32',
                         delimiter=',')

    with pm.Model() as model:

        u = pm.MvNormal('u', mu=mu_u, cov=cov_u, shape=(num_obs, u_dim))
        search = data[
            'search']  #pm.MvNormal('search',mu=mu_search,cov=cov_search,observed=data['search'])

        #Incoming edge to self esteem
        u_se = pm.MvNormal('u_se', mu=mu_u, cov=cov_u, shape=u_dim)
        search_se = pm.MvNormal('search_se',
                                mu=mu_nlp,
                                cov=cov_nlp,
                                shape=search_dim)

        #self esteem as a function of its parents
        se_mean = tt.nnet.nnet.sigmoid(
            tt.dot(search, search_se) + tt.dot(u, u_se))
        se = pm.Bernoulli('se', p=se_mean, observed=data['se'])

        #Incoming edge to suicide ideation
        u_si = pm.MvNormal('u_si', mu=mu_u, cov=cov_u, shape=u_dim)
        search_si = pm.MvNormal('search_si',
                                mu=mu_nlp,
                                cov=cov_nlp,
                                shape=search_dim)
        #se_si_sig = pm.Normal('se_si_sig',mu=0,tau=1)
        se_si = pm.HalfNormal('se_si', sigma=1)  #se_si_sig**2)

        si_mean = tt.nnet.nnet.sigmoid(
            tt.dot(search, search_si) + tt.dot(u, u_si) + se_si * se)
        si = pm.Bernoulli('si', p=si_mean, observed=data['si'])

        mf = pm.fit(n=num_iter)
        #trace = pm.sample()
        trace = mf.sample(num_sample)
        #pm.traceplot(trace)
        #trace = pm.sample()
        # se_mean = pm.Uniform('se_mean',lower=0,upper=1,size=num_obs)
        # se = pm.Bernoulli('se',p=se_mean, observed = data["se"])

        # si_me
        # si = pm.Bernoulli('si',p= ,observed = data["si"])

    return trace
Ejemplo n.º 8
0
    def posterior_mcmc(self, data):
        """
        Find posterior distribution for the numerical method of solution
        """

        with pm.Model() as ab_model:
            # priors
            mua = pm.distributions.continuous.Beta('muA',
                                                   alpha=self.alpha_prior,
                                                   beta=self.beta_prior)
            mub = pm.distributions.continuous.Beta('muB',
                                                   alpha=self.alpha_prior,
                                                   beta=self.beta_prior)
            # likelihoods
            pm.Bernoulli('likelihoodA', mua, observed=data[0])
            pm.Bernoulli('likelihoodB', mub, observed=data[1])

            # find distribution of difference
            pm.Deterministic('lift', mub - mua)
            # find distribution of effect size
            sigma_a = pm.Deterministic('sigmaA', np.sqrt(mua * (1 - mua)))
            sigma_b = pm.Deterministic('sigmaB', np.sqrt(mub * (1 - mub)))
            pm.Deterministic('effect_size', (mub - mua) /
                             (np.sqrt(0.5 * (sigma_a**2 + sigma_b**2))))

            start = pm.find_MAP()
            step = pm.Slice()
            trace = pm.sample(self.iterations, step=step, start=start)

        bins = np.linspace(0, 1, self.resolution)
        mua = np.histogram(trace['muA'][500:], bins=bins, normed=True)
        mub = np.histogram(trace['muB'][500:], bins=bins, normed=True)
        sigma_a = np.histogram(trace['sigmaA'][500:], bins=bins, normed=True)
        sigma_b = np.histogram(trace['sigmaB'][500:], bins=bins, normed=True)

        rvs = trace['lift'][500:]
        bins = np.linspace(
            np.min(rvs) - 0.2 * abs(np.min(rvs)),
            np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
        lift = np.histogram(rvs, bins=bins, normed=True)

        rvs = trace['effect_size'][500:]
        bins = np.linspace(
            np.min(rvs) - 0.2 * abs(np.min(rvs)),
            np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
        pes = np.histogram(rvs, bins=bins, normed=True)

        posterior = {
            'muA': mua,
            'muB': mub,
            'sigmaA': sigma_a,
            'sigmaB': sigma_b,
            'lift': lift,
            'es': pes,
            'prior': self.prior()
        }

        return posterior
Ejemplo n.º 9
0
    def pymc3_dist(self, name, hypers):
        p = self.p
        if(len(hypers) == 1):
                hyper_dist = hypers[0][0]
                hyper_name = hypers[0][1]
                p = hyper_dist.pymc3_dist(hyper_name, [])

        if(self.num_elements==-1):
            return pm.Bernoulli(name, p=p)
        else:
            return pm.Bernoulli(name, p=p, shape=self.num_elements)
Ejemplo n.º 10
0
def get_model():

    conf = bayes_workshop.conf.get_conf()

    (cols, data) = bayes_workshop.data.get_data()

    demo_subj_id = 8
    i_demo_trials = (data[:, cols.index("i_subj")] == demo_subj_id)
    demo_data = data[i_demo_trials, :]

    (n_trials, _) = demo_data.shape

    i_modality = demo_data[:, cols.index("i_modality")]
    responses = demo_data[:, cols.index("target_longer")]
    cmp_dur = demo_data[:, cols.index("target_duration")]

    i_audio = (i_modality == 0)
    i_visual = (i_modality == 1)

    with pm.Model() as model:

        alpha = pm.Normal("alpha", mu=conf.standard_ms, sd=50.0)

        beta = pm.HalfNormal("beta", sd=100.0)

        delta_alpha = pm.Normal("delta_alpha", mu=0.0, sd=20.0)

        delta_beta = pm.Normal("delta_beta", mu=0.0, sd=40.0)

        alpha_audio = alpha + (delta_alpha / 2.0)
        alpha_visual = alpha - (delta_alpha / 2.0)

        beta_audio = beta + (delta_beta / 2.0)
        beta_visual = beta - (delta_beta / 2.0)

        theta_audio = bayes_workshop.utils.logistic(x=cmp_dur[i_audio],
                                                    alpha=alpha_audio,
                                                    beta=beta_audio)

        theta_visual = bayes_workshop.utils.logistic(x=cmp_dur[i_visual],
                                                     alpha=alpha_visual,
                                                     beta=beta_visual)

        obs_audio = pm.Bernoulli("obs_audio",
                                 p=theta_audio,
                                 observed=responses[i_audio])

        obs_visual = pm.Bernoulli("obs_visual",
                                  p=theta_visual,
                                  observed=responses[i_visual])

    return model
Ejemplo n.º 11
0
 def fit(self, X, y):
     self.model = pm.Model()
     
     with self.model:
         xi = pm.Bernoulli("xi", .05, shape=X.shape[1]) # inclusion probability for each variable
         alpha = pm.StudentT("alpha", nu=3, mu=0.0, sd=5.0) # Intercept
         beta = pm.StudentT("beta", nu=3, mu=0.0, sd=1.0, shape=X.shape[1]) # Prior for the non-zero coefficients
         p = pm.math.dot(X, xi * beta) # Deterministic function to map the stochastics to the output
         y_obs = pm.Bernoulli("y_obs", invlogit(p + alpha),  observed=y)  # Data likelihood
     
     with self.model:
         self.trace = pm.sample(2000, random_seed=4816, cores=1, progressbar=False, chains=1)
     
     return None
Ejemplo n.º 12
0
    def _build_model(self, data):
        data = _data_df2dict(data)
        with pm.Model() as model:
            # Priors
            k = pm.Normal('k',
                          mu=0.01,
                          sd=1.,
                          shape=2,
                          transform=Ordered(),
                          testval=[0.01, 0.02])
            p = pm.Beta('p', alpha=1 + 4, beta=1 + 4)
            α = pm.Exponential('alpha', lam=1)
            ϵ = 0.01
            # Value functions
            VA = pm.Deterministic('VA', data['A'] * self._df(k, p, data['DA']))
            VB = pm.Deterministic('VB', data['B'] * self._df(k, p, data['DB']))
            # Choice function: psychometric
            P_chooseB = pm.Deterministic(
                'P_chooseB', choice_func_psychometric(α, ϵ, VA, VB))
            # Likelihood of observations
            r_likelihood = pm.Bernoulli('r_likelihood',
                                        p=P_chooseB,
                                        observed=data['R'])

        return model
Ejemplo n.º 13
0
    def fit(self, X, y):
        """Fit the Imputer to the dataset by fitting bayesian model.

        Args:
            X (pd.Dataframe): dataset to fit the imputer.
            y (pd.Series): response, which is eventually imputed.

        Returns:
            self. Instance of the class.
        """
        y = y.astype("category").cat
        y_cat_l = len(y.codes.unique())

        # bayesian logistic regression. Mutliple categories not supported yet
        if y_cat_l != 2:
            err = "Only two categories supported. Multinomial coming soon."
            raise ValueError(err)
        nc = len(X.columns)

        # initialize model for bayesian logistic reg. Default vals for priors
        # assume data is scaled and centered. Convergence can struggle or fail
        # if not the case and proper values for the priors are not specified
        # separately, also assumes each beta is normal and "independent"
        # while betas likely not independent, this is technically a rule of OLS
        with pm.Model() as fit_model:
            alpha = pm.Normal("alpha", self.am, sd=self.asd)
            beta = pm.Normal("beta", self.bm, sd=self.bsd, shape=nc)
            p = pm.invlogit(alpha + beta.dot(X.T))
            score = pm.Bernoulli("score", p, observed=y.codes)

        params = {"model": fit_model, "labels": y.categories}
        self.statistics_ = {"param": params, "strategy": self.strategy}
        return self
Ejemplo n.º 14
0
def rasch_model(dataset):
    """Defines the mcmc model for Rasch estimation.
    
    Args:
        dataset: [n_items, n_participants] 2d array of measured responses

    Returns:
        model: PyMC3 model to run
    """
    n_items, n_people = dataset.shape
    observed = dataset.astype('int')

    rasch_pymc_model = pm.Model()
    with rasch_pymc_model:
        # Ability Parameters (Standardized Normal)
        ability = pm.Normal("Ability", mu=0, sigma=1, shape=n_people)

        # Difficuly multilevel prior
        sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1)
        difficulty = pm.Normal("Difficulty",
                               mu=0,
                               sigma=sigma_difficulty,
                               shape=n_items)

        # Compute the probabilities
        kernel = ability[None, :] - difficulty[:, None]
        probabilities = pm.Deterministic("PL_Kernel", pm.math.invlogit(kernel))

        # Get the log likelihood
        log_likelihood = pm.Bernoulli("Log_Likelihood",
                                      p=probabilities,
                                      observed=observed)

    return rasch_pymc_model
Ejemplo n.º 15
0
def define_linreg_model(llj_data):
    """
    Defines a model with parameters to derive probabilities of an LLJ event
    for a given value of the predictor variable
    :param llj_data:
    :return:
    """

    predictor = llj_data[:, 0]
    llj_events = llj_data[:, 1]

    with pm.Model() as model:

        # Define probability distribution with a logistic function,
        # with parameters drawn from a normal distribution
        beta = pm.Normal("beta", mu=0, tau=0.001, testval=0)
        alpha = pm.Normal("alpha", mu=0, tau=0.001, testval=0)
        p = pm.Deterministic("p", 1.0/(1. + tt.exp(beta*predictor + alpha)))

        # Use a Bernoulli random variable to connect the probabilities to
        # the observations
        observed = pm.Bernoulli("bernoulli_obs", p, observed=llj_events)
        start = pm.find_MAP()
        step = pm.Metropolis()
        trace = pm.sample(120000, step=step, start=start)
        burned_trace = trace[100000::2]
        alpha_samples = burned_trace["alpha"][:, None]
        beta_samples = burned_trace["beta"][:, None]
        pred_vals = np.linspace(predictor.min()-5, predictor.max()+5, 50)[:,
                    None]
        p_pred = logistic(pred_vals.T, beta_samples, alpha_samples)
        mean_prob_pred = p_pred.mean(axis=0)

    return alpha_samples, beta_samples, mean_prob_pred, pred_vals, p_pred
Ejemplo n.º 16
0
    def create_model(self):
        with pm.Model() as self.model:
            self.mean = pm.gp.mean.Zero()

            # covariance function
            l_L = pm.Gamma("l_L", alpha=2, beta=2, shape = self.dim)
            # informative, positive normal prior on the period
            eta_L = pm.HalfNormal("eta_L", sd=5)
            self.cov_L = eta_L * pm.gp.cov.ExpQuad(self.dim, l_L)

                # covariance function
            l_H = pm.Gamma("l_H", alpha=2, beta=2, shape = self.dim)
            delta = pm.Normal("delta", sd=10)
            # informative, positive normal prior on the period
            eta_H = pm.HalfNormal("eta_H", sd=5)
            self.cov_H = eta_H * pm.gp.cov.ExpQuad(self.dim, l_H)

            K_LL = self.cov_L(self.X_L)
            K_HH = delta**2*self.cov_L(self.X_H) + self.cov_H(self.X_H)
            K_LH = delta*self.cov_L(self.X_L, self.X_H)

            K1 = tt.concatenate([K_LL, K_LH], axis = 1)
            K2 = tt.concatenate([K_LH.T, K_HH], axis = 1)
            self.K_stable = pm.gp.util.stabilize(tt.concatenate([K1,K2], axis = 0))


            v = pm.Normal("fp_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.X))
            fp = pm.Deterministic("fp", self.mean(self.X) + tt.slinalg.cholesky(self.K_stable).dot(v))

            p = pm.Deterministic("p", pm.math.invlogit(fp))
            y = pm.Bernoulli("y", p=p, observed=self.Y)
Ejemplo n.º 17
0
def build_softmax_linear(X, y, force_softmax=False):
    """
    Sample from Bayesian Softmax Linear Regression
    """
    num_features = X.shape[1]
    num_classes = len(np.unique(y))
    logistic_regression = num_classes == 2
    Xt = theano.shared(X)
    
    if logistic_regression and not force_softmax:
        print('running logistic regression')
        with pm.Model() as model:
            W = pm.Normal('W', 0, sd=1e6, shape=num_features)
            b = pm.Flat('b')
            logit = Xt.dot(W) + b
            p = tt.nnet.sigmoid(logit)
            observed = pm.Bernoulli('obs', p=p, observed=y)
    else:
        with pm.Model() as model:
            W = pm.Normal('W', 0, sd=1e6, shape=(num_features, num_classes))
            b = pm.Flat('b', shape=num_classes)
            logit = Xt.dot(W) + b
            p = tt.nnet.softmax(logit)
            observed = pm.Categorical('obs', p=p, observed=y)
    return model
Ejemplo n.º 18
0
    def test_model_shared_variable(self):
        x = np.random.randn(100)
        y = x > 0
        x_shared = theano.shared(x)
        y_shared = theano.shared(y)
        with pm.Model() as model:
            coeff = pm.Normal("x", mu=0, sd=1)
            logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))

            obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
            trace = pm.sample(100)

        x_shared.set_value([-1, 0, 1.0])
        y_shared.set_value([0, 0, 0])

        samples = 100
        with model:
            post_pred = pm.sample_posterior_predictive(trace,
                                                       samples=samples,
                                                       var_names=["p", "obs"])

        expected_p = np.array(
            [logistic.eval({coeff: val}) for val in trace["x"][:samples]])
        assert post_pred["obs"].shape == (samples, 3)
        assert np.allclose(post_pred["p"], expected_p)
Ejemplo n.º 19
0
    def test_aesara_switch_broadcast_edge_cases(self):
        # Tests against two subtle issues related to a previous bug in Aesara where aet.switch would not
        # always broadcast tensors with single values https://github.com/pymc-devs/aesara/issues/270

        # Known issue 1: https://github.com/pymc-devs/pymc3/issues/4389
        data = np.zeros(10)
        with pm.Model() as m:
            p = pm.Beta("p", 1, 1)
            obs = pm.Bernoulli("obs", p=p, observed=data)
        # Assert logp is correct
        npt.assert_allclose(
            obs.logp(m.test_point),
            np.log(0.5) * 10,
        )

        # Known issue 2: https://github.com/pymc-devs/pymc3/issues/4417
        # fmt: off
        data = np.array([
            1.35202174, -0.83690274, 1.11175166, 1.29000367, 0.21282749,
            0.84430966, 0.24841369, 0.81803141, 0.20550244, -0.45016253,
        ])
        # fmt: on
        with pm.Model() as m:
            mu = pm.Normal("mu", 0, 5)
            obs = pm.TruncatedNormal("obs", mu=mu, sigma=1, lower=-1, upper=2, observed=data)
        # Assert dlogp is correct
        npt.assert_allclose(m.dlogp([mu])({"mu": 0}), 2.499424682024436, rtol=1e-5)
Ejemplo n.º 20
0
def get_beta_bernoulli_dpmixture(X, params):
    n_doc, n_feat = X.shape
    n_comp = params['n_trunc']

    with pm.Model() as model:
        # sample P ~ DP(G0)
        beta = pm.Beta('beta',
                       1.,
                       params['dp_alpha'],
                       shape=n_comp)
        p_comp = pm.Deterministic(
            'p_comp',
            beta * tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]]))
        pkw = pm.Beta('pkw',
                      alpha=params['pkw_beta_dist_alpha'],
                      beta=params['pkw_beta_dist_beta'],
                      shape=(n_comp, n_feat))
        # sample X ~ P
        z = pm.Categorical('z',
                           p=p_comp,
                           shape=n_doc)
        x = pm.Bernoulli('x',
                         p=pkw[z],
                         shape=(n_doc, n_feat),
                         observed=X)
    return model
Ejemplo n.º 21
0
 def _build_BPF(self):
     print('start building the Bayesian probabilistic model')
     self.x_u = theano.shared(self.train_u)
     self.x_i = theano.shared(self.train_i)
     self.y_r = theano.shared(self.train_r)
     self.y_r_ui = theano.shared(np.array(self.nn_r_ui))
     assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value()))
     with pm.Model() as self.bncf:  #define the prior and likelihood
         b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0])
         b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1])
         u = pm.Normal('u', 0, sd=1)
         tY = pm.Deterministic(
             'tY',
             tt.add(
                 tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui),
                 u))
         #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui
         nY = pm.Deterministic('nY', pm.math.sigmoid(tY))
         # likelihood of observed data
         Y = pm.Bernoulli(
             'Y', nY,
             observed=self.y_r)  #total_size=self.y_r.get_value().shape[0]
     with self.bncf:  #inference
         approx = pm.fit(n=1000, method=pm.ADVI())
         self.trace = approx.sample(draws=500)
     with self.bncf:  #posterior prediction
         ppc = pm.sample_posterior_predictive(self.trace, progressbar=True)
         self.by_r_ui = ppc['Y'].mean(axis=0)
     print('done building the Bayesian probabilistic model')
Ejemplo n.º 22
0
def bayes_variance_model(data, var=True):
    '''
    Implement a model that predicts individual choices based on
    non-linearly transformed contrast samples.

    If var == True the variance of the ten contrast samples is
    explicitly modeled. If False it is ommited.
    '''
    basic_model = pm.Model()
    contrast = get_contrast(data)
    response = (data.response + 1) / 2.
    with basic_model:
        # Priors for unknown model parameters
        beta_time = pm.Normal('time', mu=0, sd=10, shape=10)
        offset = pm.Normal('offset', mu=0, sd=0.05)
        alpha = pm.Normal('alpha', mu=0, sd=2)

        transformed = contrast**pm.math.exp(alpha) - 0.5**pm.math.exp(alpha)
        weighted = transformed * beta_time[np.newaxis, :]
        # Expected value of outcome
        mu = weighted.mean(1) + offset
        if var:
            beta_std = pm.Normal('std', mu=0, sd=.35)
            stdc = (transformed).std(1) / transformed.std(1).mean()
            mu += beta_std * beta_time.mean() * stdc

        ps = pm.math.invlogit(mu)
        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Bernoulli('Y_obs', ps, observed=response)
    return basic_model
Ejemplo n.º 23
0
    def _build_model(self, data):
        data = _data_df2dict(data)
        with pm.Model() as model:
            # Priors
            # NOTE: we need another variable if we deal with losses, which goes
            # to the value function
            β = pm.Bound(pm.Normal, lower=0)('beta', mu=1, sd=1000)
            γ = pm.Bound(pm.Normal, lower=0)('gamma', mu=0, sd=1000)
            τ = pm.Bound(pm.Normal, lower=0)('tau', mu=0, sd=1000)

            # TODO: pay attention to the choice function & it's params
            α = pm.Exponential('alpha', lam=1)
            ϵ = 0.01

            value_diff = (self._value_function(γ, data['B']) -
                          self._value_function(γ, data['A']))
            time_diff = (self._time_weighing_function(τ, data['DB']) -
                         self._time_weighing_function(τ, data['DA']))
            diff = value_diff - β * time_diff

            # Choice function: psychometric
            P_chooseB = pm.Deterministic('P_chooseB',
                                         choice_func_psychometric2(α, ϵ, diff))
            # Likelihood of observations
            r_likelihood = pm.Bernoulli('r_likelihood',
                                        p=P_chooseB,
                                        observed=data['R'])

        return model
Ejemplo n.º 24
0
    def test_model_shared_variable(self):
        rng = np.random.RandomState(9832)

        x = rng.randn(100)
        y = x > 0
        x_shared = aesara.shared(x)
        y_shared = aesara.shared(y)
        with pm.Model(rng_seeder=rng) as model:
            coeff = pm.Normal("x", mu=0, sd=1)
            logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))

            obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
            trace = pm.sample(100,
                              return_inferencedata=False,
                              compute_convergence_checks=False)

        x_shared.set_value([-1, 0, 1.0])
        y_shared.set_value([0, 0, 0])

        samples = 100
        with model:
            post_pred = pm.sample_posterior_predictive(trace,
                                                       samples=samples,
                                                       var_names=["p", "obs"])

        expected_p = np.array(
            [logistic.eval({coeff: val}) for val in trace["x"][:samples]])
        assert post_pred["obs"].shape == (samples, 3)
        npt.assert_allclose(post_pred["p"], expected_p)
Ejemplo n.º 25
0
 def add_observations():
     with hierarchical_model.pymc_model:
         for i in range(hierarchical_model.n_groups):
             observations.append(
                 pm.Bernoulli(f'y_{i}',
                              theta[i],
                              observed=hierarchical_model.y[i]))
Ejemplo n.º 26
0
    def create_model(self):
        with pm.Model() as self.model:
            self.mean = pm.gp.mean.Zero()

            # covariance function
            l = pm.Gamma("l_L", alpha=2, beta=2, shape = self.dim)
            # informative, positive normal prior on the period
            eta = pm.HalfNormal("eta_L", sd=5)
            self.cov = eta * pm.gp.cov.ExpQuad(self.dim, l)

            Kuu = self.cov(self.Xu)
            Kuf = self.cov(self.Xu, self.X)
            Luu = tt.slinalg.cholesky(pm.gp.util.stabilize(Kuu))

            vu = pm.Normal("u_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.Xu))
            u = pm.Deterministic("u", Luu.dot(vu))

            Luuinv_u = pm.gp.util.solve_lower(Luu,u)
            A = pm.gp.util.solve_lower(Luu, Kuf)

            Qff = tt.dot(tt.transpose(A),A)

            Kffd = self.cov(self.X, diag=True)
            Lamd = pm.gp.util.stabilize(tt.diag(tt.clip(Kffd - tt.diag(Qff) + self.sigma**2, 0.0, np.inf)))


            v = pm.Normal("fp_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.X))
            fp = pm.Deterministic("fp", tt.dot(tt.transpose(A), Luuinv_u) + tt.sqrt(Lamd).dot(v))

            p = pm.Deterministic("p", pm.math.invlogit(fp))
            y = pm.Bernoulli("y", p=p, observed=self.Y)
Ejemplo n.º 27
0
 def fit(self, matches, target):
     lineups = set(matches['t1_lineup'].unique()).union(
         matches['t2_lineup'].unique())
     self.lineup_f2id = dict(enumerate(
         lineups, 0))  # start from 0 for zero-based indexing
     self.lineup_id2f = {v: k for k, v in self.lineup_f2id.items()}
     t1 = matches['t1_lineup'].map(self.lineup_id2f)
     t2 = matches['t2_lineup'].map(self.lineup_id2f)
     # threshold_date = str(datetime.datetime.strptime(matches['date'].max(), '%Y-%m-%d %H:%M') -\
     #                     datetime.timedelta(days=self.time_span))
     # t1_older = matches[matches['date'] <= threshold_date]['t1_lineup'].map(self.lineup_id2f)
     # t2_older = matches[matches['date'] <= threshold_date]['t2_lineup'].map(self.lineup_id2f)
     # t1_newer = matches[matches['date'] > threshold_date]['t1_lineup'].map(self.lineup_id2f)
     # t2_newer = matches[matches['date'] > threshold_date]['t2_lineup'].map(self.lineup_id2f)
     t_num = len(lineups)  # number of teams
     # obs_older = target[matches['date'] <= threshold_date]
     # obs_newer = target[matches['date'] > threshold_date]
     # modeling older observations
     with pm.Model() as model:
         sigma = pm.HalfFlat('sigma', shape=t_num)
         alpha = pm.Normal('alpha', mu=0, sigma=sigma, shape=t_num)
         theta = pm.Deterministic('theta', alpha[t1] - alpha[t2])
         y = pm.Bernoulli('y', logit_p=theta, observed=target)
         self.trace = pm.sample(self.fit_iters, tune=self.tune)
     '''# modeling newer observations
Ejemplo n.º 28
0
def fun_infer_model_test(df,
                         question,
                         tune=100,
                         samples=10,
                         K=2,
                         path="./",
                         name="",
                         run=1):
    ch = 1
    N = df.shape[0]
    Q = df.shape[1]

    with pm.Model() as model:
        learner = pm.Uniform('learner', shape=(N, K))
        x = pm.math.dot(learner, question.T)
        results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df)

    if run:
        with model:
            trace = pm.sample(samples,
                              chains=ch,
                              tune=tune,
                              discard_tuned_samples=True)

        pd.DataFrame(trace['learner'].mean(0)).to_csv(
            path + name + "learner_fixed_question" + str(K) + ".txt", sep="\t")
        print("finished: " + str(K))
    return [model, trace]
Ejemplo n.º 29
0
def Two_Parameter_Model(Path, N_people, SEED, Max_pump=128):
    '''
    Path: Path where your record file exist.
    N_people: The Number of participants you have.
    SEED: Set radnom seed of MCMC sampling.
    Max_pump: Maxiumum trial of your BART setting. My setting is 128.
    '''
    traces_gamma = []
    traces_beta = []
    for part in range(1, N_people + 1):
        with pm.Model() as total_model:
            start_time = time.time()
            p = 0.15
            obs = BernData(part, Max_pump, Path)
            #Pth participant
            gamma_plus = pm.Uniform("gamma_plus", 0, 10)
            beta = pm.Uniform("beta", 0, 10)
            omega_k = -gamma_plus / np.log(1 - p)
            for i in range(len(obs)):
                for l in range(Max_pump):
                    theta_lk = 1 / (1 + np.exp(beta * (l - omega_k)))
                    prob = pm.Bernoulli("prob_{}_{}".format(i, l),
                                        p=theta_lk,
                                        observed=obs[i][l])
            _trace = pm.sample_smc(1000, cores=6, random_seed=SEED)
            print("Sampling end:", part,
                  "--- %s seconds ---" % (time.time() - start_time))
        traces_gamma.append(_trace["gamma_plus"])
        traces_beta.append(_trace["beta"])
    return traces_gamma, traces_beta
Ejemplo n.º 30
0
def main(n_chains=3):
    np.random.seed(113)
    x_train, y_train_onehot = prepare_data(train=True,
                                           onehot=True,
                                           take_first=500)
    print(f"Using {len(x_train)} train samples.")
    model = pm.Model()
    with model:
        w = pm.Bernoulli('w', p=0.5, shape=(784, 10))
        logit_vec = tt.dot(x_train, w)
        proba = softmax(logit_vec)
        y_obs = pm.Multinomial('y_obs', n=1, p=proba, observed=y_train_onehot)
        trace = None
        if os.path.exists(fpath_trace):
            with open(fpath_trace, 'rb') as f:
                trace = pickle.load(f)
            if trace.nchains != n_chains:
                print(
                    f"Reset previous progress {trace} to match n_chains={n_chains}"
                )
                trace = None
        trace = pm.sample(draws=3,
                          njobs=1,
                          chains=n_chains,
                          tune=0,
                          trace=trace)
    if trace.nchains == n_chains:
        # we didn't stop the training process
        with open(fpath_trace, 'wb') as f:
            pickle.dump(trace, f)
    convergence_plot(trace=trace, train=True)