예제 #1
0
def level_model(y_old, y_new):
    # PyMC3 level changepoint model
    # level is modeled by Poisson RVs
    # y_old: older data points since the last changepoint
    # y_new: last win(10) datapoints
    mean_new = y_new.mean() if len(y_new) > 0 else None
    mean_old = y_old.mean() if len(y_old) > 0 else mean_new
    y_ = np.concatenate((y_old, y_new))
    y_obs = theano.shared(y_)
    with pm.Model() as model:
        w = pm.Dirichlet('w', a=np.ones(2))
        lambda_ = pm.Exponential('lambda', lam=np.array([1.0 / mean_old, 1.0 / mean_new]), shape=(2,))
        components = pm.Poisson.dist(mu=lambda_, shape=(2, ))
        diff = pm.Deterministic('diff', lambda_[0] - lambda_[1])
        obs = pm.Mixture('obs', w=w, comp_dists=components, observed=y_obs)
    return model
예제 #2
0
    def fit_logreturns(self, data):

        def likelihood(x):

            def _normal(x, sigma):  # assumes a mu of 0
                return pm.Normal.dist(mu=0., sd=sigma).logp(x)

            nu_t = pm.math.dot(rhos, x[1:])
            err = tt.reshape(x[0] - nu_t, [-1])

            logps = (w[0] * pm.math.exp(_normal(err, pm.math.exp(s)))) + (w[1] * pm.math.exp(_normal(x[0], float(1e-100))))

            return pm.math.log(logps)

        with pm.Model() as self.model:
            W = np.array([1., 1.])

            w = pm.Dirichlet('w', W)

            intercept = pm.Normal('intercept', mu=-5, sd=5., testval=-5.)
            theta = pm.Uniform('theta', lower=0.001, upper=1.)
            sigma = pm.Uniform('sigma', lower=0.001, upper=10.)

            rhos = pm.Uniform('rhos', lower=-1., upper=1., shape=self.num_lags)

            sde = lambda x, theta, mu: (theta * (mu-x), sigma)
            s = ts.EulerMaruyama('path',
                                 1.0,
                                 sde,
                                 [theta, intercept],
                                 shape=len(data) - self.num_lags,
                                 testval=np.ones_like(data[self.num_lags:]))

            lagged_data = self._lags(data)

            pm.DensityDist('obs', likelihood, observed=lagged_data)

            self.trace = pm.sample(3000, tune=3000, nuts_kwargs=dict(target_accept=0.95))
            pm.traceplot(self.trace, varnames=[w, intercept, rhos, theta, sigma])

        self.estimated_rhos = np.mean(self.trace['rhos'], axis=0)
        self.estimated_w = np.mean(self.trace['w'], axis=0)
        self.estimated_intercept = np.mean(self.trace['intercept'], axis=0)
        self.estimated_theta = np.mean(self.trace['theta'], axis=0)
        self.estimated_sigma = np.mean(self.trace['sigma'], axis=0)

        self.data = data
예제 #3
0
def run_regional_model(data,
                       progressbar=False,
                       db_file=None,
                       burn=2000,
                       samp=5000):

    # Setup masks
    r_dum = data.Region.str.get_dummies()
    regs = r_dum.columns
    r_mtx = r_dum.as_matrix()
    num_reg = r_mtx.shape[1]

    heads = [{'name': 'Region', 'values': regs.tolist()}]

    with pm.Model() as model:
        b0_mu = pm.Normal('b0_mu', mu=4, sd=3)
        sigma = pm.Uniform('sigma', lower=0.7, upper=70)
        thresh = pm.Dirichlet('thresh', a=np.ones(5))

        mu_reg = pm.Normal('mu_reg', mu=0, sd=3, shape=num_reg)

        reg_mu = b0_mu + mu_reg

        reg_range = tt.arange(num_reg)
        cat_ps, update = theano.scan(
            fn=lambda r_i: compute_ps(thresh, reg_mu[r_i], sigma),
            sequences=[reg_range])
        reg_ps = pm.Deterministic('reg_ps', cat_ps)
        nat_ps = pm.Deterministic('nat_ps', compute_ps(thresh, b0_mu, sigma))

        cat_r = theano.dot(r_mtx, cat_ps)
        resp = data.response - 1
        results = pm.Categorical('results', p=cat_r, observed=resp)

    with model:
        db = None
        if db_file is not None:
            db = pm.backends.Text(db_file)
        step = pm.Metropolis()
        burn = pm.sample(burn, step=step, progressbar=progressbar)
        trace = pm.sample(samp,
                          step=step,
                          start=burn[-1],
                          progressbar=progressbar,
                          trace=db)

    return {'heads': heads, 'trace': trace}
예제 #4
0
def build_model(data, K):
    N = data.shape[0]
    d = data.shape[1]
    print('Building model with n=%d, d=%d, k=%d' % (N, d, K))
    with pm.Model() as gmm:
        #Prior over component weights
        if K > 1:
            p = pm.Dirichlet('p', a=np.array([1.] * K))

        #Prior over component means
        mus = [
            pm.MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(d)),
                        tau=pm.floatX(0.1 * np.eye(d)),
                        shape=(d, ))
            #testval = pm.floatX(np.ones(d)))
            for i in range(K)
        ]
        #Cholesky decomposed LKJ prior over component covariance matrices
        packed_L = [
            pm.LKJCholeskyCov('packed_L_%d' % i,
                              n=d,
                              eta=2.,
                              sd_dist=pm.HalfCauchy.dist(1))
            #testval = pm.floatX(np.ones(int(d*(d-1)/2+d))))
            for i in range(K)
        ]
        #Unpack packed_L into full array
        L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)]
        #Convert L to sigma and tau for convenience
        sigma = [
            pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T))
            for i in range(K)
        ]
        tau = [
            pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i]))
            for i in range(K)
        ]

        #Specify the likelihood
        if K > 1:
            mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)]
            Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data)
        else:
            Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data)

    return gmm
예제 #5
0
def test_categorical():
    k = 3
    ndata = 5000

    v = np.random.randint(0, k, ndata)

    with pymc3.Model() as model:
        p = pymc3.Dirichlet(name='p', a=np.array([1., 1., 1.]), shape=k)
        category = pymc3.Categorical(name='category',
                                     p=p,
                                     shape=ndata,
                                     observed=v)
        step = pymc3.Metropolis(vars=[p, category])
        trace = pymc3.sample(3000, step=step)

    pymc3.traceplot(trace)
    plt.show()
예제 #6
0
def dice_toss():
    n = 100
    h = np.array([6,6,6,6,6,6])
    h=h/sum(h)
    
    a=np.ones((6))
    niter = 1000
    with pm.Model() as model: # context management
        # define priors
        p = pm.Dirichlet('p',a=a)
        # define likelihood
        y= pm.Categorical('y',p=p,observed=h)
        # inference
        trace = pm.sample(niter, progressbar=True)
    
    pm.plots.traceplot(trace)
    plt.show()
	def run_non_sparse_initialization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S))
			BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			U = pm.Normal('U', mu=0, tau=(1.0/1.0), shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)

			w = pm.Dirichlet('w', a=np.ones((self.S,2)))

			beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA)

			BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts)
			BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts)

			mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2)
			approx = pm.fit(method='advi', n=2000)
		#pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		y = means_dict['w_stickbreaking__'].T
		y = np.concatenate([y, -np.sum(y, 0, keepdims=True)])
		e_y = np.exp(y - np.max(y, 0, keepdims=True))
		w_learned = e_y / np.sum(e_y, 0, keepdims=True)
		self.conc_init = np.exp(means_dict['CONC_log__'])
		self.alpha_init = np.exp(means_dict['ALPHA_log__'])
		self.beta_init = means_dict['BETA']
		self.U_init = means_dict['U']
		self.V_init = means_dict['V']
		self.mu_a_init = means_dict['MU_A']
		self.sigma_a_init = np.exp(means_dict['SIGMA_A_log__'])
		self.A_init = means_dict['A']
		self.w_init = w_learned.T
예제 #8
0
def laser_late_trials(data, num_emissions):

    # Make the pymc3 model
    with pm.Model() as model:
        # Dirichlet prior on the emission/spiking probabilities - 4 states
        p = pm.Dirichlet('p', np.ones(num_emissions), shape=(4, num_emissions))

        # Discrete Uniform switch times
        # Switch from detection to identity firing
        t1 = pm.DiscreteUniform('t1', lower=20, upper=60)
        # Switch from identity to palatability firing
        t2 = pm.DiscreteUniform('t2', lower=t1 + 20, upper=120)
        # Switch from palatability firing to end
        t3 = pm.DiscreteUniform('t3', lower=t2 + 30, upper=150)

        # Add potentials to keep the switch times from coming too close to each other
        #t_pot1 = pm.Potential('t_pot1', tt.switch(t2 - t1 >= 20, 0, -np.inf))
        #t_pot2 = pm.Potential('t_pot2', tt.switch(t3 - t2 >= 20, 0, -np.inf))
        #t_pot3 = pm.Potential('t_pot3', tt.switch(t3 - t1 >= 40, 0, -np.inf))

        # Get the actual state numbers based on the switch times
        states1 = tt.switch(t1 >= np.arange(150), 0, 1)
        states2 = tt.switch(t2 >= np.arange(150), states1, 2)
        states = tt.switch(t3 >= np.arange(150), states2, 3)

        # Categorical observations
        obs = pm.Categorical('obs',
                             p=p[states],
                             observed=np.append(data[:140], data[190:]))

    # Inference button :D
    with model:
        tr = pm.sample(300000,
                       init=None,
                       step=pm.Metropolis(),
                       njobs=2,
                       start={
                           't1': 25,
                           't2': 75,
                           't3': 125
                       },
                       progressbar=False)

    # Return the inference!
    return model, tr[250000:]
예제 #9
0
def run_national_model(data):

    with pm.Model() as model:
        mu = pm.Normal('mu', mu=4, sd=3)
        sigma = pm.Uniform('sigma', lower=0.7, upper=70)
        thresh = pm.Dirichlet('thresh', a=np.ones(5))

        cat_p = compute_ps(thresh, mu, sigma)

        resp = data.response - 1
        results = pm.Categorical('results', p=cat_p, observed=resp)

    with model:
        step = pm.Metropolis()
        burn = pm.sample(2000, step=step)
        trace = pm.sample(5000, step=step, start=burn[-1])

    return trace
예제 #10
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6))
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(20,
                                  tune=10,
                                  cores=1,
                                  return_inferencedata=False,
                                  compute_convergence_checks=False)
     sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model)
     sim_ppc = pm.sample_posterior_predictive(burned_trace,
                                              samples=20,
                                              model=dm_model)
     assert sim_priors["probs"].shape == (20, 6)
     assert sim_priors["obs"].shape == (20, ) + mn_data.shape
     assert sim_ppc["obs"].shape == (20, ) + mn_data.shape
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None):
    # データの整理
    data_0 = data.query('campaign != 1')
    data_1 = data.query('campaign == 1')
    d = np.array([[
        sum(data_0['angel'] == 0),
        sum(data_0['angel'] == 1),
        sum(data_0['angel'] == 2)
    ],
                  [
                      sum(data_1['angel'] == 0),
                      sum(data_1['angel'] == 1),
                      sum(data_1['angel'] == 2)
                  ]])
    weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]])
    # パラメータ推定
    with pm.Model() as model:
        alpha = [1., 1., 1.]  # hyper-parameter of DirichletDist.
        pi = pm.Dirichlet('pi', a=np.array(alpha))
        for i in np.arange(d.shape[0]):
            piw = pi * weight[i]
            m = pm.Multinomial('m_%s' % (i),
                               n=np.sum(d[i]),
                               p=piw,
                               observed=d[i])
        trace = pm.sample(n_sample, chains=n_chain)
    np.savetxt('trace_pi.csv', trace['pi'], delimiter=',')
    # Silver
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1])
    print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean()))
    # Gold
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2])
    print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean()))
    # save fig
    if ax is not None:
        pm.plot_posterior(trace['pi'][:, 0], ax=ax[0])
        pm.plot_posterior(trace['pi'][:, 1], ax=ax[1])
        pm.plot_posterior(trace['pi'][:, 2], ax=ax[2])
        ax[0].set_title('Nothing')
        ax[1].set_title('SilverAngel')
        ax[2].set_title('GoldAngel')
    return trace
예제 #12
0
def fun_infer_model_learn(df,
                          tune=100,
                          samples=10,
                          K=2,
                          path="./",
                          name="",
                          run=1):
    print("Write: " + path + name + "learner_" + str(K) + ".txt")
    print("Write: " + path + name + "question_" + str(K) + ".txt")
    print("Write: " + path + name + "concentration_" + str(K) + ".txt")
    ch = 1
    N = df.shape[0]
    Q = df.shape[1]
    # for K in Krange:
    with pm.Model() as model:
        learner = pm.Uniform('learner', shape=(N, K))
        concentration = pm.Uniform('concentration', testval=.5)
        question = pm.Dirichlet('question',
                                a=np.repeat(concentration, K),
                                shape=(Q, K))
        # difficulty=pm.Uniform ('difficulty',0.1,4,shape=(Q,1),testval=np.repeat(.5,Q).reshape(Q,1))
        x = pm.math.dot(learner, question.T)
        results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df)

    if run:
        with model:
            trace = pm.sample(samples,
                              chains=ch,
                              tune=tune,
                              discard_tuned_samples=True)

        # a=pm.math.dot(trace['learner'].mean(0), trace['question'][:,:].mean(0).T)

        pd.DataFrame(trace['learner'].mean(0)).to_csv(
            path + name + "learner_" + str(K) + ".txt", sep="\t")
        pd.DataFrame(trace['question'].mean(0)).to_csv(
            path + name + "question_" + str(K) + ".txt", sep="\t")
        # pd.DataFrame(a.eval()).to_csv(path+name+"estim_"+str(K)+".txt",sep="\t")
        pd.DataFrame(trace['concentration']).to_csv(
            path + name + "concentration_" + str(K) + ".txt", sep="\t")
        print("finished: " + str(K))
        return [model, trace]
    return [model, None]
예제 #13
0
파일: base.py 프로젝트: zhuyiche/pymc-learn
    def predict_proba(self, X, return_std=False):
        """
        Predicts probabilities of new data with a trained GaussianMixture Model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        cats : numpy array, shape [n_samples, ]

        return_std : Boolean flag of whether to return standard deviations with
        mean probabilities. Defaults to False.
        """

        if self.trace is None:
            raise NotFittedError('Run fit on the model before predict.')

        # num_samples = X.shape[0]

        if self.cached_model is None:
            self.cached_model = self.create_model()

        self._set_shared_vars({'model_input': X})
        K = self.num_components

        with self.cached_model:
            pi = pm.Dirichlet("probability",
                              a=np.array([1.0, 1.0, 1.0]),
                              shape=K)
            _vars = [pi]

            ppc = pm.sample_ppc(
                self.trace,
                # model=self.cached_model,
                vars=_vars,
                samples=2000,
                size=len(X))

        if return_std:
            return ppc['probability'].mean(axis=0), \
                   ppc['probability'].std(axis=0)
        else:
            return ppc['probability'].mean(axis=0)
예제 #14
0
def get_beta_bernoulli_mixture(X, params):
    n_doc, n_feat = X.shape
    n_comp = params['n_comp']

    with pm.Model() as model:
        pkw = pm.Beta('pkw',
                      alpha=params['pkw_beta_dist_alpha'],
                      beta=params['pkw_beta_dist_beta'],
                      shape=(n_comp, n_feat))
        p_comp = pm.Dirichlet('p_comp',
                              a=params['pcomp_dirichlet_dist_alpha'] * np.ones(n_comp))
        z = pm.Categorical('z',
                           p=p_comp,
                           shape=n_doc)
        x = pm.Bernoulli('x',
                         p=pkw[z],
                         shape=(n_doc, n_feat),
                         observed=X)
    return model
예제 #15
0
    def run_mcmc(self, n_generations, n_burn):
        logger.info("{} subpaths in total".format(
            len(self.traversome.all_sub_paths)))
        isomer_num = self.traversome.num_of_isomers
        with pm.Model() as isomer_model:
            isomer_percents = pm.Dirichlet(name="props",
                                           a=np.ones(isomer_num),
                                           shape=(isomer_num, ))
            loglike_expression = self.traversome.get_multinomial_like_formula(
                isomer_percents=isomer_percents,
                log_func=tt.log).loglike_expression
            pm.Potential("likelihood", loglike_expression)
            # pm.Deterministic("likelihood", likes)
            # pm.DensityDist?
            # pm.Mixture(name="likelihood", w=np.ones(len(components)), comp_dists=components, observed=data)
            # pm.Binomial("path_last", n=n__num_reads_in_range, p=this_prob, observed=x__num_matched_reads)
            # sample from the distribution

            # uses the BFGS optimization algorithm to find the maximum of the log-posterior
            logger.info("Searching the maximum of the log-posterior ..")
            start = pm.find_MAP(model=isomer_model)
            # trace = pm.sample_smc(n_generations, parallel=False)

            # In an upcoming release,
            # pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default
            logger.info("Using NUTS sampler ..")
            self.trace = pm.sample(n_generations,
                                   tune=n_burn,
                                   discard_tuned_samples=True,
                                   cores=1,
                                   init='adapt_diag',
                                   start=start,
                                   return_inferencedata=True)

            logger.info("Summarizing the MCMC traces ..")
            summary = az.summary(self.trace)
            logger.info("\n{}".format(summary))
            axes = az.plot_trace(self.trace)
            fig = axes.ravel()[0].figure
            fig.savefig(
                os.path.join(self.traversome.outdir, "mcmc.trace_plot.pdf"))
        return OrderedDict([(_go, _prop)
                            for _go, _prop in enumerate(summary["mean"])])
예제 #16
0
    def test_multivariate2(self):
        # Added test for issue #3271
        mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
        with pm.Model() as dm_model:
            probs = pm.Dirichlet("probs", a=np.ones(6), shape=6)
            obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
            burned_trace = pm.sample(20, tune=10, cores=1)
        sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model)
        sim_ppc = pm.sample_posterior_predictive(burned_trace,
                                                 samples=20,
                                                 model=dm_model)
        assert sim_priors["probs"].shape == (20, 6)
        assert sim_priors["obs"].shape == (20, ) + obs.distribution.shape
        assert sim_ppc["obs"].shape == (20, ) + obs.distribution.shape

        sim_ppc = pm.fast_sample_posterior_predictive(burned_trace,
                                                      samples=20,
                                                      model=dm_model)
        assert sim_ppc["obs"].shape == (20, ) + obs.distribution.shape
def csp_modeling(
    obs,
    templates,
    dbname,
    redo=False,
):
    """ Model a CSP with bayesian model. """
    if os.path.exists(dbname) and not redo:
        return dbname
    with pm.Model() as model:
        w = pm.Dirichlet("w", np.ones(len(templates)))
        bestfit = pm.math.dot(w.T, templates)
        sigma = pm.Exponential("sigma", lam=1)
        likelihood = pm.Normal('like', mu=bestfit, sd=sigma, observed=obs)
    with model:
        trace = pm.sample(1000, tune=1000)
    results = {'model': model, "trace": trace}
    with open(dbname, 'wb') as buff:
        pickle.dump(results, buff)
    return
예제 #18
0
def test_DiscreteMarkovChain_point():
    test_Gammas = at.as_tensor_variable(np.array([[[1.0, 0.0], [0.0, 1.0]]]))

    with pm.Model():
        # XXX: `draw_values` won't use the `Deterministic`s values in the `point` map!
        # Also, `Constant` is only for integer types (?!), so we can't use that.
        test_gamma_0 = pm.Dirichlet("gamma_0", np.r_[1.0, 1000.0], shape=2)
        test_point = {"gamma_0": np.r_[1.0, 0.0]}
        assert np.all(
            DiscreteMarkovChain.dist(test_Gammas, test_gamma_0, shape=10).random(
                point=test_point
            )
            == 0
        )
        assert np.all(
            DiscreteMarkovChain.dist(test_Gammas, 1.0 - test_gamma_0, shape=10).random(
                point=test_point
            )
            == 1
        )
	def run_factorization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S), testval=self.alpha_init)
			BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			gamma = pm.HalfCauchy('GAMMA', beta=5, shape=(self.N, self.K), testval=np.ones((self.N, self.K)))
			U = pm.Normal('U', mu=0, sigma=1.0/gamma, shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)

			w = pm.Dirichlet('w', a=np.ones((self.S,2)), testval=self.w_init)

			beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA)

			BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts)
			BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts)

			mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2)
			approx = pm.fit(method='advi', n=30000)
		#pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ALPHA.txt', (np.exp(means_dict['ALPHA_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_GAMMA.txt', (np.exp(means_dict['GAMMA_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_CONC.txt', (np.exp(means_dict['CONC_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_w_stick_breaking.txt', (np.exp(means_dict['w_stickbreaking__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ELBO.txt', (approx.hist), fmt="%s", delimiter='\t')
예제 #20
0
    def SIR_training(self, sequence, totalpopulation):
        self.popu = totalpopulation
        self.data = sequence[:]
        acc_infect = sequence[:, 0] / totalpopulation
        basic_model = pm.Model()
        n = len(acc_infect)
        I = acc_infect[0]
        R = 0
        S = 1 - I
        with basic_model:
            BoundedNormal = pm.Bound(pm.Normal, lower=0.0, upper=1.0)
            BoundedNormal2 = pm.Bound(pm.Normal, lower=1.0, upper=10.0)
            theta = []
            r0 = BoundedNormal2('R_0', mu=self.r0, sigma=0.72)
            gamma = BoundedNormal('gamma', mu=self.gamma, sigma=0.02)
            beta = pm.Deterministic('beta', r0 * gamma)
            ka = pm.Gamma('ka', 2, 0.0001)
            Lambda1 = pm.Gamma('Lambda1', 2, 0.0001)
            qu = pm.Uniform('qu', lower=0.1, upper=1.0)

            theta.append(
                pm.Deterministic('theta_' + str(0), pm.math.stack([S, I, R])))
            for i in range(1, n):
                states = theta[i - 1]
                solve_theta = pm.Deterministic(
                    'solve_theta_' + str(i),
                    ka * pm.math.stack([
                        states[0] - qu * beta * states[0] * states[1],
                        states[1] + qu * beta * states[0] * states[1] -
                        gamma * states[1], states[2] + gamma * states[1]
                    ]))
                theta.append(
                    pm.Dirichlet('theta_' + str(i), a=solve_theta, shape=(3)))
                real_infect = pm.Beta('real_infect_' + str(i),
                                      Lambda1 * theta[i][1],
                                      Lambda1 * (1 - theta[i][1]),
                                      observed=acc_infect[i])

            step = pm.Metropolis()
            Trace = pm.sample(2000, cores=16, chains=1, init='auto', step=step)
            self.trace = Trace
예제 #21
0
 def __init__(self,
              wave,
              flux,
              templates,
              adegree=None,
              mdegree=None,
              reddening=False):
     """ Model CSP with bayesian model. """
     self.wave = wave
     self.flux = flux
     self.templates = templates
     self.ntemplates = len(templates)
     self.adegree = adegree
     # Construct additive polynomial
     if self.adegree is not None:
         _ = np.linspace(-1, 1, len(self.wave))
         self.apoly = np.zeros((adegree + 1, len(_)))
         for i in range(adegree + 1):
             self.apoly[i] = legendre(i)(_)
     else:
         self.apoly = np.zeros(1)
     # Build statistical model
     with pm.Model() as self.model:
         self.flux0 = pm.Normal("f0", mu=1, sd=5)  # Multiplicative constant
         self.w = pm.Dirichlet("w",
                               np.ones(self.ntemplates) / self.ntemplates)
         self.wpoly = pm.Deterministic("wpoly",
                                       pm.math.zeros_like(self.flux0))  \
                      if self.adegree is None  else \
                      pm.Normal("wpoly", mu=0, sd=1, shape=self.adegree)
         self.bestfit = pm.Deterministic(
             "bestfit",
             self.__call__(self.w,
                           wpoly=self.wpoly,
                           f0=self.flux0,
                           math=pm.math))
         self.sigma = pm.Exponential("sigma", lam=0.01)
         self.like = pm.Normal('like',
                               mu=self.bestfit,
                               sd=self.sigma,
                               observed=flux)
예제 #22
0
def get_logisticnormal_bernoulli_mixture(X, params):
    n_doc, n_feat = X.shape
    n_comp = params['n_comp']

    with pm.Model() as model:
        theta = pm.MvNormal('theta',
                            mu=np.zeros(n_feat),
                            cov=np.identity(n_feat),
                            shape=(n_comp, n_feat))
        pkw = pm.Deterministic('pkw',
                               1 / (1 + tt.exp(-theta)))
        p_comp = pm.Dirichlet('p_comp',
                              a=params['pcomp_dirichlet_dist_alpha'] * np.ones(n_comp))
        z = pm.Categorical('z',
                           p=p_comp,
                           shape=n_doc)
        x = pm.Bernoulli('x',
                         p=pkw[z],
                         shape=(n_doc, n_feat),
                         observed=X)
    return model
예제 #23
0
    def _sample_pymc3(cls, dist, size):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'MultivariateNormalDistribution': lambda dist:
                pymc3.MvNormal('X', mu=matrix2numpy(dist.mu, float).flatten(),
                cov=matrix2numpy(dist.sigma, float), shape=(1, dist.mu.shape[0])),
            'MultivariateBetaDistribution': lambda dist:
                pymc3.Dirichlet('X', a=list2numpy(dist.alpha, float).flatten()),
            'MultinomialDistribution': lambda dist:
                pymc3.Multinomial('X', n=int(dist.n),
                p=list2numpy(dist.p, float).flatten(), shape=(1, len(dist.p)))
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            return pymc3.sample(size, chains=1, progressbar=False)[:]['X']
예제 #24
0
def create_dirac_zero_hmm(X, mu, xis, observed):
    S = 2
    z_tt = tt.stack([tt.dot(X, xis[..., s, :]) for s in range(S)], axis=1)
    Gammas_tt = pm.Deterministic("Gamma", multilogit_inv(z_tt))
    gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S, )))

    if type(observed) == np.ndarray:
        T = X.shape[0]
    else:
        T = X.get_value().shape[0]

    V_rv = DiscreteMarkovChain("V_t", Gammas_tt, gamma_0_rv, shape=T)
    if type(observed) == np.ndarray:
        V_rv.tag.test_value = (observed > 0) * 1
    else:
        V_rv.tag.test_value = (observed.get_value() > 0) * 1
    Y_rv = SwitchingProcess(
        "Y_t",
        [pm.Constant.dist(0), pm.Constant.dist(mu)],
        V_rv,
        observed=observed,
    )
    return Y_rv
예제 #25
0
    def __init__(self, npersons, nitems, nlevels):
        super(FourPGRM, self).__init__(npersons, nitems, nlevels)
        with self.model:
            phi = pm.Dirichlet(name='phi',
                               a=np.ones(2 * nlevels - 1),
                               shape=(1, nitems, 2 * nlevels - 1))
            # 1. drop the last term, which would make the top term 1 after
            #    cumsum.
            # 2. reshape into a 2 x L matrix for each item, each row
            #    corresponding to gamma and sigma, respectively
            # 3. cumulatively sum across the gamma->sigma, to ensure
            #    sigma > gamma
            # 4. cumulative gammas and sigmas across levels to ensure monotone
            phi_star = phi[..., :-1]
            phi_star = phi_star.reshape((1, nitems, 2, nlevels - 1))
            phi_star = phi_star.cumsum(axis=-2)
            phi_star = phi_star.cumsum(axis=-1)

            # first row is gamma, second is sigma
            gamma = pm.Deterministic(name='gamma', var=phi_star[..., 0, :])
            sigma = pm.Deterministic(name='sigma', var=phi_star[..., 1, :])
        param_list = [gamma, sigma]
        self.params.update({var.name: var for var in param_list})
예제 #26
0
    def _model_eval(self, X, K, n_samples):
        # setup model
        with pm.Model() as model:
            data_dim = X.shape[1]
            # prior of mixture ratio
            w = pm.Dirichlet('w', a=np.ones(K))
            # setup the likelihood
            init_mu = np.zeros(data_dim)
            components = [
                self._multivariate_normal_dist(init_mu, suffix=k)
                for k in range(K)
            ]
            like = pm.Mixture('like', w=w, comp_dists=components, observed=X)

        # fit model
        with model:
            trace = pm.sample(2000,
                              step=pm.NUTS(),
                              start=pm.find_MAP(),
                              tune=1000)

        # store the result
        self.result['K=' + str(K)] = trace
예제 #27
0
    def run_model(self, **kwargs):
        """Run Bayesian model using prefit Y's for each Gene and Dataset distribution"""
        # Importing here since Theano base_compiledir needs to be set prior to import
        import pymc3 as pm

        # Collect fits
        self.fits = self.t_fits()

        click.echo("Building model")
        with pm.Model() as self.model:
            # Convex model priors
            b = ([1] if len(self.backgrounds) == 1 else pm.Dirichlet(
                "b", a=np.ones(len(self.backgrounds))))
            # Model error
            eps = pm.InverseGamma("eps", 1, 1)

            # Convex model declaration
            for gene in tqdm(self.training_genes):
                y, norm_term = 0, 0
                for i, dataset in enumerate(self.backgrounds):
                    name = f"{gene}={dataset}"
                    fit = self.fits.loc[name]
                    x = pm.StudentT(name, nu=fit.nu, mu=fit.mu, lam=fit.lam)
                    y += (b[i] / fit.sd) * x
                    norm_term += b[i] / fit.sd

                # y_g = \frac{\sum_d \frac{\beta * x}{\sigma} + \epsilon}{\sum_d\frac{\beta}{\sigma}}
                # Embed mu in laplacian distribution
                pm.Laplace(
                    gene,
                    mu=y / norm_term,
                    b=eps / norm_term,
                    observed=self.sample[gene],
                )
            # Sample
            self.trace = pm.sample(**kwargs)
예제 #28
0
def trend_model(y_old, y_new):
    # PyMC3 trend changepoint model
    # trend is modeled by Normal RVs
    # y_old: older data points since the last changepoint
    # y_new: last win(10) datapoints
    g_new = np.gradient(y_new)                     # observed trend
    g_old = np.gradient(y_old) if len(y_old) > 1 else g_new
    mu_new = g_new.mean() if len(g_new) > 0 else None
    mu_old = g_old.mean() if len(g_old) > 0 else mu_new
    sigma_new = max(1.0, g_new.std()) if len(g_new) > 0 else None
    sigma_old = max(1.0, g_old.std()) if len(g_old) > 0 else sigma_new
    y_ = np.concatenate((y_old, y_new))
    y_obs = theano.shared(y_)
    ts = np.array(range(1, 1 + len(y_)))  # start from 1 to deal with intercept
    t_arr = np.array([ts, ts]).T

    with pm.Model() as model:
        w = pm.Dirichlet('w', a=np.ones(2))
        mu = pm.Normal('mu', np.array([mu_old, mu_new]), np.array([sigma_old, sigma_new]), shape=(2,))
        mu_t = pm.Deterministic('mu_t', t_arr * mu)
        tau = pm.Gamma('tau', 1.0, 1.0, shape=2)
        diff = pm.Deterministic('diff', mu[1] - mu[0])                    # needed for PyMC3 model
        obs = pm.NormalMixture('obs', w, mu_t, tau=tau, observed=y_obs)   # needed for PyMC3 model
    return model
예제 #29
0
(D, W) = data.shape


def log_lda(theta, phi):
    def ll_lda(value):
        dixs, vixs = value.nonzero()
        vfreqs = value[dixs, vixs]
        ll = vfreqs * pm.math.logsumexp(
            t.log(theta[dixs]) + t.log(phi.T[vixs]), axis=1).ravel()
        return t.sum(ll)

    return ll_lda


with model1:
    theta = pm.Dirichlet("theta", a=alpha, shape=(D, K))
    phi = pm.Dirichlet("phi", a=beta, shape=(K, V))
    doc = pm.DensityDist('doc', log_lda(theta, phi), observed=data)
with model1:
    inference = pm.ADVI()
    approx = pm.fit(
        n=10000,
        method=inference,
        callbacks=[pm.callbacks.CheckParametersConvergence(diff='absolute')])

#inference
tr1 = approx.sample(draws=1000)
pm.plots.traceplot(tr1)
pm.plot_posterior(tr1, color='LightSeaGreen')

plt.plot(approx.hist)
예제 #30
0
data = data[0:1000]
truth = truth[0:1000]

data[np.where(data>=1)]=1


np.random.seed(12345)
alphaprime=10
nclusters = 10
ncells = data.shape[0]
nsites = data.shape[1]

#without scaling
model = pm.Model()
with model:
    pi = pm.Dirichlet('pi', a=np.array([alphaprime]*nclusters),shape=nclusters)
    # Define priors
    pk = pm.Beta('pk', 1,1,shape=(nclusters,nsites))
    z = pm.Categorical("z",p=pi,shape=ncells)
    # Define likelihood
    likelihood = pm.Bernoulli('likelihood',p=pk[z],observed=data,shape=(ncells))


with model:
    step1 = pm.Metropolis(vars=[pk, pi, alpha])
    step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
    tr = pm.sample(100, step=[step1, step2])

traceplot(trace)