예제 #1
0
def test_vae():
    minibatch_size = 10
    data = pm.floatX(np.random.rand(100))
    x_mini = pm.Minibatch(data, minibatch_size)
    x_inp = tt.vector()
    x_inp.tag.test_value = data[:minibatch_size]

    ae = theano.shared(pm.floatX([.1, .1]))
    be = theano.shared(pm.floatX(1.))

    ad = theano.shared(pm.floatX(1.))
    bd = theano.shared(pm.floatX(1.))

    enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be
    mu,  rho = enc[:, 0], enc[:, 1]

    with pm.Model():
        # Hidden variables
        zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size)
        dec = zs * ad + bd
        # Observation model
        pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp)

        pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)},
               more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def test_vae():
    minibatch_size = 10
    data = pm.floatX(np.random.rand(100))
    x_mini = pm.Minibatch(data, minibatch_size)
    x_inp = tt.vector()
    x_inp.tag.test_value = data[:minibatch_size]

    ae = theano.shared(pm.floatX([.1, .1]))
    be = theano.shared(pm.floatX(1.))

    ad = theano.shared(pm.floatX(1.))
    bd = theano.shared(pm.floatX(1.))

    enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be
    mu,  rho = enc[:, 0], enc[:, 1]

    with pm.Model():
        # Hidden variables
        zs = pm.Normal('zs', mu=0, sigma=1, shape=minibatch_size)
        dec = zs * ad + bd
        # Observation model
        pm.Normal('xs_', mu=dec, sigma=0.1, observed=x_inp)

        pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)},
               more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
예제 #3
0
def test_expressions(expr):
    with Model() as model:
        var = expr((10, 10))
        Normal('obs', observed=var)
        assert var.tag.test_value.shape == (10, 10)
        assert len(model.free_RVs) == 3
        fit(1)
예제 #4
0
def sample_chain(model,
                 chain_i=0,
                 step=None,
                 num_samples=MAX_NUM_SAMPLES,
                 advi=False,
                 tune=5,
                 discard_tuned_samples=True,
                 num_scale1_iters=NUM_SCALE1_ITERS,
                 num_scale0_iters=NUM_SCALE0_ITERS):
    """Sample single chain from constructed Bayesian model"""
    start = timer()
    with model:
        if not advi:
            pm._log.info('Assigning NUTS sampler...')
            if step is None:
                start_, step = pm.init_nuts(init='advi',
                                            njobs=1,
                                            n_init=NUM_INIT_STEPS,
                                            random_seed=-1,
                                            progressbar=False)

            discard = tune if discard_tuned_samples else 0
            for i, trace in enumerate(
                    pm.iter_sample(num_samples + discard,
                                   step,
                                   start=start_,
                                   chain=chain_i)):
                if i == 0:
                    min_num_samples = get_min_samples_per_chain(
                        len(trace[0]), MIN_SAMPLES_CONSTANT, NUM_CHAINS)
                elapsed = timer() - start
                if elapsed > SOFT_MAX_TIME_IN_SECONDS / NUM_CHAINS:
                    print('exceeded soft time limit...')
                    if i + 1 - discard >= min_num_samples:
                        print('collected enough samples; stopping')
                        break
                    else:
                        print('but only collected {} of {}; continuing...'.
                              format(i + 1 - discard, min_num_samples))
                        if elapsed > HARD_MAX_TIME_IN_SECONDS / NUM_CHAINS:
                            print('exceeded HARD time limit; STOPPING')
                            break
            return trace[discard:]
        else:  # ADVI for neural networks
            scale = theano.shared(pm.floatX(1))
            vi = pm.ADVI(cost_part_grad_scale=scale)
            pm.fit(n=num_scale1_iters, method=vi)
            scale.set_value(0)
            approx = pm.fit(n=num_scale0_iters)
            # one sample to get dimensions of trace
            trace = approx.sample(draws=1)
            min_num_samples = get_min_samples_per_chain(
                len(trace.varnames), MIN_SAMPLES_CONSTANT, 1)
            trace = approx.sample(draws=min_num_samples)
            return trace
예제 #5
0
def test_discrete_not_allowed():
    mu_true = np.array([-2, 0, 2])
    z_true = np.random.randint(len(mu_true), size=100)
    y = np.random.normal(mu_true[z_true], np.ones_like(z_true))

    with pm.Model():
        mu = pm.Normal('mu', mu=0, sd=10, shape=3)
        z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y))
        pm.Normal('y_obs', mu=mu[z], sd=1., observed=y)
        with pytest.raises(opvi.ParametrizationError):
            pm.fit(n=1)  # fails
def test_discrete_not_allowed():
    mu_true = np.array([-2, 0, 2])
    z_true = np.random.randint(len(mu_true), size=100)
    y = np.random.normal(mu_true[z_true], np.ones_like(z_true))

    with pm.Model():
        mu = pm.Normal('mu', mu=0, sigma=10, shape=3)
        z = pm.Categorical('z', p=tt.ones(3) / 3, shape=len(y))
        pm.Normal('y_obs', mu=mu[z], sigma=1., observed=y)
        with pytest.raises(opvi.ParametrizationError):
            pm.fit(n=1)  # fails
예제 #7
0
def main():
    config = create_configuration(filename='/regression-siso.json')
    dataset = get_dataset(config.dataset, testing=False)

    # %%
    x_train = dataset.x
    y_train = dataset.y
    x = theano.shared(x_train)
    y = theano.shared(y_train)
    nn = construct_nn(x=x, y=y, config=config)

    # ADVI
    with nn:
        inference = pm.ADVI()
        approx = pm.fit(n=50000, method=inference)
    trace = approx.sample(draws=5000)

    # with nn:
    #     inference = pm.NUTS()
    #     trace = pm.sample(2000, tune=1000, cores=4, inference=inference)
    print(pm.summary(trace))

    x.set_value(x_train)
    y.set_value(y_train)

    with nn:
        ppc = pm.sample_ppc(trace, samples=500, progressbar=False)
예제 #8
0
 def _build_BPF(self):
     print('start building the Bayesian probabilistic model')
     self.x_u = theano.shared(self.train_u)
     self.x_i = theano.shared(self.train_i)
     self.y_r = theano.shared(self.train_r)
     self.y_r_ui = theano.shared(np.array(self.nn_r_ui))
     assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value()))
     with pm.Model() as self.bncf:  #define the prior and likelihood
         b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0])
         b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1])
         u = pm.Normal('u', 0, sd=1)
         tY = pm.Deterministic(
             'tY',
             tt.add(
                 tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui),
                 u))
         #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui
         nY = pm.Deterministic('nY', pm.math.sigmoid(tY))
         # likelihood of observed data
         Y = pm.Bernoulli(
             'Y', nY,
             observed=self.y_r)  #total_size=self.y_r.get_value().shape[0]
     with self.bncf:  #inference
         approx = pm.fit(n=1000, method=pm.ADVI())
         self.trace = approx.sample(draws=500)
     with self.bncf:  #posterior prediction
         ppc = pm.sample_posterior_predictive(self.trace, progressbar=True)
         self.by_r_ui = ppc['Y'].mean(axis=0)
     print('done building the Bayesian probabilistic model')
예제 #9
0
	def _sample(self, num_epochs = None, num_draws = None):
		if not num_epochs: num_epochs = self.num_epochs
		if not num_draws:  num_draws  = self.num_draws

		with self.model:
			approx     = pm.fit(n = num_epochs, obj_optimizer = pm.adam(learning_rate = self.learning_rate))
			self.trace = approx.sample(draws = num_draws)
예제 #10
0
    def fit(self, fast_sampling=True, sample_size=3000):

        with pm.Model() as self.model:
            beta = pm.Normal('beta', mu=0.0, tau=1.0, shape=(self.dim + 1, 1))
            # Priors for spatial random effects
            tau = pm.Gamma('tau', alpha=2., beta=2.)
            alpha = pm.Uniform('alpha', lower=0, upper=1)
            phi = pm.MvNormal('phi',
                              mu=0,
                              tau=tau * (self.D - alpha * self.weight_matrix),
                              shape=(1, self.N))

            # Mean model
            mu = pm.Deterministic('mu', tt.dot(self.covariates, beta) + phi.T)
            theta_sd = pm.Gamma('theta_sd', alpha=1.0, beta=1.0)
            # Likelihood
            Yi = pm.Normal('Yi',
                           mu=mu.ravel(),
                           tau=theta_sd,
                           observed=self.response_var)

            if fast_sampling:
                inference = pm.ADVI()
                approx = pm.fit(n=50000, method=inference)  #until converge
                self.trace = approx.sample(draws=sample_size)
            else:
                self.trace = pm.sample(sample_size, cores=2, tune=1000)

        self._report_credible_interval(self.trace, 'beta')
        self._report_credible_interval(self.trace, 'tau')
예제 #11
0
    def fit(self, draws=500, chains=4, trace_size=500, method='NUTS', map_initialization=False,
            finalize=True, step_kwargs={}, sample_kwargs={}):
        """Fit the PMProphet model.

        Parameters
        ----------
        draws : int, > 0
            The number of MCMC samples.
        chains: int, =4
            The number of MCMC draws.
        trace_size: int, =1000
            The last N number of samples to keep in the trace
        method : 'NUTS' or 'Metropolis' or 'ADVI'.
        map_initialization : bool
            Initialize the model with maximum a posteriori estimates.
        finalize : bool
            Finalize the model.
        step_kwargs : dict
            Additional arguments for the sampling algorithms
            (`NUTS` or `Metropolis`).
        sample_kwargs : dict
            Additional arguments for the PyMC3 `sample` function.

        Returns
        -------
        The fitted PMProphet object.
        """

        if chains * draws < trace_size and method != 'ADVI':
            raise Exception("Desired trace size should be smaller than the sampled data points")

        self.skip_first = (chains * draws) - trace_size if method != 'ADVI' else 0
        self.chains = chains

        if finalize:
            self.finalize_model()

        with self.model:
            if map_initialization:
                self.start = pm.find_MAP(maxeval=10000)
                if draws == 0:
                    self.trace = {k: np.array([v]) for k, v in self.start.items()}

            if draws:
                if method == 'NUTS' or method == 'Metropolis':
                    self.trace = pm.sample(
                        draws,
                        chains=chains,
                        step=pm.Metropolis(**step_kwargs) if method == 'Metropolis' else pm.NUTS(**step_kwargs),
                        start=self.start if map_initialization else None,
                        **sample_kwargs
                    )
                else:
                    res = pm.fit(
                        draws,
                        start=self.start if map_initialization else None
                    )
                    self.trace = res.sample(trace_size)

        return self
    def fit(self, sampling_size=5000, fast_sample=False):
        with pm.Model() as self.model:

            rho = pm.Exponential('rho', 1/5, shape=self.dim_gp)
            tau = pm.Exponential('tau', 1/3)

            cov_func = pm.gp.cov.Matern52(self.dim_gp, ls=rho)
            self.gp = pm.gp.Latent(cov_func=cov_func)
            f = self.gp.prior("f", X=self.locations)

            mean_func = f
            self.beta_list = []
            if self.covariates:
                for i in range(len(self.covariates)):
                    beta = pm.Normal('_'.join(['beta', str(i)]), mu=0, sd=50)
                    self.beta_list.append(beta)
                    mean_func = mean_func + beta*self.covariates[i]

            sigma = pm.HalfNormal('sigma', sd=20)
            y = pm.Normal('Y', mu=mean_func, sd=sigma, observed=self.response)

            if fast_sample:
                inference = pm.ADVI()
                approx = pm.fit(n=25000, method=inference) #until converge
                self.trace = approx.sample(draws=sampling_size)
            else:
                start = pm.find_MAP()
                self.trace = pm.sample(sampling_size, tune=10000, nchains=4)
예제 #13
0
	def run_factorization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			U = pm.Normal('U', mu=0, tau=(1.0/100.0), shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/100.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=self.total_counts[~nans], observed=self.allelic_counts[~nans])
			approx = pm.fit(method='advi', n=1000)
		pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_CONC.txt', np.exp(means_dict['CONC_log__']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_A.txt', (means_dict['A']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_MU_A.txt', (means_dict['MU_A']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_SIGMA_A.txt', np.exp(means_dict['SIGMA_A_log__']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ELBO.txt', approx.hist, fmt="%s", delimiter='\t')
예제 #14
0
    def fit(self,
            X,
            Y,
            samples=500,
            advi_n=50000,
            advi_n_mc=1,
            advi_obj_optimizer=pm.adam(learning_rate=.1)):

        self.num_samples = samples

        self._build_model(X, Y)

        with self.model:
            if self.inference_method == 'advi':
                mean_field = pm.fit(
                    n=advi_n,
                    method='advi',
                    obj_n_mc=advi_n_mc,
                    obj_optimizer=advi_obj_optimizer
                )  # TODO: how to determine hyperparameters?

                self.trace = mean_field.sample(draws=samples)
            elif self.inference_method == 'mcmc':
                self.trace = pm.sample(samples, tune=samples)
            else:
                raise Exception(
                    "Unknown output parameter value: %s. Choose among 'normal', 'bernoulli'."
                    % self.output)
	def run_factorization(self, N, S, X, K, num_cov, k, n):
		# Smart initialization
		rat = k/n
		nans = np.isnan(rat)
		conc_inits = np.zeros((1, S))
		beta_inits = np.zeros((num_cov, S))
		for index_s in range(S):
			column_rat = rat[:, index_s]
			column_nans = np.isnan(column_rat)
			valid_rat = column_rat[~column_nans]
			conc_init = min(1.0/np.var(valid_rat), 1000.0)
			m_init = min(max(np.mean(valid_rat), 1.0/1000 ), 1.0-(1.0/1000))
			conc_inits[0, index_s] = conc_init
			beta_inits[0, index_s] = np.log(m_init/(1.0-m_init))
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.Gamma('CONC', alpha=1e-4, beta=1e-4, shape=(1,S), testval=conc_inits)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(S, num_cov), testval=beta_inits.T)
			U = pm.Normal('U', mu=0, tau=(1/1000.0), shape=(N, K), testval=np.random.randn(N, K))
			V = pm.Normal('V', mu=0, tau=(1/1000.0), shape=(S, K), testval=np.random.randn(S, K))
			p = pm.math.invlogit(pm.math.dot(X, BETA.T) + pm.math.dot(U,V.T))
			conc_mat = pm.math.dot(np.ones((N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=n[~nans], observed=k[~nans])
			approx = pm.fit(method='advi', n=30000)
		pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
예제 #16
0
def fit_expected_successes_per_action_model(xS, attempts):
    ## estimates a hierarchical binomial model for success rate data
    ## takes as input:
    ##      sp, a numpy array of shape (num_players,) containing the expected successes per action for each player (e.g. xG per shot, xA per KP)
    ##      attempts, a numpy array of shape (num_players,) containing the total numbers of attempted actions for each player (e.g. shots, key passes)
    ## returns:
    ##      sl, a numpy array of shape (6000,N) containing 6000 posterior samples of success probabilites (N is the number of players in the
    ##      original data frame who have registered non-zero expected succcesses)
    ##      sb, a numpy array of shape (6000,3) containing 6000 posterior samples of: the population-level & observation-level beta 'sample size'
    ##              parameters and the population-level mean
    ##      kk, boolean indicating which players have actually registered non-zero expected successes
    import numpy as np
    import pymc3 as pm
    kk = (attempts > 0) & (xS > 0)
    sp = xS[kk] / attempts[kk]
    attempts = attempts[kk]
    N = attempts.shape[0]

    with pm.Model() as model:
        v = pm.HalfNormal('v', shape=2, sigma=100)
        mu = pm.Uniform('mu')
        lambdas = pm.Beta('lambdas', alpha=mu * v[0], beta=(1 - mu) * v[0], shape=N)
        y = pm.Beta('y',
                    alpha=lambdas * (attempts * (v[1] + 1) - 1),
                    beta=(1 - lambdas) * (attempts * (v[1] + 1) - 1),
                    observed=sp)
        approx = pm.fit(n=30000)
    sl = approx.sample(6000)['lambdas']
    sb = np.c_[approx.sample(6000)['v'], approx.sample(6000)['mu']]
    return [sl, sb, kk, 'expected']
예제 #17
0
def fit_counts_model(counts, mins_played):
    ## estimates a hierarchical poisson model for count data
    ## takes as input:
    ##      counts, a numpy array of shape (num_players,) containing the total numbers of actions completed (across all games)
    ##      mins_played, a numpy array of shape (num_players,) containing the total number of minutes each player was observed for
    ## returns:
    ##      sl, a numpy array of shape (6000,N) containing 6000 posterior samples of actions per 90 (N is the number of players in the
    ##      original data frame who have actually played minutes)
    ##      sb, a numpy array of shape (6000,2) containing 6000 posterior samples of the population-level gamma shape parameter &
    ##                                          the population-level mean
    ##      kk, boolean indicating which players have actually played minutes
    import numpy as np
    import pymc3 as pm
    kk = (mins_played > 0) & np.isfinite(counts)
    mins_played = mins_played[kk]
    counts = counts[kk]
    N = counts.shape[0]

    with pm.Model() as model:
        beta = pm.HalfNormal('beta', sigma=100)
        mu = pm.HalfFlat('mu')
        lambdas = pm.Gamma('lambdas', alpha=mu * beta, beta=beta, shape=N)
        lambda_tilde = lambdas * mins_played
        y = pm.Poisson('y', lambda_tilde, observed=counts)
        approx = pm.fit(n=30000)
    sl = approx.sample(6000)['lambdas'] * 90
    sb = np.c_[approx.sample(6000)['beta'], approx.sample(6000)['mu']]
    return [sl, sb, kk, 'count']
    def fit_vi(self):
        self.likelihood_fn()
        with self.model:
            print('Fitting model...')
            self.trace_vi = pm.fit(
                self.n_iterations,
                method='advi',
                # Stochastic nature of VI in PyMC3. In PyMC3, VI uses MC sample to approximate the objective gradients.
                # As a consequence, the result of the fit is stochastic - you can see that in the ELBO it is not always decreasing.
                # So when you stop the training, VI return the fitting from the last iteration, which can happen to have high ELBO.
                # Solution is to increase the obj_n_mc - Number of monte carlo samples used for approximation of objective gradients.
                obj_n_mc=1,
                obj_optimizer=pm.adamax(),
                # Defining a callback to do early stop when convergence is achieved
                callbacks=[
                    pm.callbacks.CheckParametersConvergence(every=50,
                                                            diff='absolute',
                                                            tolerance=1e-3)
                ])
            print('Sampling...')
            self.trace_vi_samples = self.trace_vi.sample()
            self.pred_samples_fit = pm.sample_posterior_predictive(
                self.trace_vi_samples, vars=[self.y_pred], samples=500)

        if not self.minibatch:
            # with minibatch there is no possibility to recover the fitted values
            # backtransform the sampling of the fit for the original scale
            self.pred_samples_fit = self.dt.inv_transf_train_general(
                self.pred_samples_fit['y_pred'])
예제 #19
0
def fit_models(models,
               method='NUTS',
               verbose=True,
               n_samples=2000,
               n_advi=200000,
               **kwargs):
    if isinstance(models, pm.model.Model):
        models = [models]
    elif isinstance(models, list) and np.alltrue(
            np.array([isinstance(model, pm.model.Model) for model in models])):
        pass
    else:
        raise ValueError(
            "Models must be list of <pymc3.model.Model> instances.")
    if verbose:
        print('Fitting {} model(s) using {}...'.format(len(models), method))
    traces = []

    for m, model in enumerate(models):
        if verbose:
            print('  Fitting model {} of {}...'.format(m + 1, len(models)))
        with model:
            if method == 'NUTS':
                trace = pm.sample(draws=n_samples, **kwargs)
            elif method == 'ADVI':
                vi_est = pm.fit(n=n_advi, **kwargs)
                trace = vi_est.sample(n_samples)
            traces.append(trace)

    if len(traces) == 1:
        return traces[0]
    else:
        return traces
예제 #20
0
def VINormal(dim, const_str, const_fx, K, nfit=30000):
    """\
    Normal (full-rank) sampling, fit with ADVI to a
    high-potential probability distribution



    :input dim:       The dimensionality
    :input const_str: Constraint strings; used to define potentials
    :input const_fx:  Constraint callables, included for API compatibility
    :input K:         Number of points to sample
    :input nfit:      Number of gradient iterations for variational inference

    :returns: A set of points X drawn from a N(μ,Σ); where the parameters are fit
              by variational inference to match the potential distribution formed
              by the potentials -c*g_i; for c=7500


    """
    with pm.Model() as mod:
        x = pm.Uniform('x', shape=dim)
        for i, const in enumerate(const_str):
            cname = 'g%d' % i
            g = pm.Deterministic(cname, eval(const, {'__builtins__': None}, {'x': x } ))
            pname = '%s_pot' % cname
            pm.Potential(pname, tt.switch(tt.lt(g, 0), 7500*g, 0))
        fit_res = pm.fit(nfit, method='fullrank_advi', obj_n_mc=3)
        trace = fit_res.sample(K)
    return trace['x']
예제 #21
0
def approximate_posterior_predictive(spec):
    # Fit a neural network with a speficied number of nodes in a single hidden layer
    print("Fitting an MLE model...")
    nn = SimpleNN(width=spec["width"],
                  num_iters=5_000,
                  step_size=0.01,
                  checkpoint=1_000,
                  seed=0)
    nn.fit(df.x, df.y)
    print("", end="", flush=True)

    x_input = theano.shared(df[["x"]].values)
    y_output = theano.shared(df["y"].values)

    # Build a hierarchical Bayesian neural network. Initialize with MLE.
    model = build_model(
        x_input,
        y_output,
        sigma=spec["sigma"],
        noise=spec["noise"],
        width=spec["width"],
        n_weights=nn.n_weights,
        init=nn.weights,
    )

    # Sample from the posterior
    if spec["algorithm"] == "NUTS":
        trace = pm.sample(**nuts_kwargs, model=model)
    elif spec["algorithm"] == "ADVI":
        mean_field = pm.fit(**advi_kwargs, model=model)
        trace = mean_field.sample(10_000)
    else:
        raise NotImplemented

    return trace, nn
예제 #22
0
    def fit_advi(self, n=3, method='advi', n_type='restart'):
        r"""Find posterior using ADVI (maximising likehood of the data and
            minimising KL-divergence of posterior to prior)
        :param n: number of independent initialisations
        :param method: to allow for potential use of SVGD or MCMC (currently only ADVI implemented).
        :param n_type: type of repeated initialisation:
                                  'restart' to pick different initial value,
                                  'cv' for molecular cross-validation - splits counts into n datasets,
                                         for now, only n=2 is implemented
                                  'bootstrap' for fitting the model to multiple downsampled datasets.
                                         Run `mod.bootstrap_data()` to generate variants of data
                                  '
        :return: self.mean_field dictionary with MeanField pymc3 objects. 
        """

        if not np.isin(n_type, ['restart', 'cv', 'bootstrap']):
            raise ValueError(
                "n_type should be one of ['restart', 'cv', 'bootstrap']")

        self.mean_field = {}
        self.samples = {}
        self.node_samples = {}

        self.n_type = n_type

        if np.isin(n_type, ['bootstrap']):
            if self.X_data_sample is None:
                self.bootstrap_data(n=n)
        elif np.isin(n_type, ['cv']):
            self.generate_cv_data(n=n)  # cv data added to self.X_data_sample

        init_names = ['init_' + str(i + 1) for i in np.arange(n)]

        with self.model:

            for i, name in enumerate(init_names):

                # when type is molecular cross-validation or bootstrap,
                # replace self.x_data tensor with new data
                if np.isin(n_type, ['cv', 'bootstrap']):
                    more_replacements = {
                        self.x_data:
                        self.X_data_sample[i].astype(self.data_type)
                    }
                else:
                    more_replacements = {}

                # train the model
                self.mean_field[name] = pm.fit(
                    self.n_iter,
                    method='advi',
                    callbacks=[CheckParametersConvergence()],
                    obj_optimizer=pm.adam(learning_rate=self.learning_rate),
                    total_grad_norm_constraint=self.total_grad_norm_constraint,
                    more_replacements=more_replacements)

                # plot training history
                if self.verbose:
                    print(
                        plt.plot(np.log10(self.mean_field[name].hist[15000:])))
예제 #23
0
    def fit(self, X, y):
        X = _check_X_input(X)
        y = _check_1d_inp(y)
        assert X.shape[0] == y.shape[0]

        with pm.Model():
            pm.glm.linear.GLM(X, y, family='binomial')
            pm.find_MAP()
            if self.method == 'advi':
                trace = pm.fit(progressbar=False, n=niter)
            if self.method == 'nuts':
                trace = pm.sample(progressbar=False, draws=niter)
        if self.method == 'advi':
            self.coefs = [i for i in trace.sample(nsamples)]
        elif self.method == 'nuts':
            samples_chosen = np.random.choice(np.arange(len(trace)),
                                              size=nsamples,
                                              replace=False)
            samples_chosen = set(list(samples_chosen))
            self.coefs = [i for i in trace if i in samples_chosen]
        else:
            raise ValueError("'method' must be one of 'advi' or 'nuts'")
        self.coefs = pd.DataFrame.from_dict(coefs)
        self.coefs = coefs[['Intercept'] +
                           ['x' + str(i) for i in range(X.shape[1])]]
        self.intercept = coefs['Intercept'].values.reshape((-1, 1)).copy()
        del self.coefs['Intercept']
        self.coefs = coefs.values.T
예제 #24
0
    def sample(self):

        TIME0 = datetime.now()

        if self.inference == "NUTS":
            with self.model:
                trace = pm.sample(draws=self.draws,
                                  cores=self.cores,
                                  chains=self.chains,
                                  tune=self.tune,
                                  progressbar=self.progressbar,
                                  target_accept=.95)
            # could set target_accept=.95 to get smaller step size if warnings appear
        elif self.inference == "ADVI":
            with self.model:
                mean_field = pm.fit(n=10000,
                                    method="fullrank_advi",
                                    progressbar=self.progressbar)
                # TODO: trace is just a workaround here so the rest of the code understands
                # ADVI. We could communicate parameters from mean_fied directly.
                trace = mean_field.sample(1000)
        else:
            raise NotImplementedError

        TIME1 = datetime.now()
        print("Finished job {0} in {1:.0f} seconds.".format(
            os.getpid(), (TIME1 - TIME0).total_seconds()))

        return trace
예제 #25
0
def model_fit_using_se(data,
                       u_dim,
                       method='mcmc',
                       num_iter=10,
                       num_sample=1000):

    search_dim = data['search'].shape[1]
    num_obs = data['search'].shape[0]

    cov_u, mu_u = np.eye(u_dim), np.zeros(u_dim)
    cov_nlp, mu_nlp = np.eye(search_dim), np.zeros(search_dim)

    cov_nlp = np.loadtxt("similarity_matrix.csv",
                         dtype='float32',
                         delimiter=',')

    with pm.Model() as model:

        u = pm.MvNormal('u', mu=mu_u, cov=cov_u, shape=(num_obs, u_dim))
        search = data[
            'search']  #pm.MvNormal('search',mu=mu_search,cov=cov_search,observed=data['search'])

        #Incoming edge to self esteem
        u_se = pm.MvNormal('u_se', mu=mu_u, cov=cov_u, shape=u_dim)
        search_se = pm.MvNormal('search_se',
                                mu=mu_nlp,
                                cov=cov_nlp,
                                shape=search_dim)

        #self esteem as a function of its parents
        se_mean = tt.nnet.nnet.sigmoid(
            tt.dot(search, search_se) + tt.dot(u, u_se))
        se = pm.Bernoulli('se', p=se_mean, observed=data['se'])

        #Incoming edge to suicide ideation
        u_si = pm.MvNormal('u_si', mu=mu_u, cov=cov_u, shape=u_dim)
        search_si = pm.MvNormal('search_si',
                                mu=mu_nlp,
                                cov=cov_nlp,
                                shape=search_dim)
        #se_si_sig = pm.Normal('se_si_sig',mu=0,tau=1)
        se_si = pm.HalfNormal('se_si', sigma=1)  #se_si_sig**2)

        si_mean = tt.nnet.nnet.sigmoid(
            tt.dot(search, search_si) + tt.dot(u, u_si) + se_si * se)
        si = pm.Bernoulli('si', p=si_mean, observed=data['si'])

        mf = pm.fit(n=num_iter)
        #trace = pm.sample()
        trace = mf.sample(num_sample)
        #pm.traceplot(trace)
        #trace = pm.sample()
        # se_mean = pm.Uniform('se_mean',lower=0,upper=1,size=num_obs)
        # se = pm.Bernoulli('se',p=se_mean, observed = data["se"])

        # si_me
        # si = pm.Bernoulli('si',p= ,observed = data["si"])

    return trace
예제 #26
0
    def fast_sample(self, sample_size=5000, iters=10000):
        if self.model is None:
            self.fit()

        with self.model:
            inference = pm.ADVI()
            approx = pm.fit(n=iters, method=inference)  #until converge
            self.trace = approx.sample(draws=sample_size)
예제 #27
0
 def fit_ADVI(self,
              n_samples=2000,
              n_iter=100000,
              inference='advi',
              **fit_kws):
     with self.model:
         self.approx_fit = pm.fit(n=n_iter, method=inference, **fit_kws)
         self.trace_ = self.approx_fit.sample(draws=n_samples)
    def fit(self, sample_size, traceplot_name=None, fast_sampling=False):
        '''
        sample_size (int): The size of the sample
        traceplot_name (str): The name of the traceplot file
        fast_sampling   (bool): whether or not variational approximation should be used.

        Note: to evaluate the kernel function, pymc3 only accept tensor type from theano.
        '''
        self.model = pm.Model()
        # self.X_train = tt.constant(self.X_train) #need tensor type
        self.X_train = shared(self.X_train)

        with self.model:
            evaluated_kernels = []
            packed_L = pm.LKJCholeskyCov('packed_L',
                                         n=3,
                                         eta=2.,
                                         sd_dist=pm.HalfCauchy.dist(2.5))
            L = pm.expand_packed_triangular(3, packed_L)

            for center in self.centers.values:
                evaluated_kernels.append(
                    pm.MvNormal.dist(mu=center, chol=L).logp(self.X_train))

            beta = pm.Normal('beta', mu=0, sd=3, shape=self.number_of_centers)
            latentProcess = pm.Deterministic('mu',
                                             tt.dot(beta, evaluated_kernels))

            error = pm.HalfCauchy('error', 12)
            y_ = pm.Normal("y",
                           mu=latentProcess,
                           sd=error,
                           observed=np.log(self.y_train))

            if fast_sampling:
                with self.model:
                    inference = pm.ADVI()
                    approx = pm.fit(n=sample_size,
                                    method=inference)  #until converge
                    self.trace = approx.sample(draws=sample_size)

            else:
                with self.model:
                    start = pm.find_MAP()
                    self.trace = pm.sample(sample_size, start=start)

            if traceplot_name:
                fig, axs = plt.subplots(3, 2)  # 2 RVs
                pm.traceplot(self.trace,
                             varnames=['packed_L', 'beta', 'error'],
                             ax=axs)
                fig.savefig(traceplot_name)

                fig_path = os.path.join(os.getcwd(), traceplot_name)
                print(f'the traceplot has been saved to {fig_path}')
예제 #29
0
    def _fit(self, X, Y, sampler='vi', **kwargs):
        self.construct_model(X, Y)
        callbacks = kwargs['vi_params'].get('callbacks', [])
        kwargs['random_seed'] = self.random_state.randint(2**32,
                                                          dtype='uint32')

        for i, c in enumerate(callbacks):
            if isinstance(c, pm.callbacks.CheckParametersConvergence):
                params = c.__dict__
                params.pop('_diff')
                params.pop('prev')
                params.pop('ord')
                params['diff'] = 'absolute'
                callbacks[i] = pm.callbacks.CheckParametersConvergence(
                    **params)
        if sampler == 'vi':
            random_seed = kwargs['random_seed']
            with self.model:
                sample_params = kwargs['sample_params']
                vi_params = kwargs['vi_params']
                vi_params['random_seed'] = sample_params[
                    'random_seed'] = random_seed
                draws_ = kwargs['draws']
                try:
                    self.trace = pm.sample(**sample_params)
                    vi_params['start'] = self.trace[-1]
                    self.trace_vi = pm.fit(**vi_params)
                    self.trace = self.trace_vi.sample(draws=draws_)
                except Exception as e:
                    if hasattr(e, 'message'):
                        message = e.message
                    else:
                        message = e
                    self.logger.error(message)
                    self.trace_vi = None
                    self.trace = None
            if self.trace_vi is None and self.trace is None:
                with self.model:
                    self.logger.info(
                        "Error in vi ADVI sampler using nuts sampler with draws {}"
                        .format(draws_))
                    nuts_params = copy.deepcopy(sample_params)
                    nuts_params['tune'] = nuts_params['draws'] = 50
                    self.logger.info("Params {}".format(nuts_params))
                    self.trace = pm.sample(**nuts_params)
        elif sampler == 'metropolis':
            with self.model:
                start = pm.find_MAP()
                self.trace = pm.sample(**kwargs,
                                       step=pm.Metropolis(),
                                       start=start)
        else:
            with self.model:
                self.trace = pm.sample(**kwargs, step=pm.NUTS())
예제 #30
0
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs):
    callbacks = vi_params.get("callbacks", [])
    for i, c in enumerate(callbacks):
        if isinstance(c, CheckParametersConvergence):
            params = c.__dict__
            params.pop("_diff")
            params.pop("prev")
            params.pop("ord")
            params["diff"] = "absolute"
            callbacks[i] = CheckParametersConvergence(**params)
    if sampler == "variational":
        with self.model:
            try:
                self.trace_ = pm.sample(chains=2, cores=8, tune=5, draws=5)
                vi_params["start"] = self.trace_[-1]
                self.trace_vi_ = pm.fit(**vi_params)
                self.trace_ = self.trace_vi_.sample(draws=draws)
            except Exception as e:
                if hasattr(e, "message"):
                    message = e.message
                else:
                    message = e
                logger.error(message)
                self.trace_vi_ = None
        if self.trace_vi_ is None and self.trace_ is None:
            with self.model:
                logger.info(
                    "Error in vi ADVI sampler using Metropolis sampler with draws {}"
                    .format(draws))
                self.trace = pm.sample(chains=1,
                                       cores=4,
                                       tune=20,
                                       draws=20,
                                       step=pm.NUTS())
    elif sampler == "metropolis":
        with self.model:
            start = pm.find_MAP()
            self.trace_ = pm.sample(
                chains=2,
                cores=8,
                tune=tune,
                draws=draws,
                **kwargs,
                step=pm.Metropolis(),
                start=start,
            )
    else:
        with self.model:
            self.trace_ = pm.sample(chains=2,
                                    cores=8,
                                    tune=tune,
                                    draws=draws,
                                    **kwargs,
                                    step=pm.NUTS())
예제 #31
0
    def fit(self, instances: np.ndarray,
            labels: np.ndarray) -> Optional[List[str]]:

        self.model = self._construct_nn(instances, labels)
        with self.model:
            inference = pm.ADVI()
            self.approx = pm.fit(n=EPOCHS, method=inference)

        self.sample_proba = self._sample_probability(instances)

        return None
예제 #32
0
def model_uncertainty(splits, stakes, actions, temp=1., sd=1.):
    with pm.Model() as repeated_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        p = pm.Gamma('p', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds_a = np.exp(2 * r * splits + c * stakes**st)
        odds_r = np.exp(p * (splits < 0.5 - t / 2))
        p = odds_a / (odds_r + odds_a)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_repeated = fitted.sample(2000)
        # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    # with pm.Model() as simple_model:
    #   r = pm.Normal('r', mu=0, sd=1)
    #   p = np.exp(r*splits) / (1 + np.exp(r*splits))
    #   a = pm.Binomial('a', 1, p, observed=actions)
    #   trace_simple = pm.sample(2000, init='map')

    with pm.Model() as fairness_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        f = pm.Normal('f', mu=0, sd=sd)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2))
        p = odds / (1 + odds)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_fairness = fitted.sample(2000)
        # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    fairness_model.name = 'fair'
    repeated_model.name = 'repeated'
    model_dict = dict(
        zip([fairness_model, repeated_model],
            [trace_fairness, trace_repeated]))
    comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA')
    return trace_fairness, trace_repeated, comp
예제 #33
0
def test_var_replacement():
    X_mean = pm.floatX(np.linspace(0, 10, 10))
    y = pm.floatX(np.random.normal(X_mean*4, .05))
    with pm.Model():
        inp = pm.Normal('X', X_mean, shape=X_mean.shape)
        coef = pm.Normal('b', 4.)
        mean = inp * coef
        pm.Normal('y', mean, .1, observed=y)
        advi = pm.fit(100)
        assert advi.sample_node(mean).eval().shape == (10, )
        x_new = pm.floatX(np.linspace(0, 10, 11))
        assert advi.sample_node(mean, more_replacements={inp: x_new}).eval().shape == (11, )
예제 #34
0
def init_nuts(init='auto', njobs=1, n_init=500000, model=None,
              random_seed=-1, progressbar=True, **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    njobs : int
        Number of parallel jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    random_seed = int(np.atleast_1d(random_seed)[0])

    cb = [
        pm.callbacks.CheckParametersConvergence(
            tolerance=1e-2, diff='absolute'),
        pm.callbacks.CheckParametersConvergence(
            tolerance=1e-2, diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * njobs
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
        if njobs == 1:
            start = start[0]
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(njobs):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
        if njobs == 1:
            start = start[0]
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
        if njobs == 1:
            start = start[0]
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
        if njobs == 1:
            start = start[0]
    elif init == 'advi':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        potential = quadpotential.QuadPotentialDiag(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'advi_map':
        start = pm.find_MAP()
        approx = pm.MeanField(model=model, start=start)
        pm.fit(
            random_seed=random_seed,
            n=n_init, method=pm.KLqp(approx),
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window
        )
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        potential = quadpotential.QuadPotentialDiag(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)
        start = [start] * njobs
        potential = quadpotential.QuadPotentialFull(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init, step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, njobs))
        potential = quadpotential.QuadPotentialFull(cov)
        if njobs == 1:
            start = start[0]
    else:
        raise NotImplementedError('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, **kwargs)

    return start, step
		output = pm.Poisson('spikes', mu = p, observed = spikes)

	# Sample from the model - using 2 chains in parallel (minimum to compare traceplots and rhat values)
	# Eventually variational inference with advi seems a better prospect - NUTS is too slow/finicky to sample
	# The logic of using ADVI here follows from: https://pymc-devs.github.io/pymc3/notebooks/bayesian_neural_network_opvi-advi.html
	# And also from: https://pymc-devs.github.io/pymc3/notebooks/variational_api_quickstart.html
	# Here we aren't scaling the variance of the gradient as it doesn't seem to give much improvement in simple models (look at the first link above)
	
	# We also use callbacks similar to those used in the 'init' portion of pm.sample - this stops ADVI once it has converged/ELBO doesn't change beyond a threshold
	cb = [pm.callbacks.CheckParametersConvergence(diff='absolute', tolerance = 1e-4), pm.callbacks.CheckParametersConvergence(diff='relative', tolerance = 1e-4),]
	with model:
		#trace = pm.sample(num_samples + 1000, tune = 1000)[1000:]
		#v_params = pm.variational.advi(n = 200000)
		#trace = pm.variational.sample_vp(v_params, draws=num_samples)
		inference = pm.fit(n=200000, method = 'fullrank_advi', callbacks = cb)
		trace = inference.sample(num_samples)

	# Print the Gelman-Rubin statistics for this model to file
	#print('\n', file = f)
	#print("======================== Unit {} ============================", file = f)
	#print(pm.diagnostics.gelman_rubin(trace), file = f)
	#print("=============================================================", file = f)

	# Run through the laser conditions and tastes again, and save the model results in results
	# The strategy now is to run through the MCMC samples, and calculate the difference in the Poisson mean between the laser on and off conditions for the different tastes
	for laser_status in range(lasers.shape[0] - 1):
		for stimulus in range(len(trains_dig_in)):
			# First calculate the mean firing rate for the laser off (control) condition for this taste
			bayesian_results[laser_status, stimulus, :, 0] = np.exp(trace['b_t'][:, stimulus] + trace['b_l'][:, 2*laser_status + 1] + trace['b_t_l'][:, stimulus, 2*laser_status + 1])