def fit(self, sampling_size=5000, fast_sample=False):
        with pm.Model() as self.model:

            rho = pm.Exponential('rho', 1/5, shape=self.dim_gp)
            tau = pm.Exponential('tau', 1/3)

            cov_func = pm.gp.cov.Matern52(self.dim_gp, ls=rho)
            self.gp = pm.gp.Latent(cov_func=cov_func)
            f = self.gp.prior("f", X=self.locations)

            mean_func = f
            self.beta_list = []
            if self.covariates:
                for i in range(len(self.covariates)):
                    beta = pm.Normal('_'.join(['beta', str(i)]), mu=0, sd=50)
                    self.beta_list.append(beta)
                    mean_func = mean_func + beta*self.covariates[i]

            sigma = pm.HalfNormal('sigma', sd=20)
            y = pm.Normal('Y', mu=mean_func, sd=sigma, observed=self.response)

            if fast_sample:
                inference = pm.ADVI()
                approx = pm.fit(n=25000, method=inference) #until converge
                self.trace = approx.sample(draws=sampling_size)
            else:
                start = pm.find_MAP()
                self.trace = pm.sample(sampling_size, tune=10000, nchains=4)
Ejemplo n.º 2
0
def main():
    config = create_configuration(filename='/regression-siso.json')
    dataset = get_dataset(config.dataset, testing=False)

    # %%
    x_train = dataset.x
    y_train = dataset.y
    x = theano.shared(x_train)
    y = theano.shared(y_train)
    nn = construct_nn(x=x, y=y, config=config)

    # ADVI
    with nn:
        inference = pm.ADVI()
        approx = pm.fit(n=50000, method=inference)
    trace = approx.sample(draws=5000)

    # with nn:
    #     inference = pm.NUTS()
    #     trace = pm.sample(2000, tune=1000, cores=4, inference=inference)
    print(pm.summary(trace))

    x.set_value(x_train)
    y.set_value(y_train)

    with nn:
        ppc = pm.sample_ppc(trace, samples=500, progressbar=False)
Ejemplo n.º 3
0
 def _build_BPF(self):
     print('start building the Bayesian probabilistic model')
     self.x_u = theano.shared(self.train_u)
     self.x_i = theano.shared(self.train_i)
     self.y_r = theano.shared(self.train_r)
     self.y_r_ui = theano.shared(np.array(self.nn_r_ui))
     assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value()))
     with pm.Model() as self.bncf:  #define the prior and likelihood
         b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0])
         b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1])
         u = pm.Normal('u', 0, sd=1)
         tY = pm.Deterministic(
             'tY',
             tt.add(
                 tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui),
                 u))
         #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui
         nY = pm.Deterministic('nY', pm.math.sigmoid(tY))
         # likelihood of observed data
         Y = pm.Bernoulli(
             'Y', nY,
             observed=self.y_r)  #total_size=self.y_r.get_value().shape[0]
     with self.bncf:  #inference
         approx = pm.fit(n=1000, method=pm.ADVI())
         self.trace = approx.sample(draws=500)
     with self.bncf:  #posterior prediction
         ppc = pm.sample_posterior_predictive(self.trace, progressbar=True)
         self.by_r_ui = ppc['Y'].mean(axis=0)
     print('done building the Bayesian probabilistic model')
Ejemplo n.º 4
0
    def fit(self, fast_sampling=True, sample_size=3000):

        with pm.Model() as self.model:
            beta = pm.Normal('beta', mu=0.0, tau=1.0, shape=(self.dim + 1, 1))
            # Priors for spatial random effects
            tau = pm.Gamma('tau', alpha=2., beta=2.)
            alpha = pm.Uniform('alpha', lower=0, upper=1)
            phi = pm.MvNormal('phi',
                              mu=0,
                              tau=tau * (self.D - alpha * self.weight_matrix),
                              shape=(1, self.N))

            # Mean model
            mu = pm.Deterministic('mu', tt.dot(self.covariates, beta) + phi.T)
            theta_sd = pm.Gamma('theta_sd', alpha=1.0, beta=1.0)
            # Likelihood
            Yi = pm.Normal('Yi',
                           mu=mu.ravel(),
                           tau=theta_sd,
                           observed=self.response_var)

            if fast_sampling:
                inference = pm.ADVI()
                approx = pm.fit(n=50000, method=inference)  #until converge
                self.trace = approx.sample(draws=sample_size)
            else:
                self.trace = pm.sample(sample_size, cores=2, tune=1000)

        self._report_credible_interval(self.trace, 'beta')
        self._report_credible_interval(self.trace, 'tau')
Ejemplo n.º 5
0
    def fast_sample(self, sample_size=5000, iters=10000):
        if self.model is None:
            self.fit()

        with self.model:
            inference = pm.ADVI()
            approx = pm.fit(n=iters, method=inference)  #until converge
            self.trace = approx.sample(draws=sample_size)
def fit_model_LN(N,
                 J,
                 D,
                 R,
                 T,
                 Sigmas,
                 featvar_id,
                 filename,
                 c,
                 normalize,
                 batch=False):
    model = pm.Model()
    with model:
        """hyperparameters"""
        theta_prior = stickbreak_prior('theta', 1., T)
        alpha = .1
        """priors"""
        theta = pm.Dirichlet('theta', theta_prior, shape=T)
        psi = [[
            pm.MvNormal('psi_{}_{}'.format(t, d),
                        mu=tt.zeros(R[d]),
                        cov=tt.exp(-Sigmas[d]),
                        shape=R[d]) for d in range(D)
        ] for t in range(T)]
        phi = tt.stack([
            tt.concatenate([
                pm.Deterministic('phi_{}_{}'.format(t, d),
                                 tt.nnet.softmax(psi[t][d]))[0]
                for d in range(D)
            ]) for t in range(T)
        ])
        """likelihood"""
        target = pm.DensityDist('target',
                                loglik(theta=theta, phi=phi),
                                observed=dict(featvar_id=featvar_id))
        """fit model"""
        inference = pm.ADVI()
        inference.fit(100000,
                      obj_optimizer=pm.adam(learning_rate=.01, beta1=.8),
                      callbacks=[pm.callbacks.CheckParametersConvergence()])
        trace = inference.approx.sample()
        posterior = {
            k: trace[k]
            for k in trace.varnames if not k.endswith('__')
        }
        posterior['ELBO'] = inference.hist
        if batch == False:
            f = open(
                'posterior_LN_{}_{}_{}.pkl'.format(
                    filename.split('.')[0], c, normalize), 'wb')
        else:
            f = open(
                'posterior_LN_{}_{}_{}_holdout_{}.pkl'.format(
                    filename.split('.')[0], c, normalize, batch), 'wb')
        pkl.dump(posterior, f)
        f.close()
Ejemplo n.º 7
0
def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print(
            'usage: python3 inference_dir.py [chain no] [optional output no]')
        sys.exit()
    elif len(sys.argv) == 2:
        c = int(sys.argv[1])
        d = int(sys.argv[1])
    if len(sys.argv) == 3:
        c = int(sys.argv[1])
        d = int(sys.argv[2])
    np.random.seed(c)
    np.random.shuffle(lang_ind)
    np.random.shuffle(sound_ind)
    lang_minibatch = pm.Minibatch(lang_ind, 500)
    sound_minibatch = pm.Minibatch(sound_ind, 500)
    model_ln = pm.Model()
    with model_ln:
        beta = pm.HalfFlat('beta')
        "theta = language-level prior over components"
        theta = tt.stack([
            pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K)
            for l in range(L)
        ])
        psi = [
            pm.MvNormal('psi_{}'.format(k), mu=[0] * S, cov=Sigma, shape=S)
            for k in range(K)
        ]
        "phi = component-level collection of distributions over sound change"
        phi = tt.stack([
            tt.concatenate([
                pm.Deterministic(
                    'phi_{}_{}'.format(k, x),
                    tt.nnet.softmax(psi[k][s_breaks[x][0]:s_breaks[x][1]])[0])
                for x in range(X)
            ]) for k in range(K)
        ])
        target = pm.DensityDist('target',
                                logprob(theta=theta, phi=phi),
                                observed=dict(lang_array=lang_minibatch,
                                              sound_array=sound_minibatch),
                                total_size=N)
        inference_ln = pm.ADVI()
        inference_ln.fit(50000,
                         obj_optimizer=pm.adam(learning_rate=.01,
                                               beta1=uniform(.7, .9)),
                         callbacks=[pm.callbacks.CheckParametersConvergence()])
        trace_ln = inference_ln.approx.sample()
        posterior = {
            k: trace_ln[k]
            for k in trace_ln.varnames if not k.endswith('__')
        }
        posterior['ELBO'] = inference_ln.hist
        f = open('posterior_ln_shuffle_{}.pkl'.format(d), 'wb')
        pkl.dump(posterior, f)
        f.close()
    def fit(self, sample_size, traceplot_name=None, fast_sampling=False):
        '''
        sample_size (int): The size of the sample
        traceplot_name (str): The name of the traceplot file
        fast_sampling   (bool): whether or not variational approximation should be used.

        Note: to evaluate the kernel function, pymc3 only accept tensor type from theano.
        '''
        self.model = pm.Model()
        # self.X_train = tt.constant(self.X_train) #need tensor type
        self.X_train = shared(self.X_train)

        with self.model:
            evaluated_kernels = []
            packed_L = pm.LKJCholeskyCov('packed_L',
                                         n=3,
                                         eta=2.,
                                         sd_dist=pm.HalfCauchy.dist(2.5))
            L = pm.expand_packed_triangular(3, packed_L)

            for center in self.centers.values:
                evaluated_kernels.append(
                    pm.MvNormal.dist(mu=center, chol=L).logp(self.X_train))

            beta = pm.Normal('beta', mu=0, sd=3, shape=self.number_of_centers)
            latentProcess = pm.Deterministic('mu',
                                             tt.dot(beta, evaluated_kernels))

            error = pm.HalfCauchy('error', 12)
            y_ = pm.Normal("y",
                           mu=latentProcess,
                           sd=error,
                           observed=np.log(self.y_train))

            if fast_sampling:
                with self.model:
                    inference = pm.ADVI()
                    approx = pm.fit(n=sample_size,
                                    method=inference)  #until converge
                    self.trace = approx.sample(draws=sample_size)

            else:
                with self.model:
                    start = pm.find_MAP()
                    self.trace = pm.sample(sample_size, start=start)

            if traceplot_name:
                fig, axs = plt.subplots(3, 2)  # 2 RVs
                pm.traceplot(self.trace,
                             varnames=['packed_L', 'beta', 'error'],
                             ax=axs)
                fig.savefig(traceplot_name)

                fig_path = os.path.join(os.getcwd(), traceplot_name)
                print(f'the traceplot has been saved to {fig_path}')
Ejemplo n.º 9
0
def sample_chain(model,
                 chain_i=0,
                 step=None,
                 num_samples=MAX_NUM_SAMPLES,
                 advi=False,
                 tune=5,
                 discard_tuned_samples=True,
                 num_scale1_iters=NUM_SCALE1_ITERS,
                 num_scale0_iters=NUM_SCALE0_ITERS):
    """Sample single chain from constructed Bayesian model"""
    start = timer()
    with model:
        if not advi:
            pm._log.info('Assigning NUTS sampler...')
            if step is None:
                start_, step = pm.init_nuts(init='advi',
                                            njobs=1,
                                            n_init=NUM_INIT_STEPS,
                                            random_seed=-1,
                                            progressbar=False)

            discard = tune if discard_tuned_samples else 0
            for i, trace in enumerate(
                    pm.iter_sample(num_samples + discard,
                                   step,
                                   start=start_,
                                   chain=chain_i)):
                if i == 0:
                    min_num_samples = get_min_samples_per_chain(
                        len(trace[0]), MIN_SAMPLES_CONSTANT, NUM_CHAINS)
                elapsed = timer() - start
                if elapsed > SOFT_MAX_TIME_IN_SECONDS / NUM_CHAINS:
                    print('exceeded soft time limit...')
                    if i + 1 - discard >= min_num_samples:
                        print('collected enough samples; stopping')
                        break
                    else:
                        print('but only collected {} of {}; continuing...'.
                              format(i + 1 - discard, min_num_samples))
                        if elapsed > HARD_MAX_TIME_IN_SECONDS / NUM_CHAINS:
                            print('exceeded HARD time limit; STOPPING')
                            break
            return trace[discard:]
        else:  # ADVI for neural networks
            scale = theano.shared(pm.floatX(1))
            vi = pm.ADVI(cost_part_grad_scale=scale)
            pm.fit(n=num_scale1_iters, method=vi)
            scale.set_value(0)
            approx = pm.fit(n=num_scale0_iters)
            # one sample to get dimensions of trace
            trace = approx.sample(draws=1)
            min_num_samples = get_min_samples_per_chain(
                len(trace.varnames), MIN_SAMPLES_CONSTANT, 1)
            trace = approx.sample(draws=min_num_samples)
            return trace
Ejemplo n.º 10
0
    def fit(self, instances: np.ndarray,
            labels: np.ndarray) -> Optional[List[str]]:

        self.model = self._construct_nn(instances, labels)
        with self.model:
            inference = pm.ADVI()
            self.approx = pm.fit(n=EPOCHS, method=inference)

        self.sample_proba = self._sample_probability(instances)

        return None
def inference_with_model(model):
    with model:
        advi = pm.ADVI()
        tracker = pm.callbacks.Tracker(mean=advi.approx.mean.eval,
                                       std=advi.approx.std.eval)
        mean_field = advi.fit(
            n=vi_params["n"],
            callbacks=[CheckParametersConvergence(), tracker],
        )
    vi_trace = mean_field.sample(draws=sampler_params["draws"])
    return advi, vi_trace, mean_field, tracker
Ejemplo n.º 12
0
def ar_model_pred_advi_dynamic(X, ar_order):
    # prepare training dataset
    train_size = int(X.shape[0] * 0.66)
    train, test = X.iloc[0:train_size], X.iloc[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(test.shape[0]):
        tau = 0.001

        model = pm.Model()
        with model:

            beta = pm.Uniform('beta', lower=-1, upper=1, shape=ar_order)
            y_obs = pm.AR('y_obs', rho=beta, tau=tau, observed=history)
            #trace = pm.sample(2000, tune=1000)
            step = step = pm.ADVI()

            n_draws, n_chains = 3000, 3
            n_sim = n_draws * n_chains

            advi_fit = pm.fit(method=pm.ADVI(), n=30000)

            # Consider 3000 draws and 2 chains.
            advi_trace = advi_fit.sample(10000)

        values = history[len(history) - ar_order:]
        values = values[::-1]
        yhat = np.dot(get_coef_from_trace(advi_trace), values)
        predictions.append(yhat)
        history.append(test[t])
        history = history[1:]
    # calculate out of sample error
    #error = mean_squared_error(test, predictions)
    predictions = pd.DataFrame(predictions)
    predictions.set_index(X[train_size:X.shape[0]].index,
                          inplace=True,
                          drop=True)
    return predictions[0]
    def fit(self, iters=10000):
        with self.model:
            inference = pm.ADVI()
            approx = pm.fit(n=iters, method=inference)
            trace = approx.sample(iters // 2)

        # save
        s = len(trace) // 2
        self.trace = trace
        self.inference = inference
        self.z = trace[s::]['z'].mean(axis=0)
        self.mu = trace[s::]['mu'].mean(axis=0)
        self.alpha = trace[s::]['alpha'].mean(axis=0)
        self.w = trace[s::]['w'].mean(axis=0)
Ejemplo n.º 14
0
def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print(
            'usage: python3 inference_dir.py [chain no] [optional output no]')
        sys.exit()
    elif len(sys.argv) == 2:
        c = int(sys.argv[1])
        d = int(sys.argv[1])
    if len(sys.argv) == 3:
        c = int(sys.argv[1])
        d = int(sys.argv[2])
    np.random.seed(c)
    lang_minibatch = pm.Minibatch(lang_ind, 500)
    sound_minibatch = pm.Minibatch(sound_ind, 500)
    model_dir = pm.Model()
    with model_dir:
        beta = pm.HalfFlat('beta')
        "theta = language-level prior over components"
        theta = tt.stack([
            pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K)
            for l in range(L)
        ])
        phi = tt.stack([
            tt.concatenate([
                pm.Dirichlet('phi_{}_{}'.format(k, x),
                             a=tt.ones(R[x]) * alpha,
                             shape=R[x]) for x in range(X)
            ]) for k in range(K)
        ])
        target = pm.DensityDist('target',
                                logprob(theta=theta, phi=phi),
                                observed=dict(lang_array=lang_minibatch,
                                              sound_array=sound_minibatch),
                                total_size=N)
        inference_dir = pm.ADVI()
        inference_dir.fit(
            50000,
            obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)),
            callbacks=[pm.callbacks.CheckParametersConvergence()])
        trace_dir = inference_dir.approx.sample()
        posterior = {
            k: trace_dir[k]
            for k in trace_dir.varnames if not k.endswith('__')
        }
        posterior['ELBO'] = inference_dir.hist
        f = open('posterior_dir_{}.pkl'.format(d), 'wb')
        pkl.dump(posterior, f)
        f.close()
Ejemplo n.º 15
0
    def _advi_inference(self, inference_args):
        """
        Runs variational ADVI and then samples from those results.

        Parameters
        ----------
        inference_args : dict, arguments to be passed to the PyMC3 fit method. See PyMC3 doc for permissible values.
        """
        with self.cached_model:
            inference = pm.ADVI()
            approx = pm.fit(method=inference, **inference_args)

        self.approx = approx
        self.trace = approx.sample(draws=self.default_advi_sample_draws)
        self.summary = pm.df_summary(self.trace)
        self.advi_hist = inference.hist
Ejemplo n.º 16
0
def train(neural_network, inference_file, model_file, hypers):
    set_tt_rng(MRG_RandomStreams(42))

    with neural_network:
        inference = pm.ADVI()
        approx = pm.fit(n=hypers['n_sample'],
                        method=inference,
                        obj_optimizer=pm.adam(learning_rate=hypers['lr']))
        # approx = pm.fit(n=50000, method=inference, obj_optimizer=pm.adam(learning_rate=0.01))

    with open(inference_file, "wb") as f:
        pickle.dump(inference, f, pickle.HIGHEST_PROTOCOL)
    with open(model_file, "wb") as f:
        pickle.dump(approx, f, pickle.HIGHEST_PROTOCOL)

    return inference, approx
Ejemplo n.º 17
0
def run_inference(model, fit, samples, unbinned_array, model_save_dir,
                  model_name):
    """
    model : PyMC3 changepoint model as defined above
    fit : number of iterations to fit
    samples : number of samples to generate from fitted model
    model_save_dir : parent directory of where to save model
    model_name : name for the model
    """

    #model_dump_path = os.path.join(model_save_dir,f'dump_{model_name}.pkl')
    model_dump_path = get_model_dump_path(model_name, model_save_dir)
    #trace_dump_path = os.path.join(model_save_dir,f'traces_{model_name}.pkl')

    if os.path.exists(model_dump_path):
        print('Trace loaded from cache')
        with open(model_dump_path, 'rb') as buff:
            data = pickle.load(buff)
        model = data['model']
        #inference = data['inference']
        approx = data['approx']
        # Remove pickled data to conserve memory
        del data
        # Recreate samples
        trace = approx.sample(draws=samples)
    else:
        with model:
            inference = pm.ADVI('full-rank')
            approx = pm.fit(n=fit, method=inference)
            trace = approx.sample(draws=samples)

        # Extract relevant variables from trace
        lambda_stack = trace['lambda'].swapaxes(0, 1)
        tau_samples = trace['tau']

        print('Dumping trace to cache')
        with open(model_dump_path, 'wb') as buff:
            pickle.dump(
                {
                    'model': model,
                    'approx': approx,
                    'lambda': lambda_stack,
                    'tau': tau_samples,
                    'data': model.obs.observations,
                    'fulldata': unbinned_array
                }, buff)
Ejemplo n.º 18
0
    def fit(self,
            x,
            y,
            epochs=30000,
            method='advi',
            batch_size=128,
            n_models=1,
            **sample_kwargs):
        """

        :param x:
        :param y:
        :param epochs:
        :param method:
        :param batch_size: int or array. For hierarchical models, batch along the second dimension (e.g., [None, 128])
        :param sample_kwargs:
        :return:
        """
        self.train_x = x
        with self.model:
            if method == 'nuts':
                # self.x.set_value(x)
                # self.y.set_value(y)
                for _ in range(n_models):
                    self.trace.append(pm.sample(epochs, **sample_kwargs))
            else:
                mini_x = pm.Minibatch(x, batch_size=batch_size, dtype=floatX)
                mini_y = pm.Minibatch(y, batch_size=batch_size, dtype=floatX)

                if method == 'advi':
                    inference = pm.ADVI()
                elif method == 'svgd':
                    inference = pm.SVGD()
                for _ in range(n_models):
                    approx = pm.fit(n=epochs,
                                    method=inference,
                                    more_replacements={
                                        self.x: mini_x,
                                        self.y: mini_y
                                    },
                                    **sample_kwargs)
                    self.trace.append(approx.sample(draws=20000))
                    self.approx.append(approx)
Ejemplo n.º 19
0
def test_save_load(tmp_path_factory, c, sig_defs):

    # make small for speed
    c = c[0:30]
    sig_defs = sig_defs[0:5]

    dataset_args = {'foo': 'bar'}
    model_args = {'bar': 'baz'}
    pymc3_args = {'baz': 'foo'}

    # train a model with 5 sigs
    with pm.Model() as model:
        data = pm.Data("data", c)
        N = data.sum(1).reshape((c.shape[0], 1))
        activities = ch_dirichlet("activities",
                                  a=np.ones(5),
                                  shape=(c.shape[0], 5))
        B = pm.math.dot(activities, sig_defs)
        pm.Multinomial('corpus', n=N, p=B, observed=data)

        trace = pm.ADVI()
        trace.fit()

    # checkpoint
    fp = tmp_path_factory.mktemp("ckp") / "vanilla_lda.ckp"
    save_checkpoint(fp, model, trace, dataset_args, model_args, pymc3_args)

    # load model
    m2, t2, dataset_args2, model_args2, pymc3_args2 = load_checkpoint(fp)

    # all params should be identical
    # checks are weak because __eq__ methods are not provided
    #assert str(model) == str(m2), 'model load failed'
    assert np.allclose(trace.hist, t2.hist), 'trace load failed'
    assert dataset_args == dataset_args2, 'dataset_args load failed'
    assert model_args == model_args2, 'model_args load failed'
    assert pymc3_args == pymc3_args2, 'dataset_args load failed'

    # with same seed, both models should tune with same result
    # test model tuning
    trace.refine(100)
    t2.refine(100)
    assert np.allclose(trace.hist, t2.hist), 'trace tuning failed'
Ejemplo n.º 20
0
    def fit(self, model_name=None, n_iter=40000):

        p = self.p
        if model_name is not None:
            p = model_name

        try:
            os.mkdir("{}/{}".format(self.traces_dir, self.p))
        except:
            print("Dir exists")

        with self.model:

            advi = pm.ADVI()
            # how can the trace be saved when using pm.fit??
            #approx = advi.fit(n=n_draws, callbacks=[tracker])
            approx = advi.fit(n=n_iter)

            plt.plot(advi.hist)
            plt.legend()
            plt.title('ELBO')
            plt.xlabel('Iteration')
            plt.savefig(os.path.join(self.plot_dir,
                                     "ELBO/{}.eps".format(self.p)),
                        format="eps",
                        dpi=900)
            plt.close()

            trace = approx.sample(10000)

            with open("{}/{}/trace.pik".format(self.traces_dir, self.p),
                      'wb') as f:
                pickle.dump({'model': self.model, 'trace': trace}, f)

            #with open('trace.p', 'rb') as f:
            #    test1 = pickle.load(f)

        df = pd.DataFrame({"estimate": trace["estimate"][:, 0, 0]})
        df.to_csv("{}/{}/chain-0.tsv".format(self.traces_dir, self.p))

        self.trace = trace

        return trace
Ejemplo n.º 21
0
    def _advi_inference(self, inference_args, num_advi_sample_draws):
        """
        Runs variational ADVI and then samples from those results.
        Parameters
        ----------
        inference_args : dict
            arguments to be passed to the PyMC3 fit method
            See PyMC3 doc for permissible values.
        num_advi_sample_draws : int
            Number of samples to draw from ADVI approximation after it has been fit
        """
        with self.cached_model:
            inference = pm.ADVI()
            approx = pm.fit(method=inference, **inference_args)

        self.approx = approx
        self.trace = approx.sample(draws=num_advi_sample_draws)
        self.summary = pm.summary(self.trace)
        self.advi_hist = inference.hist
Ejemplo n.º 22
0
def run_model_estimation(int_point,
                         y_elec,
                         bad_trials,
                         surprise_reg=None,
                         model_type="OLS"):
    """
    Inputs: int_point - sampling point in interstimulus interval
            y_elec - array with eeg recordings (num_trials x num_interstim_rec)
            surprise_reg - num_trials x 1 surprise from Baye learning model
            model_type - regression model
    Output: Time-series of log model evidence/Negative free energy
            from VI on Bayesian model
    """
    # Normalize the data and regressor to lie within 0, 1
    y_std = normalize(y_elec[:, int_point])
    surprise_reg_std = normalize(surprise_reg)

    # Select specific model OLS/Hierarchical
    if model_type == "OLS":
        model = OLS_model(y_std, bad_trials, surprise_reg_std)
    elif model_type == "Hierarchical":
        model = Hierarchical_model(y_std, bad_trials, surprise_reg_std)
    elif model_type == "Bayesian-MLP":
        model = Bayesian_NN(y_std, bad_trials, surprise_reg_std)
    elif model_type == "Null":
        model = Null_model(y_std, bad_trials)
    else:
        raise "Provide a valid model type"

    # Run the Variational Inference scheme with ADVI
    # ADVI - Automatic Differentiation VI
    with model:
        inference = pm.ADVI()
        approx = pm.fit(
            method=inference,
            callbacks=[
                pm.callbacks.CheckParametersConvergence(diff='absolute')
            ],
            n=30000,
            progressbar=0)
    # return full optimization trace of free energy
    return -approx.hist
Ejemplo n.º 23
0
    def _inference(self, minibatches, n=200000):
        """
        Runs minibatch variational ADVI and then sample from those results.

        Parameters
        ----------
        minibatches: minibatches for ADVI

        n: number of iterations for ADVI fit, defaults to 200000
        """
        with self.cached_model:
            advi = pm.ADVI()
            approx = pm.fit(
                n=n,
                method=advi,
                more_replacements=minibatches,
                callbacks=[pm.callbacks.CheckParametersConvergence()])

        self.advi_trace = approx.sample(draws=10000)

        self.advi_hist = advi.hist
Ejemplo n.º 24
0
    def fit(self,
            sampling_size=5000,
            traceplot_name=None,
            fast_sampling=False):
        '''
        Args:
            sampling_size (int): the length of markov chain
            create_traceplot (boolean): Whether or not generate the traceplot.
        '''
        self.model = pm.Model()
        with self.model:
            rho = pm.Exponential('rho', 1 / 5, shape=3)
            tau = pm.Exponential('tau', 1 / 3)

            cov_func = pm.gp.cov.Matern52(3, ls=rho)
            self.gp = pm.gp.Marginal(cov_func=cov_func)

            sigma = pm.HalfNormal('sigma', sd=3)
            y_ = self.gp.marginal_likelihood('y',
                                             X=self.X_train,
                                             y=np.log(self.y_train),
                                             noise=sigma)

        if fast_sampling:
            with self.model:
                inference = pm.ADVI()
                approx = pm.fit(n=50000, method=inference)  #until converge
                self.trace = approx.sample(draws=sampling_size)

        else:
            with self.model:
                start = pm.find_MAP()
                self.trace = pm.sample(sampling_size, nchains=1)

        if traceplot_name:
            fig, axs = plt.subplots(3, 2)  # 2 RVs
            pm.traceplot(self.trace, varnames=['rho', 'sigma', 'tau'], ax=axs)
            fig.savefig(traceplot_name)
            fig_path = os.path.join(os.getcwd(), traceplot_name)
            print(f'the traceplot has been saved to {fig_path}')
Ejemplo n.º 25
0
    def setup_model(self, data):
        with pm.Model() as model:
            self.transmat_ = pm.Normal('Tmat',
                                       mu=1,
                                       sd=1,
                                       shape=(self.latent_dimension))
            self.hidden_states.append(
                pm.Normal('H0',
                          mu=0,
                          sd=1,
                          shape=(self.sample_minibatch, self.latent_dimension),
                          testval=np.random.randn(self.sample_minibatch,
                                                  self.latent_dimension)))
            for i in range(1, self.num_time_steps):
                self.hidden_states.append(
                    th.dot(self.hidden_states[-1], diag(self.transmat_)))
            F = pm.Normal('F',
                          mu=0,
                          sd=1,
                          shape=(self.latent_dimension, self.observ_dimension),
                          testval=np.random.randn(self.latent_dimension,
                                                  self.observ_dimension))
            for i in range(self.num_time_steps):
                self.observed_states.append(
                    pm.Normal('X_{}'.format(i),
                              mu=th.dot(self.hidden_states[i], F),
                              sd=1,
                              shape=(self.sample_minibatch,
                                     self.observ_dimension),
                              observed=data[i]))
            approx = pm.fit(n=45000, method=pm.ADVI())
            trace = approx.sample(500)

            import pickle
            with open('pick.dump2.pkl', 'wb') as buff:
                pickle.dump({
                    'model': model,
                    'approx': approx,
                    'trace': trace
                }, buff)
Ejemplo n.º 26
0
    def fit(self, data, adviIterations):
        self.data = data
        self.yScaler.fit(data)
        laggedData = lagData(data, self.numLags)
        # changing basis
        # set basis
        self.radialBasis = RadialBasis(self.numBasis)
        self.radialBasis.fit(laggedData)
        # changeBasis
        changedBasis = self.radialBasis.transform(laggedData)
        # scaling for numeric funzies
        self.scaler.fit(changedBasis)
        changedBasis = self.scaler.transform(changedBasis)
        # set model predictors as shared so we can do the forecasting
        self.sharedPredictors = shared(changedBasis)
        # pymc model
        with self.model:
            theta = pm.Normal('theta', 0, 1,
                              shape = (self.numBasis, data.shape[1]))

            fX = pm.math.matrix_dot(self.sharedPredictors, theta)
            pm.Deterministic('fX', fX)
    
            yVec = pm.MvNormal('yVec', fX, 
                               tau = np.eye(data.shape[1]),
                               observed=self.yScaler.transform(
                                   data[self.numLags:, :]))

            advi = pm.ADVI()
            self.approx = pm.fit(n = adviIterations, method = advi)
        
        print('variational inference concluded')

        print(
            '''
            The sin which is unpardonable is knowingly and willfully to reject truth,
            to fear knowledge lest that knowledge pander not to thy prejudices.
            ''')

        self.fitted = True
Ejemplo n.º 27
0
 def setup_model(self, data):
     with pm.Model() as model:
         init_states = np.random.randn(self.sample_minibatch,
                                       self.latent_dimension)
         self.hidden_states.append(
             pm.Normal('H0',
                       mu=0,
                       sd=1,
                       shape=(self.sample_minibatch, self.latent_dimension),
                       testval=init_states))
         for i in range(1, self.num_time_steps):
             self.hidden_states.append(
                 pm.Normal('H{}'.format(i + 1),
                           mu=self.hidden_states[-1],
                           sd=0.1,
                           shape=(self.sample_minibatch,
                                  self.latent_dimension),
                           testval=init_states))
         F = pm.Normal('F',
                       mu=0,
                       sd=1,
                       shape=(self.latent_dimension, self.observ_dimension),
                       testval=np.random.randn(self.latent_dimension,
                                               self.observ_dimension))
         for i in range(self.num_time_steps):
             self.observed_states.append(
                 pm.Normal('X_{}'.format(i),
                           mu=th.dot(self.hidden_states[i], F),
                           sd=1,
                           shape=(self.sample_minibatch,
                                  self.observ_dimension),
                           observed=data[i]))
         iters = 30000
         inference = pm.ADVI()
         approx = pm.fit(n=iters, method=inference)
         trace = approx.sample(500)
         plt.semilogy(list(range(iters)), inference.hist)
         plt.ylabel('ELBO')
         plt.xlabel('iteration')
         plt.savefig('linear_elbo.pdf')
Ejemplo n.º 28
0
    def setup_model(self, data):
#        p = 0.8
        with pm.Model() as model:
            init_states = np.random.randn(self.sample_minibatch, self.latent_dimension)
            self.hidden_states.append(
              pm.Normal('H0', mu=0, sd=1,shape=(self.sample_minibatch, self.latent_dimension), testval=init_states)

            )
            for i in range(1, self.num_time_steps):
              self.hidden_states.append(
                pm.Normal('H{}'.format(i+1), mu=self.hidden_states[-1], sd=0.1,shape=(self.sample_minibatch, self.latent_dimension), testval=init_states)
              )
	    
            l1_size = int((self.observ_dimension - self.latent_dimension)/3) + self.latent_dimension
            l2_size = int((self.observ_dimension - self.latent_dimension)/3) * 2 + self.latent_dimension  
#            P0 = pm.Bernoulli('P0', p, shape=(self.latent_dimension, l1_size), testval=np.random.binomial(1, p, size=(self.latent_dimension, l1_size)))
            W0 = pm.Normal('W0',mu=0, sd=1, shape=(self.latent_dimension, l1_size), testval=np.random.randn(self.latent_dimension, l1_size))
#            P1 = pm.Bernoulli('P1', p, shape=(l1_size, l2_size), testval=np.random.binomial(1, p, size=(l1_size, l2_size)))
            W1 = pm.Normal('W1',mu=0, sd=1, shape=(l1_size, l2_size), testval=np.random.randn(l1_size, l2_size))
            W2 = pm.Normal('W2',mu=0, sd=1, shape=(l2_size, self.observ_dimension), testval=np.random.randn(l2_size, self.observ_dimension))
	    
            for i in range(self.num_time_steps):		
                  pm.Normal('X_{}'.format(i), mu=th.dot(th.tensor.tanh(th.dot(th.tensor.tanh(th.dot(self.hidden_states[i], W0)), W1)), W2), sd=1, shape=(self.sample_minibatch, self.observ_dimension), observed=data[i])
            inference = pm.ADVI()
            iters = 150000
            approx = pm.fit(n=iters, method=inference)
            trace = approx.sample(500)

            plt.semilogy(list(range(iters)), inference.hist)
            #plt.yscale('log')i
            plt.legend()
            plt.ylabel('ELBO')
            plt.xlabel('iteration')
            plt.savefig('nn_elbo.pdf')
            import pickle
            with open('nn5d_2layer_all.pkl', 'wb') as buff:
                pickle.dump(trace, buff)
Ejemplo n.º 29
0
def sample_fc_nn(X,
                 y,
                 output,
                 hidden_dims=[NUM_HIDDEN],
                 num_samples=MAX_NUM_SAMPLES,
                 vi=True,
                 num_scale1_iters=NUM_SCALE1_ITERS,
                 num_scale0_iters=NUM_SCALE0_ITERS):
    """
    Sample from fully connected Bayesian neural network
    """
    nn = build_shallow_nn(X, y, output, hidden_dims)
    with nn:
        if vi:  # variational inference (fast)
            # common schedule for `scale` is 1 at the beginning and 0 at the end
            scale = theano.shared(pm.floatX(1))
            vi = pm.ADVI(cost_part_grad_scale=scale)
            pm.fit(n=num_scale1_iters, method=vi)
            scale.set_value(0)
            approx = pm.fit(n=num_scale0_iters)
            trace = approx.sample(draws=num_samples)
        else:  # NUTS (very slow)
            trace = pm.sample(num_samples)
    return format_trace(trace)
Ejemplo n.º 30
0
    def fit_advi_iterative(self, n=3, method='advi', n_type='restart',
                           n_iter=None,
                           learning_rate=None, reducing_lr=False,
                           progressbar=True,
                           scale_cost_to_minibatch=True):
        """Find posterior using pm.ADVI() method directly (allows continuing training through `refine` method.
        (maximising likelihood of the data and minimising KL-divergence of posterior to prior - ELBO loss)

        Parameters
        ----------
        n :
            number of independent initialisations (Default value = 3)
        method :
            advi', to allow for potential use of SVGD, MCMC, custom (currently only ADVI implemented). (Default value = 'advi')
        n_type :
            type of repeated initialisation:
            
            * **'restart'** to pick different initial value,
            * **'cv'** for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented
            * **'bootstrap'** for fitting the model to multiple downsampled datasets.
              Run `mod.bootstrap_data()` to generate variants of data (Default value = 'restart')

        n_iter :
            number of iterations, supersedes self.n_iter specified when creating model instance. (Default value = None)
        learning_rate :
            learning rate, supersedes self.learning_rate specified when creating model instance. (Default value = None)
        reducing_lr :
            boolean, use decaying learning rate? (Default value = False)
        progressbar :
            boolean, show progress bar? (Default value = True)
        scale_cost_to_minibatch :
            when using training in minibatches, scale cost function appropriately?
            See discussion https://discourse.pymc.io/t/effects-of-scale-cost-to-minibatch/1429 to understand the effects. (Default value = True)

        Returns
        -------
        None
            self.mean_field dictionary with MeanField pymc3 objects,
            and self.advi dictionary with ADVI objects for each initialisation.

        """

        self.n_type = n_type
        self.scale_cost_to_minibatch = scale_cost_to_minibatch

        if n_iter is None:
            n_iter = self.n_iter

        if learning_rate is None:
            learning_rate = self.learning_rate

        ### Initialise optimiser ###
        if reducing_lr:
            # initialise the function for adaptive learning rate
            s = theano.shared(np.array(learning_rate).astype(self.data_type))

            def reduce_rate(a, h, i):
                s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type))

            optimiser = pm.adam(learning_rate=s)
            callbacks = [reduce_rate, CheckParametersConvergence()]
        else:
            optimiser = pm.adam(learning_rate=learning_rate)
            callbacks = [CheckParametersConvergence()]

        if np.isin(n_type, ['bootstrap']):
            if self.X_data_sample is None:
                self.bootstrap_data(n=n)
        elif np.isin(n_type, ['cv']):
            self.generate_cv_data()  # cv data added to self.X_data_sample

        init_names = ['init_' + str(i + 1) for i in np.arange(n)]

        for i, name in enumerate(init_names):

            with self.model:

                self.advi[name] = pm.ADVI()

            # when type is molecular cross-validation or bootstrap, 
            # replace self.x_data tensor with new data
            if np.isin(n_type, ['cv', 'bootstrap']):

                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                # or using all data
                else:
                    more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)}
                    # if any other data inputs should be added
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                self.extra_data[k].astype(self.data_type)

            else:

                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                else:
                    more_replacements = {}

            self.advi[name].scale_cost_to_minibatch = scale_cost_to_minibatch

            # train the model  
            self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks,
                                                        obj_optimizer=optimiser,
                                                        total_grad_norm_constraint=self.total_grad_norm_constraint,
                                                        progressbar=progressbar, more_replacements=more_replacements)

            # plot training history
            if self.verbose:
                print(plt.plot(np.log10(self.mean_field[name].hist[15000:])));