def fit(self, sampling_size=5000, fast_sample=False): with pm.Model() as self.model: rho = pm.Exponential('rho', 1/5, shape=self.dim_gp) tau = pm.Exponential('tau', 1/3) cov_func = pm.gp.cov.Matern52(self.dim_gp, ls=rho) self.gp = pm.gp.Latent(cov_func=cov_func) f = self.gp.prior("f", X=self.locations) mean_func = f self.beta_list = [] if self.covariates: for i in range(len(self.covariates)): beta = pm.Normal('_'.join(['beta', str(i)]), mu=0, sd=50) self.beta_list.append(beta) mean_func = mean_func + beta*self.covariates[i] sigma = pm.HalfNormal('sigma', sd=20) y = pm.Normal('Y', mu=mean_func, sd=sigma, observed=self.response) if fast_sample: inference = pm.ADVI() approx = pm.fit(n=25000, method=inference) #until converge self.trace = approx.sample(draws=sampling_size) else: start = pm.find_MAP() self.trace = pm.sample(sampling_size, tune=10000, nchains=4)
def main(): config = create_configuration(filename='/regression-siso.json') dataset = get_dataset(config.dataset, testing=False) # %% x_train = dataset.x y_train = dataset.y x = theano.shared(x_train) y = theano.shared(y_train) nn = construct_nn(x=x, y=y, config=config) # ADVI with nn: inference = pm.ADVI() approx = pm.fit(n=50000, method=inference) trace = approx.sample(draws=5000) # with nn: # inference = pm.NUTS() # trace = pm.sample(2000, tune=1000, cores=4, inference=inference) print(pm.summary(trace)) x.set_value(x_train) y.set_value(y_train) with nn: ppc = pm.sample_ppc(trace, samples=500, progressbar=False)
def _build_BPF(self): print('start building the Bayesian probabilistic model') self.x_u = theano.shared(self.train_u) self.x_i = theano.shared(self.train_i) self.y_r = theano.shared(self.train_r) self.y_r_ui = theano.shared(np.array(self.nn_r_ui)) assert (len(self.y_r.get_value()) == len(self.y_r_ui.get_value())) with pm.Model() as self.bncf: #define the prior and likelihood b_u = pm.Normal('b_u', 0, sd=1, shape=self.shape[0]) b_i = pm.Normal('b_i', 0, sd=1, shape=self.shape[1]) u = pm.Normal('u', 0, sd=1) tY = pm.Deterministic( 'tY', tt.add( tt.add(tt.add(b_u[self.x_u], b_i[self.x_i]), self.y_r_ui), u)) #tY = pm.Deterministic('tY', ((b_u[self.x_u]+b_i[self.x_i])+self.y_r_ui)+u)#b_u+b_i+u+nn_r_ui nY = pm.Deterministic('nY', pm.math.sigmoid(tY)) # likelihood of observed data Y = pm.Bernoulli( 'Y', nY, observed=self.y_r) #total_size=self.y_r.get_value().shape[0] with self.bncf: #inference approx = pm.fit(n=1000, method=pm.ADVI()) self.trace = approx.sample(draws=500) with self.bncf: #posterior prediction ppc = pm.sample_posterior_predictive(self.trace, progressbar=True) self.by_r_ui = ppc['Y'].mean(axis=0) print('done building the Bayesian probabilistic model')
def fit(self, fast_sampling=True, sample_size=3000): with pm.Model() as self.model: beta = pm.Normal('beta', mu=0.0, tau=1.0, shape=(self.dim + 1, 1)) # Priors for spatial random effects tau = pm.Gamma('tau', alpha=2., beta=2.) alpha = pm.Uniform('alpha', lower=0, upper=1) phi = pm.MvNormal('phi', mu=0, tau=tau * (self.D - alpha * self.weight_matrix), shape=(1, self.N)) # Mean model mu = pm.Deterministic('mu', tt.dot(self.covariates, beta) + phi.T) theta_sd = pm.Gamma('theta_sd', alpha=1.0, beta=1.0) # Likelihood Yi = pm.Normal('Yi', mu=mu.ravel(), tau=theta_sd, observed=self.response_var) if fast_sampling: inference = pm.ADVI() approx = pm.fit(n=50000, method=inference) #until converge self.trace = approx.sample(draws=sample_size) else: self.trace = pm.sample(sample_size, cores=2, tune=1000) self._report_credible_interval(self.trace, 'beta') self._report_credible_interval(self.trace, 'tau')
def fast_sample(self, sample_size=5000, iters=10000): if self.model is None: self.fit() with self.model: inference = pm.ADVI() approx = pm.fit(n=iters, method=inference) #until converge self.trace = approx.sample(draws=sample_size)
def fit_model_LN(N, J, D, R, T, Sigmas, featvar_id, filename, c, normalize, batch=False): model = pm.Model() with model: """hyperparameters""" theta_prior = stickbreak_prior('theta', 1., T) alpha = .1 """priors""" theta = pm.Dirichlet('theta', theta_prior, shape=T) psi = [[ pm.MvNormal('psi_{}_{}'.format(t, d), mu=tt.zeros(R[d]), cov=tt.exp(-Sigmas[d]), shape=R[d]) for d in range(D) ] for t in range(T)] phi = tt.stack([ tt.concatenate([ pm.Deterministic('phi_{}_{}'.format(t, d), tt.nnet.softmax(psi[t][d]))[0] for d in range(D) ]) for t in range(T) ]) """likelihood""" target = pm.DensityDist('target', loglik(theta=theta, phi=phi), observed=dict(featvar_id=featvar_id)) """fit model""" inference = pm.ADVI() inference.fit(100000, obj_optimizer=pm.adam(learning_rate=.01, beta1=.8), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace = inference.approx.sample() posterior = { k: trace[k] for k in trace.varnames if not k.endswith('__') } posterior['ELBO'] = inference.hist if batch == False: f = open( 'posterior_LN_{}_{}_{}.pkl'.format( filename.split('.')[0], c, normalize), 'wb') else: f = open( 'posterior_LN_{}_{}_{}_holdout_{}.pkl'.format( filename.split('.')[0], c, normalize, batch), 'wb') pkl.dump(posterior, f) f.close()
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) np.random.shuffle(lang_ind) np.random.shuffle(sound_ind) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_ln = pm.Model() with model_ln: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) psi = [ pm.MvNormal('psi_{}'.format(k), mu=[0] * S, cov=Sigma, shape=S) for k in range(K) ] "phi = component-level collection of distributions over sound change" phi = tt.stack([ tt.concatenate([ pm.Deterministic( 'phi_{}_{}'.format(k, x), tt.nnet.softmax(psi[k][s_breaks[x][0]:s_breaks[x][1]])[0]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_ln = pm.ADVI() inference_ln.fit(50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_ln = inference_ln.approx.sample() posterior = { k: trace_ln[k] for k in trace_ln.varnames if not k.endswith('__') } posterior['ELBO'] = inference_ln.hist f = open('posterior_ln_shuffle_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def fit(self, sample_size, traceplot_name=None, fast_sampling=False): ''' sample_size (int): The size of the sample traceplot_name (str): The name of the traceplot file fast_sampling (bool): whether or not variational approximation should be used. Note: to evaluate the kernel function, pymc3 only accept tensor type from theano. ''' self.model = pm.Model() # self.X_train = tt.constant(self.X_train) #need tensor type self.X_train = shared(self.X_train) with self.model: evaluated_kernels = [] packed_L = pm.LKJCholeskyCov('packed_L', n=3, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(3, packed_L) for center in self.centers.values: evaluated_kernels.append( pm.MvNormal.dist(mu=center, chol=L).logp(self.X_train)) beta = pm.Normal('beta', mu=0, sd=3, shape=self.number_of_centers) latentProcess = pm.Deterministic('mu', tt.dot(beta, evaluated_kernels)) error = pm.HalfCauchy('error', 12) y_ = pm.Normal("y", mu=latentProcess, sd=error, observed=np.log(self.y_train)) if fast_sampling: with self.model: inference = pm.ADVI() approx = pm.fit(n=sample_size, method=inference) #until converge self.trace = approx.sample(draws=sample_size) else: with self.model: start = pm.find_MAP() self.trace = pm.sample(sample_size, start=start) if traceplot_name: fig, axs = plt.subplots(3, 2) # 2 RVs pm.traceplot(self.trace, varnames=['packed_L', 'beta', 'error'], ax=axs) fig.savefig(traceplot_name) fig_path = os.path.join(os.getcwd(), traceplot_name) print(f'the traceplot has been saved to {fig_path}')
def sample_chain(model, chain_i=0, step=None, num_samples=MAX_NUM_SAMPLES, advi=False, tune=5, discard_tuned_samples=True, num_scale1_iters=NUM_SCALE1_ITERS, num_scale0_iters=NUM_SCALE0_ITERS): """Sample single chain from constructed Bayesian model""" start = timer() with model: if not advi: pm._log.info('Assigning NUTS sampler...') if step is None: start_, step = pm.init_nuts(init='advi', njobs=1, n_init=NUM_INIT_STEPS, random_seed=-1, progressbar=False) discard = tune if discard_tuned_samples else 0 for i, trace in enumerate( pm.iter_sample(num_samples + discard, step, start=start_, chain=chain_i)): if i == 0: min_num_samples = get_min_samples_per_chain( len(trace[0]), MIN_SAMPLES_CONSTANT, NUM_CHAINS) elapsed = timer() - start if elapsed > SOFT_MAX_TIME_IN_SECONDS / NUM_CHAINS: print('exceeded soft time limit...') if i + 1 - discard >= min_num_samples: print('collected enough samples; stopping') break else: print('but only collected {} of {}; continuing...'. format(i + 1 - discard, min_num_samples)) if elapsed > HARD_MAX_TIME_IN_SECONDS / NUM_CHAINS: print('exceeded HARD time limit; STOPPING') break return trace[discard:] else: # ADVI for neural networks scale = theano.shared(pm.floatX(1)) vi = pm.ADVI(cost_part_grad_scale=scale) pm.fit(n=num_scale1_iters, method=vi) scale.set_value(0) approx = pm.fit(n=num_scale0_iters) # one sample to get dimensions of trace trace = approx.sample(draws=1) min_num_samples = get_min_samples_per_chain( len(trace.varnames), MIN_SAMPLES_CONSTANT, 1) trace = approx.sample(draws=min_num_samples) return trace
def fit(self, instances: np.ndarray, labels: np.ndarray) -> Optional[List[str]]: self.model = self._construct_nn(instances, labels) with self.model: inference = pm.ADVI() self.approx = pm.fit(n=EPOCHS, method=inference) self.sample_proba = self._sample_probability(instances) return None
def inference_with_model(model): with model: advi = pm.ADVI() tracker = pm.callbacks.Tracker(mean=advi.approx.mean.eval, std=advi.approx.std.eval) mean_field = advi.fit( n=vi_params["n"], callbacks=[CheckParametersConvergence(), tracker], ) vi_trace = mean_field.sample(draws=sampler_params["draws"]) return advi, vi_trace, mean_field, tracker
def ar_model_pred_advi_dynamic(X, ar_order): # prepare training dataset train_size = int(X.shape[0] * 0.66) train, test = X.iloc[0:train_size], X.iloc[train_size:] history = [x for x in train] # make predictions predictions = list() for t in range(test.shape[0]): tau = 0.001 model = pm.Model() with model: beta = pm.Uniform('beta', lower=-1, upper=1, shape=ar_order) y_obs = pm.AR('y_obs', rho=beta, tau=tau, observed=history) #trace = pm.sample(2000, tune=1000) step = step = pm.ADVI() n_draws, n_chains = 3000, 3 n_sim = n_draws * n_chains advi_fit = pm.fit(method=pm.ADVI(), n=30000) # Consider 3000 draws and 2 chains. advi_trace = advi_fit.sample(10000) values = history[len(history) - ar_order:] values = values[::-1] yhat = np.dot(get_coef_from_trace(advi_trace), values) predictions.append(yhat) history.append(test[t]) history = history[1:] # calculate out of sample error #error = mean_squared_error(test, predictions) predictions = pd.DataFrame(predictions) predictions.set_index(X[train_size:X.shape[0]].index, inplace=True, drop=True) return predictions[0]
def fit(self, iters=10000): with self.model: inference = pm.ADVI() approx = pm.fit(n=iters, method=inference) trace = approx.sample(iters // 2) # save s = len(trace) // 2 self.trace = trace self.inference = inference self.z = trace[s::]['z'].mean(axis=0) self.mu = trace[s::]['mu'].mean(axis=0) self.alpha = trace[s::]['alpha'].mean(axis=0) self.w = trace[s::]['w'].mean(axis=0)
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_dir = pm.Model() with model_dir: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) phi = tt.stack([ tt.concatenate([ pm.Dirichlet('phi_{}_{}'.format(k, x), a=tt.ones(R[x]) * alpha, shape=R[x]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_dir = pm.ADVI() inference_dir.fit( 50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_dir = inference_dir.approx.sample() posterior = { k: trace_dir[k] for k in trace_dir.varnames if not k.endswith('__') } posterior['ELBO'] = inference_dir.hist f = open('posterior_dir_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def _advi_inference(self, inference_args): """ Runs variational ADVI and then samples from those results. Parameters ---------- inference_args : dict, arguments to be passed to the PyMC3 fit method. See PyMC3 doc for permissible values. """ with self.cached_model: inference = pm.ADVI() approx = pm.fit(method=inference, **inference_args) self.approx = approx self.trace = approx.sample(draws=self.default_advi_sample_draws) self.summary = pm.df_summary(self.trace) self.advi_hist = inference.hist
def train(neural_network, inference_file, model_file, hypers): set_tt_rng(MRG_RandomStreams(42)) with neural_network: inference = pm.ADVI() approx = pm.fit(n=hypers['n_sample'], method=inference, obj_optimizer=pm.adam(learning_rate=hypers['lr'])) # approx = pm.fit(n=50000, method=inference, obj_optimizer=pm.adam(learning_rate=0.01)) with open(inference_file, "wb") as f: pickle.dump(inference, f, pickle.HIGHEST_PROTOCOL) with open(model_file, "wb") as f: pickle.dump(approx, f, pickle.HIGHEST_PROTOCOL) return inference, approx
def run_inference(model, fit, samples, unbinned_array, model_save_dir, model_name): """ model : PyMC3 changepoint model as defined above fit : number of iterations to fit samples : number of samples to generate from fitted model model_save_dir : parent directory of where to save model model_name : name for the model """ #model_dump_path = os.path.join(model_save_dir,f'dump_{model_name}.pkl') model_dump_path = get_model_dump_path(model_name, model_save_dir) #trace_dump_path = os.path.join(model_save_dir,f'traces_{model_name}.pkl') if os.path.exists(model_dump_path): print('Trace loaded from cache') with open(model_dump_path, 'rb') as buff: data = pickle.load(buff) model = data['model'] #inference = data['inference'] approx = data['approx'] # Remove pickled data to conserve memory del data # Recreate samples trace = approx.sample(draws=samples) else: with model: inference = pm.ADVI('full-rank') approx = pm.fit(n=fit, method=inference) trace = approx.sample(draws=samples) # Extract relevant variables from trace lambda_stack = trace['lambda'].swapaxes(0, 1) tau_samples = trace['tau'] print('Dumping trace to cache') with open(model_dump_path, 'wb') as buff: pickle.dump( { 'model': model, 'approx': approx, 'lambda': lambda_stack, 'tau': tau_samples, 'data': model.obs.observations, 'fulldata': unbinned_array }, buff)
def fit(self, x, y, epochs=30000, method='advi', batch_size=128, n_models=1, **sample_kwargs): """ :param x: :param y: :param epochs: :param method: :param batch_size: int or array. For hierarchical models, batch along the second dimension (e.g., [None, 128]) :param sample_kwargs: :return: """ self.train_x = x with self.model: if method == 'nuts': # self.x.set_value(x) # self.y.set_value(y) for _ in range(n_models): self.trace.append(pm.sample(epochs, **sample_kwargs)) else: mini_x = pm.Minibatch(x, batch_size=batch_size, dtype=floatX) mini_y = pm.Minibatch(y, batch_size=batch_size, dtype=floatX) if method == 'advi': inference = pm.ADVI() elif method == 'svgd': inference = pm.SVGD() for _ in range(n_models): approx = pm.fit(n=epochs, method=inference, more_replacements={ self.x: mini_x, self.y: mini_y }, **sample_kwargs) self.trace.append(approx.sample(draws=20000)) self.approx.append(approx)
def test_save_load(tmp_path_factory, c, sig_defs): # make small for speed c = c[0:30] sig_defs = sig_defs[0:5] dataset_args = {'foo': 'bar'} model_args = {'bar': 'baz'} pymc3_args = {'baz': 'foo'} # train a model with 5 sigs with pm.Model() as model: data = pm.Data("data", c) N = data.sum(1).reshape((c.shape[0], 1)) activities = ch_dirichlet("activities", a=np.ones(5), shape=(c.shape[0], 5)) B = pm.math.dot(activities, sig_defs) pm.Multinomial('corpus', n=N, p=B, observed=data) trace = pm.ADVI() trace.fit() # checkpoint fp = tmp_path_factory.mktemp("ckp") / "vanilla_lda.ckp" save_checkpoint(fp, model, trace, dataset_args, model_args, pymc3_args) # load model m2, t2, dataset_args2, model_args2, pymc3_args2 = load_checkpoint(fp) # all params should be identical # checks are weak because __eq__ methods are not provided #assert str(model) == str(m2), 'model load failed' assert np.allclose(trace.hist, t2.hist), 'trace load failed' assert dataset_args == dataset_args2, 'dataset_args load failed' assert model_args == model_args2, 'model_args load failed' assert pymc3_args == pymc3_args2, 'dataset_args load failed' # with same seed, both models should tune with same result # test model tuning trace.refine(100) t2.refine(100) assert np.allclose(trace.hist, t2.hist), 'trace tuning failed'
def fit(self, model_name=None, n_iter=40000): p = self.p if model_name is not None: p = model_name try: os.mkdir("{}/{}".format(self.traces_dir, self.p)) except: print("Dir exists") with self.model: advi = pm.ADVI() # how can the trace be saved when using pm.fit?? #approx = advi.fit(n=n_draws, callbacks=[tracker]) approx = advi.fit(n=n_iter) plt.plot(advi.hist) plt.legend() plt.title('ELBO') plt.xlabel('Iteration') plt.savefig(os.path.join(self.plot_dir, "ELBO/{}.eps".format(self.p)), format="eps", dpi=900) plt.close() trace = approx.sample(10000) with open("{}/{}/trace.pik".format(self.traces_dir, self.p), 'wb') as f: pickle.dump({'model': self.model, 'trace': trace}, f) #with open('trace.p', 'rb') as f: # test1 = pickle.load(f) df = pd.DataFrame({"estimate": trace["estimate"][:, 0, 0]}) df.to_csv("{}/{}/chain-0.tsv".format(self.traces_dir, self.p)) self.trace = trace return trace
def _advi_inference(self, inference_args, num_advi_sample_draws): """ Runs variational ADVI and then samples from those results. Parameters ---------- inference_args : dict arguments to be passed to the PyMC3 fit method See PyMC3 doc for permissible values. num_advi_sample_draws : int Number of samples to draw from ADVI approximation after it has been fit """ with self.cached_model: inference = pm.ADVI() approx = pm.fit(method=inference, **inference_args) self.approx = approx self.trace = approx.sample(draws=num_advi_sample_draws) self.summary = pm.summary(self.trace) self.advi_hist = inference.hist
def run_model_estimation(int_point, y_elec, bad_trials, surprise_reg=None, model_type="OLS"): """ Inputs: int_point - sampling point in interstimulus interval y_elec - array with eeg recordings (num_trials x num_interstim_rec) surprise_reg - num_trials x 1 surprise from Baye learning model model_type - regression model Output: Time-series of log model evidence/Negative free energy from VI on Bayesian model """ # Normalize the data and regressor to lie within 0, 1 y_std = normalize(y_elec[:, int_point]) surprise_reg_std = normalize(surprise_reg) # Select specific model OLS/Hierarchical if model_type == "OLS": model = OLS_model(y_std, bad_trials, surprise_reg_std) elif model_type == "Hierarchical": model = Hierarchical_model(y_std, bad_trials, surprise_reg_std) elif model_type == "Bayesian-MLP": model = Bayesian_NN(y_std, bad_trials, surprise_reg_std) elif model_type == "Null": model = Null_model(y_std, bad_trials) else: raise "Provide a valid model type" # Run the Variational Inference scheme with ADVI # ADVI - Automatic Differentiation VI with model: inference = pm.ADVI() approx = pm.fit( method=inference, callbacks=[ pm.callbacks.CheckParametersConvergence(diff='absolute') ], n=30000, progressbar=0) # return full optimization trace of free energy return -approx.hist
def _inference(self, minibatches, n=200000): """ Runs minibatch variational ADVI and then sample from those results. Parameters ---------- minibatches: minibatches for ADVI n: number of iterations for ADVI fit, defaults to 200000 """ with self.cached_model: advi = pm.ADVI() approx = pm.fit( n=n, method=advi, more_replacements=minibatches, callbacks=[pm.callbacks.CheckParametersConvergence()]) self.advi_trace = approx.sample(draws=10000) self.advi_hist = advi.hist
def fit(self, sampling_size=5000, traceplot_name=None, fast_sampling=False): ''' Args: sampling_size (int): the length of markov chain create_traceplot (boolean): Whether or not generate the traceplot. ''' self.model = pm.Model() with self.model: rho = pm.Exponential('rho', 1 / 5, shape=3) tau = pm.Exponential('tau', 1 / 3) cov_func = pm.gp.cov.Matern52(3, ls=rho) self.gp = pm.gp.Marginal(cov_func=cov_func) sigma = pm.HalfNormal('sigma', sd=3) y_ = self.gp.marginal_likelihood('y', X=self.X_train, y=np.log(self.y_train), noise=sigma) if fast_sampling: with self.model: inference = pm.ADVI() approx = pm.fit(n=50000, method=inference) #until converge self.trace = approx.sample(draws=sampling_size) else: with self.model: start = pm.find_MAP() self.trace = pm.sample(sampling_size, nchains=1) if traceplot_name: fig, axs = plt.subplots(3, 2) # 2 RVs pm.traceplot(self.trace, varnames=['rho', 'sigma', 'tau'], ax=axs) fig.savefig(traceplot_name) fig_path = os.path.join(os.getcwd(), traceplot_name) print(f'the traceplot has been saved to {fig_path}')
def setup_model(self, data): with pm.Model() as model: self.transmat_ = pm.Normal('Tmat', mu=1, sd=1, shape=(self.latent_dimension)) self.hidden_states.append( pm.Normal('H0', mu=0, sd=1, shape=(self.sample_minibatch, self.latent_dimension), testval=np.random.randn(self.sample_minibatch, self.latent_dimension))) for i in range(1, self.num_time_steps): self.hidden_states.append( th.dot(self.hidden_states[-1], diag(self.transmat_))) F = pm.Normal('F', mu=0, sd=1, shape=(self.latent_dimension, self.observ_dimension), testval=np.random.randn(self.latent_dimension, self.observ_dimension)) for i in range(self.num_time_steps): self.observed_states.append( pm.Normal('X_{}'.format(i), mu=th.dot(self.hidden_states[i], F), sd=1, shape=(self.sample_minibatch, self.observ_dimension), observed=data[i])) approx = pm.fit(n=45000, method=pm.ADVI()) trace = approx.sample(500) import pickle with open('pick.dump2.pkl', 'wb') as buff: pickle.dump({ 'model': model, 'approx': approx, 'trace': trace }, buff)
def fit(self, data, adviIterations): self.data = data self.yScaler.fit(data) laggedData = lagData(data, self.numLags) # changing basis # set basis self.radialBasis = RadialBasis(self.numBasis) self.radialBasis.fit(laggedData) # changeBasis changedBasis = self.radialBasis.transform(laggedData) # scaling for numeric funzies self.scaler.fit(changedBasis) changedBasis = self.scaler.transform(changedBasis) # set model predictors as shared so we can do the forecasting self.sharedPredictors = shared(changedBasis) # pymc model with self.model: theta = pm.Normal('theta', 0, 1, shape = (self.numBasis, data.shape[1])) fX = pm.math.matrix_dot(self.sharedPredictors, theta) pm.Deterministic('fX', fX) yVec = pm.MvNormal('yVec', fX, tau = np.eye(data.shape[1]), observed=self.yScaler.transform( data[self.numLags:, :])) advi = pm.ADVI() self.approx = pm.fit(n = adviIterations, method = advi) print('variational inference concluded') print( ''' The sin which is unpardonable is knowingly and willfully to reject truth, to fear knowledge lest that knowledge pander not to thy prejudices. ''') self.fitted = True
def setup_model(self, data): with pm.Model() as model: init_states = np.random.randn(self.sample_minibatch, self.latent_dimension) self.hidden_states.append( pm.Normal('H0', mu=0, sd=1, shape=(self.sample_minibatch, self.latent_dimension), testval=init_states)) for i in range(1, self.num_time_steps): self.hidden_states.append( pm.Normal('H{}'.format(i + 1), mu=self.hidden_states[-1], sd=0.1, shape=(self.sample_minibatch, self.latent_dimension), testval=init_states)) F = pm.Normal('F', mu=0, sd=1, shape=(self.latent_dimension, self.observ_dimension), testval=np.random.randn(self.latent_dimension, self.observ_dimension)) for i in range(self.num_time_steps): self.observed_states.append( pm.Normal('X_{}'.format(i), mu=th.dot(self.hidden_states[i], F), sd=1, shape=(self.sample_minibatch, self.observ_dimension), observed=data[i])) iters = 30000 inference = pm.ADVI() approx = pm.fit(n=iters, method=inference) trace = approx.sample(500) plt.semilogy(list(range(iters)), inference.hist) plt.ylabel('ELBO') plt.xlabel('iteration') plt.savefig('linear_elbo.pdf')
def setup_model(self, data): # p = 0.8 with pm.Model() as model: init_states = np.random.randn(self.sample_minibatch, self.latent_dimension) self.hidden_states.append( pm.Normal('H0', mu=0, sd=1,shape=(self.sample_minibatch, self.latent_dimension), testval=init_states) ) for i in range(1, self.num_time_steps): self.hidden_states.append( pm.Normal('H{}'.format(i+1), mu=self.hidden_states[-1], sd=0.1,shape=(self.sample_minibatch, self.latent_dimension), testval=init_states) ) l1_size = int((self.observ_dimension - self.latent_dimension)/3) + self.latent_dimension l2_size = int((self.observ_dimension - self.latent_dimension)/3) * 2 + self.latent_dimension # P0 = pm.Bernoulli('P0', p, shape=(self.latent_dimension, l1_size), testval=np.random.binomial(1, p, size=(self.latent_dimension, l1_size))) W0 = pm.Normal('W0',mu=0, sd=1, shape=(self.latent_dimension, l1_size), testval=np.random.randn(self.latent_dimension, l1_size)) # P1 = pm.Bernoulli('P1', p, shape=(l1_size, l2_size), testval=np.random.binomial(1, p, size=(l1_size, l2_size))) W1 = pm.Normal('W1',mu=0, sd=1, shape=(l1_size, l2_size), testval=np.random.randn(l1_size, l2_size)) W2 = pm.Normal('W2',mu=0, sd=1, shape=(l2_size, self.observ_dimension), testval=np.random.randn(l2_size, self.observ_dimension)) for i in range(self.num_time_steps): pm.Normal('X_{}'.format(i), mu=th.dot(th.tensor.tanh(th.dot(th.tensor.tanh(th.dot(self.hidden_states[i], W0)), W1)), W2), sd=1, shape=(self.sample_minibatch, self.observ_dimension), observed=data[i]) inference = pm.ADVI() iters = 150000 approx = pm.fit(n=iters, method=inference) trace = approx.sample(500) plt.semilogy(list(range(iters)), inference.hist) #plt.yscale('log')i plt.legend() plt.ylabel('ELBO') plt.xlabel('iteration') plt.savefig('nn_elbo.pdf') import pickle with open('nn5d_2layer_all.pkl', 'wb') as buff: pickle.dump(trace, buff)
def sample_fc_nn(X, y, output, hidden_dims=[NUM_HIDDEN], num_samples=MAX_NUM_SAMPLES, vi=True, num_scale1_iters=NUM_SCALE1_ITERS, num_scale0_iters=NUM_SCALE0_ITERS): """ Sample from fully connected Bayesian neural network """ nn = build_shallow_nn(X, y, output, hidden_dims) with nn: if vi: # variational inference (fast) # common schedule for `scale` is 1 at the beginning and 0 at the end scale = theano.shared(pm.floatX(1)) vi = pm.ADVI(cost_part_grad_scale=scale) pm.fit(n=num_scale1_iters, method=vi) scale.set_value(0) approx = pm.fit(n=num_scale0_iters) trace = approx.sample(draws=num_samples) else: # NUTS (very slow) trace = pm.sample(num_samples) return format_trace(trace)
def fit_advi_iterative(self, n=3, method='advi', n_type='restart', n_iter=None, learning_rate=None, reducing_lr=False, progressbar=True, scale_cost_to_minibatch=True): """Find posterior using pm.ADVI() method directly (allows continuing training through `refine` method. (maximising likelihood of the data and minimising KL-divergence of posterior to prior - ELBO loss) Parameters ---------- n : number of independent initialisations (Default value = 3) method : advi', to allow for potential use of SVGD, MCMC, custom (currently only ADVI implemented). (Default value = 'advi') n_type : type of repeated initialisation: * **'restart'** to pick different initial value, * **'cv'** for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented * **'bootstrap'** for fitting the model to multiple downsampled datasets. Run `mod.bootstrap_data()` to generate variants of data (Default value = 'restart') n_iter : number of iterations, supersedes self.n_iter specified when creating model instance. (Default value = None) learning_rate : learning rate, supersedes self.learning_rate specified when creating model instance. (Default value = None) reducing_lr : boolean, use decaying learning rate? (Default value = False) progressbar : boolean, show progress bar? (Default value = True) scale_cost_to_minibatch : when using training in minibatches, scale cost function appropriately? See discussion https://discourse.pymc.io/t/effects-of-scale-cost-to-minibatch/1429 to understand the effects. (Default value = True) Returns ------- None self.mean_field dictionary with MeanField pymc3 objects, and self.advi dictionary with ADVI objects for each initialisation. """ self.n_type = n_type self.scale_cost_to_minibatch = scale_cost_to_minibatch if n_iter is None: n_iter = self.n_iter if learning_rate is None: learning_rate = self.learning_rate ### Initialise optimiser ### if reducing_lr: # initialise the function for adaptive learning rate s = theano.shared(np.array(learning_rate).astype(self.data_type)) def reduce_rate(a, h, i): s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type)) optimiser = pm.adam(learning_rate=s) callbacks = [reduce_rate, CheckParametersConvergence()] else: optimiser = pm.adam(learning_rate=learning_rate) callbacks = [CheckParametersConvergence()] if np.isin(n_type, ['bootstrap']): if self.X_data_sample is None: self.bootstrap_data(n=n) elif np.isin(n_type, ['cv']): self.generate_cv_data() # cv data added to self.X_data_sample init_names = ['init_' + str(i + 1) for i in np.arange(n)] for i, name in enumerate(init_names): with self.model: self.advi[name] = pm.ADVI() # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(n_type, ['cv', 'bootstrap']): # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) # or using all data else: more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)} # if any other data inputs should be added if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ self.extra_data[k].astype(self.data_type) else: # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) else: more_replacements = {} self.advi[name].scale_cost_to_minibatch = scale_cost_to_minibatch # train the model self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks, obj_optimizer=optimiser, total_grad_norm_constraint=self.total_grad_norm_constraint, progressbar=progressbar, more_replacements=more_replacements) # plot training history if self.verbose: print(plt.plot(np.log10(self.mean_field[name].hist[15000:])));