def nf_samples_to_trace(self): """Convert NF samples to a trace.""" lenght_pos = len(self.nf_samples) varnames = [v.name for v in self.variables] with self.model: self.nf_strace = NDArray(name=self.model.name) self.nf_strace.setup(lenght_pos, self.chain) for i in range(lenght_pos): value = [] size = 0 for var in varnames: shape, new_size = self.var_info[var] value.append(self.nf_samples[i][size:size + new_size].reshape(shape)) size += new_size self.nf_strace.record( point={k: v for k, v in zip(varnames, value)}) self.nf_trace = point_list_to_multitrace(self.nf_strace, model=self.model)
def posterior_to_trace(self): """Save results into a PyMC3 trace.""" lenght_pos = len(self.posterior) varnames = [v.name for v in self.variables] with self.model: strace = NDArray(name=self.model.name) strace.setup(lenght_pos, self.chain) for i in range(lenght_pos): value = [] size = 0 for var in varnames: shape, new_size = self.var_info[var] value.append(self.posterior[i][size : size + new_size].reshape(shape)) size += new_size strace.record(point={k: v for k, v in zip(varnames, value)}) return strace
def test_choose_chains(n_points, tune, expected_length, expected_n_traces): with pm.Model() as model: a = pm.Normal("a", mu=0, sigma=1) trace_0 = NDArray(model) trace_1 = NDArray(model) trace_2 = NDArray(model) trace_0.setup(n_points[0], 1) trace_1.setup(n_points[1], 1) trace_2.setup(n_points[2], 1) for _ in range(n_points[0]): trace_0.record({"a": 0}) for _ in range(n_points[1]): trace_1.record({"a": 0}) for _ in range(n_points[2]): trace_2.record({"a": 0}) traces, length = pm.sampling._choose_chains( [trace_0, trace_1, trace_2], tune=tune) assert length == expected_length assert expected_n_traces == len(traces)
class NFO: """Sequencial NF Bayesian Optimization.""" def __init__(self, n0=10, init_samples=None, k_trunc=np.inf, eps_z=.01, nf_iter=2, N=10, t_ess=0.5, beta_max=1, model=None, random_seed=-1, chain=0, frac_validate=0.0, iteration=None, alpha_w=(0, 0), alpha_uw=(0, 0), verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor_min=1.0, bw_factor_max=1.0, bw_factor_num=1, rel_bw=1, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, trainable_qw=False, sgd_steps=0, knots_trainable=5, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], bounds=None): self.N = N self.n0 = n0 self.model = model self.chain = chain # Init method params. self.init_samples = init_samples self.random_seed = random_seed # Set the torch seed. if self.random_seed != 1: np.random.seed(self.random_seed) torch.manual_seed(self.random_seed) # Separating out so I can keep track. These are SINF params. assert 0.0 <= frac_validate <= 1.0 self.frac_validate = frac_validate self.iteration = iteration self.alpha_uw = alpha_uw self.alpha_w = alpha_w self.k_trunc = k_trunc self.verbose = verbose self.n_component = n_component self.interp_nbin = interp_nbin self.KDE = KDE self.bw_factors = np.linspace(bw_factor_min, bw_factor_max, bw_factor_num) self.edge_bins = edge_bins self.ndata_wT = ndata_wT self.MSWD_max_iter = MSWD_max_iter self.NBfirstlayer = NBfirstlayer self.logit = logit self.Whiten = Whiten self.batchsize = batchsize self.nocuda = nocuda self.patch = patch self.shape = shape #convert array of bounds passed in from [][x1min,x2min,...],[x1max,x2max...]] to what SINF wants, [[x1min,x1max],[x2min,x2max],...] if (bounds is not None): bounds_sinf = list([list(b) for b in bounds.T]) else: bounds_sinf = [ [None, None] for i in range(init_samples.shape[1]) ] #get the dimensionality from initial samples assuming (N,d) shape self.bounds = bounds_sinf #trainable sinf self.trainable_qw = trainable_qw self.sgd_steps = sgd_steps self.knots_trainable = knots_trainable #nfo self.t_ess = t_ess self.beta_max = beta_max self.beta = 0 #initial value of beta before iterating, match smc self.rel_bw = rel_bw self.model = modelcontext(model) self.variables = inputvars(self.model.vars) def initialize_var_info(self): """Extract variable info for the model instance.""" var_info = OrderedDict() init = self.model.test_point for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) self.var_info = var_info def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] if self.init_samples is None: init_rnd = sample_prior_predictive( self.N, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) for i in range(self.N): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(self.model.dict_to_array(point)) self.prior_samples = np.array(floatX(population)) elif self.init_samples is not None: self.prior_samples = np.copy(self.init_samples) self.samples = np.copy(self.prior_samples) self.nf_samples = np.copy(self.samples) self.get_posterior_logp() self.get_prior_logp() self.log_weight = self.posterior_logp - self.prior_logp self.log_evidence = logsumexp(self.log_weight) - np.log( len(self.log_weight)) self.evidence = np.exp(self.log_evidence) self.log_weight = self.log_weight - self.log_evidence self.regularize_weights() #same as in fitnf but prior~q self.log_weight_pq_num = self.posterior_logp + 2 * self.prior_logp self.log_weight_pq_den = 3 * self.prior_logp self.log_evidence_pq = logsumexp(self.log_weight_pq_num) - logsumexp( self.log_weight_pq_den) self.evidence_pq = np.exp(self.log_evidence_pq) self.log_weight_pq = self.posterior_logp - self.prior_logp - self.log_evidence_pq self.pq_bw_loss = np.log( (np.exp(self.posterior_logp) - np.exp(self.log_evidence_pq + self.prior_logp))**2) #not actually used yet I think self.regularize_weights_pq() #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes self.log_mean_loss = np.log( np.mean((np.exp(self.posterior_logp) - np.exp(self.prior_logp + self.log_evidence_pq))**2)) self.init_weights_cleanup(lambda x: self.prior_logp(x), lambda x: self.prior_dlogp(x)) self.q_ess = self.calculate_ess(self.log_weight) self.total_ess = self.calculate_ess(self.sinf_logw) self.all_logq = np.array([]) self.nf_models = [] self.nf_models_uw = [] def setup_logp(self): """Set up the prior and likelihood logp functions, and derivatives.""" shared = make_shared_replacements(self.variables, self.model) self.prior_logp_func = logp_forw([self.model.varlogpt], self.variables, shared) self.prior_dlogp_func = logp_forw( [gradient(self.model.varlogpt, self.variables)], self.variables, shared) self.likelihood_logp_func = logp_forw([self.model.datalogpt], self.variables, shared) self.posterior_logp_func = logp_forw([self.model.logpt], self.variables, shared) self.posterior_dlogp_func = logp_forw( [gradient(self.model.logpt, self.variables)], self.variables, shared) self.posterior_hessian_func = logp_forw( [hessian(self.model.logpt, self.variables)], self.variables, shared) self.posterior_logp_nojac = logp_forw([self.model.logp_nojact], self.variables, shared) self.posterior_dlogp_nojac = logp_forw( [gradient(self.model.logp_nojact, self.variables)], self.variables, shared) self.posterior_hessian_nojac = logp_forw( [hessian(self.model.logp_nojact, self.variables)], self.variables, shared) def get_prior_logp(self): """Get the prior log probabilities.""" priors = [self.prior_logp_func(sample) for sample in self.nf_samples] self.prior_logp = np.array(priors).squeeze() def get_likelihood_logp(self): """Get the likelihood log probabilities.""" likelihoods = [ self.likelihood_logp_func(sample) for sample in self.nf_samples ] self.likelihood_logp = np.array(likelihoods).squeeze() def get_posterior_logp(self): """Get the posterior log probabilities.""" posteriors = [ self.posterior_logp_func(sample) for sample in self.nf_samples ] self.posterior_logp = np.array(posteriors).squeeze() def sinf_logq(self, param_vals): if param_vals.size == 1: param_vals = np.array([param_vals]) sinf_logq = self.nf_model.evaluate_density( torch.from_numpy(param_vals.astype(np.float32))).numpy().astype( np.float64) return sinf_logq.item() def regularize_weights(self): """Apply clipping to importance weights.""" inf_weights = np.isinf(np.exp(self.log_weight)) self.log_weight = np.clip( self.log_weight, a_min=None, a_max=logsumexp(self.log_weight[~inf_weights]) - np.log(len(self.log_weight[~inf_weights])) + self.k_trunc * np.log(len(self.log_weight))) self.weights = np.exp(self.log_weight) def regularize_weights_pq(self): """Apply clipping to pq importance weights.""" inf_weights = np.isinf(np.exp(self.log_weight_pq)) self.log_weight_pq = np.clip( self.log_weight_pq, a_min=None, a_max=logsumexp(self.log_weight_pq[~inf_weights]) - np.log(len(self.log_weight_pq[~inf_weights])) + self.k_trunc * np.log(len(self.log_weight_pq))) self.weights_pq = np.exp(self.log_weight_pq) def calculate_ess(self, logw): """Calculate ESS given a set of sample weights""" logw = logw - logsumexp(logw) ess = np.exp(-logsumexp(2 * logw) - np.log(logw.shape[0])) return ess def calculate_weight_variance(self): """Calculates the variance of importance weights for a given q.""" return np.var(self.weight) def init_weights_cleanup(self, logq_func=None, dlogq_func=None): """Finish initializing the first importance weights.""" self.sinf_logw = np.copy(self.log_weight) self.importance_weights = np.copy(self.weights) self.importance_weights_pq = np.copy(self.weights_pq) def fit_nf(self, weighted=True): """Fit the NF model for a given iteration after initialization.""" bw_var_weights = [] bw_pq_loss = [] bw_nf_models = [] if (self.trainable_qw): interp_nbin = self.knots_trainable else: interp_nbin = self.interp_nbin self.train_weights = self.importance_weights_pq print(self.train_weights.shape, self.prior_logp.shape, self.posterior_logp.shape) #use tempered likelihood with current value of beta in the weights self.train_weights *= np.exp((1. - self.beta * self.beta_max) * (self.prior_logp - self.posterior_logp)) #again, when beta = 1. this does nothing, ow swaps true posterior for current beta-tempered posterior #assign no (or uniform) weighting if we have no weights at this iteration by overwriting if (not weighted): print("No weights in the fit") self.train_weights = np.ones(self.train_weights.shape[0]) #multiply bw_factors by rel_bw self.bw_factors_use = self.bw_factors * self.rel_bw print("bw_uw = {0:.2f} bw_w".format(self.rel_bw)) #use uw alpha self.alpha = self.alpha_uw else: print("weighting fit") #IW3 self.train_weights *= np.exp( self.prior_logp - self.logq_uw ) #not bothering with Z since doesn't matter for fits... self.logIW3 = self.train_weights / np.sum( self.train_weights) #just normalized pbeta/quw self.bw_factors_use = self.bw_factors #use w alpha (standard alpha) self.alpha = self.alpha_w print("bounds", self.bounds) for bw_factor in self.bw_factors_use: if self.frac_validate > 0.0: num_val = int(self.frac_validate * self.samples.shape[0]) val_idx = np.random.choice(np.arange(self.samples.shape[0]), size=num_val, replace=False) fit_idx = np.delete(np.arange(self.samples.shape[0]), val_idx) self.train_ess = self.calculate_ess(self.sinf_logw[fit_idx, ...]) self.nf_model = GIS( torch.from_numpy(self.samples[fit_idx, ...].astype(np.float32)), torch.from_numpy(self.samples[val_idx, ...].astype(np.float32)), weight_train=torch.from_numpy( self.train_weights[fit_idx, ...].astype(np.float32)), weight_validate=torch.from_numpy( self.train_weights[val_idx, ...].astype(np.float32)), iteration=self.iteration, alpha=self.alpha, verbose=self.verbose, K=self.n_component, M=interp_nbin, KDE=self.KDE, b_factor=bw_factor, edge_bins=self.edge_bins, ndata_A=self.ndata_wT, MSWD_max_iter=self.MSWD_max_iter, NBfirstlayer=self.NBfirstlayer, Whiten=self.Whiten, batchsize=self.batchsize, nocuda=self.nocuda, bounds=self.bounds) elif self.frac_validate == 0.0: fit_idx = np.arange(self.samples.shape[0]) self.train_ess = self.calculate_ess(self.sinf_logw[fit_idx, ...]) self.nf_model = GIS(torch.from_numpy( self.samples.astype(np.float32)), weight_train=torch.from_numpy( self.train_weights.astype(np.float32)), iteration=self.iteration, alpha=self.alpha, verbose=self.verbose, K=self.n_component, M=interp_nbin, KDE=self.KDE, b_factor=bw_factor, edge_bins=self.edge_bins, ndata_A=self.ndata_wT, MSWD_max_iter=self.MSWD_max_iter, NBfirstlayer=self.NBfirstlayer, Whiten=self.Whiten, batchsize=self.batchsize, nocuda=self.nocuda, bounds=self.bounds) #compute logq because we didn't draw new samples (when it wouldn've been computed automatically) self.logq = self.nf_model.evaluate_density( torch.from_numpy(self.samples[fit_idx, ...].astype( np.float32))).numpy().astype(np.float64) self.train_logq = self.logq #first estimator of evidence using E_p[1/q] self.log_weight = self.posterior_logp - self.logq self.log_evidence = logsumexp(self.log_weight) - np.log( len(self.log_weight)) self.log_weight = self.log_weight - self.log_evidence self.regularize_weights() bw_var_weights.append(np.var(self.weights)) bw_nf_models.append(self.nf_model) #second estimator of evidence using E_q[pq]/E_q[q^2] to avoid SINF dropping low-p samples self.log_weight_pq_num = (self.posterior_logp + 2 * self.logq) self.log_weight_pq_den = 3 * self.logq self.log_evidence_pq = (logsumexp(self.log_weight_pq_num) - logsumexp(self.log_weight_pq_den) ) #length factor unnecessary here self.log_weight_pq = self.posterior_logp - self.logq - self.log_evidence_pq self.pq_bw_loss = np.log( (np.exp(self.posterior_logp) - np.exp(self.log_evidence_pq + self.logq))**2) self.regularize_weights_pq() bw_pq_loss.append( np.sum(self.pq_bw_loss) ) #alternative loss for choosing bw, check for underflow? min_var_idx = bw_var_weights.index(min(bw_var_weights)) min_pq_idx = bw_pq_loss.index(min(bw_pq_loss)) self.nf_model = bw_nf_models[min_pq_idx] self.min_var_weights = bw_var_weights[min_var_idx] self.min_var_bw = self.bw_factors_use[min_var_idx] self.min_pq_bw = self.bw_factors_use[min_pq_idx] self.train_logp = self.posterior_logp self.logq = self.nf_model.evaluate_density( torch.from_numpy(self.samples[fit_idx, ...].astype( np.float32))).numpy().astype(np.float64) self.train_logq = self.logq self.log_weight = self.posterior_logp - self.logq self.log_evidence = logsumexp(self.log_weight) - np.log( len(self.log_weight)) self.log_weight = self.log_weight - self.log_evidence #second estimator of evidence using E[pq]/E[q^2] to avoid SINF dropping low-p samples #For now we don't actually end up using these weights except to get the evidence, but can later self.log_weight_pq_num = (self.posterior_logp + 2 * self.logq) self.log_weight_pq_den = 3 * self.logq self.log_evidence_pq = (logsumexp(self.log_weight_pq_num) - logsumexp(self.log_weight_pq_den) ) #length factor unnecessary here #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes self.log_mean_loss = np.log( np.mean((np.exp(self.posterior_logp) - np.exp(self.logq + self.log_evidence_pq))**2)) self.regularize_weights() self.pq_bw_loss = np.log((np.exp(self.posterior_logp) - np.exp(self.log_evidence_pq + self.logq))**2) self.log_weight_pq = self.posterior_logp - self.logq - self.log_evidence_pq self.regularize_weights_pq() self.sinf_logw = self.log_weight self.importance_weights = np.exp(self.sinf_logw - logsumexp(self.sinf_logw)) self.importance_weights_pq = np.exp(self.log_weight_pq - logsumexp(self.log_weight_pq)) self.q_ess = self.calculate_ess(self.log_weight) self.total_ess = self.calculate_ess(self.sinf_logw) #some of this is redundant if (weighted): #This needs to come AFTER we already fit q_w, so moving it to the end... if (self.trainable_qw): model_pretrain = self.nf_model #trainable sinf model_train, Z = sopt.optimize_SINF( model=model_pretrain, x=torch.from_numpy(self.samples.astype(np.float32)), #train not on p but on p_beta #FIXME SHOULD REALLY HAVE ATTRIBUTES FOR P_BETA p=torch.as_tensor( np.exp(self.beta * self.beta_max * (self.posterior_logp - self.prior_logp) + self.prior_logp)), # x=init_prior, #this needs to be samples # p=torch.as_tensor(np.exp(np_two_gaussians(init_prior))).float(), #this needs to be logp # lossfunc=lossfunc, Nepoch=self.sgd_steps, lr=1e-3, #not touching these lrA=2e-5, optimize_A=True, batchsize=None, verbose=True) #what do I do with this Z?? #update the quantities with what we would usually update for q_w self.nf_model = model_train self.logq = self.nf_model.evaluate_density( torch.from_numpy(self.samples[fit_idx, ...].astype( np.float32))).numpy().astype(np.float64) self.logq_w = self.logq self.nf_model_w = self.nf_model self.trainable_Z = Z #not sure what to do with this, but saving it - it is not the same as the other pq loss... else: #old weighted fit, non-trainable self.logq_w = self.logq self.nf_model_w = self.nf_model #these don't change whether trained or not self.nf_models.append(self.nf_model) self.log_evidence_pq_w = self.log_evidence_pq self.log_weight_pq_w = self.log_weight_pq else: #unweighted fit self.logq_uw = self.logq self.nf_model_uw = self.nf_model self.nf_models_uw.append(self.nf_model) self.log_evidence_pq_uw = self.log_evidence_pq self.log_weight_pq_uw = self.log_weight_pq def nf_samples_to_trace(self): """Convert NF samples to a trace.""" lenght_pos = len(self.nf_samples) varnames = [v.name for v in self.variables] with self.model: self.nf_strace = NDArray(name=self.model.name) self.nf_strace.setup(lenght_pos, self.chain) for i in range(lenght_pos): value = [] size = 0 for var in varnames: shape, new_size = self.var_info[var] value.append(self.nf_samples[i][size:size + new_size].reshape(shape)) size += new_size self.nf_strace.record( point={k: v for k, v in zip(varnames, value)}) self.nf_trace = point_list_to_multitrace(self.nf_strace, model=self.model) def posterior_to_trace(self): """Save results into a PyMC3 trace.""" length_pos = len(self.posterior) varnames = [v.name for v in self.variables] print(f'posterior to trace varnames = {varnames}') with self.model: strace = NDArray(name=self.model.name) strace.setup(length_pos, self.chain) for i in range(length_pos): value = [] size = 0 for var in varnames: shape, new_size = self.var_info[var] value.append(self.posterior[i][size:size + new_size].reshape(shape)) size += new_size strace.record(point={k: v for k, v in zip(varnames, value)}) return strace def update_weights_beta(self): """Calculate the next inverse temperature (beta). Here we use IW3=p(beta)/q_uw instead of standard temp weights. """ low_beta = old_beta = self.beta up_beta = 2.0 #need to use saved posterior and prior instead of likelihood as I did in fit_nf (to avoid reevaluating, which we treat as expensive) # Target distribution is l^beta_max (vanilla case is beta_max=1) - this makes beta iteration work as before self.likelihood_logp = self.beta_max * (self.posterior_logp - self.prior_logp) rN = int(self.n0 * self.t_ess) #this does not change as N changes while up_beta - low_beta > 1e-6: new_beta = (low_beta + up_beta) / 2.0 #use IW3 instead of temp weights log_temp_weights_un = new_beta * self.likelihood_logp + self.prior_logp - self.logq_uw log_temp_weights = log_temp_weights_un - logsumexp( log_temp_weights_un) ESS = int(np.exp(-logsumexp(log_temp_weights * 2))) if ESS == rN: break elif ESS < rN: up_beta = new_beta else: low_beta = new_beta if new_beta >= 1: new_beta = 1 self.beta = new_beta self.logIW3 -= logsumexp(self.logIW3)
def test_sequential_backend(self): with self.model: backend = NDArray() pm.sample(10, cores=1, chains=2, trace=backend)