Ejemplo n.º 1
0
 def nf_samples_to_trace(self):
     """Convert NF samples to a trace."""
     lenght_pos = len(self.nf_samples)
     varnames = [v.name for v in self.variables]
     with self.model:
         self.nf_strace = NDArray(name=self.model.name)
         self.nf_strace.setup(lenght_pos, self.chain)
     for i in range(lenght_pos):
         value = []
         size = 0
         for var in varnames:
             shape, new_size = self.var_info[var]
             value.append(self.nf_samples[i][size:size +
                                             new_size].reshape(shape))
             size += new_size
         self.nf_strace.record(
             point={k: v
                    for k, v in zip(varnames, value)})
     self.nf_trace = point_list_to_multitrace(self.nf_strace,
                                              model=self.model)
Ejemplo n.º 2
0
 def posterior_to_trace(self):
     """Save results into a PyMC3 trace."""
     lenght_pos = len(self.posterior)
     varnames = [v.name for v in self.variables]
     
     with self.model:
         strace = NDArray(name=self.model.name)
         strace.setup(lenght_pos, self.chain)
     for i in range(lenght_pos):
         value = []
         size = 0
         for var in varnames:
             shape, new_size = self.var_info[var]
             value.append(self.posterior[i][size : size + new_size].reshape(shape))
             size += new_size
         strace.record(point={k: v for k, v in zip(varnames, value)})
     return strace
Ejemplo n.º 3
0
def test_choose_chains(n_points, tune, expected_length, expected_n_traces):
    with pm.Model() as model:
        a = pm.Normal("a", mu=0, sigma=1)
        trace_0 = NDArray(model)
        trace_1 = NDArray(model)
        trace_2 = NDArray(model)
        trace_0.setup(n_points[0], 1)
        trace_1.setup(n_points[1], 1)
        trace_2.setup(n_points[2], 1)
        for _ in range(n_points[0]):
            trace_0.record({"a": 0})
        for _ in range(n_points[1]):
            trace_1.record({"a": 0})
        for _ in range(n_points[2]):
            trace_2.record({"a": 0})
        traces, length = pm.sampling._choose_chains(
            [trace_0, trace_1, trace_2], tune=tune)
    assert length == expected_length
    assert expected_n_traces == len(traces)
Ejemplo n.º 4
0
class NFO:
    """Sequencial NF Bayesian Optimization."""
    def __init__(self,
                 n0=10,
                 init_samples=None,
                 k_trunc=np.inf,
                 eps_z=.01,
                 nf_iter=2,
                 N=10,
                 t_ess=0.5,
                 beta_max=1,
                 model=None,
                 random_seed=-1,
                 chain=0,
                 frac_validate=0.0,
                 iteration=None,
                 alpha_w=(0, 0),
                 alpha_uw=(0, 0),
                 verbose=False,
                 n_component=None,
                 interp_nbin=None,
                 KDE=True,
                 bw_factor_min=1.0,
                 bw_factor_max=1.0,
                 bw_factor_num=1,
                 rel_bw=1,
                 edge_bins=None,
                 ndata_wT=None,
                 MSWD_max_iter=None,
                 NBfirstlayer=True,
                 logit=False,
                 Whiten=False,
                 trainable_qw=False,
                 sgd_steps=0,
                 knots_trainable=5,
                 batchsize=None,
                 nocuda=False,
                 patch=False,
                 shape=[28, 28, 1],
                 bounds=None):
        self.N = N
        self.n0 = n0

        self.model = model
        self.chain = chain

        # Init method params.
        self.init_samples = init_samples

        self.random_seed = random_seed

        # Set the torch seed.
        if self.random_seed != 1:
            np.random.seed(self.random_seed)
            torch.manual_seed(self.random_seed)

        # Separating out so I can keep track. These are SINF params.
        assert 0.0 <= frac_validate <= 1.0
        self.frac_validate = frac_validate
        self.iteration = iteration
        self.alpha_uw = alpha_uw
        self.alpha_w = alpha_w
        self.k_trunc = k_trunc
        self.verbose = verbose
        self.n_component = n_component
        self.interp_nbin = interp_nbin
        self.KDE = KDE
        self.bw_factors = np.linspace(bw_factor_min, bw_factor_max,
                                      bw_factor_num)
        self.edge_bins = edge_bins
        self.ndata_wT = ndata_wT
        self.MSWD_max_iter = MSWD_max_iter
        self.NBfirstlayer = NBfirstlayer
        self.logit = logit
        self.Whiten = Whiten
        self.batchsize = batchsize
        self.nocuda = nocuda
        self.patch = patch
        self.shape = shape

        #convert array of bounds passed in from [][x1min,x2min,...],[x1max,x2max...]] to what SINF wants, [[x1min,x1max],[x2min,x2max],...]
        if (bounds is not None):
            bounds_sinf = list([list(b) for b in bounds.T])
        else:
            bounds_sinf = [
                [None, None] for i in range(init_samples.shape[1])
            ]  #get the dimensionality from initial samples assuming (N,d) shape
        self.bounds = bounds_sinf

        #trainable sinf
        self.trainable_qw = trainable_qw
        self.sgd_steps = sgd_steps
        self.knots_trainable = knots_trainable

        #nfo
        self.t_ess = t_ess
        self.beta_max = beta_max
        self.beta = 0  #initial value of beta before iterating, match smc
        self.rel_bw = rel_bw

        self.model = modelcontext(model)
        self.variables = inputvars(self.model.vars)

    def initialize_var_info(self):
        """Extract variable info for the model instance."""
        var_info = OrderedDict()
        init = self.model.test_point
        for v in self.variables:
            var_info[v.name] = (init[v.name].shape, init[v.name].size)
        self.var_info = var_info

    def initialize_population(self):
        """Create an initial population from the prior distribution."""
        population = []

        if self.init_samples is None:
            init_rnd = sample_prior_predictive(
                self.N,
                var_names=[v.name for v in self.model.unobserved_RVs],
                model=self.model,
            )

            for i in range(self.N):

                point = Point(
                    {v.name: init_rnd[v.name][i]
                     for v in self.variables},
                    model=self.model)
                population.append(self.model.dict_to_array(point))
            self.prior_samples = np.array(floatX(population))

        elif self.init_samples is not None:
            self.prior_samples = np.copy(self.init_samples)

        self.samples = np.copy(self.prior_samples)
        self.nf_samples = np.copy(self.samples)
        self.get_posterior_logp()
        self.get_prior_logp()
        self.log_weight = self.posterior_logp - self.prior_logp
        self.log_evidence = logsumexp(self.log_weight) - np.log(
            len(self.log_weight))
        self.evidence = np.exp(self.log_evidence)
        self.log_weight = self.log_weight - self.log_evidence
        self.regularize_weights()

        #same as in fitnf but prior~q
        self.log_weight_pq_num = self.posterior_logp + 2 * self.prior_logp
        self.log_weight_pq_den = 3 * self.prior_logp
        self.log_evidence_pq = logsumexp(self.log_weight_pq_num) - logsumexp(
            self.log_weight_pq_den)
        self.evidence_pq = np.exp(self.log_evidence_pq)
        self.log_weight_pq = self.posterior_logp - self.prior_logp - self.log_evidence_pq
        self.pq_bw_loss = np.log(
            (np.exp(self.posterior_logp) -
             np.exp(self.log_evidence_pq +
                    self.prior_logp))**2)  #not actually used yet I think
        self.regularize_weights_pq()

        #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes
        self.log_mean_loss = np.log(
            np.mean((np.exp(self.posterior_logp) -
                     np.exp(self.prior_logp + self.log_evidence_pq))**2))

        self.init_weights_cleanup(lambda x: self.prior_logp(x),
                                  lambda x: self.prior_dlogp(x))
        self.q_ess = self.calculate_ess(self.log_weight)
        self.total_ess = self.calculate_ess(self.sinf_logw)

        self.all_logq = np.array([])
        self.nf_models = []
        self.nf_models_uw = []

    def setup_logp(self):
        """Set up the prior and likelihood logp functions, and derivatives."""
        shared = make_shared_replacements(self.variables, self.model)

        self.prior_logp_func = logp_forw([self.model.varlogpt], self.variables,
                                         shared)
        self.prior_dlogp_func = logp_forw(
            [gradient(self.model.varlogpt, self.variables)], self.variables,
            shared)
        self.likelihood_logp_func = logp_forw([self.model.datalogpt],
                                              self.variables, shared)
        self.posterior_logp_func = logp_forw([self.model.logpt],
                                             self.variables, shared)
        self.posterior_dlogp_func = logp_forw(
            [gradient(self.model.logpt, self.variables)], self.variables,
            shared)
        self.posterior_hessian_func = logp_forw(
            [hessian(self.model.logpt, self.variables)], self.variables,
            shared)
        self.posterior_logp_nojac = logp_forw([self.model.logp_nojact],
                                              self.variables, shared)
        self.posterior_dlogp_nojac = logp_forw(
            [gradient(self.model.logp_nojact, self.variables)], self.variables,
            shared)
        self.posterior_hessian_nojac = logp_forw(
            [hessian(self.model.logp_nojact, self.variables)], self.variables,
            shared)

    def get_prior_logp(self):
        """Get the prior log probabilities."""
        priors = [self.prior_logp_func(sample) for sample in self.nf_samples]
        self.prior_logp = np.array(priors).squeeze()

    def get_likelihood_logp(self):
        """Get the likelihood log probabilities."""
        likelihoods = [
            self.likelihood_logp_func(sample) for sample in self.nf_samples
        ]
        self.likelihood_logp = np.array(likelihoods).squeeze()

    def get_posterior_logp(self):
        """Get the posterior log probabilities."""
        posteriors = [
            self.posterior_logp_func(sample) for sample in self.nf_samples
        ]
        self.posterior_logp = np.array(posteriors).squeeze()

    def sinf_logq(self, param_vals):
        if param_vals.size == 1:
            param_vals = np.array([param_vals])
        sinf_logq = self.nf_model.evaluate_density(
            torch.from_numpy(param_vals.astype(np.float32))).numpy().astype(
                np.float64)
        return sinf_logq.item()

    def regularize_weights(self):
        """Apply clipping to importance weights."""
        inf_weights = np.isinf(np.exp(self.log_weight))
        self.log_weight = np.clip(
            self.log_weight,
            a_min=None,
            a_max=logsumexp(self.log_weight[~inf_weights]) -
            np.log(len(self.log_weight[~inf_weights])) +
            self.k_trunc * np.log(len(self.log_weight)))
        self.weights = np.exp(self.log_weight)

    def regularize_weights_pq(self):
        """Apply clipping to pq importance weights."""
        inf_weights = np.isinf(np.exp(self.log_weight_pq))
        self.log_weight_pq = np.clip(
            self.log_weight_pq,
            a_min=None,
            a_max=logsumexp(self.log_weight_pq[~inf_weights]) -
            np.log(len(self.log_weight_pq[~inf_weights])) +
            self.k_trunc * np.log(len(self.log_weight_pq)))
        self.weights_pq = np.exp(self.log_weight_pq)

    def calculate_ess(self, logw):
        """Calculate ESS given a set of sample weights"""
        logw = logw - logsumexp(logw)
        ess = np.exp(-logsumexp(2 * logw) - np.log(logw.shape[0]))
        return ess

    def calculate_weight_variance(self):
        """Calculates the variance of importance weights for a given q."""
        return np.var(self.weight)

    def init_weights_cleanup(self, logq_func=None, dlogq_func=None):
        """Finish initializing the first importance weights."""
        self.sinf_logw = np.copy(self.log_weight)
        self.importance_weights = np.copy(self.weights)
        self.importance_weights_pq = np.copy(self.weights_pq)

    def fit_nf(self, weighted=True):
        """Fit the NF model for a given iteration after initialization."""
        bw_var_weights = []
        bw_pq_loss = []
        bw_nf_models = []

        if (self.trainable_qw):
            interp_nbin = self.knots_trainable
        else:
            interp_nbin = self.interp_nbin

        self.train_weights = self.importance_weights_pq
        print(self.train_weights.shape, self.prior_logp.shape,
              self.posterior_logp.shape)
        #use tempered likelihood with current value of beta in the weights
        self.train_weights *= np.exp((1. - self.beta * self.beta_max) *
                                     (self.prior_logp - self.posterior_logp))
        #again, when beta = 1. this does nothing, ow swaps true posterior for current beta-tempered posterior

        #assign no (or uniform) weighting if we have no weights at this iteration by overwriting
        if (not weighted):
            print("No weights in the fit")
            self.train_weights = np.ones(self.train_weights.shape[0])

            #multiply bw_factors by rel_bw
            self.bw_factors_use = self.bw_factors * self.rel_bw
            print("bw_uw = {0:.2f} bw_w".format(self.rel_bw))

            #use uw alpha
            self.alpha = self.alpha_uw

        else:
            print("weighting fit")
            #IW3
            self.train_weights *= np.exp(
                self.prior_logp - self.logq_uw
            )  #not bothering with Z since doesn't matter for fits...
            self.logIW3 = self.train_weights / np.sum(
                self.train_weights)  #just normalized pbeta/quw

            self.bw_factors_use = self.bw_factors

            #use w alpha (standard alpha)
            self.alpha = self.alpha_w

        print("bounds", self.bounds)
        for bw_factor in self.bw_factors_use:
            if self.frac_validate > 0.0:
                num_val = int(self.frac_validate * self.samples.shape[0])
                val_idx = np.random.choice(np.arange(self.samples.shape[0]),
                                           size=num_val,
                                           replace=False)
                fit_idx = np.delete(np.arange(self.samples.shape[0]), val_idx)
                self.train_ess = self.calculate_ess(self.sinf_logw[fit_idx,
                                                                   ...])
                self.nf_model = GIS(
                    torch.from_numpy(self.samples[fit_idx,
                                                  ...].astype(np.float32)),
                    torch.from_numpy(self.samples[val_idx,
                                                  ...].astype(np.float32)),
                    weight_train=torch.from_numpy(
                        self.train_weights[fit_idx, ...].astype(np.float32)),
                    weight_validate=torch.from_numpy(
                        self.train_weights[val_idx, ...].astype(np.float32)),
                    iteration=self.iteration,
                    alpha=self.alpha,
                    verbose=self.verbose,
                    K=self.n_component,
                    M=interp_nbin,
                    KDE=self.KDE,
                    b_factor=bw_factor,
                    edge_bins=self.edge_bins,
                    ndata_A=self.ndata_wT,
                    MSWD_max_iter=self.MSWD_max_iter,
                    NBfirstlayer=self.NBfirstlayer,
                    Whiten=self.Whiten,
                    batchsize=self.batchsize,
                    nocuda=self.nocuda,
                    bounds=self.bounds)
            elif self.frac_validate == 0.0:
                fit_idx = np.arange(self.samples.shape[0])
                self.train_ess = self.calculate_ess(self.sinf_logw[fit_idx,
                                                                   ...])
                self.nf_model = GIS(torch.from_numpy(
                    self.samples.astype(np.float32)),
                                    weight_train=torch.from_numpy(
                                        self.train_weights.astype(np.float32)),
                                    iteration=self.iteration,
                                    alpha=self.alpha,
                                    verbose=self.verbose,
                                    K=self.n_component,
                                    M=interp_nbin,
                                    KDE=self.KDE,
                                    b_factor=bw_factor,
                                    edge_bins=self.edge_bins,
                                    ndata_A=self.ndata_wT,
                                    MSWD_max_iter=self.MSWD_max_iter,
                                    NBfirstlayer=self.NBfirstlayer,
                                    Whiten=self.Whiten,
                                    batchsize=self.batchsize,
                                    nocuda=self.nocuda,
                                    bounds=self.bounds)

            #compute logq because we didn't draw new samples (when it wouldn've been computed automatically)
            self.logq = self.nf_model.evaluate_density(
                torch.from_numpy(self.samples[fit_idx, ...].astype(
                    np.float32))).numpy().astype(np.float64)
            self.train_logq = self.logq

            #first estimator of evidence using E_p[1/q]
            self.log_weight = self.posterior_logp - self.logq
            self.log_evidence = logsumexp(self.log_weight) - np.log(
                len(self.log_weight))
            self.log_weight = self.log_weight - self.log_evidence
            self.regularize_weights()
            bw_var_weights.append(np.var(self.weights))
            bw_nf_models.append(self.nf_model)

            #second estimator of evidence using E_q[pq]/E_q[q^2] to avoid SINF dropping low-p samples
            self.log_weight_pq_num = (self.posterior_logp + 2 * self.logq)
            self.log_weight_pq_den = 3 * self.logq
            self.log_evidence_pq = (logsumexp(self.log_weight_pq_num) -
                                    logsumexp(self.log_weight_pq_den)
                                    )  #length factor unnecessary here
            self.log_weight_pq = self.posterior_logp - self.logq - self.log_evidence_pq
            self.pq_bw_loss = np.log(
                (np.exp(self.posterior_logp) -
                 np.exp(self.log_evidence_pq + self.logq))**2)
            self.regularize_weights_pq()
            bw_pq_loss.append(
                np.sum(self.pq_bw_loss)
            )  #alternative loss for choosing bw, check for underflow?

            min_var_idx = bw_var_weights.index(min(bw_var_weights))
            min_pq_idx = bw_pq_loss.index(min(bw_pq_loss))

        self.nf_model = bw_nf_models[min_pq_idx]

        self.min_var_weights = bw_var_weights[min_var_idx]
        self.min_var_bw = self.bw_factors_use[min_var_idx]
        self.min_pq_bw = self.bw_factors_use[min_pq_idx]

        self.train_logp = self.posterior_logp
        self.logq = self.nf_model.evaluate_density(
            torch.from_numpy(self.samples[fit_idx, ...].astype(
                np.float32))).numpy().astype(np.float64)
        self.train_logq = self.logq

        self.log_weight = self.posterior_logp - self.logq
        self.log_evidence = logsumexp(self.log_weight) - np.log(
            len(self.log_weight))
        self.log_weight = self.log_weight - self.log_evidence

        #second estimator of evidence using E[pq]/E[q^2] to avoid SINF dropping low-p samples
        #For now we don't actually end up using these weights except to get the evidence, but can later
        self.log_weight_pq_num = (self.posterior_logp + 2 * self.logq)
        self.log_weight_pq_den = 3 * self.logq
        self.log_evidence_pq = (logsumexp(self.log_weight_pq_num) -
                                logsumexp(self.log_weight_pq_den)
                                )  #length factor unnecessary here

        #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes
        self.log_mean_loss = np.log(
            np.mean((np.exp(self.posterior_logp) -
                     np.exp(self.logq + self.log_evidence_pq))**2))

        self.regularize_weights()
        self.pq_bw_loss = np.log((np.exp(self.posterior_logp) -
                                  np.exp(self.log_evidence_pq + self.logq))**2)
        self.log_weight_pq = self.posterior_logp - self.logq - self.log_evidence_pq

        self.regularize_weights_pq()

        self.sinf_logw = self.log_weight
        self.importance_weights = np.exp(self.sinf_logw -
                                         logsumexp(self.sinf_logw))
        self.importance_weights_pq = np.exp(self.log_weight_pq -
                                            logsumexp(self.log_weight_pq))

        self.q_ess = self.calculate_ess(self.log_weight)
        self.total_ess = self.calculate_ess(self.sinf_logw)

        #some of this is redundant
        if (weighted):
            #This needs to come AFTER we already fit q_w, so moving it to the end...
            if (self.trainable_qw):
                model_pretrain = self.nf_model

                #trainable sinf
                model_train, Z = sopt.optimize_SINF(
                    model=model_pretrain,
                    x=torch.from_numpy(self.samples.astype(np.float32)),
                    #train not on p but on p_beta
                    #FIXME SHOULD REALLY HAVE ATTRIBUTES FOR P_BETA
                    p=torch.as_tensor(
                        np.exp(self.beta * self.beta_max *
                               (self.posterior_logp - self.prior_logp) +
                               self.prior_logp)),
                    # x=init_prior, #this needs to be samples
                    # p=torch.as_tensor(np.exp(np_two_gaussians(init_prior))).float(), #this needs to be logp
                    # lossfunc=lossfunc,
                    Nepoch=self.sgd_steps,
                    lr=1e-3,  #not touching these
                    lrA=2e-5,
                    optimize_A=True,
                    batchsize=None,
                    verbose=True)
                #what do I do with this Z??
                #update the quantities with what we would usually update for q_w

                self.nf_model = model_train
                self.logq = self.nf_model.evaluate_density(
                    torch.from_numpy(self.samples[fit_idx, ...].astype(
                        np.float32))).numpy().astype(np.float64)

                self.logq_w = self.logq
                self.nf_model_w = self.nf_model
                self.trainable_Z = Z  #not sure what to do with this, but saving it - it is not the same as the other pq loss...
            else:  #old weighted fit, non-trainable
                self.logq_w = self.logq
                self.nf_model_w = self.nf_model

            #these don't change whether trained or not
            self.nf_models.append(self.nf_model)
            self.log_evidence_pq_w = self.log_evidence_pq
            self.log_weight_pq_w = self.log_weight_pq

        else:  #unweighted fit
            self.logq_uw = self.logq
            self.nf_model_uw = self.nf_model
            self.nf_models_uw.append(self.nf_model)
            self.log_evidence_pq_uw = self.log_evidence_pq
            self.log_weight_pq_uw = self.log_weight_pq

    def nf_samples_to_trace(self):
        """Convert NF samples to a trace."""
        lenght_pos = len(self.nf_samples)
        varnames = [v.name for v in self.variables]
        with self.model:
            self.nf_strace = NDArray(name=self.model.name)
            self.nf_strace.setup(lenght_pos, self.chain)
        for i in range(lenght_pos):
            value = []
            size = 0
            for var in varnames:
                shape, new_size = self.var_info[var]
                value.append(self.nf_samples[i][size:size +
                                                new_size].reshape(shape))
                size += new_size
            self.nf_strace.record(
                point={k: v
                       for k, v in zip(varnames, value)})
        self.nf_trace = point_list_to_multitrace(self.nf_strace,
                                                 model=self.model)

    def posterior_to_trace(self):
        """Save results into a PyMC3 trace."""
        length_pos = len(self.posterior)
        varnames = [v.name for v in self.variables]
        print(f'posterior to trace varnames = {varnames}')
        with self.model:
            strace = NDArray(name=self.model.name)
            strace.setup(length_pos, self.chain)
        for i in range(length_pos):
            value = []
            size = 0
            for var in varnames:
                shape, new_size = self.var_info[var]
                value.append(self.posterior[i][size:size +
                                               new_size].reshape(shape))
                size += new_size
            strace.record(point={k: v for k, v in zip(varnames, value)})
        return strace

    def update_weights_beta(self):
        """Calculate the next inverse temperature (beta).
        Here we use IW3=p(beta)/q_uw instead of standard temp weights.
        """
        low_beta = old_beta = self.beta
        up_beta = 2.0
        #need to use saved posterior and prior instead of likelihood as I did in fit_nf (to avoid reevaluating, which we treat as expensive)
        # Target distribution is l^beta_max (vanilla case is beta_max=1) - this makes beta iteration work as before
        self.likelihood_logp = self.beta_max * (self.posterior_logp -
                                                self.prior_logp)

        rN = int(self.n0 * self.t_ess)  #this does not change as N changes

        while up_beta - low_beta > 1e-6:
            new_beta = (low_beta + up_beta) / 2.0
            #use IW3 instead of temp weights
            log_temp_weights_un = new_beta * self.likelihood_logp + self.prior_logp - self.logq_uw
            log_temp_weights = log_temp_weights_un - logsumexp(
                log_temp_weights_un)
            ESS = int(np.exp(-logsumexp(log_temp_weights * 2)))
            if ESS == rN:
                break
            elif ESS < rN:
                up_beta = new_beta
            else:
                low_beta = new_beta
        if new_beta >= 1:
            new_beta = 1
        self.beta = new_beta
        self.logIW3 -= logsumexp(self.logIW3)
Ejemplo n.º 5
0
 def test_sequential_backend(self):
     with self.model:
         backend = NDArray()
         pm.sample(10, cores=1, chains=2, trace=backend)