def _set_rejection_round(self, round): self._update_round_info(self.state['round']) # Get a subseed for this round for ensuring consistent results for the round seed = self.seed if round == 0 else get_sub_seed(self.seed, round) self._round_random_state = np.random.RandomState(seed) self._rejection = Rejection( self.model, discrepancy_name=self.discrepancy_name, output_names=self.output_names, batch_size=self.batch_size, seed=seed, max_parallel_batches=self.max_parallel_batches)
def _init_new_round(self): round = self.state['round'] dashes = '-'*16 logger.info('%s Starting round %d %s' % (dashes, round, dashes)) # Get a subseed for this round for ensuring consistent results for the round seed = self.seed if round == 0 else get_sub_seed(self.seed, round) self._round_random_state = np.random.RandomState(seed) self._rejection = Rejection(self.model, discrepancy_name=self.discrepancy_name, output_names=self.output_names, batch_size=self.batch_size, seed=seed, max_parallel_batches=self.max_parallel_batches) self._rejection.set_objective(self.objective['n_samples'], threshold=self.current_population_threshold)
def sample(self, n_samples, warmup=None, n_chains=4, threshold=None, initials=None, algorithm='nuts', n_evidence=None, **kwargs): r"""Sample the posterior distribution of BOLFI. Here the likelihood is defined through the cumulative density function of the standard normal distribution: L(\theta) \propto F((h-\mu(\theta)) / \sigma(\theta)) where h is the threshold, and \mu(\theta) and \sigma(\theta) are the posterior mean and (noisy) standard deviation of the associated Gaussian process. The sampling is performed with an MCMC sampler (the No-U-Turn Sampler, NUTS). Parameters ---------- n_samples : int Number of requested samples from the posterior for each chain. This includes warmup, and note that the effective sample size is usually considerably smaller. warmpup : int, optional Length of warmup sequence in MCMC sampling. Defaults to n_samples//2. n_chains : int, optional Number of independent chains. threshold : float, optional The threshold (bandwidth) for posterior (give as log if log discrepancy). initials : np.array of shape (n_chains, n_params), optional Initial values for the sampled parameters for each chain. Defaults to best evidence points. algorithm : string, optional Sampling algorithm to use. Currently only 'nuts' is supported. n_evidence : int If the regression model is not fitted yet, specify the amount of evidence Returns ------- BolfiSample """ if self.state['n_batches'] == 0: self.fit(n_evidence) # TODO: other MCMC algorithms posterior = self.extract_posterior(threshold) warmup = warmup or n_samples // 2 # Unless given, select the evidence points with smallest discrepancy if initials is not None: if np.asarray(initials).shape != (n_chains, self.target_model.input_dim): raise ValueError("The shape of initials must be (n_chains, n_params).") else: inds = np.argsort(self.target_model.Y[:, 0]) initials = np.asarray(self.target_model.X[inds]) self.target_model.is_sampling = True # enables caching for default RBF kernel tasks_ids = [] ii_initial = 0 # sampling is embarrassingly parallel, so depending on self.client this may parallelize for ii in range(n_chains): seed = get_sub_seed(self.seed, ii) # discard bad initialization points while np.isinf(posterior.logpdf(initials[ii_initial])): ii_initial += 1 if ii_initial == len(inds): raise ValueError( "BOLFI.sample: Cannot find enough acceptable initialization points!") tasks_ids.append( self.client.apply( mcmc.nuts, n_samples, initials[ii_initial], posterior.logpdf, posterior.gradient_logpdf, n_adapt=warmup, seed=seed, **kwargs)) ii_initial += 1 # get results from completed tasks or run sampling (client-specific) chains = [] for id in tasks_ids: chains.append(self.client.get_result(id)) chains = np.asarray(chains) print( "{} chains of {} iterations acquired. Effective sample size and Rhat for each " "parameter:".format(n_chains, n_samples)) for ii, node in enumerate(self.parameter_names): print(node, mcmc.eff_sample_size(chains[:, :, ii]), mcmc.gelman_rubin(chains[:, :, ii])) self.target_model.is_sampling = False return BolfiSample( method_name='BOLFI', chains=chains, parameter_names=self.parameter_names, warmup=warmup, threshold=float(posterior.threshold), n_sim=self.state['n_sim'], seed=self.seed)
def sample(self, n_samples, warmup=None, n_chains=4, initials=None, algorithm='nuts', sigma_proposals=None, n_evidence=None, *args, **kwargs): """Sample from the posterior distribution of BOLFIRE. Sampling is performed with an MCMC sampler. Parameters ---------- n_samples: int Number of requested samples from the posterior for each chain. This includes warmup, and note that the effective sample size is usually considerably smaller. warmup: int, optional Length of warmup sequence in MCMC sampling. n_chains: int, optional Number of independent chains. initials: np.ndarray (n_chains, n_params), optional Initial values for the sampled parameters for each chain. algorithm: str, optional Sampling algorithm to use. sigma_proposals: np.ndarray Standard deviations for Gaussian proposals of each parameter for Metropolis-Hastings. n_evidence: int, optional If the surrogate model is not fitted yet, specify the amount of evidence. Returns ------- BOLFIRESample """ # Fit posterior in case not done if self.state['n_batches'] == 0: self.fit(n_evidence) # Check algorithm if algorithm not in ['nuts', 'metropolis']: raise ValueError('The given algorithm is not supported.') # Check standard deviations of Gaussian proposals when using Metropolis-Hastings if algorithm == 'metropolis': if sigma_proposals is None: raise ValueError('Gaussian proposal standard deviations have ' 'to be provided for Metropolis-sampling.') elif sigma_proposals.shape[0] != self.target_model.input_dim: raise ValueError('The length of Gaussian proposal standard ' 'deviations must be n_params.') posterior = self.extract_result() warmup = warmup or n_samples // 2 # Unless given, select the evidence points with best likelihood ratio if initials is not None: if np.asarray(initials).shape != (n_chains, self.target_model.input_dim): raise ValueError( 'The shape of initials must be (n_chains, n_params).') else: inds = np.argsort(self.target_model.Y[:, 0]) initials = np.asarray(self.target_model.X[inds]) # Enable caching for default RBF kernel self.target_model.is_sampling = True tasks_ids = [] ii_initial = 0 for ii in range(n_chains): seed = get_sub_seed(self.seed, ii) # Discard bad initialization points while np.isinf(posterior.logpdf(initials[ii_initial])): ii_initial += 1 if ii_initial == len(inds): raise ValueError( 'BOLFIRE.sample: Cannot find enough acceptable ' 'initialization points!') if algorithm == 'nuts': tasks_ids.append( self.client.apply(mcmc.nuts, n_samples, initials[ii_initial], posterior.logpdf, posterior.gradient_logpdf, n_adapt=warmup, seed=seed, **kwargs)) elif algorithm == 'metropolis': tasks_ids.append( self.client.apply(mcmc.metropolis, n_samples, initials[ii_initial], posterior.logpdf, sigma_proposals, warmup, seed=seed, **kwargs)) ii_initial += 1 # Get results from completed tasks or run sampling (client-specific) chains = [] for id in tasks_ids: chains.append(self.client.get_result(id)) chains = np.asarray(chains) logger.info(f'{n_chains} chains of {n_samples} iterations acquired. ' 'Effective sample size and Rhat for each parameter:') for ii, node in enumerate(self.parameter_names): logger.info(f'{node} {mcmc.eff_sample_size(chains[:, :, ii])} ' f'{mcmc.gelman_rubin_statistic(chains[:, :, ii])}') self.target_model.is_sampling = False return BOLFIRESample(method_name='BOLFIRE', chains=chains, parameter_names=self.parameter_names, warmup=warmup, n_sim=self.state['n_sim'], seed=self.seed, *args, **kwargs)