def _run_sims( self, round_, num_simulations_per_round, ): """ Runs the simulations at the beginning of each round. Args: round_: int. Round num_simulations_per_round: int. Number of simulations in current round Returns: self._parameter_bank: torch.tensor. theta used for training self._observation_bank: torch.tensor. x used for training self._prior_masks: torch.tensor. Masks of 0/1 for each prior sample, indicating whether prior sample will be used in current round """ # Generate parameters from prior in first round, and from most recent posterior # estimate in subsequent rounds. if round_ == 0: parameters, observations = simulate_in_batches( simulator=self._simulator, parameter_sample_fn=lambda num_samples: self._prior.sample( (num_samples, )), num_samples=np.maximum( 0, num_simulations_per_round - self._num_pilot_samples), simulation_batch_size=self._simulation_batch_size, x_dim=self._true_observation. shape[1:], # do not pass batch_dim ) parameters = torch.cat( (parameters, self.pilot_parameters[:num_simulations_per_round]), dim=0) observations = torch.cat( (observations, self.pilot_observations[:num_simulations_per_round]), dim=0, ) else: parameters, observations = simulate_in_batches( simulator=self._simulator, parameter_sample_fn=lambda num_samples: self._neural_posterior. sample( num_samples, context=self._true_observation, ), num_samples=num_simulations_per_round, simulation_batch_size=self._simulation_batch_size, x_dim=self._true_observation. shape[1:], # do not pass batch_dim ) # Store (parameter, observation) pairs. self._parameter_bank.append(parameters) self._observation_bank.append(observations) self._prior_masks.append( torch.ones(num_simulations_per_round, 1) if round_ == 0 else torch.zeros(num_simulations_per_round, 1))
def test_benchmarking_sp(sim_batch_size): num_simulations = 100 theta = torch.zeros(num_simulations, 2) show_pbar = True tic = time.time() simulate_in_batches( slow_linear_gaussian, theta, sim_batch_size, num_workers=1, show_progress_bars=show_pbar, ) toc_sp = time.time() - tic tic = time.time() simulate_in_batches( slow_linear_gaussian, theta, sim_batch_size, num_workers=10, show_progress_bars=show_pbar, ) toc_joblib = time.time() - tic # Allow joblib to be 10 percent slower. assert toc_joblib <= toc_sp * 1.1
def test_simulate_in_batches( num_sims, batch_size, simulator=diagonal_linear_gaussian, prior=BoxUniform(zeros(5), ones(5)), ): """Test combinations of num_sims and simulation_batch_size. """ theta = prior.sample((num_sims,)) simulate_in_batches(simulator, theta, batch_size)
def test_simulate_in_batches( num_sims, batch_size, simulator, prior=BoxUniform(zeros(5), ones(5)), ): """Test combinations of num_sims and simulation_batch_size. """ simulator, prior = prepare_for_sbi(simulator, prior) theta = prior.sample((num_sims, )) simulate_in_batches(simulator, theta, batch_size)
def test_simulate_in_batches( num_samples, batch_size, simulator=linear_gaussian, prior=BoxUniform(torch.zeros(5), torch.ones(5)), ): """Test combinations of num_samples and simulation_batch_size. """ simulate_in_batches( simulator, lambda n: prior.sample((n, )), num_samples, batch_size, torch.Size([5]), )
def __init__( self, simulator: Callable, prior, distance: Union[str, Callable] = "l2", num_workers: int = 1, simulation_batch_size: int = 1, show_progress_bars: bool = True, ) -> None: r"""Base class for Approximate Bayesian Computation methods. Args: simulator: A function that takes parameters $\theta$ and maps them to simulations, or observations, `x`, $\mathrm{sim}(\theta)\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. """ self.prior = prior self._simulator = simulator self._show_progress_bars = show_progress_bars # Select distance function. if type(distance) == str: distances = ["l1", "l2", "mse"] assert (distance in distances ), f"Distance function str must be one of {distances}." self.distance = self.choose_distance_function( distance_type=distance) self._batched_simulator = lambda theta: simulate_in_batches( simulator=self._simulator, theta=theta, sim_batch_size=simulation_batch_size, num_workers=num_workers, show_progress_bars=self._show_progress_bars, ) self.logger = logging.getLogger(__name__)
def simulate_for_sbi( simulator: Callable, proposal: Any, num_simulations: int, num_workers: int = 1, simulation_batch_size: int = 1, show_progress_bar: bool = True, ) -> Tuple[Tensor, Tensor]: r""" Returns ($\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\theta$ and maps them to simulations, or observations, `x`, $\text{sim}(\theta)\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\theta$ and simulation-outputs $x$. """ check_if_proposal_has_default_x(proposal) theta = proposal.sample((num_simulations,)) x = simulate_in_batches( simulator, theta, simulation_batch_size, num_workers, show_progress_bar, ) return theta, x
def __init__( self, simulator: Callable, prior, num_workers: int = 1, simulation_batch_size: int = 1, device: str = "cpu", logging_level: Union[int, str] = "WARNING", summary_writer: Optional[SummaryWriter] = None, show_progress_bars: bool = True, show_round_summary: bool = False, ): r""" Base class for inference methods. Args: simulator: A function that takes parameters $\theta$ and maps them to simulations, or observations, `x`, $\mathrm{sim}(\theta)\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). device: torch device on which to compute, e.g. gpu or cpu. logging_level: Minimum severity of messages to log. One of the strings "INFO", "WARNING", "DEBUG", "ERROR" and "CRITICAL". summary_writer: A `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. show_round_summary: Whether to show the validation loss and leakage after each round. """ # We set the device globally by setting the default tensor type for all tensors. assert device in ( "gpu", "cpu", ), "Currently, only 'gpu' or 'cpu' are supported as devices." self._device = configure_default_device(device) self._simulator, self._prior = simulator, prior self._show_progress_bars = show_progress_bars self._show_round_summary = show_round_summary self._batched_simulator = lambda theta: simulate_in_batches( self._simulator, theta, simulation_batch_size, num_workers, self._show_progress_bars, ) # Initialize roundwise (theta, x, prior_masks) for storage of parameters, # simulations and masks indicating if simulations came from prior. self._theta_roundwise, self._x_roundwise, self._prior_masks = [], [], [] # Initialize list that indicates the round from which simulations were drawn. self._data_round_index = [] self._round = 0 # XXX We could instantiate here the Posterior for all children. Two problems: # 1. We must dispatch to right PotentialProvider for mcmc based on name # 2. `method_family` cannot be resolved only from `self.__class__.__name__`, # since SRE, AALR demand different handling but are both in SRE class. self._summary_writer = (self._default_summary_writer() if summary_writer is None else summary_writer) # Logging during training (by SummaryWriter). self._summary = dict( median_observation_distances=[], epochs=[], best_validation_log_probs=[], )
def __init__( self, simulator: Callable, prior, true_observation: Tensor, num_pilot_samples: int = 100, density_estimator=None, calibration_kernel: Optional[Callable] = None, z_score_obs: bool = True, simulation_batch_size: int = 1, use_combined_loss: bool = False, retrain_from_scratch_each_round: bool = False, discard_prior_samples: bool = False, device: Optional[torch.device] = None, sample_with_mcmc: bool = False, mcmc_method: str = "slice-np", summary_writer: Optional[SummaryWriter] = None, ): """ See NeuralInference docstring for all other arguments. Args: num_pilot_samples: number of simulations that are run when instantiating an object. Used to z-score the observations. density_estimator: neural density estimator calibration_kernel: a function to calibrate the context z_score_obs: whether to z-score the data features x use_combined_loss: whether to jointly neural_net prior samples using maximum likelihood. Useful to prevent density leaking when using box uniform priors. retrain_from_scratch_each_round: whether to retrain the conditional density estimator for the posterior from scratch each round. discard_prior_samples: whether to discard prior samples from round two onwards. """ super().__init__( simulator, prior, true_observation, simulation_batch_size, device, summary_writer, ) self.z_score_obs = z_score_obs self._num_pilot_samples = num_pilot_samples self._use_combined_loss = use_combined_loss self._discard_prior_samples = discard_prior_samples self._prior_masks = [] self._model_bank = [] self._retrain_from_scratch_each_round = retrain_from_scratch_each_round # run prior samples ( self.pilot_parameters, self.pilot_observations, ) = simulate_in_batches( simulator=self._simulator, parameter_sample_fn=lambda num_samples: self._prior.sample( (num_samples, )), num_samples=num_pilot_samples, simulation_batch_size=self._simulation_batch_size, x_dim=self._true_observation.shape[1:], # do not pass batch_dim ) # create the deep neural density estimator if density_estimator is None: density_estimator = utils.posterior_nn( model="maf", prior=self._prior, context=self._true_observation, ) # create the neural posterior which can sample(), log_prob() self._neural_posterior = Posterior( algorithm_family="snpe", neural_net=density_estimator, prior=prior, context=self._true_observation, sample_with_mcmc=sample_with_mcmc, mcmc_method=mcmc_method, get_potential_function=PotentialFunctionProvider(), ) # obtain z-score for observations and define embedding net if self.z_score_obs: self.obs_mean = torch.mean(self.pilot_observations, dim=0) self.obs_std = torch.std(self.pilot_observations, dim=0) else: self.obs_mean = torch.zeros(self._true_observation.shape) self.obs_std = torch.ones(self._true_observation.shape) # new embedding_net contains z-scoring if not isinstance(self._neural_posterior.neural_net, MultivariateGaussianMDN): embedding = nn.Sequential( utils.Normalize(self.obs_mean, self.obs_std), self._neural_posterior.neural_net._embedding_net, ) self._neural_posterior.set_embedding_net(embedding) elif z_score_obs: warnings.warn("z-scoring of observation not implemented for MDNs") # calibration kernels proposed in Lueckmann, Goncalves et al 2017 if calibration_kernel is None: self.calibration_kernel = lambda context_input: torch.ones( [len(context_input)]) else: self.calibration_kernel = calibration_kernel # If we're retraining from scratch each round, # keep a copy of the original untrained model for reinitialization. self._untrained_neural_posterior = deepcopy(self._neural_posterior) # extra SNPE-specific fields summary_writer self._summary.update({"rejection_sampling_acceptance_rates": []})
log = sbibm.get_logger(__name__) log.info(f"Starting to run RF-ABC") prior = task.get_prior() simulator = task.get_simulator() if observation is None: observation = task.get_observation(num_observation) # Simulate training data set log.info(f"Generating data set as reference table") thetas = prior(num_samples=num_simulations) xs = simulate_in_batches( simulator, theta=thetas, sim_batch_size=batch_size, num_workers=1, show_progress_bars=True, ) assert not thetas.isnan().any() assert not xs.isnan().any() assert not observation.isnan().any() dim_thetas = thetas.shape[1] dim_xs = xs.shape[1] names_thetas = [f"t{i}" for i in range(dim_thetas)] names_xs = [f"x{i}" for i in range(dim_xs)] np_thetas = thetas.numpy().astype(np.float64)