def save_parameters(self, path: Union[str, Path], parameters: torch.Tensor): """Save parameters to a given path""" save_tensor_to_csv(path, parameters, self.get_labels_parameters())
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_top_samples: Optional[int] = 100, quantile: Optional[float] = None, eps: Optional[float] = None, distance: str = "l2", batch_size: int = 1000, save_distances: bool = False, kde_bandwidth: Optional[str] = "cv", sass: bool = False, sass_fraction: float = 0.5, sass_feature_expansion_degree: int = 3, lra: bool = False, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs REJ-ABC from `sbi` Choose one of `num_top_samples`, `quantile`, `eps`. Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_top_samples: If given, will use `top=True` with num_top_samples quantile: Quantile to use eps: Epsilon threshold to use distance: Distance to use batch_size: Batch size for simulator save_distances: If True, stores distances of samples to disk kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with linear regression as in Beaumont et al. 2002. Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) assert not (num_top_samples is None and quantile is None and eps is None) log = sbibm.get_logger(__name__) log.info(f"Running REJ-ABC") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) kde = kde_bandwidth is not None if observation is None: observation = task.get_observation(num_observation) if num_top_samples is not None and quantile is None: if sass: quantile = num_top_samples / (num_simulations - int(sass_fraction * num_simulations)) else: quantile = num_top_samples / num_simulations inference_method = MCABC( simulator=simulator, prior=prior, simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, ) # Returns samples or kde posterior in output. output, summary = inference_method( x_o=observation, num_simulations=num_simulations, eps=eps, quantile=quantile, return_summary=True, kde=kde, kde_kwargs={} if run_kde else {"kde_bandwidth": kde_bandwidth}, lra=lra, sass=sass, sass_expansion_degree=sass_feature_expansion_degree, sass_fraction=sass_fraction, ) assert simulator.num_simulations == num_simulations if save_distances: save_tensor_to_csv("distances.csv", summary["distances"]) if kde: kde_posterior = output samples = kde_posterior.sample(num_simulations) # LPTP can only be returned with KDE posterior. if num_observation is not None: true_parameters = task.get_true_parameters( num_observation=num_observation) log_prob_true_parameters = kde_posterior.log_prob( true_parameters.squeeze()) return samples, simulator.num_simulations, log_prob_true_parameters else: samples = output return samples, simulator.num_simulations, None
def save_data(self, path: Union[str, Path], data: torch.Tensor): """Save data to a given path""" save_tensor_to_csv(path, data, self.get_labels_data())
def main(cfg: DictConfig) -> None: log = logging.getLogger(__name__) log.info(cfg.pretty()) log.info(f"sbibm version: {sbibm.__version__}") log.info(f"Hostname: {socket.gethostname()}") if cfg.seed is None: log.info( "Seed not specified, generating random seed for replicability") cfg.seed = int(torch.randint(low=1, high=2**32 - 1, size=(1, ))[0]) log.info(f"Random seed: {cfg.seed}") save_config(cfg) # Seeding torch.manual_seed(cfg.seed) random.seed(cfg.seed) np.random.seed(cfg.seed) # Devices gpu = True if cfg.device != "cpu" else False if gpu: torch.cuda.set_device(0) torch.set_default_tensor_type( "torch.cuda.FloatTensor" if gpu else "torch.FloatTensor") # Paths path_samples = "posterior_samples.csv.bz2" path_runtime = "runtime.csv" path_log_prob_true_parameters = "log_prob_true_parameters.csv" path_num_simulations_simulator = "num_simulations_simulator.csv" path_predictive_samples = "predictive_samples.csv.bz2" # Run task = sbibm.get_task(cfg.task.name) t0 = time.time() parts = cfg.algorithm.run.split(".") module_name = ".".join(["sbibm", "algorithms"] + parts[:-1]) run_fn = getattr(importlib.import_module(module_name), parts[-1]) algorithm_params = cfg.algorithm.params if "params" in cfg.algorithm else {} log.info("Start run") outputs = run_fn( task, num_observation=cfg.task.num_observation, num_samples=task.num_posterior_samples, num_simulations=cfg.task.num_simulations, **algorithm_params, ) runtime = time.time() - t0 log.info("Finished run") # Store outputs if type(outputs) == torch.Tensor: samples = outputs num_simulations_simulator = float("nan") log_prob_true_parameters = float("nan") elif type(outputs) == tuple and len(outputs) == 3: samples = outputs[0] num_simulations_simulator = float(outputs[1]) log_prob_true_parameters = (float(outputs[2]) if outputs[2] is not None else float("nan")) else: raise NotImplementedError save_tensor_to_csv(path_samples, samples, columns=task.get_labels_parameters()) save_float_to_csv(path_runtime, runtime) save_float_to_csv(path_num_simulations_simulator, num_simulations_simulator) save_float_to_csv(path_log_prob_true_parameters, log_prob_true_parameters) # Predictive samples log.info("Draw posterior predictive samples") simulator = task.get_simulator() predictive_samples = [] batch_size = 1_000 for idx in range(int(samples.shape[0] / batch_size)): try: predictive_samples.append( simulator(samples[(idx * batch_size):((idx + 1) * batch_size), :])) except: predictive_samples.append( float("nan") * torch.ones((batch_size, task.dim_data))) predictive_samples = torch.cat(predictive_samples, dim=0) save_tensor_to_csv(path_predictive_samples, predictive_samples, task.get_labels_data()) # Compute metrics if cfg.compute_metrics: df_metrics = compute_metrics_df( task_name=cfg.task.name, num_observation=cfg.task.num_observation, path_samples=path_samples, path_runtime=path_runtime, path_predictive_samples=path_predictive_samples, path_log_prob_true_parameters=path_log_prob_true_parameters, log=log, ) df_metrics.to_csv("metrics.csv", index=False) log.info(f"Metrics:\n{df_metrics.transpose().to_string(header=False)}")