Beispiel #1
0
    def __init__(
        self,
        prior: Optional[Distribution] = None,
        device: str = "cpu",
        logging_level: Union[int, str] = "WARNING",
        summary_writer: Optional[SummaryWriter] = None,
        show_progress_bars: bool = True,
    ):
        r"""Base class for inference methods.

        Args:
            prior: A probability distribution that expresses prior knowledge about the
                parameters, e.g. which ranges are meaningful for them. Must be a PyTorch
                distribution, see FAQ for details on how to use custom distributions.
            device: torch device on which to train the neural net and on which to
                perform all posterior operations, e.g. gpu or cpu.
            logging_level: Minimum severity of messages to log. One of the strings
               "INFO", "WARNING", "DEBUG", "ERROR" and "CRITICAL".
            summary_writer: A `SummaryWriter` to control, among others, log
                file location (default is `<current working directory>/logs`.)
            show_progress_bars: Whether to show a progressbar during simulation and
                sampling.
        """

        self._device = process_device(device)
        check_prior(prior)
        check_if_prior_on_device(self._device, prior)
        self._prior = prior

        self._posterior = None
        self._neural_net = None
        self._x_shape = None

        self._show_progress_bars = show_progress_bars

        # Initialize roundwise (theta, x, prior_masks) for storage of parameters,
        # simulations and masks indicating if simulations came from prior.
        self._theta_roundwise, self._x_roundwise, self._prior_masks = [], [], []
        self._model_bank = []

        # Initialize list that indicates the round from which simulations were drawn.
        self._data_round_index = []

        self._round = 0
        self._val_log_prob = float("-Inf")

        # XXX We could instantiate here the Posterior for all children. Two problems:
        #     1. We must dispatch to right PotentialProvider for mcmc based on name
        #     2. `method_family` cannot be resolved only from `self.__class__.__name__`,
        #         since SRE, AALR demand different handling but are both in SRE class.

        self._summary_writer = (self._default_summary_writer()
                                if summary_writer is None else summary_writer)

        # Logging during training (by SummaryWriter).
        self._summary = dict(
            median_observation_distances=[],
            epochs=[],
            best_validation_log_probs=[],
            validation_log_probs=[],
            train_log_probs=[],
            epoch_durations_sec=[],
        )
Beispiel #2
0
    def build_posterior(
        self,
        density_estimator: Optional[nn.Module] = None,
        prior: Optional[Distribution] = None,
        sample_with: str = "mcmc",
        mcmc_method: str = "slice_np",
        vi_method: str = "rKL",
        mcmc_parameters: Dict[str, Any] = {},
        vi_parameters: Dict[str, Any] = {},
        rejection_sampling_parameters: Dict[str, Any] = {},
    ) -> Union[MCMCPosterior, RejectionPosterior, VIPosterior]:
        r"""Build posterior from the neural density estimator.

        SNLE trains a neural network to approximate the likelihood $p(x|\theta)$. The
        posterior wraps the trained network such that one can directly evaluate the
        unnormalized posterior log probability $p(\theta|x) \propto p(x|\theta) \cdot
        p(\theta)$ and draw samples from the posterior with MCMC or rejection sampling.

        Args:
            density_estimator: The density estimator that the posterior is based on.
                If `None`, use the latest neural density estimator that was trained.
            prior: Prior distribution.
            sample_with: Method to use for sampling from the posterior. Must be one of
                [`mcmc` | `rejection` | `vi`].
            mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`,
                `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy
                implementation of slice sampling; select `hmc`, `nuts` or `slice` for
                Pyro-based sampling.
            vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note
                some of the methods admit a `mode seeking` property (e.g. rKL) whereas
                some admit a `mass covering` one (e.g fKL).
            mcmc_parameters: Additional kwargs passed to `MCMCPosterior`.
            vi_parameters: Additional kwargs passed to `VIPosterior`.
            rejection_sampling_parameters: Additional kwargs passed to
                `RejectionPosterior`.

        Returns:
            Posterior $p(\theta|x)$  with `.sample()` and `.log_prob()` methods
            (the returned log-probability is unnormalized).
        """
        if prior is None:
            assert (
                self._prior is not None
            ), """You did not pass a prior. You have to pass the prior either at
            initialization `inference = SNLE(prior)` or to `.build_posterior
            (prior=prior)`."""
            prior = self._prior
        else:
            check_prior(prior)

        if density_estimator is None:
            likelihood_estimator = self._neural_net
            # If internal net is used device is defined.
            device = self._device
        else:
            likelihood_estimator = density_estimator
            # Otherwise, infer it from the device of the net parameters.
            device = next(density_estimator.parameters()).device.type

        potential_fn, theta_transform = likelihood_estimator_based_potential(
            likelihood_estimator=likelihood_estimator, prior=prior, x_o=None)

        if sample_with == "mcmc":
            self._posterior = MCMCPosterior(
                potential_fn=potential_fn,
                theta_transform=theta_transform,
                proposal=prior,
                method=mcmc_method,
                device=device,
                x_shape=self._x_shape,
                **mcmc_parameters,
            )
        elif sample_with == "rejection":
            self._posterior = RejectionPosterior(
                potential_fn=potential_fn,
                proposal=prior,
                device=device,
                x_shape=self._x_shape,
                **rejection_sampling_parameters,
            )
        elif sample_with == "vi":
            self._posterior = VIPosterior(
                potential_fn=potential_fn,
                theta_transform=theta_transform,
                prior=prior,  # type: ignore
                vi_method=vi_method,
                device=device,
                x_shape=self._x_shape,
                **vi_parameters,
            )
        else:
            raise NotImplementedError

        # Store models at end of each round.
        self._model_bank.append(deepcopy(self._posterior))

        return deepcopy(self._posterior)
Beispiel #3
0
    ):
        """
        Args:
            prior: Prior distribution with `.log_prob()` and `.sample()`.
            posterior_estimator: The trained neural posterior.
            max_sampling_batch_size: Batchsize of samples being drawn from
                the proposal at every iteration.
            device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
                `potential_fn.device` is used.
            x_shape: Shape of a single simulator output. If passed, it is used to check
                the shape of the observed data and give a descriptive error.
        """
        # Because `DirectPosterior` does not take the `potential_fn` as input, it
        # builds it itself. The `potential_fn` and `theta_transform` are used only for
        # obtaining the MAP.
        check_prior(prior)
        potential_fn, theta_transform = posterior_estimator_based_potential(
            posterior_estimator, prior, None)

        super().__init__(
            potential_fn=potential_fn,
            theta_transform=theta_transform,
            device=device,
            x_shape=x_shape,
        )

        self.prior = prior
        self.posterior_estimator = posterior_estimator

        self.max_sampling_batch_size = max_sampling_batch_size
        self._leakage_density_correction_factor = None
Beispiel #4
0
    def __init__(
        self,
        potential_fn: BasePotential,
        q: TransformedDistribution,
        prior: Optional[Distribution] = None,
        n_particles: int = 256,
        clip_value: float = 5.0,
        optimizer: Type[Union[SGD, Adam, Adadelta, RMSprop, Adagrad, Adamax,
                              AdamW, ASGD]] = Adam,
        scheduler: Type[Union[CosineAnnealingLR, ExponentialLR,
                              CosineAnnealingWarmRestarts, CyclicLR, LambdaLR,
                              StepLR, ]] = ExponentialLR,
        eps: float = 1e-5,
        **kwargs,
    ):
        """This is a wrapper around a PyTorch optimizer which is used to minimize some
            loss for variational inference.

        Args:
            potential_fn: Potential function of the target i.e. the posterior density up
                to normalization constant.
            q: Variational distribution
            prior: Prior distribution, which will be used within the warmup, if given.
                Note that this will not affect the potential_fn, so make sure to have
                the same prior within it.
            n_particles: Number of samples used to estimate gradients.
            clip_value: Norm value on which gradients are clipped.
            optimizer: Base class for an pytorch optimizer. We support on of [SGD, Adam,
                Adadelta, RMSprop, Adagrad, Adamax, AdamW, ASGD].
            scheduler: Base class for an pytorch scheduler. We support on of
                [CosineAnnealingLR, ExponentialLR, CosineAnnealingWarmRestarts,
                CyclicLR, LambdaLR, StepLR]. Note that you may have to pass additional
                arguments of the scheduling method, this can be passed within the
                keyword arguments.
            eps: This value determines the sensitivity of the convergence checks.
            kwargs: All additional arguments associated with optimizer, scheduler such
                as learning_rates, gamma-values and so on. We refer to the documentation
                of each of the supported optimizers or schedulers for details.

        """

        self.potential_fn = potential_fn
        self.q = q
        check_prior(prior)
        self.prior = prior
        self.device = potential_fn.device
        self.to(self.device)

        self.n_particles = n_particles
        self.clip_value = clip_value
        self.learning_rate = kwargs.get("lr", 1e-3)
        self.retain_graph = kwargs.get("retain_graph", False)
        self._kwargs = kwargs

        # This prevents error that would stop optimization.
        self.q.set_default_validate_args(False)
        if prior is not None:
            self.prior.set_default_validate_args(False)  # type: ignore

        # Manage modules if present.
        if hasattr(self.q, "modules"):
            self.modules = nn.ModuleList(self.q.modules())
        else:
            self.modules = nn.ModuleList()
        self.modules.train()

        # Ensure that distribution has parameters and that these are on the right device
        if not hasattr(self.q, "parameters"):
            raise ValueError(
                "The variational distribution has no parameters to optimize.")
        self.to(self.device)

        # Keep a state to resolve invalid values
        self.state_dict = [para.data.clone() for para in self.q.parameters()]

        # Init optimizer and scheduler with correct arguments
        opt_kwargs = filter_kwrags_for_func(optimizer.__init__, kwargs)
        kwargs.pop("lr")  # This is just because CyclicLR Scheduler ...
        scheduler_kwargs = filter_kwrags_for_func(scheduler.__init__, kwargs)

        self._optimizer = optimizer(self.q.parameters(), **opt_kwargs)
        self._scheduler = scheduler(self._optimizer, **scheduler_kwargs)
        # Loss and summary
        self.eps = eps
        self.num_step = 0
        self.warm_up_was_done = False
        self.losses = torch.ones(2000)
        self.moving_average = torch.ones(2000)
        self.moving_std = torch.ones(2000)
        self.moving_slope = torch.ones(2000)

        # Hyperparameters to change adaptively
        self.HYPER_PARAMETERS = ["n_particles", "clip_value", "eps"]
Beispiel #5
0
    def build_posterior(
        self,
        density_estimator: Optional[nn.Module] = None,
        prior: Optional[Distribution] = None,
        sample_with: str = "rejection",
        mcmc_method: str = "slice_np",
        vi_method: str = "rKL",
        mcmc_parameters: Dict[str, Any] = {},
        vi_parameters: Dict[str, Any] = {},
        rejection_sampling_parameters: Dict[str, Any] = {},
    ) -> Union[MCMCPosterior, RejectionPosterior, VIPosterior, DirectPosterior]:
        r"""Build posterior from the neural density estimator.

        For SNPE, the posterior distribution that is returned here implements the
        following functionality over the raw neural density estimator:
        - correct the calculation of the log probability such that it compensates for
            the leakage.
        - reject samples that lie outside of the prior bounds.
        - alternatively, if leakage is very high (which can happen for multi-round
            SNPE), sample from the posterior with MCMC.

        Args:
            density_estimator: The density estimator that the posterior is based on.
                If `None`, use the latest neural density estimator that was trained.
            prior: Prior distribution.
            sample_with: Method to use for sampling from the posterior. Must be one of
                [`mcmc` | `rejection` | `vi`].
            mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`,
                `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy
                implementation of slice sampling; select `hmc`, `nuts` or `slice` for
                Pyro-based sampling.
            vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note
                some of the methods admit a `mode seeking` property (e.g. rKL) whereas
                some admit a `mass covering` one (e.g fKL).
            mcmc_parameters: Additional kwargs passed to `MCMCPosterior`.
            vi_parameters: Additional kwargs passed to `VIPosterior`.
            rejection_sampling_parameters: Additional kwargs passed to
                `RejectionPosterior` or `DirectPosterior`. By default,
                `DirectPosterior` is used. Only if `rejection_sampling_parameters`
                contains `proposal`, a `RejectionPosterior` is instantiated.

        Returns:
            Posterior $p(\theta|x)$  with `.sample()` and `.log_prob()` methods
            (the returned log-probability is unnormalized).
        """
        if prior is None:
            assert self._prior is not None, (
                "You did not pass a prior. You have to pass the prior either at "
                "initialization `inference = SNPE(prior)` or to "
                "`.build_posterior(prior=prior)`."
            )
            prior = self._prior
        else:
            utils.check_prior(prior)

        if density_estimator is None:
            posterior_estimator = self._neural_net
            # If internal net is used device is defined.
            device = self._device
        else:
            posterior_estimator = density_estimator
            # Otherwise, infer it from the device of the net parameters.
            device = next(density_estimator.parameters()).device.type

        potential_fn, theta_transform = posterior_estimator_based_potential(
            posterior_estimator=posterior_estimator, prior=prior, x_o=None
        )

        if sample_with == "rejection":
            if "proposal" in rejection_sampling_parameters.keys():
                self._posterior = RejectionPosterior(
                    potential_fn=potential_fn,
                    device=device,
                    x_shape=self._x_shape,
                    **rejection_sampling_parameters,
                )
            else:
                self._posterior = DirectPosterior(
                    posterior_estimator=posterior_estimator,
                    prior=prior,
                    x_shape=self._x_shape,
                    device=device,
                )
        elif sample_with == "mcmc":
            self._posterior = MCMCPosterior(
                potential_fn=potential_fn,
                theta_transform=theta_transform,
                proposal=prior,
                method=mcmc_method,
                device=device,
                x_shape=self._x_shape,
                **mcmc_parameters,
            )
        elif sample_with == "vi":
            self._posterior = VIPosterior(
                potential_fn=potential_fn,
                theta_transform=theta_transform,
                prior=prior,  # type: ignore
                vi_method=vi_method,
                device=device,
                x_shape=self._x_shape,
                **vi_parameters,
            )
        else:
            raise NotImplementedError

        # Store models at end of each round.
        self._model_bank.append(deepcopy(self._posterior))

        return deepcopy(self._posterior)