Esempio n. 1
0
    def __init__(
        self,
        module: nn.Module,
        *,  # As per PEP 3102, this forces clients to specify kwargs explicitly, not positionally
        sample_rate: Optional[float] = None,
        batch_size: Optional[int] = None,
        sample_size: Optional[int] = None,
        max_grad_norm: Union[float, List[float]],
        noise_multiplier: Optional[float] = None,
        alphas: List[float] = DEFAULT_ALPHAS,
        secure_rng: bool = False,
        batch_first: bool = True,
        target_delta: float = 1e-6,
        target_epsilon: Optional[float] = None,
        epochs: Optional[float] = None,
        loss_reduction: str = "mean",
        poisson: bool = False,
        **misc_settings,
    ):
        r"""
        Args:
            module: The Pytorch module to which we are attaching the privacy engine
            alphas: A list of RDP orders
            noise_multiplier: The ratio of the standard deviation of the Gaussian noise to
                the L2-sensitivity of the function to which the noise is added
            max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm
                higher than this will be clipped to this value.
            batch_size: Training batch size. Used in the privacy accountant.
            sample_size: The size of the sample (dataset). Used in the privacy accountant.
            sample_rate: Sample rate used to build batches. Used in the privacy accountant.
            secure_rng: If on, it will use ``torchcsprng`` for secure random number generation.
                Comes with a significant performance cost, therefore it's recommended that you
                turn it off when just experimenting.
            batch_first: Flag to indicate if the input tensor to the corresponding module
                has the first dimension representing the batch. If set to True, dimensions on
                input tensor will be ``[batch_size, ..., ...]``.
            target_delta: The target delta. If unset, we will set it for you.
            loss_reduction: Indicates if the loss reduction (for aggregating the gradients)
                is a sum or a mean operation. Can take values "sum" or "mean"
            **misc_settings: Other arguments to the init
        """

        self.steps = 0
        self.poisson = poisson
        self.loss_reduction = loss_reduction
        self.batch_size = batch_size
        self.sample_size = sample_size
        self.sample_rate = sample_rate
        self._set_sample_rate()

        if isinstance(
                module,
                DifferentiallyPrivateDistributedDataParallel) or isinstance(
                    module, torch.nn.parallel.DistributedDataParallel):
            rank = torch.distributed.get_rank()
            n_replicas = torch.distributed.get_world_size()
            self.sample_rate *= n_replicas
        else:
            rank = 0
            n_replicas = 1

        self.module = GradSampleModule(module)

        if poisson:
            # TODO: Check directly if sampler is UniformSampler when sampler gets passed to the Engine (in the future)
            if sample_size is None:
                raise ValueError(
                    "If using Poisson sampling, sample_size should get passed to the PrivacyEngine."
                )

            # Number of empty batches follows a geometric distribution
            # Planck is the same distribution but its parameter is the (negative) log of the geometric's parameter
            self._poisson_empty_batches_distribution = planck(
                -math.log(1 - self.sample_rate) * self.sample_size)

        if noise_multiplier is None:
            if target_epsilon is None or target_delta is None or epochs is None:
                raise ValueError(
                    "If noise_multiplier is not specified, (target_epsilon, target_delta, epochs) should be given to the engine."
                )
            self.noise_multiplier = get_noise_multiplier(
                target_epsilon, target_delta, self.sample_rate, epochs, alphas)
        else:
            self.noise_multiplier = noise_multiplier

        self.max_grad_norm = max_grad_norm
        self.alphas = alphas
        self.target_delta = target_delta
        self.secure_rng = secure_rng
        self.batch_first = batch_first
        self.misc_settings = misc_settings
        self.n_replicas = n_replicas
        self.rank = rank

        self.device = next(module.parameters()).device
        self.steps = 0

        if self.noise_multiplier < 0:
            raise ValueError(
                f"noise_multiplier={self.noise_multiplier} is not a valid value. Please provide a float >= 0."
            )

        if isinstance(self.max_grad_norm, float) and self.max_grad_norm <= 0:
            raise ValueError(
                f"max_grad_norm={self.max_grad_norm} is not a valid value. Please provide a float > 0."
            )

        if not self.target_delta:
            if self.sample_size:
                warnings.warn(
                    "target_delta unset. Setting it to an order of magnitude less than 1/sample_size."
                )
                self.target_delta = 0.1 * (1 / self.sample_size)
            else:
                raise ValueError("Please provide a target_delta.")

        if self.secure_rng:
            self.seed = None
            try:
                import torchcsprng as csprng
            except ImportError as e:
                msg = (
                    "To use secure RNG, you must install the torchcsprng package! "
                    "Check out the instructions here: https://github.com/pytorch/csprng#installation"
                )
                raise ImportError(msg) from e

            self.seed = None
            self.random_number_generator = csprng.create_random_device_generator(
                "/dev/urandom")
        else:
            warnings.warn(
                "Secure RNG turned off. This is perfectly fine for experimentation as it allows "
                "for much faster training performance, but remember to turn it on and retrain "
                "one last time before production with ``secure_rng`` turned on."
            )
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.seed = int.from_bytes(os.urandom(8),
                                           byteorder="big",
                                           signed=True)
                self.random_number_generator = self._set_seed(self.seed)

        self.validator = DPModelInspector()
        self.clipper = None  # lazy initialization in attach
mean, var, skew, kurt = planck.stats(lambda_, moments='mvsk')

# Display the probability mass function (``pmf``):

x = np.arange(planck.ppf(0.01, lambda_),
              planck.ppf(0.99, lambda_))
ax.plot(x, planck.pmf(x, lambda_), 'bo', ms=8, label='planck pmf')
ax.vlines(x, 0, planck.pmf(x, lambda_), colors='b', lw=5, alpha=0.5)

# Alternatively, the distribution object can be called (as a function)
# to fix the shape and location. This returns a "frozen" RV object holding
# the given parameters fixed.

# Freeze the distribution and display the frozen ``pmf``:

rv = planck(lambda_)
ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
        label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()

# Check accuracy of ``cdf`` and ``ppf``:

prob = planck.cdf(x, lambda_)
np.allclose(x, planck.ppf(prob, lambda_))
# True

# Generate random numbers:

r = planck.rvs(lambda_, size=1000)