コード例 #1
0
    def __init__(
        self,
        estimator: Union["CLASSIFIER_LOSS_GRADIENTS_TYPE",
                         "OBJECT_DETECTOR_TYPE"],
        norm: Union[int, float, str] = np.inf,
        eps: Union[int, float, np.ndarray] = 0.3,
        eps_step: Union[int, float, np.ndarray] = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        num_random_init: int = 0,
        batch_size: int = 32,
        random_eps: bool = False,
        summary_writer: Union[str, bool, SummaryWriter] = False,
        verbose: bool = True,
    ):
        """
        Create a :class:`.ProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation supporting "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param random_eps: When True, epsilon is drawn randomly from truncated normal distribution. The literature
                           suggests this for FGSM based training to generalize across different epsilons. eps_step
                           is modified to preserve the ratio of eps / eps_step. The effectiveness of this
                           method with PGD is untested (https://arxiv.org/pdf/1611.01236.pdf).
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting
                                at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param summary_writer: Activate summary writer for TensorBoard.
                               Default is `False` and deactivated summary writer.
                               If `True` save runs/CURRENT_DATETIME_HOSTNAME in current directory.
                               If of type `str` save in path.
                               If of type `SummaryWriter` apply provided custom summary writer.
                               Use hierarchical folder structure to compare between runs easily. e.g. pass in
                               ‘runs/exp1’, ‘runs/exp2’, etc. for each new experiment to compare across them.
        :param verbose: Show progress bars.
        """
        super().__init__(estimator=estimator, summary_writer=False)

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.num_random_init = num_random_init
        self.batch_size = batch_size
        self.random_eps = random_eps
        self.verbose = verbose
        ProjectedGradientDescent._check_params(self)

        self._attack: Union[ProjectedGradientDescentPyTorch,
                            ProjectedGradientDescentTensorFlowV2,
                            ProjectedGradientDescentNumpy]
        if isinstance(self.estimator, PyTorchClassifier
                      ) and self.estimator.all_framework_preprocessing:
            self._attack = ProjectedGradientDescentPyTorch(
                estimator=estimator,  # type: ignore
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )

        elif isinstance(self.estimator, TensorFlowV2Classifier
                        ) and self.estimator.all_framework_preprocessing:
            self._attack = ProjectedGradientDescentTensorFlowV2(
                estimator=estimator,  # type: ignore
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )

        else:
            self._attack = ProjectedGradientDescentNumpy(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )
コード例 #2
0
class ProjectedGradientDescent(EvasionAttack):
    """
    The Projected Gradient Descent attack is an iterative method in which, after each iteration, the perturbation is
    projected on an lp-ball of specified radius (in addition to clipping the values of the adversarial sample so that it
    lies in the permitted data range). This is the attack proposed by Madry et al. for adversarial training.

    | Paper link: https://arxiv.org/abs/1706.06083
    """

    attack_params = EvasionAttack.attack_params + [
        "norm",
        "eps",
        "eps_step",
        "targeted",
        "num_random_init",
        "batch_size",
        "max_iter",
        "random_eps",
        "summary_writer",
        "verbose",
    ]

    _estimator_requirements = (BaseEstimator, LossGradientsMixin)

    def __init__(
        self,
        estimator: Union["CLASSIFIER_LOSS_GRADIENTS_TYPE",
                         "OBJECT_DETECTOR_TYPE"],
        norm: Union[int, float, str] = np.inf,
        eps: Union[int, float, np.ndarray] = 0.3,
        eps_step: Union[int, float, np.ndarray] = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        num_random_init: int = 0,
        batch_size: int = 32,
        random_eps: bool = False,
        summary_writer: Union[str, bool, SummaryWriter] = False,
        verbose: bool = True,
    ):
        """
        Create a :class:`.ProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation supporting "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param random_eps: When True, epsilon is drawn randomly from truncated normal distribution. The literature
                           suggests this for FGSM based training to generalize across different epsilons. eps_step
                           is modified to preserve the ratio of eps / eps_step. The effectiveness of this
                           method with PGD is untested (https://arxiv.org/pdf/1611.01236.pdf).
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting
                                at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param summary_writer: Activate summary writer for TensorBoard.
                               Default is `False` and deactivated summary writer.
                               If `True` save runs/CURRENT_DATETIME_HOSTNAME in current directory.
                               If of type `str` save in path.
                               If of type `SummaryWriter` apply provided custom summary writer.
                               Use hierarchical folder structure to compare between runs easily. e.g. pass in
                               ‘runs/exp1’, ‘runs/exp2’, etc. for each new experiment to compare across them.
        :param verbose: Show progress bars.
        """
        super().__init__(estimator=estimator, summary_writer=False)

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.num_random_init = num_random_init
        self.batch_size = batch_size
        self.random_eps = random_eps
        self.verbose = verbose
        ProjectedGradientDescent._check_params(self)

        self._attack: Union[ProjectedGradientDescentPyTorch,
                            ProjectedGradientDescentTensorFlowV2,
                            ProjectedGradientDescentNumpy]
        if isinstance(self.estimator, PyTorchClassifier
                      ) and self.estimator.all_framework_preprocessing:
            self._attack = ProjectedGradientDescentPyTorch(
                estimator=estimator,  # type: ignore
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )

        elif isinstance(self.estimator, TensorFlowV2Classifier
                        ) and self.estimator.all_framework_preprocessing:
            self._attack = ProjectedGradientDescentTensorFlowV2(
                estimator=estimator,  # type: ignore
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )

        else:
            self._attack = ProjectedGradientDescentNumpy(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
                summary_writer=summary_writer,
                verbose=verbose,
            )

    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        logger.info("Creating adversarial samples.")
        return self._attack.generate(x=x, y=y, **kwargs)

    @property
    def summary_writer(self):
        """The summary writer."""
        return self._attack.summary_writer

    def set_params(self, **kwargs) -> None:
        super().set_params(**kwargs)
        self._attack.set_params(**kwargs)

    def _check_params(self) -> None:

        if self.norm not in [1, 2, np.inf, "inf"]:
            raise ValueError(
                'Norm order must be either 1, 2, `np.inf` or "inf".')

        if not (isinstance(self.eps, (int, float)) and isinstance(
                self.eps_step,
            (int, float)) or isinstance(self.eps, np.ndarray)
                and isinstance(self.eps_step, np.ndarray)):
            raise TypeError(
                "The perturbation size `eps` and the perturbation step-size `eps_step` must have the same type of `int`"
                ", `float`, or `np.ndarray`.")

        if isinstance(self.eps, (int, float)):
            if self.eps < 0:
                raise ValueError(
                    "The perturbation size `eps` has to be nonnegative.")
        else:
            if (self.eps < 0).any():
                raise ValueError(
                    "The perturbation size `eps` has to be nonnegative.")

        if isinstance(self.eps_step, (int, float)):
            if self.eps_step <= 0:
                raise ValueError(
                    "The perturbation step-size `eps_step` has to be positive."
                )
        else:
            if (self.eps_step <= 0).any():
                raise ValueError(
                    "The perturbation step-size `eps_step` has to be positive."
                )

        if isinstance(self.eps, np.ndarray) and isinstance(
                self.eps_step, np.ndarray):
            if self.eps.shape != self.eps_step.shape:
                raise ValueError(
                    "The perturbation size `eps` and the perturbation step-size `eps_step` must have the same shape."
                )

        if not isinstance(self.targeted, bool):
            raise ValueError("The flag `targeted` has to be of type bool.")

        if not isinstance(self.num_random_init, int):
            raise TypeError(
                "The number of random initialisations has to be of type integer."
            )

        if self.num_random_init < 0:
            raise ValueError(
                "The number of random initialisations `random_init` has to be greater than or equal to 0."
            )

        if self.batch_size <= 0:
            raise ValueError("The batch size `batch_size` has to be positive.")

        if self.max_iter < 0:
            raise ValueError(
                "The number of iterations `max_iter` has to be a nonnegative integer."
            )

        if not isinstance(self.verbose, bool):
            raise ValueError("The verbose has to be a Boolean.")
コード例 #3
0
    def __init__(
        self,
        estimator,
        norm: int = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        num_random_init: int = 0,
        batch_size: int = 32,
        random_eps: bool = False,
    ):
        """
        Create a :class:`.ProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation supporting np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param random_eps: When True, epsilon is drawn randomly from truncated normal distribution. The literature
                           suggests this for FGSM based training to generalize across different epsilons. eps_step
                           is modified to preserve the ratio of eps / eps_step. The effectiveness of this
                           method with PGD is untested (https://arxiv.org/pdf/1611.01236.pdf).
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting
                                at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        """
        super(ProjectedGradientDescent, self).__init__(estimator=estimator)

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.num_random_init = num_random_init
        self.batch_size = batch_size
        self.random_eps = random_eps
        ProjectedGradientDescent._check_params(self)

        no_preprocessing = self.estimator.preprocessing is None or (
            np.all(self.estimator.preprocessing[0] == 0)
            and np.all(self.estimator.preprocessing[1] == 1))
        no_defences = not self.estimator.preprocessing_defences and not self.estimator.postprocessing_defences

        self._attack: Union[ProjectedGradientDescentPyTorch,
                            ProjectedGradientDescentTensorFlowV2,
                            ProjectedGradientDescentNumpy]
        if isinstance(self.estimator,
                      PyTorchClassifier) and no_preprocessing and no_defences:
            self._attack = ProjectedGradientDescentPyTorch(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )

        elif isinstance(
                self.estimator,
                TensorFlowV2Classifier) and no_preprocessing and no_defences:
            self._attack = ProjectedGradientDescentTensorFlowV2(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )

        else:
            self._attack = ProjectedGradientDescentNumpy(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )
    def _test_framework_vs_numpy(self, classifier):
        # Test PGD with np.inf norm
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist)

        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test PGD with L1 norm
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=1,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist)

        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=1,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test PGD with L2 norm
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=2,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist)

        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=2,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test PGD with True targeted
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=True,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist,
                                            self.y_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist,
                                           self.y_test_mnist)

        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=True,
            num_random_init=0,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist,
                                            self.y_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist,
                                           self.y_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test PGD with num_random_init=2
        master_seed(1234)
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=2,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist)

        master_seed(1234)
        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=2,
            batch_size=3,
            random_eps=False,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test PGD with random_eps=True
        master_seed(1234)
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=True,
        )
        x_train_adv_np = attack_np.generate(self.x_train_mnist)
        x_test_adv_np = attack_np.generate(self.x_test_mnist)

        master_seed(1234)
        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=0,
            batch_size=3,
            random_eps=True,
        )
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test the masking 1
        master_seed(1234)
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=1,
            batch_size=3,
            random_eps=True,
        )

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_train_mnist.shape))
        mask = mask.reshape(self.x_train_mnist.shape).astype(np.float32)
        x_train_adv_np = attack_np.generate(self.x_train_mnist, mask=mask)

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_test_mnist.shape))
        mask = mask.reshape(self.x_test_mnist.shape).astype(np.float32)
        x_test_adv_np = attack_np.generate(self.x_test_mnist, mask=mask)

        master_seed(1234)
        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=1,
            batch_size=3,
            random_eps=True,
        )

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_train_mnist.shape))
        mask = mask.reshape(self.x_train_mnist.shape).astype(np.float32)
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist, mask=mask)

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_test_mnist.shape))
        mask = mask.reshape(self.x_test_mnist.shape).astype(np.float32)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist, mask=mask)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)

        # Test the masking 2
        master_seed(1234)
        attack_np = ProjectedGradientDescentNumpy(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=1,
            batch_size=3,
            random_eps=True,
        )

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_train_mnist.shape[1:]))
        mask = mask.reshape(self.x_train_mnist.shape[1:]).astype(np.float32)
        x_train_adv_np = attack_np.generate(self.x_train_mnist, mask=mask)

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_test_mnist.shape[1:]))
        mask = mask.reshape(self.x_test_mnist.shape[1:]).astype(np.float32)
        x_test_adv_np = attack_np.generate(self.x_test_mnist, mask=mask)

        master_seed(1234)
        attack_fw = ProjectedGradientDescent(
            classifier,
            eps=1.0,
            eps_step=0.1,
            max_iter=5,
            norm=np.inf,
            targeted=False,
            num_random_init=1,
            batch_size=3,
            random_eps=True,
        )

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_train_mnist.shape[1:]))
        mask = mask.reshape(self.x_train_mnist.shape[1:]).astype(np.float32)
        x_train_adv_fw = attack_fw.generate(self.x_train_mnist, mask=mask)

        mask = np.random.binomial(n=1,
                                  p=0.5,
                                  size=np.prod(self.x_test_mnist.shape[1:]))
        mask = mask.reshape(self.x_test_mnist.shape[1:]).astype(np.float32)
        x_test_adv_fw = attack_fw.generate(self.x_test_mnist, mask=mask)

        # Test
        self.assertAlmostEqual(np.mean(x_train_adv_np - self.x_train_mnist),
                               np.mean(x_train_adv_fw - self.x_train_mnist),
                               places=6)
        self.assertAlmostEqual(np.mean(x_test_adv_np - self.x_test_mnist),
                               np.mean(x_test_adv_fw - self.x_test_mnist),
                               places=6)
コード例 #5
0
class ProjectedGradientDescent(EvasionAttack):
    """
    The Projected Gradient Descent attack is an iterative method in which, after each iteration, the perturbation is
    projected on an lp-ball of specified radius (in addition to clipping the values of the adversarial sample so that it
    lies in the permitted data range). This is the attack proposed by Madry et al. for adversarial training.

    | Paper link: https://arxiv.org/abs/1706.06083
    """

    attack_params = EvasionAttack.attack_params + [
        "norm",
        "eps",
        "eps_step",
        "targeted",
        "num_random_init",
        "batch_size",
        "minimal",
        "max_iter",
        "random_eps",
    ]

    _estimator_requirements = (BaseEstimator, LossGradientsMixin)

    def __init__(
        self,
        estimator,
        norm: int = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        num_random_init: int = 0,
        batch_size: int = 32,
        random_eps: bool = False,
    ):
        """
        Create a :class:`.ProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation supporting np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param random_eps: When True, epsilon is drawn randomly from truncated normal distribution. The literature
                           suggests this for FGSM based training to generalize across different epsilons. eps_step
                           is modified to preserve the ratio of eps / eps_step. The effectiveness of this
                           method with PGD is untested (https://arxiv.org/pdf/1611.01236.pdf).
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting
                                at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        """
        super(ProjectedGradientDescent, self).__init__(estimator=estimator)

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.num_random_init = num_random_init
        self.batch_size = batch_size
        self.random_eps = random_eps
        ProjectedGradientDescent._check_params(self)

        no_preprocessing = self.estimator.preprocessing is None or (
            np.all(self.estimator.preprocessing[0] == 0)
            and np.all(self.estimator.preprocessing[1] == 1))
        no_defences = not self.estimator.preprocessing_defences and not self.estimator.postprocessing_defences

        self._attack: Union[ProjectedGradientDescentPyTorch,
                            ProjectedGradientDescentTensorFlowV2,
                            ProjectedGradientDescentNumpy]
        if isinstance(self.estimator,
                      PyTorchClassifier) and no_preprocessing and no_defences:
            self._attack = ProjectedGradientDescentPyTorch(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )

        elif isinstance(
                self.estimator,
                TensorFlowV2Classifier) and no_preprocessing and no_defences:
            self._attack = ProjectedGradientDescentTensorFlowV2(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )

        else:
            self._attack = ProjectedGradientDescentNumpy(
                estimator=estimator,
                norm=norm,
                eps=eps,
                eps_step=eps_step,
                max_iter=max_iter,
                targeted=targeted,
                num_random_init=num_random_init,
                batch_size=batch_size,
                random_eps=random_eps,
            )

    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :return: An array holding the adversarial examples.
        """
        logger.info("Creating adversarial samples.")
        return self._attack.generate(x=x, y=y, **kwargs)

    def set_params(self, **kwargs) -> None:
        self._attack.set_params(**kwargs)

    def _check_params(self) -> None:
        # Check if order of the norm is acceptable given current implementation
        if self.norm not in [np.inf, int(1), int(2)]:
            raise ValueError("Norm order must be either `np.inf`, 1, or 2.")

        if self.eps <= 0:
            raise ValueError("The perturbation size `eps` has to be positive.")

        if self.eps_step <= 0:
            raise ValueError(
                "The perturbation step-size `eps_step` has to be positive.")

        if not isinstance(self.targeted, bool):
            raise ValueError("The flag `targeted` has to be of type bool.")

        if not isinstance(self.num_random_init, (int, np.int)):
            raise TypeError(
                "The number of random initialisations has to be of type integer"
            )

        if self.num_random_init < 0:
            raise ValueError(
                "The number of random initialisations `random_init` has to be greater than or equal to 0."
            )

        if self.batch_size <= 0:
            raise ValueError("The batch size `batch_size` has to be positive.")

        if self.eps_step > self.eps:
            raise ValueError(
                "The iteration step `eps_step` has to be smaller than the total attack `eps`."
            )

        if self.max_iter <= 0:
            raise ValueError(
                "The number of iterations `max_iter` has to be a positive integer."
            )