예제 #1
0
    def test_compute_success_array(self):
        class DummyClassifier:
            def predict(self, x, batch_size):
                return x

        classifier = DummyClassifier()
        x_clean = np.array([[0, 1], [1, 0]])
        x_adv = np.array([[1, 0], [0, 1]])
        labels = np.array([[1, 0], [0, 1]])

        attack_success_targeted = compute_success_array(classifier,
                                                        x_clean,
                                                        labels,
                                                        x_adv,
                                                        targeted=True)
        attack_success_untargeted = compute_success_array(classifier,
                                                          x_clean,
                                                          labels,
                                                          x_adv,
                                                          targeted=False)

        self.assertTrue((attack_success_targeted == np.array([True,
                                                              True])).all())
        self.assertTrue((attack_success_untargeted == np.array([True,
                                                                True])).all())
예제 #2
0
 def _compute_attack_failure_array(self, x: np.ndarray, targets: np.ndarray,
                                   x_adv: np.ndarray) -> np.ndarray:
     attack_success = compute_success_array(
         self.attacker.estimator,
         x,
         targets,
         x_adv,
         self.attacker.targeted  # type: ignore
     )
     return np.invert(attack_success)
예제 #3
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.

        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        mask = self._get_mask(x, **kwargs)

        # Ensure eps is broadcastable
        self._check_compatibility_input_and_eps(x=x)

        # Check whether random eps is enabled
        self._random_eps()

        if isinstance(self.estimator, ClassifierMixin):
            # Set up targets
            targets = self._set_targets(x, y)

            # Start to compute adversarial examples
            adv_x = x.astype(ART_NUMPY_DTYPE)

            for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))):

                self._batch_id = batch_id

                for rand_init_num in trange(
                    max(1, self.num_random_init), desc="PGD - Random Initializations", disable=not self.verbose
                ):
                    batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
                    batch_index_2 = min(batch_index_2, x.shape[0])
                    batch = x[batch_index_1:batch_index_2]
                    batch_labels = targets[batch_index_1:batch_index_2]
                    mask_batch = mask

                    if mask is not None:
                        if len(mask.shape) == len(x.shape):
                            mask_batch = mask[batch_index_1:batch_index_2]

                    for i_max_iter in trange(
                        self.max_iter, desc="PGD - Iterations", leave=False, disable=not self.verbose
                    ):
                        self._i_max_iter = i_max_iter

                        batch = self._compute(
                            batch,
                            x[batch_index_1:batch_index_2],
                            batch_labels,
                            mask_batch,
                            self.eps,
                            self.eps_step,
                            self._project,
                            self.num_random_init > 0 and i_max_iter == 0,
                            self._batch_id,
                        )

                    if rand_init_num == 0:
                        # initial (and possibly only) random restart: we only have this set of
                        # adversarial examples for now
                        adv_x[batch_index_1:batch_index_2] = np.copy(batch)
                    else:
                        # replace adversarial examples if they are successful
                        attack_success = compute_success_array(
                            self.estimator,  # type: ignore
                            x[batch_index_1:batch_index_2],
                            targets[batch_index_1:batch_index_2],
                            batch,
                            self.targeted,
                            batch_size=self.batch_size,
                        )
                        adv_x[batch_index_1:batch_index_2][attack_success] = batch[attack_success]

            logger.info(
                "Success rate of attack: %.2f%%",
                100
                * compute_success(
                    self.estimator,  # type: ignore
                    x,
                    targets,
                    adv_x,
                    self.targeted,
                    batch_size=self.batch_size,  # type: ignore
                ),
            )
        else:
            if self.num_random_init > 0:  # pragma: no cover
                raise ValueError("Random initialisation is only supported for classification.")

            # Set up targets
            targets = self._set_targets(x, y, classifier_mixin=False)

            # Start to compute adversarial examples
            if x.dtype == object:
                adv_x = x.copy()
            else:
                adv_x = x.astype(ART_NUMPY_DTYPE)

            for i_max_iter in trange(self.max_iter, desc="PGD - Iterations", disable=not self.verbose):
                self._i_max_iter = i_max_iter

                adv_x = self._compute(
                    adv_x,
                    x,
                    targets,
                    mask,
                    self.eps,
                    self.eps_step,
                    self._project,
                    self.num_random_init > 0 and i_max_iter == 0,
                )

        if self.summary_writer is not None:
            self.summary_writer.reset()

        return adv_x
예제 #4
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        mask = self._get_mask(x, **kwargs)

        # Ensure eps is broadcastable
        self._check_compatibility_input_and_eps(x=x)

        # Check whether random eps is enabled
        self._random_eps()

        # Set up targets
        targets = self._set_targets(x, y)

        # Create dataset
        if mask is not None:
            # Here we need to make a distinction: if the masks are different for each input, we need to index
            # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
            if len(mask.shape) == len(x.shape):
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    mask.astype(ART_NUMPY_DTYPE),
                )).batch(self.batch_size, drop_remainder=False)

            else:
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    np.array([mask.astype(ART_NUMPY_DTYPE)] * x.shape[0]),
                )).batch(self.batch_size, drop_remainder=False)

        else:
            dataset = tf.data.Dataset.from_tensor_slices((
                x.astype(ART_NUMPY_DTYPE),
                targets.astype(ART_NUMPY_DTYPE),
            )).batch(self.batch_size, drop_remainder=False)

        # Start to compute adversarial examples
        adv_x = x.astype(ART_NUMPY_DTYPE)
        data_loader = iter(dataset)

        # Compute perturbation with batching
        for (batch_id, batch_all) in enumerate(
                tqdm(data_loader,
                     desc="PGD - Batches",
                     leave=False,
                     disable=not self.verbose)):
            if mask is not None:
                (batch, batch_labels,
                 mask_batch) = batch_all[0], batch_all[1], batch_all[2]
            else:
                (batch, batch_labels,
                 mask_batch) = batch_all[0], batch_all[1], None

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size

            # Compute batch_eps and batch_eps_step
            if isinstance(self.eps, np.ndarray):
                if len(self.eps.shape) == len(
                        x.shape) and self.eps.shape[0] == x.shape[0]:
                    batch_eps = self.eps[batch_index_1:batch_index_2]
                    batch_eps_step = self.eps_step[batch_index_1:batch_index_2]

                else:
                    batch_eps = self.eps
                    batch_eps_step = self.eps_step

            else:
                batch_eps = self.eps
                batch_eps_step = self.eps_step

            for rand_init_num in range(max(1, self.num_random_init)):
                if rand_init_num == 0:
                    # first iteration: use the adversarial examples as they are the only ones we have now
                    adv_x[batch_index_1:batch_index_2] = self._generate_batch(
                        x=batch,
                        targets=batch_labels,
                        mask=mask_batch,
                        eps=batch_eps,
                        eps_step=batch_eps_step)
                else:
                    adversarial_batch = self._generate_batch(
                        x=batch,
                        targets=batch_labels,
                        mask=mask_batch,
                        eps=batch_eps,
                        eps_step=batch_eps_step)
                    attack_success = compute_success_array(
                        self.estimator,
                        batch,
                        batch_labels,
                        adversarial_batch,
                        self.targeted,
                        batch_size=self.batch_size,
                    )
                    # return the successful adversarial examples
                    adv_x[batch_index_1:batch_index_2][
                        attack_success] = adversarial_batch[attack_success]

        logger.info(
            "Success rate of attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  adv_x,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return adv_x