Exemple #1
0
    def generate(self, x, y=None, **kwargs):
        """Generate adversarial samples and return them in an array.
        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        '''
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError('Target labels `y` need to be provided for a targeted attack.')

            # Use model predictions as correct outputs
            logger.info('Using model predictions as correct labels for FGM.')
            y = get_labels_np_array(self.classifier.predict(x, batch_size=self.batch_size))
        y = y / np.sum(y, axis=1, keepdims=True)
        '''

        # Return adversarial examples computed with minimal perturbation if option is active
        if self.minimal:
            logger.info('Performing minimal perturbation FGM.')
            adv_x_best = self._minimal_perturbation(x, y)
            rate_best = 100 * compute_success(self.classifier,
                                              x,
                                              y,
                                              adv_x_best,
                                              self.targeted,
                                              batch_size=self.batch_size)
        else:
            adv_x_best = None
            rate_best = None

            for _ in range(max(1, self.num_random_init)):
                adv_x = self._compute(x, x, y, self.eps, self.eps,
                                      self._project, self.num_random_init > 0)

                if self.num_random_init > 1:
                    rate = 100 * compute_success(self.classifier,
                                                 x,
                                                 y,
                                                 adv_x,
                                                 self.targeted,
                                                 batch_size=self.batch_size)
                    if rate_best is None or rate > rate_best or adv_x_best is None:
                        rate_best = rate
                        adv_x_best = adv_x
                else:
                    adv_x_best = adv_x

        return adv_x_best
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError('Target labels `y` need to be provided for a targeted attack.')

            # Use model predictions as correct outputs
            targets = get_labels_np_array(self.classifier.predict(x, batch_size=self.batch_size))
        else:
            targets = y

        adv_x_best = None
        rate_best = None

        for _ in range(max(1, self.num_random_init)):
            adv_x = x.astype(NUMPY_DTYPE)

            for i_max_iter in range(self.max_iter):
                adv_x = self._compute(adv_x, x, targets, self.eps, self.eps_step, self._project,
                                      self.num_random_init > 0 and i_max_iter == 0)

            if self.num_random_init > 1:
                rate = 100 * compute_success(self.classifier, x, targets, adv_x,
                                             self.targeted, batch_size=self.batch_size)
                if rate_best is None or rate > rate_best or adv_x_best is None:
                    rate_best = rate
                    adv_x_best = adv_x
            else:
                adv_x_best = adv_x

        logger.info('Success rate of attack: %.2f%%', rate_best if rate_best is not None else
                    100 * compute_success(self.classifier, x, y, adv_x, self.targeted, batch_size=self.batch_size))

        return adv_x_best
Exemple #3
0
    def test_compute_success(self):
        class DummyClassifier:
            def predict(self, x, batch_size):
                return x

        classifier = DummyClassifier()
        x_clean = np.array([[0, 1], [1, 0]])
        x_adv = np.array([[1, 0], [0, 1]])
        labels = np.array([[1, 0], [0, 1]])

        attack_success_targeted = compute_success(classifier, x_clean, labels, x_adv, targeted=True)
        attack_success_untargeted = compute_success(classifier, x_clean, labels, x_adv, targeted=False)

        self.assertEqual(attack_success_targeted, 1.0)
        self.assertEqual(attack_success_untargeted, 1.0)
Exemple #4
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Assert that, if attack is targeted, y is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        if self.estimator.nb_classes == 2 and y.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches,
                               desc="EAD",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                x_batch, y_batch)

        # Apply clip
        if self.estimator.clip_values is not None:
            x_adv = np.clip(x_adv, self.estimator.clip_values[0],
                            self.estimator.clip_values[1])

        # Compute success rate of the EAD attack
        logger.info(
            "Success rate of EAD attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
Exemple #5
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        if self.estimator.nb_classes == 2 and y.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in trange(nb_batches,
                               desc="ZOO",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            "Success rate of ZOO attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
Exemple #6
0
    def generate(self, x, y=None):
        """Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: The labels for the data `x`. Only provide this parameter if you'd like to use true
                  labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
                  "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
                  Labels should be one-hot-encoded.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError(
                    'Target labels `y` need to be provided for a targeted attack.'
                )

            # Use model predictions as correct outputs
            logger.info('Using model predictions as correct labels for FGM.')
            y = get_labels_np_array(self.classifier.predict(x))
        y = y / np.sum(y, axis=1, keepdims=True)

        # Return adversarial examples computed with minimal perturbation if option is active
        if self.minimal:
            logger.info('Performing minimal perturbation FGM.')
            adv_x_best = self._minimal_perturbation(x, y)
            rate_best = 100 * compute_success(self.classifier, x, y,
                                              adv_x_best, self.targeted)
        else:
            adv_x_best = None
            rate_best = 0.0

            for i_random_init in range(max(1, self.num_random_init)):
                adv_x = self._compute(x, y, self.eps, self.eps,
                                      self.num_random_init > 0)
                rate = 100 * compute_success(self.classifier, x, y, adv_x,
                                             self.targeted)
                if rate > rate_best or adv_x_best is None:
                    rate_best = rate
                    adv_x_best = adv_x

        logger.info('Success rate of FGM attack: %.2f%%', rate_best)

        return adv_x_best
Exemple #7
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial examples and return them as an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes, return_one_hot=False)
        x_adv = x.copy()

        for index in trange(x_adv.shape[0], desc="Decision tree attack", disable=not self.verbose):
            path = self.estimator.get_decision_path(x_adv[index])
            legitimate_class = np.argmax(self.estimator.predict(x_adv[index].reshape(1, -1)))
            position = -2
            adv_path = [-1]
            ancestor = path[position]
            while np.abs(position) < (len(path) - 1) or adv_path[0] == -1:
                ancestor = path[position]
                current_child = path[position + 1]
                # search in right subtree
                if current_child == self.estimator.get_left_child(ancestor):
                    if y is None:
                        adv_path = self._df_subtree(self.estimator.get_right_child(ancestor), legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_right_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                else:  # search in left subtree
                    if y is None:
                        adv_path = self._df_subtree(self.estimator.get_left_child(ancestor), legitimate_class)
                    else:
                        adv_path = self._df_subtree(
                            self.estimator.get_left_child(ancestor),
                            legitimate_class,
                            y[index],
                        )
                position = position - 1  # we are going the decision path upwards
            adv_path.append(ancestor)
            # we figured out which is the way to the target, now perturb
            # first one is leaf-> no threshold, cannot be perturbed
            for i in range(1, 1 + len(adv_path[1:])):
                go_for = adv_path[i - 1]
                threshold = self.estimator.get_threshold_at_node(adv_path[i])
                feature = self.estimator.get_feature_at_node(adv_path[i])
                # only perturb if the feature is actually wrong
                if x_adv[index][feature] > threshold and go_for == self.estimator.get_left_child(adv_path[i]):
                    x_adv[index][feature] = threshold - self.offset
                elif x_adv[index][feature] <= threshold and go_for == self.estimator.get_right_child(adv_path[i]):
                    x_adv[index][feature] = threshold + self.offset

        logger.info(
            "Success rate of decision tree attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv),
        )
        return x_adv
Exemple #8
0
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,).
        :type y: `np.ndarray`
        :param x_adv_init: Initial array to act as initial adversarial examples. Same shape as `x`.
        :type x_adv_init: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Get clip_min and clip_max from the classifier or infer them from data
        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
        else:
            clip_min, clip_max = np.min(x), np.max(x)

        # Prediction from the original images
        preds = np.argmax(self.classifier.predict(x, batch_size=self.batch_size), axis=1)

        # Prediction from the initial adversarial examples if not None
        x_adv_init = kwargs.get('x_adv_init')

        if x_adv_init is not None:
            init_preds = np.argmax(self.classifier.predict(x_adv_init, batch_size=self.batch_size), axis=1)
        else:
            init_preds = [None] * len(x)
            x_adv_init = [None] * len(x)

        # Assert that, if attack is targeted, y is provided
        if self.targeted and y is None:
            raise ValueError('Target labels `y` need to be provided for a targeted attack.')

        # Some initial setups
        x_adv = x.astype(NUMPY_DTYPE)
        if y is not None:
            y = np.argmax(y, axis=1)

        # Generate the adversarial samples
        for ind, val in enumerate(x_adv):
            if self.targeted:
                x_adv[ind] = self._perturb(x=val, y=y[ind], y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)
            else:
                x_adv[ind] = self._perturb(x=val, y=-1, y_p=preds[ind], init_pred=init_preds[ind],
                                           adv_init=x_adv_init[ind], clip_min=clip_min, clip_max=clip_max)

        if y is not None:
            y = to_categorical(y, self.classifier.nb_classes())

        logger.info('Success rate of HopSkipJump attack: %.2f%%',
                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted, batch_size=self.batch_size))

        return x_adv
Exemple #9
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in a Numpy array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Initialize variables
        y_pred = self.estimator.predict(x, batch_size=self.batch_size)
        pred_class = np.argmax(y_pred, axis=1)

        # Compute perturbation with implicit batching
        for batch_id in trange(
            int(np.ceil(x_adv.shape[0] / float(self.batch_size))), desc="NewtonFool", disable=not self.verbose
        ):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]

            # Main algorithm for each batch
            norm_batch = np.linalg.norm(np.reshape(batch, (batch.shape[0], -1)), axis=1)
            l_batch = pred_class[batch_index_1:batch_index_2]
            l_b = to_categorical(l_batch, self.estimator.nb_classes).astype(bool)

            # Main loop of the algorithm
            for _ in range(self.max_iter):
                # Compute score
                score = self.estimator.predict(batch)[l_b]

                # Compute the gradients and norm
                grads = self.estimator.class_gradient(batch, label=l_batch)
                if grads.shape[1] == 1:
                    grads = np.squeeze(grads, axis=1)
                norm_grad = np.linalg.norm(np.reshape(grads, (batch.shape[0], -1)), axis=1)

                # Theta
                theta = self._compute_theta(norm_batch, score, norm_grad)

                # Perturbation
                di_batch = self._compute_pert(theta, grads, norm_grad)

                # Update xi and perturbation
                batch += di_batch

            # Apply clip
            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv[batch_index_1:batch_index_2] = np.clip(batch, clip_min, clip_max)
            else:
                x_adv[batch_index_1:batch_index_2] = batch

        logger.info(
            "Success rate of NewtonFool attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, batch_size=self.batch_size),
        )
        return x_adv
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.classifier.predict(x, batch_size=self.batch_size))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in range(nb_batches):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if hasattr(self.classifier,
                   "clip_values") and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            "Success rate of ZOO attack: %.2f%%",
            100 * compute_success(self.classifier,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Assert that, if attack is targeted, y is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.classifier.predict(x, batch_size=self.batch_size))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in range(nb_batches):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            x_adv[batch_index_1:batch_index_2] = self._generate_batch(
                x_batch, y_batch)

        # Apply clip
        if hasattr(self.classifier,
                   "clip_values") and self.classifier.clip_values is not None:
            x_adv = np.clip(x_adv, self.classifier.clip_values[0],
                            self.classifier.clip_values[1])

        # Compute success rate of the EAD attack
        logger.info(
            "Success rate of EAD attack: %.2f%%",
            100 * compute_success(self.classifier,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
Exemple #12
0
    def generate(self, x, y=None):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: If `self.targeted` is true, then `y` represents the target labels. Otherwise, the targets are the
                  original class labels.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        # ZOO can probably be extended to feature vectors if no zooming or resizing is applied
        if len(x.shape) == 2:
            raise ValueError(
                'Feature vectors detected. The ZOO attack can only be applied to data with spatial'
                'dimensions.')

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError(
                'Target labels `y` need to be provided for a targeted attack.')

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(self.classifier.predict(x, logits=False))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in range(nb_batches):
            logger.debug('Processing batch %i out of %i', batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)
        x_adv = np.vstack(x_adv)

        # Apply clip
        if hasattr(self.classifier,
                   'clip_values') and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
            np.clip(x_adv, clip_min, clip_max, out=x_adv)

        # Log success rate of the ZOO attack
        logger.info(
            'Success rate of ZOO attack: %.2f%%',
            100 * compute_success(self.classifier, x, y, x_adv, self.targeted))

        return x_adv
Exemple #13
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param cost_matrix: A non-negative cost matrix.
        :type cost_matrix: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.copy().astype(ART_NUMPY_DTYPE)

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError("Target labels `y` need to be provided for a targeted attack.")

            # Use model predictions as correct outputs
            targets = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))
        else:
            targets = y

        if self.estimator.nb_classes == 2 and targets.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        # Compute the cost matrix if needed
        cost_matrix = kwargs.get("cost_matrix")
        if cost_matrix is None:
            cost_matrix = self._compute_cost_matrix(self.p, self.kernel_size)

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches, desc="Wasserstein", disable=not self.verbose):
            logger.debug("Processing batch %i out of %i", batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]
            batch_labels = targets[batch_index_1:batch_index_2]

            x_adv[batch_index_1:batch_index_2] = self._generate_batch(batch, batch_labels, cost_matrix)

        logger.info(
            "Success rate of attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, self.targeted, batch_size=self.batch_size),
        )

        return x_adv
Exemple #14
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial examples and return them as an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,).
        :return: An array holding the adversarial examples.
        """
        x_adv = copy.copy(x)

        def minfun(x, args):  # minimize L2 norm
            return np.sum(np.sqrt((x - args["orig"]) ** 2))

        def constraint_conf(x, args):  # constraint for confidence
            pred = args["classifier"].predict(x.reshape(1, -1))[0, 0]
            if args["class_zero"]:
                pred = 1.0 - pred
            return (pred - args["conf"]).reshape(-1)

        def constraint_unc(x, args):  # constraint for uncertainty
            cur_unc = (args["classifier"].predict_uncertainty(x.reshape(1, -1))).reshape(-1)
            return (args["max_uncertainty"] - cur_unc)[0]

        bounds = []
        # adding bounds, to not go away from original data
        for i in range(np.shape(x)[1]):
            bounds.append((self.min_val, self.max_val))
        for i in trange(x.shape[0], desc="HCLU"):  # go through data amd craft
            # get properties for attack
            max_uncertainty = self.unc_increase * self.estimator.predict_uncertainty(x_adv[i].reshape(1, -1))
            class_zero = not self.estimator.predict(x_adv[i].reshape(1, -1))[0, 0] < 0.5
            init_args = {
                "classifier": self.estimator,
                "class_zero": class_zero,
                "max_uncertainty": max_uncertainty,
                "conf": self.conf,
            }
            constr_conf = {"type": "ineq", "fun": constraint_conf, "args": (init_args,)}
            constr_unc = {"type": "ineq", "fun": constraint_unc, "args": (init_args,)}
            args = {"args": init_args, "orig": x[i].reshape(-1)}
            # finally, run optimization
            x_adv[i] = minimize(minfun, x_adv[i], args=args, bounds=bounds, constraints=[constr_conf, constr_unc],)["x"]
        logger.info(
            "Success rate of HCLU attack: %.2f%%", 100 * compute_success(self.estimator, x, y, x_adv),
        )
        return x_adv
Exemple #15
0
    def poison(self,
               x: np.ndarray,
               y: Optional[np.ndarray] = None,
               **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        """
        Iteratively finds optimal attack points starting at values at `x`.

        :param x: An array with the points that initialize attack points.
        :param y: The target labels for the attack.
        :return: A tuple holding the `(poisoning_examples, poisoning_labels)`.
        """
        if y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")
        else:
            y_attack = np.copy(y)

        print("in poison")
        num_poison = len(x)
        if num_poison == 0:
            raise ValueError("Must input at least one poison point")

        num_features = len(x[0])
        train_data = np.copy(self.x_train)
        train_labels = np.copy(self.y_train)
        all_poison = []

        for attack_point, attack_label in tqdm(zip(x, y_attack),
                                               desc="SVM poisoning"):
            print("in for loop")
            poison = self.generate_attack_point(attack_point, attack_label)
            all_poison.append(poison)
            train_data = np.vstack([train_data, poison])
            train_labels = np.vstack([train_labels, attack_label])

        print("after generate attack")

        x_adv = np.array(all_poison).reshape((num_poison, num_features))
        targeted = y is not None

        logger.info(
            "Success rate of poisoning attack SVM attack: %.2f%%",
            100 *
            compute_success(self.estimator, x, y, x_adv, targeted=targeted),
        )

        return x_adv, y_attack
Exemple #16
0
    def generate(self, x, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: If `self.targeted` is true, then `y` represents the target labels. Otherwise, the targets are the
                  original class labels.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        # Parse and save attack-specific parameters
        params_cpy = dict(kwargs)
        y = params_cpy.pop(str('y'), None)
        self.set_params(**params_cpy)

        # Check that `y` is provided for targeted attacks
        if self.targeted and y is None:
            raise ValueError('Target labels `y` need to be provided for a targeted attack.')

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(self.classifier.predict(x, logits=False))

        # Compute adversarial examples with implicit batching
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        x_adv = []
        for batch_id in range(nb_batches):
            logger.debug('Processing batch %i out of %i', batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            x_batch = x[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]
            res = self._generate_batch(x_batch, y_batch)
            x_adv.append(res)

        # Apply clip
        x_adv = np.vstack(x_adv)
        x_adv = np.clip(x_adv, self.classifier.clip_values[0], self.classifier.clip_values[1])

        # Log success rate of the ZOO attack
        logger.info('Success rate of ZOO attack: %.2f%%',
                    100 * compute_success(self.classifier, x, y, x_adv, self.targeted))

        return x_adv
    def poison(self, x, y=None, **kwargs):
        """
        Iteratively finds optimal attack points starting at values at x

        :param x: An array with the points that initialize attack points.
        :type x: `np.ndarray`
        :param y: The target labels for
        :return: An tuple holding the (poisoning examples, poisoning labels).
        :rtype: `(np.ndarray, np.ndarray)`
        """

        if y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")
        else:
            y_attack = np.copy(y)

        num_poison = len(x)

        if num_poison == 0:
            raise ValueError("Must input at least one poison point")

        num_features = len(x[0])
        train_data = np.copy(self.x_train)
        train_labels = np.copy(self.y_train)
        all_poison = []

        for attack_point, attack_label in zip(x, y_attack):
            poison = self.generate_attack_point(attack_point, attack_label)
            all_poison.append(poison)
            train_data = np.vstack([train_data, poison])
            train_labels = np.vstack([train_labels, attack_label])

        x_adv = np.array(all_poison).reshape((num_poison, num_features))
        targeted = y is not None

        logger.info(
            "Success rate of poisoning attack SVM attack: %.2f%%",
            100 *
            compute_success(self.classifier, x, y, x_adv, targeted=targeted),
        )

        return x_adv, y_attack
Exemple #18
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y_val` represents the target labels. Otherwise, the
                  targets are the original class labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        if self.estimator.clip_values is not None:
            clip_min_per_pixel, clip_max_per_pixel = self.estimator.clip_values
        else:
            clip_min_per_pixel, clip_max_per_pixel = np.amin(x), np.amax(x)

        # Assert that, if attack is targeted, y_val is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches,
                               desc="C&W L_inf",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]

            # Determine values for later clipping
            clip_min = np.clip(x_batch - self.eps, clip_min_per_pixel,
                               clip_max_per_pixel)
            clip_max = np.clip(x_batch + self.eps, clip_min_per_pixel,
                               clip_max_per_pixel)

            # The optimization is performed in tanh space to keep the
            # adversarial images bounded from clip_min and clip_max.
            x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max,
                                            self._tanh_smoother)

            # Initialize perturbation in tanh space:
            x_adv_batch = x_batch.copy()
            x_adv_batch_tanh = x_batch_tanh.copy()

            # Initialize optimization:
            z_logits, loss = self._loss(x_adv_batch, y_batch)
            attack_success = loss <= 0
            learning_rate = self.learning_rate * np.ones(x_batch.shape[0])

            for i_iter in range(self.max_iter):
                logger.debug("Iteration step %i out of %i", i_iter,
                             self.max_iter)
                logger.debug("Average Loss: %f", np.mean(loss))

                logger.debug(
                    "Successful attack samples: %i out of %i",
                    int(np.sum(attack_success)),
                    x_batch.shape[0],
                )

                # only continue optimization for those samples where attack hasn't succeeded yet:
                active = ~attack_success
                if np.sum(active) == 0:
                    break

                # compute gradient:
                logger.debug("Compute loss gradient")
                perturbation_tanh = -self._loss_gradient(
                    z_logits[active],
                    y_batch[active],
                    x_adv_batch[active],
                    x_adv_batch_tanh[active],
                    clip_min[active],
                    clip_max[active],
                )

                # perform line search to optimize perturbation
                # first, halve the learning rate until perturbation actually decreases the loss:
                prev_loss = loss.copy()
                best_loss = loss.copy()
                best_lr = np.zeros(x_batch.shape[0])
                halving = np.zeros(x_batch.shape[0])

                for i_halve in range(self.max_halving):
                    logger.debug(
                        "Perform halving iteration %i out of %i",
                        i_halve,
                        self.max_halving,
                    )
                    do_halving = loss[active] >= prev_loss[active]
                    logger.debug("Halving to be performed on %i samples",
                                 int(np.sum(do_halving)))
                    if np.sum(do_halving) == 0:
                        break
                    active_and_do_halving = active.copy()
                    active_and_do_halving[active] = do_halving

                    lr_mult = learning_rate[active_and_do_halving]
                    for _ in range(len(x.shape) - 1):
                        lr_mult = lr_mult[:, np.newaxis]

                    adv_10 = x_adv_batch_tanh[active_and_do_halving]
                    new_x_adv_batch_tanh = adv_10 + lr_mult * perturbation_tanh[
                        do_halving]

                    new_x_adv_batch = tanh_to_original(
                        new_x_adv_batch_tanh,
                        clip_min[active_and_do_halving],
                        clip_max[active_and_do_halving],
                    )
                    _, loss[active_and_do_halving] = self._loss(
                        new_x_adv_batch, y_batch[active_and_do_halving])
                    logger.debug("New Average Loss: %f", np.mean(loss))
                    logger.debug("Loss: %s", str(loss))
                    logger.debug("Prev_loss: %s", str(prev_loss))
                    logger.debug("Best_loss: %s", str(best_loss))

                    best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                    best_loss[loss < best_loss] = loss[loss < best_loss]
                    learning_rate[active_and_do_halving] /= 2
                    halving[active_and_do_halving] += 1
                learning_rate[active] *= 2

                # if no halving was actually required, double the learning rate as long as this
                # decreases the loss:
                for i_double in range(self.max_doubling):
                    logger.debug(
                        "Perform doubling iteration %i out of %i",
                        i_double,
                        self.max_doubling,
                    )
                    do_doubling = (halving[active]
                                   == 1) & (loss[active] <= best_loss[active])
                    logger.debug(
                        "Doubling to be performed on %i samples",
                        int(np.sum(do_doubling)),
                    )
                    if np.sum(do_doubling) == 0:
                        break
                    active_and_do_doubling = active.copy()
                    active_and_do_doubling[active] = do_doubling
                    learning_rate[active_and_do_doubling] *= 2

                    lr_mult = learning_rate[active_and_do_doubling]
                    for _ in range(len(x.shape) - 1):
                        lr_mult = lr_mult[:, np.newaxis]

                    x_adv15 = x_adv_batch_tanh[active_and_do_doubling]
                    new_x_adv_batch_tanh = x_adv15 + lr_mult * perturbation_tanh[
                        do_doubling]
                    new_x_adv_batch = tanh_to_original(
                        new_x_adv_batch_tanh,
                        clip_min[active_and_do_doubling],
                        clip_max[active_and_do_doubling],
                    )
                    _, loss[active_and_do_doubling] = self._loss(
                        new_x_adv_batch, y_batch[active_and_do_doubling])
                    logger.debug("New Average Loss: %f", np.mean(loss))
                    best_lr[loss < best_loss] = learning_rate[loss < best_loss]
                    best_loss[loss < best_loss] = loss[loss < best_loss]

                learning_rate[halving == 1] /= 2

                update_adv = best_lr[active] > 0
                logger.debug(
                    "Number of adversarial samples to be finally updated: %i",
                    int(np.sum(update_adv)),
                )

                if np.sum(update_adv) > 0:
                    active_and_update_adv = active.copy()
                    active_and_update_adv[active] = update_adv
                    best_lr_mult = best_lr[active_and_update_adv]
                    for _ in range(len(x.shape) - 1):
                        best_lr_mult = best_lr_mult[:, np.newaxis]

                    best_13 = best_lr_mult * perturbation_tanh[update_adv]
                    x_adv_batch_tanh[active_and_update_adv] = x_adv_batch_tanh[
                        active_and_update_adv] + best_13
                    x_adv_batch[active_and_update_adv] = tanh_to_original(
                        x_adv_batch_tanh[active_and_update_adv],
                        clip_min[active_and_update_adv],
                        clip_max[active_and_update_adv],
                    )
                    (
                        z_logits[active_and_update_adv],
                        loss[active_and_update_adv],
                    ) = self._loss(
                        x_adv_batch[active_and_update_adv],
                        y_batch[active_and_update_adv],
                    )
                    attack_success = loss <= 0

            # Update depending on attack success:
            x_adv_batch[~attack_success] = x_batch[~attack_success]
            x_adv[batch_index_1:batch_index_2] = x_adv_batch

        logger.info(
            "Success rate of C&W L_inf attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels.
        :param x_adv_init: Initial array to act as initial adversarial examples. Same shape as `x`.
        :type x_adv_init: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y,
                                             self.estimator.nb_classes,
                                             return_one_hot=False)

        # Get clip_min and clip_max from the classifier or infer them from data
        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
        else:
            clip_min, clip_max = np.min(x), np.max(x)

        # Prediction from the original images
        preds = np.argmax(self.estimator.predict(x,
                                                 batch_size=self.batch_size),
                          axis=1)

        # Prediction from the initial adversarial examples if not None
        x_adv_init = kwargs.get("x_adv_init")

        if x_adv_init is not None:
            init_preds = np.argmax(self.estimator.predict(
                x_adv_init, batch_size=self.batch_size),
                                   axis=1)
        else:
            init_preds = [None] * len(x)
            x_adv_init = [None] * len(x)

        # Assert that, if attack is targeted, y is provided
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # Some initial setups
        x_adv = x.astype(ART_NUMPY_DTYPE)

        # Generate the adversarial samples
        for ind, val in enumerate(
                tqdm(x_adv, desc="Boundary attack", disable=not self.verbose)):
            if self.targeted:
                x_adv[ind] = self._perturb(
                    x=val,
                    y=y[ind],
                    y_p=preds[ind],
                    init_pred=init_preds[ind],
                    adv_init=x_adv_init[ind],
                    clip_min=clip_min,
                    clip_max=clip_max,
                )
            else:
                x_adv[ind] = self._perturb(
                    x=val,
                    y=-1,
                    y_p=preds[ind],
                    init_pred=init_preds[ind],
                    adv_init=x_adv_init[ind],
                    clip_min=clip_min,
                    clip_max=clip_max,
                )

        if y is not None:
            y = to_categorical(y, self.estimator.nb_classes)

        logger.info(
            "Success rate of Boundary attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv
Exemple #20
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.

        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        mask = self._get_mask(x, **kwargs)

        # Ensure eps is broadcastable
        self._check_compatibility_input_and_eps(x=x)

        # Check whether random eps is enabled
        self._random_eps()

        if isinstance(self.estimator, ClassifierMixin):
            # Set up targets
            targets = self._set_targets(x, y)

            # Start to compute adversarial examples
            adv_x = x.astype(ART_NUMPY_DTYPE)

            for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))):

                self._batch_id = batch_id

                for rand_init_num in trange(
                    max(1, self.num_random_init), desc="PGD - Random Initializations", disable=not self.verbose
                ):
                    batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
                    batch_index_2 = min(batch_index_2, x.shape[0])
                    batch = x[batch_index_1:batch_index_2]
                    batch_labels = targets[batch_index_1:batch_index_2]
                    mask_batch = mask

                    if mask is not None:
                        if len(mask.shape) == len(x.shape):
                            mask_batch = mask[batch_index_1:batch_index_2]

                    for i_max_iter in trange(
                        self.max_iter, desc="PGD - Iterations", leave=False, disable=not self.verbose
                    ):
                        self._i_max_iter = i_max_iter

                        batch = self._compute(
                            batch,
                            x[batch_index_1:batch_index_2],
                            batch_labels,
                            mask_batch,
                            self.eps,
                            self.eps_step,
                            self._project,
                            self.num_random_init > 0 and i_max_iter == 0,
                            self._batch_id,
                        )

                    if rand_init_num == 0:
                        # initial (and possibly only) random restart: we only have this set of
                        # adversarial examples for now
                        adv_x[batch_index_1:batch_index_2] = np.copy(batch)
                    else:
                        # replace adversarial examples if they are successful
                        attack_success = compute_success_array(
                            self.estimator,  # type: ignore
                            x[batch_index_1:batch_index_2],
                            targets[batch_index_1:batch_index_2],
                            batch,
                            self.targeted,
                            batch_size=self.batch_size,
                        )
                        adv_x[batch_index_1:batch_index_2][attack_success] = batch[attack_success]

            logger.info(
                "Success rate of attack: %.2f%%",
                100
                * compute_success(
                    self.estimator,  # type: ignore
                    x,
                    targets,
                    adv_x,
                    self.targeted,
                    batch_size=self.batch_size,  # type: ignore
                ),
            )
        else:
            if self.num_random_init > 0:  # pragma: no cover
                raise ValueError("Random initialisation is only supported for classification.")

            # Set up targets
            targets = self._set_targets(x, y, classifier_mixin=False)

            # Start to compute adversarial examples
            if x.dtype == object:
                adv_x = x.copy()
            else:
                adv_x = x.astype(ART_NUMPY_DTYPE)

            for i_max_iter in trange(self.max_iter, desc="PGD - Iterations", disable=not self.verbose):
                self._i_max_iter = i_max_iter

                adv_x = self._compute(
                    adv_x,
                    x,
                    targets,
                    mask,
                    self.eps,
                    self.eps_step,
                    self._project,
                    self.num_random_init > 0 and i_max_iter == 0,
                )

        if self.summary_writer is not None:
            self.summary_writer.reset()

        return adv_x
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask to be applied to the adversarial perturbations. Shape needs to be
                     broadcastable to the shape of x. Any features for which the mask is zero will not be adversarially
                     perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        # Check whether random eps is enabled
        self._random_eps()

        # Set up targets
        targets = self._set_targets(x, y)

        # Get the mask
        mask = self._get_mask(x, **kwargs)

        # Create dataset
        if mask is not None:
            # Here we need to make a distinction: if the masks are different for each input, we need to index
            # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
            if len(mask.shape) == len(x.shape):
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    mask.astype(ART_NUMPY_DTYPE),
                )).batch(self.batch_size, drop_remainder=False)

            else:
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    np.array([mask.astype(ART_NUMPY_DTYPE)] * x.shape[0]),
                )).batch(self.batch_size, drop_remainder=False)

        else:
            dataset = tf.data.Dataset.from_tensor_slices((
                x.astype(ART_NUMPY_DTYPE),
                targets.astype(ART_NUMPY_DTYPE),
            )).batch(self.batch_size, drop_remainder=False)

        # Start to compute adversarial examples
        adv_x_best = None
        rate_best = None

        for _ in range(max(1, self.num_random_init)):
            adv_x = x.astype(ART_NUMPY_DTYPE)
            data_loader = iter(dataset)

            # Compute perturbation with batching
            for (batch_id, batch_all) in enumerate(data_loader):
                if mask is not None:
                    (batch, batch_labels,
                     mask_batch) = batch_all[0], batch_all[1], batch_all[2]
                else:
                    (batch, batch_labels,
                     mask_batch) = batch_all[0], batch_all[1], None

                batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                    batch_id + 1) * self.batch_size
                adv_x[batch_index_1:batch_index_2] = self._generate_batch(
                    batch, batch_labels, mask_batch)

            if self.num_random_init > 1:
                rate = 100 * compute_success(self.estimator,
                                             x,
                                             targets,
                                             adv_x,
                                             self.targeted,
                                             batch_size=self.batch_size)
                if rate_best is None or rate > rate_best or adv_x_best is None:
                    rate_best = rate
                    adv_x_best = adv_x
            else:
                adv_x_best = adv_x

        logger.info(
            "Success rate of attack: %.2f%%",
            rate_best if rate_best is not None else 100 *
            compute_success(self.estimator,
                            x,
                            y,
                            adv_x_best,
                            self.targeted,
                            batch_size=self.batch_size),
        )

        return adv_x_best
    def generate(self, x, y=None, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  `(nb_samples,)`.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        y = check_and_transform_label_format(y, self.classifier.nb_classes())

        # Initialize variables
        dims = list(x.shape[1:])
        self._nb_features = np.product(dims)
        x_adv = np.reshape(x.astype(ART_NUMPY_DTYPE), (-1, self._nb_features))
        preds = np.argmax(self.classifier.predict(x, batch_size=self.batch_size), axis=1)

        # Determine target classes for attack
        if y is None:
            # Randomly choose target from the incorrect classes for each sample
            from art.utils import random_targets
            targets = np.argmax(random_targets(preds, self.classifier.nb_classes()), axis=1)
        else:
            targets = np.argmax(y, axis=1)

        # Compute perturbation with implicit batching
        for batch_id in range(int(np.ceil(x_adv.shape[0] / float(self.batch_size)))):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]

            # Main algorithm for each batch
            # Initialize the search space; optimize to remove features that can't be changed
            search_space = np.zeros(batch.shape)
            if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
                clip_min, clip_max = self.classifier.clip_values
                if self.theta > 0:
                    search_space[batch < clip_max] = 1
                else:
                    search_space[batch > clip_min] = 1

            # Get current predictions
            current_pred = preds[batch_index_1:batch_index_2]
            target = targets[batch_index_1:batch_index_2]
            active_indices = np.where(current_pred != target)[0]
            all_feat = np.zeros_like(batch)

            while active_indices.size != 0:
                # Compute saliency map
                feat_ind = self._saliency_map(np.reshape(batch, [batch.shape[0]] + dims)[active_indices],
                                              target[active_indices], search_space[active_indices])

                # Update used features
                all_feat[active_indices, feat_ind[:, 0]] = 1
                all_feat[active_indices, feat_ind[:, 1]] = 1

                # Apply attack with clipping
                if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
                    # Prepare update depending of theta
                    if self.theta > 0:
                        clip_func, clip_value = np.minimum, clip_max
                    else:
                        clip_func, clip_value = np.maximum, clip_min

                    # Update adversarial examples
                    tmp_batch = batch[active_indices]
                    tmp_batch[np.arange(len(active_indices)), feat_ind[:, 0]] = \
                        clip_func(clip_value, tmp_batch[np.arange(len(active_indices)), feat_ind[:, 0]] + self.theta)
                    tmp_batch[np.arange(len(active_indices)), feat_ind[:, 1]] = \
                        clip_func(clip_value, tmp_batch[np.arange(len(active_indices)), feat_ind[:, 1]] + self.theta)
                    batch[active_indices] = tmp_batch

                    # Remove indices from search space if max/min values were reached
                    search_space[batch == clip_value] = 0

                # Apply attack without clipping
                else:
                    tmp_batch = batch[active_indices]
                    tmp_batch[np.arange(len(active_indices)), feat_ind[:, 0]] += self.theta
                    tmp_batch[np.arange(len(active_indices)), feat_ind[:, 1]] += self.theta
                    batch[active_indices] = tmp_batch

                # Recompute model prediction
                current_pred = np.argmax(self.classifier.predict(np.reshape(batch, [batch.shape[0]] + dims)), axis=1)

                # Update active_indices
                active_indices = np.where((current_pred != target) *
                                          (np.sum(all_feat, axis=1) / self._nb_features <= self.gamma) *
                                          (np.sum(search_space, axis=1) > 0))[0]

            x_adv[batch_index_1:batch_index_2] = batch

        x_adv = np.reshape(x_adv, x.shape)

        logger.info('Success rate of JSMA attack: %.2f%%',
                    100 * compute_success(self.classifier, x, y, x_adv, batch_size=self.batch_size))

        return x_adv
Exemple #23
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.

        :param mask: An array with a mask to be applied to the adversarial perturbations. Shape needs to be
                     broadcastable to the shape of x. Any features for which the mask is zero will not be adversarially
                     perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        # Check whether random eps is enabled
        self._random_eps()

        if isinstance(self.estimator, ClassifierMixin):
            # Set up targets
            targets = self._set_targets(x, y)

            # Get the mask
            mask = self._get_mask(x, **kwargs)

            # Start to compute adversarial examples
            adv_x_best = None
            rate_best = None

            for _ in trange(max(1, self.num_random_init),
                            desc="PGD - Random Initializations",
                            disable=not self.verbose):
                adv_x = x.astype(ART_NUMPY_DTYPE)

                for i_max_iter in trange(self.max_iter,
                                         desc="PGD - Iterations",
                                         leave=False,
                                         disable=not self.verbose):
                    adv_x = self._compute(
                        adv_x,
                        x,
                        targets,
                        mask,
                        self.eps,
                        self.eps_step,
                        self._project,
                        self.num_random_init > 0 and i_max_iter == 0,
                    )

                if self.num_random_init > 1:
                    rate = 100 * compute_success(
                        self.estimator,
                        x,
                        targets,
                        adv_x,
                        self.targeted,
                        batch_size=self.batch_size,  # type: ignore
                    )
                    if rate_best is None or rate > rate_best or adv_x_best is None:
                        rate_best = rate
                        adv_x_best = adv_x
                else:
                    adv_x_best = adv_x

            logger.info(
                "Success rate of attack: %.2f%%",
                rate_best if rate_best is not None else 100 * compute_success(
                    self.estimator,
                    x,
                    y,
                    adv_x_best,
                    self.targeted,
                    batch_size=self.batch_size,  # type: ignore
                ),
            )
        else:
            if self.num_random_init > 0:
                raise ValueError(
                    "Random initialisation is only supported for classification."
                )

            # Set up targets
            targets = self._set_targets(x, y, classifier_mixin=False)

            # Get the mask
            mask = self._get_mask(x, **kwargs)

            # Start to compute adversarial examples
            if x.dtype == np.object:
                adv_x = x.copy()
            else:
                adv_x = x.astype(ART_NUMPY_DTYPE)

            for i_max_iter in trange(self.max_iter,
                                     desc="PGD - Iterations",
                                     disable=not self.verbose):
                adv_x = self._compute(
                    adv_x,
                    x,
                    targets,
                    mask,
                    self.eps,
                    self.eps_step,
                    self._project,
                    self.num_random_init > 0 and i_max_iter == 0,
                )

            adv_x_best = adv_x

        return adv_x_best
Exemple #24
0
    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x_adv = x.astype(ART_NUMPY_DTYPE)
        preds = self.estimator.predict(x_adv, batch_size=self.batch_size)

        if self.estimator.nb_classes == 2 and preds.shape[1] == 1:
            raise ValueError(
                "This attack has not yet been tested for binary classification with a single output classifier."
            )

        if (preds < 0.0).any() or (preds > 1.0).any():
            raise TypeError(
                "This attack requires a classifier predicting probabilities in the range [0, 1] as output."
                "Values smaller than 0.0 or larger than 1.0 have been detected."
            )
        # preds_rescaled = self._rescale(preds) # Rescaling needs more testing
        preds_rescaled = preds

        # Compute perturbation with implicit batching
        for batch_id in trange(
            int(np.ceil(x_adv.shape[0] / float(self.batch_size))), desc="VAT", disable=not self.verbose
        ):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2]
            batch = batch.reshape((batch.shape[0], -1))

            # Main algorithm for each batch
            var_d = np.random.randn(*batch.shape).astype(ART_NUMPY_DTYPE)

            # Main loop of the algorithm
            for _ in range(self.max_iter):
                var_d = self._normalize(var_d)
                preds_new = self.estimator.predict((batch + var_d).reshape((-1,) + self.estimator.input_shape))
                if (preds_new < 0.0).any() or (preds_new > 1.0).any():
                    raise TypeError(
                        "This attack requires a classifier predicting probabilities in the range [0, 1] as "
                        "output. Values smaller than 0.0 or larger than 1.0 have been detected."
                    )
                # preds_new_rescaled = self._rescale(preds_new) # Rescaling needs more testing
                preds_new_rescaled = preds_new

                from scipy.stats import entropy

                kl_div1 = entropy(
                    np.transpose(preds_rescaled[batch_index_1:batch_index_2]),
                    np.transpose(preds_new_rescaled),
                )

                var_d_new = np.zeros(var_d.shape).astype(ART_NUMPY_DTYPE)
                for current_index in range(var_d.shape[1]):
                    var_d[:, current_index] += self.finite_diff
                    preds_new = self.estimator.predict((batch + var_d).reshape((-1,) + self.estimator.input_shape))
                    if (preds_new < 0.0).any() or (preds_new > 1.0).any():
                        raise TypeError(
                            "This attack requires a classifier predicting probabilities in the range [0, 1]"
                            "as output. Values smaller than 0.0 or larger than 1.0 have been detected."
                        )
                    # preds_new_rescaled = self._rescale(preds_new) # Rescaling needs more testing
                    preds_new_rescaled = preds_new

                    kl_div2 = entropy(
                        np.transpose(preds_rescaled[batch_index_1:batch_index_2]),
                        np.transpose(preds_new_rescaled),
                    )
                    var_d_new[:, current_index] = (kl_div2 - kl_div1) / self.finite_diff
                    var_d[:, current_index] -= self.finite_diff
                var_d = var_d_new

            # Apply perturbation and clip
            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv[batch_index_1:batch_index_2] = np.clip(
                    batch + self.eps * self._normalize(var_d), clip_min, clip_max
                ).reshape((-1,) + self.estimator.input_shape)
            else:
                x_adv[batch_index_1:batch_index_2] = (batch + self.eps * self._normalize(var_d)).reshape(
                    (-1,) + self.estimator.input_shape
                )

        logger.info(
            "Success rate of virtual adversarial attack: %.2f%%",
            100 * compute_success(self.estimator, x, y, x_adv, batch_size=self.batch_size),
        )

        return x_adv
Exemple #25
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        mask = self._get_mask(x, **kwargs)

        # Ensure eps is broadcastable
        self._check_compatibility_input_and_eps(x=x)

        if isinstance(self.estimator, ClassifierMixin):
            if y is not None:
                y = check_and_transform_label_format(y,
                                                     self.estimator.nb_classes)

            if y is None:
                # Throw error if attack is targeted, but no targets are provided
                if self.targeted:  # pragma: no cover
                    raise ValueError(
                        "Target labels `y` need to be provided for a targeted attack."
                    )

                # Use model predictions as correct outputs
                logger.info(
                    "Using model predictions as correct labels for FGM.")
                y_array = get_labels_np_array(
                    self.estimator.predict(
                        x, batch_size=self.batch_size))  # type: ignore
            else:
                y_array = y

            if self.estimator.nb_classes > 2:
                y_array = y_array / np.sum(y_array, axis=1, keepdims=True)

            # Return adversarial examples computed with minimal perturbation if option is active
            adv_x_best = x
            if self.minimal:
                logger.info("Performing minimal perturbation FGM.")
                adv_x_best = self._minimal_perturbation(x, y_array, mask)
                rate_best = 100 * compute_success(
                    self.estimator,  # type: ignore
                    x,
                    y_array,
                    adv_x_best,
                    self.targeted,
                    batch_size=self.batch_size,  # type: ignore
                )
            else:
                rate_best = 0.0
                for _ in range(max(1, self.num_random_init)):
                    adv_x = self._compute(
                        x,
                        x,
                        y_array,
                        mask,
                        self.eps,
                        self.eps,
                        self._project,
                        self.num_random_init > 0,
                    )

                    if self.num_random_init > 1:
                        rate = 100 * compute_success(
                            self.estimator,  # type: ignore
                            x,
                            y_array,
                            adv_x,
                            self.targeted,
                            batch_size=self.batch_size,  # type: ignore
                        )
                        if rate > rate_best:
                            rate_best = rate
                            adv_x_best = adv_x
                    else:
                        adv_x_best = adv_x

            logger.info(
                "Success rate of FGM attack: %.2f%%",
                rate_best if rate_best is not None else 100 * compute_success(
                    self.estimator,  # type: ignore
                    x,
                    y_array,
                    adv_x_best,
                    self.targeted,
                    batch_size=self.batch_size,
                ),
            )

        else:
            if self.minimal:  # pragma: no cover
                raise ValueError(
                    "Minimal perturbation is only supported for classification."
                )

            if y is None:
                # Throw error if attack is targeted, but no targets are provided
                if self.targeted:  # pragma: no cover
                    raise ValueError(
                        "Target labels `y` need to be provided for a targeted attack."
                    )

                # Use model predictions as correct outputs
                logger.info(
                    "Using model predictions as correct labels for FGM.")
                y_array = self.estimator.predict(x, batch_size=self.batch_size)
            else:
                y_array = y

            adv_x_best = self._compute(
                x,
                x,
                y_array,
                None,
                self.eps,
                self.eps,
                self._project,
                self.num_random_init > 0,
            )

        if self.summary_writer is not None:
            self.summary_writer.reset()

        return adv_x_best
Exemple #26
0
    def generate(self, x, y=None):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :type x: `np.ndarray`
        :param y: If `self.targeted` is true, then `y_val` represents the target labels. Otherwise, the targets are
                the original class labels.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        x_adv = x  #.astype(NUMPY_DTYPE)
        if hasattr(self.classifier,
                   'clip_values') and self.classifier.clip_values is not None:
            clip_min, clip_max = self.classifier.clip_values
        else:
            clip_min, clip_max = np.amin(x), np.amax(x)

        # Assert that, if attack is targeted, y_val is provided:
        if self.targeted and y is None:
            raise ValueError(
                'Target labels `y` need to be provided for a targeted attack.')

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(self.classifier.predict(x, logits=False))

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in range(nb_batches):
            logger.debug('Processing batch %i out of %i', batch_id, nb_batches)

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]

            # The optimization is performed in tanh space to keep the adversarial images bounded in correct range
            x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max,
                                            self._tanh_smoother)

            # Initialize binary search:
            c = self.initial_const * np.ones(x_batch.shape[0])
            c_lower_bound = np.zeros(x_batch.shape[0])
            c_double = (np.ones(x_batch.shape[0]) > 0)

            # Initialize placeholders for best l2 distance and attack found so far
            best_l2dist = np.inf * np.ones(x_batch.shape[0])
            best_x_adv_batch = x_batch.copy()

            for bss in range(self.binary_search_steps):
                logger.debug('Binary search step %i out of %i (c_mean==%f)',
                             bss, self.binary_search_steps, np.mean(c))
                nb_active = int(np.sum(c < self._c_upper_bound))
                logger.debug(
                    'Number of samples with c < _c_upper_bound: %i out of %i',
                    nb_active, x_batch.shape[0])
                if nb_active == 0:
                    break
                lr = self.learning_rate * np.ones(x_batch.shape[0])

                # Initialize perturbation in tanh space:
                x_adv_batch = x_batch.copy()
                x_adv_batch_tanh = x_batch_tanh.copy()

                z, l2dist, loss = self._loss(x_batch, x_adv_batch, y_batch, c)
                attack_success = (loss - l2dist <= 0)
                overall_attack_success = attack_success

                for it in range(self.max_iter):
                    logger.debug('Iteration step %i out of %i', it,
                                 self.max_iter)
                    logger.debug('Average Loss: %f', np.mean(loss))
                    logger.debug('Average L2Dist: %f', np.mean(l2dist))
                    logger.debug('Average Margin Loss: %f',
                                 np.mean(loss - l2dist))
                    logger.debug(
                        'Current number of succeeded attacks: %i out of %i',
                        int(np.sum(attack_success)), len(attack_success))

                    improved_adv = attack_success & (l2dist < best_l2dist)
                    logger.debug('Number of improved L2 distances: %i',
                                 int(np.sum(improved_adv)))
                    if np.sum(improved_adv) > 0:
                        best_l2dist[improved_adv] = l2dist[improved_adv]
                        best_x_adv_batch[improved_adv] = x_adv_batch[
                            improved_adv]

                    active = (c < self._c_upper_bound) & (lr > 0)
                    nb_active = int(np.sum(active))
                    logger.debug(
                        'Number of samples with c < _c_upper_bound and lr > 0: %i out of %i',
                        nb_active, x_batch.shape[0])
                    if nb_active == 0:
                        break

                    # compute gradient:
                    logger.debug('Compute loss gradient')
                    perturbation_tanh = -self._loss_gradient(
                        z[active], y_batch[active], x_batch[active],
                        x_adv_batch[active], x_adv_batch_tanh[active],
                        c[active], clip_min, clip_max)

                    # perform line search to optimize perturbation
                    # first, halve the learning rate until perturbation actually decreases the loss:
                    prev_loss = loss.copy()
                    best_loss = loss.copy()
                    best_lr = np.zeros(x_batch.shape[0])
                    halving = np.zeros(x_batch.shape[0])

                    for h in range(self.max_halving):
                        logger.debug('Perform halving iteration %i out of %i',
                                     h, self.max_halving)
                        do_halving = (loss[active] >= prev_loss[active])
                        logger.debug('Halving to be performed on %i samples',
                                     int(np.sum(do_halving)))
                        if np.sum(do_halving) == 0:
                            break
                        active_and_do_halving = active.copy()
                        active_and_do_halving[active] = do_halving

                        lr_mult = lr[active_and_do_halving]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        new_x_adv_batch_tanh = x_adv_batch_tanh[active_and_do_halving] + \
                            lr_mult * perturbation_tanh[do_halving]
                        new_x_adv_batch = tanh_to_original(
                            new_x_adv_batch_tanh, clip_min, clip_max,
                            self._tanh_smoother)
                        _, l2dist[active_and_do_halving], loss[
                            active_and_do_halving] = self._loss(
                                x_batch[active_and_do_halving],
                                new_x_adv_batch,
                                y_batch[active_and_do_halving],
                                c[active_and_do_halving])

                        logger.debug('New Average Loss: %f', np.mean(loss))
                        logger.debug('New Average L2Dist: %f', np.mean(l2dist))
                        logger.debug('New Average Margin Loss: %f',
                                     np.mean(loss - l2dist))

                        best_lr[loss < best_loss] = lr[loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]
                        lr[active_and_do_halving] /= 2
                        halving[active_and_do_halving] += 1
                    lr[active] *= 2

                    # if no halving was actually required, double the learning rate as long as this
                    # decreases the loss:
                    for d in range(self.max_doubling):
                        logger.debug('Perform doubling iteration %i out of %i',
                                     d, self.max_doubling)
                        do_doubling = (halving[active] == 1) & (
                            loss[active] <= best_loss[active])
                        logger.debug('Doubling to be performed on %i samples',
                                     int(np.sum(do_doubling)))
                        if np.sum(do_doubling) == 0:
                            break
                        active_and_do_doubling = active.copy()
                        active_and_do_doubling[active] = do_doubling
                        lr[active_and_do_doubling] *= 2

                        lr_mult = lr[active_and_do_doubling]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        new_x_adv_batch_tanh = x_adv_batch_tanh[active_and_do_doubling] + \
                            lr_mult * perturbation_tanh[do_doubling]
                        new_x_adv_batch = tanh_to_original(
                            new_x_adv_batch_tanh, clip_min, clip_max,
                            self._tanh_smoother)
                        _, l2dist[active_and_do_doubling], loss[
                            active_and_do_doubling] = self._loss(
                                x_batch[active_and_do_doubling],
                                new_x_adv_batch,
                                y_batch[active_and_do_doubling],
                                c[active_and_do_doubling])
                        logger.debug('New Average Loss: %f', np.mean(loss))
                        logger.debug('New Average L2Dist: %f', np.mean(l2dist))
                        logger.debug('New Average Margin Loss: %f',
                                     np.mean(loss - l2dist))
                        best_lr[loss < best_loss] = lr[loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]

                    lr[halving == 1] /= 2

                    update_adv = (best_lr[active] > 0)
                    logger.debug(
                        'Number of adversarial samples to be finally updated: %i',
                        int(np.sum(update_adv)))

                    if np.sum(update_adv) > 0:
                        active_and_update_adv = active.copy()
                        active_and_update_adv[active] = update_adv
                        best_lr_mult = best_lr[active_and_update_adv]
                        for _ in range(len(x.shape) - 1):
                            best_lr_mult = best_lr_mult[:, np.newaxis]
                        x_adv_batch_tanh[active_and_update_adv] = x_adv_batch_tanh[active_and_update_adv] + \
                            best_lr_mult * perturbation_tanh[update_adv]
                        x_adv_batch[active_and_update_adv] = tanh_to_original(
                            x_adv_batch_tanh[active_and_update_adv], clip_min,
                            clip_max, self._tanh_smoother)
                        z[active_and_update_adv], l2dist[active_and_update_adv], loss[active_and_update_adv] = \
                            self._loss(x_batch[active_and_update_adv], x_adv_batch[active_and_update_adv],
                                       y_batch[active_and_update_adv], c[active_and_update_adv])
                        attack_success = (loss - l2dist <= 0)
                        overall_attack_success = overall_attack_success | attack_success

                # Update depending on attack success:
                improved_adv = attack_success & (l2dist < best_l2dist)
                logger.debug('Number of improved L2 distances: %i',
                             int(np.sum(improved_adv)))

                if np.sum(improved_adv) > 0:
                    best_l2dist[improved_adv] = l2dist[improved_adv]
                    best_x_adv_batch[improved_adv] = x_adv_batch[improved_adv]

                c_double[overall_attack_success] = False
                c[overall_attack_success] = (c_lower_bound +
                                             c)[overall_attack_success] / 2

                c_old = c
                c[~overall_attack_success & c_double] *= 2
                c[~overall_attack_success
                  & ~c_double] += (c - c_lower_bound)[~overall_attack_success
                                                      & ~c_double] / 2
                c_lower_bound[~overall_attack_success] = c_old[
                    ~overall_attack_success]

            x_adv[batch_index_1:batch_index_2] = best_x_adv_batch
            rate = 100 * compute_success(self.classifier, x, y, x_adv,
                                         self.targeted)
            TrackedCW.tracked_x.append(
                (x_adv, rate, batch_id, best_l2dist.mean()))

        logger.info(
            'Success rate of C&W L_2 attack: %.2f%%',
            100 * compute_success(self.classifier, x, y, x_adv, self.targeted))

        return x_adv
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: An array with the original labels to be predicted.
        :return: An array holding the adversarial examples.
        """
        x_adv = x.astype(ART_NUMPY_DTYPE)
        preds = self.estimator.predict(x, batch_size=self.batch_size)

        if is_probability(preds[0]):
            logger.warning(
                "It seems that the attacked model is predicting probabilities. DeepFool expects logits as model output "
                "to achieve its full attack strength.")

        # Determine the class labels for which to compute the gradients
        use_grads_subset = self.nb_grads < self.estimator.nb_classes
        if use_grads_subset:
            # TODO compute set of unique labels per batch
            grad_labels = np.argsort(-preds, axis=1)[:, :self.nb_grads]
            labels_set = np.unique(grad_labels)
        else:
            labels_set = np.arange(self.estimator.nb_classes)
        sorter = np.arange(len(labels_set))

        # Pick a small scalar to avoid division by 0
        tol = 10e-8

        # Compute perturbation with implicit batching
        for batch_id in trange(int(
                np.ceil(x_adv.shape[0] / float(self.batch_size))),
                               desc="DeepFool",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            batch = x_adv[batch_index_1:batch_index_2].copy()

            # Get predictions and gradients for batch
            f_batch = preds[batch_index_1:batch_index_2]
            fk_hat = np.argmax(f_batch, axis=1)
            if use_grads_subset:
                # Compute gradients only for top predicted classes
                grd = np.array([
                    self.estimator.class_gradient(batch, label=_)
                    for _ in labels_set
                ])
                grd = np.squeeze(np.swapaxes(grd, 0, 2), axis=0)
            else:
                # Compute gradients for all classes
                grd = self.estimator.class_gradient(batch)

            # Get current predictions
            active_indices = np.arange(len(batch))
            current_step = 0
            while active_indices.size > 0 and current_step < self.max_iter:
                # Compute difference in predictions and gradients only for selected top predictions
                labels_indices = sorter[np.searchsorted(labels_set,
                                                        fk_hat,
                                                        sorter=sorter)]
                grad_diff = grd - grd[np.arange(len(grd)),
                                      labels_indices][:, None]
                f_diff = f_batch[:,
                                 labels_set] - f_batch[np.arange(len(f_batch)),
                                                       labels_indices][:, None]

                # Choose coordinate and compute perturbation
                norm = np.linalg.norm(grad_diff.reshape(
                    len(grad_diff), len(labels_set), -1),
                                      axis=2) + tol
                value = np.abs(f_diff) / norm
                value[np.arange(len(value)), labels_indices] = np.inf
                l_var = np.argmin(value, axis=1)
                absolute1 = abs(f_diff[np.arange(len(f_diff)), l_var])
                draddiff = grad_diff[np.arange(len(grad_diff)),
                                     l_var].reshape(len(grad_diff), -1)
                pow1 = (pow(
                    np.linalg.norm(draddiff, axis=1),
                    2,
                ) + tol)
                r_var = absolute1 / pow1
                r_var = r_var.reshape((-1, ) + (1, ) * (len(x.shape) - 1))
                r_var = r_var * grad_diff[np.arange(len(grad_diff)), l_var]

                # Add perturbation and clip result
                if self.estimator.clip_values is not None:
                    batch[active_indices] = np.clip(
                        batch[active_indices] + r_var[active_indices] *
                        (self.estimator.clip_values[1] -
                         self.estimator.clip_values[0]),
                        self.estimator.clip_values[0],
                        self.estimator.clip_values[1],
                    )
                else:
                    batch[active_indices] += r_var[active_indices]

                # Recompute prediction for new x
                f_batch = self.estimator.predict(batch)
                fk_i_hat = np.argmax(f_batch, axis=1)

                # Recompute gradients for new x
                if use_grads_subset:
                    # Compute gradients only for (originally) top predicted classes
                    grd = np.array([
                        self.estimator.class_gradient(batch, label=_)
                        for _ in labels_set
                    ])
                    grd = np.squeeze(np.swapaxes(grd, 0, 2), axis=0)
                else:
                    # Compute gradients for all classes
                    grd = self.estimator.class_gradient(batch)

                # Stop if misclassification has been achieved
                active_indices = np.where(fk_i_hat == fk_hat)[0]

                current_step += 1

            # Apply overshoot parameter
            x_adv1 = x_adv[batch_index_1:batch_index_2]
            x_adv2 = (1 + self.epsilon) * (batch -
                                           x_adv[batch_index_1:batch_index_2])
            x_adv[batch_index_1:batch_index_2] = x_adv1 + x_adv2
            if self.estimator.clip_values is not None:
                np.clip(
                    x_adv[batch_index_1:batch_index_2],
                    self.estimator.clip_values[0],
                    self.estimator.clip_values[1],
                    out=x_adv[batch_index_1:batch_index_2],
                )

        logger.info(
            "Success rate of DeepFool attack: %.2f%%",
            100 * compute_success(
                self.estimator, x, y, x_adv, batch_size=self.batch_size),
        )
        return x_adv
Exemple #28
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape
                  (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial
                  samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect
                  (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
        :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations.
                     Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any
                     features for which the mask is zero will not be adversarially perturbed.
        :type mask: `np.ndarray`
        :return: An array holding the adversarial examples.
        """
        import tensorflow as tf  # lgtm [py/repeated-import]

        mask = self._get_mask(x, **kwargs)

        # Ensure eps is broadcastable
        self._check_compatibility_input_and_eps(x=x)

        # Check whether random eps is enabled
        self._random_eps()

        # Set up targets
        targets = self._set_targets(x, y)

        # Create dataset
        if mask is not None:
            # Here we need to make a distinction: if the masks are different for each input, we need to index
            # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
            if len(mask.shape) == len(x.shape):
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    mask.astype(ART_NUMPY_DTYPE),
                )).batch(self.batch_size, drop_remainder=False)

            else:
                dataset = tf.data.Dataset.from_tensor_slices((
                    x.astype(ART_NUMPY_DTYPE),
                    targets.astype(ART_NUMPY_DTYPE),
                    np.array([mask.astype(ART_NUMPY_DTYPE)] * x.shape[0]),
                )).batch(self.batch_size, drop_remainder=False)

        else:
            dataset = tf.data.Dataset.from_tensor_slices((
                x.astype(ART_NUMPY_DTYPE),
                targets.astype(ART_NUMPY_DTYPE),
            )).batch(self.batch_size, drop_remainder=False)

        # Start to compute adversarial examples
        adv_x = x.astype(ART_NUMPY_DTYPE)
        data_loader = iter(dataset)

        # Compute perturbation with batching
        for (batch_id, batch_all) in enumerate(
                tqdm(data_loader,
                     desc="PGD - Batches",
                     leave=False,
                     disable=not self.verbose)):
            if mask is not None:
                (batch, batch_labels,
                 mask_batch) = batch_all[0], batch_all[1], batch_all[2]
            else:
                (batch, batch_labels,
                 mask_batch) = batch_all[0], batch_all[1], None

            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size

            # Compute batch_eps and batch_eps_step
            if isinstance(self.eps, np.ndarray):
                if len(self.eps.shape) == len(
                        x.shape) and self.eps.shape[0] == x.shape[0]:
                    batch_eps = self.eps[batch_index_1:batch_index_2]
                    batch_eps_step = self.eps_step[batch_index_1:batch_index_2]

                else:
                    batch_eps = self.eps
                    batch_eps_step = self.eps_step

            else:
                batch_eps = self.eps
                batch_eps_step = self.eps_step

            for rand_init_num in range(max(1, self.num_random_init)):
                if rand_init_num == 0:
                    # first iteration: use the adversarial examples as they are the only ones we have now
                    adv_x[batch_index_1:batch_index_2] = self._generate_batch(
                        x=batch,
                        targets=batch_labels,
                        mask=mask_batch,
                        eps=batch_eps,
                        eps_step=batch_eps_step)
                else:
                    adversarial_batch = self._generate_batch(
                        x=batch,
                        targets=batch_labels,
                        mask=mask_batch,
                        eps=batch_eps,
                        eps_step=batch_eps_step)
                    attack_success = compute_success_array(
                        self.estimator,
                        batch,
                        batch_labels,
                        adversarial_batch,
                        self.targeted,
                        batch_size=self.batch_size,
                    )
                    # return the successful adversarial examples
                    adv_x[batch_index_1:batch_index_2][
                        attack_success] = adversarial_batch[attack_success]

        logger.info(
            "Success rate of attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  adv_x,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return adv_x
Exemple #29
0
    def generate(self, x, y=None):
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs.
        :type x: `np.ndarray`
        :param y: The labels for the data `x`. Only provide this parameter if you'd like to use true
                  labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
                  "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`.
                  Labels should be one-hot-encoded.
        :type y: `np.ndarray`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """

        from art.utils import compute_success, get_labels_np_array, projection

        if y is None:
            # Throw error if attack is targeted, but no targets are provided
            if self.targeted:
                raise ValueError(
                    'Target labels `y` need to be provided for a targeted attack.'
                )

            # Use model predictions as correct outputs
            targets = get_labels_np_array(self.classifier.predict(x))
        else:
            targets = y

        adv_x_best = None
        rate_best = 0.0

        for i_random_init in range(max(1, self.num_random_init)):
            adv_x = x  #.astype(NUMPY_DTYPE)
            noise = np.zeros_like(x)
            for i_max_iter in range(self.max_iter):
                # x, x_init, y, eps, eps_step, project, random_init
                adv_x = self._compute(
                    adv_x, x, targets, self.eps, self.eps_step, self._project,
                    self.num_random_init > 0 and i_max_iter == 0)
                # if self._project:
                #     noise = projection(adv_x - x, self.eps, self.norm)
                #     adv_x = x + noise

                rate = 100 * compute_success(self.classifier, x, targets,
                                             adv_x, self.targeted)
                #logger.info('Success rate of attack step: %.2f%%', rate)

                noise_norm = 0
                if self.norm == np.inf:
                    noise_norm = np.sign(noise)
                elif self.norm == 1:
                    ind = tuple(range(1, len(noise.shape)))
                    noise_norm = np.sum(np.abs(noise), axis=ind, keepdims=True)
                elif self.norm == 2:
                    ind = tuple(range(1, len(noise.shape)))
                    noise_norm = np.sqrt(
                        np.sum(np.square(noise), axis=ind, keepdims=True))

                TrackedPGD.tracked_x.append(
                    (adv_x, rate, i_max_iter, noise_norm))
                if rate >= 100:
                    break

            rate = 100 * compute_success(self.classifier, x, targets, adv_x,
                                         self.targeted)
            if rate > rate_best or adv_x_best is None:
                rate_best = rate
                adv_x_best = adv_x
            if rate >= 100:
                break

        logger.info('Success rate of attack: %.2f%%', rate_best)

        return adv_x_best
Exemple #30
0
    def generate(self,
                 x: np.ndarray,
                 y: Optional[np.ndarray] = None,
                 **kwargs) -> np.ndarray:
        """
        Generate adversarial samples and return them in an array.

        :param x: An array with the original inputs to be attacked.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
                  (nb_samples,). If `self.targeted` is true, then `y` represents the target labels. If `self.targeted`
                  is true, then `y_val` represents the target labels. Otherwise, the targets are the original class
                  labels.
        :return: An array holding the adversarial examples.
        """
        y = check_and_transform_label_format(y, self.estimator.nb_classes)
        x_adv = x.astype(ART_NUMPY_DTYPE)

        if self.estimator.clip_values is not None:
            clip_min, clip_max = self.estimator.clip_values
        else:
            clip_min, clip_max = np.amin(x), np.amax(x)

        # Assert that, if attack is targeted, y_val is provided:
        if self.targeted and y is None:
            raise ValueError(
                "Target labels `y` need to be provided for a targeted attack.")

        # No labels provided, use model prediction as correct class
        if y is None:
            y = get_labels_np_array(
                self.estimator.predict(x, batch_size=self.batch_size))

        # Compute perturbation with implicit batching
        nb_batches = int(np.ceil(x_adv.shape[0] / float(self.batch_size)))
        for batch_id in trange(nb_batches,
                               desc="C&W L_2",
                               disable=not self.verbose):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            x_batch = x_adv[batch_index_1:batch_index_2]
            y_batch = y[batch_index_1:batch_index_2]

            # The optimization is performed in tanh space to keep the adversarial images bounded in correct range
            x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max,
                                            self._tanh_smoother)

            # Initialize binary search:
            c_current = self.initial_const * np.ones(x_batch.shape[0])
            c_lower_bound = np.zeros(x_batch.shape[0])
            c_double = np.ones(x_batch.shape[0]) > 0

            # Initialize placeholders for best l2 distance and attack found so far
            best_l2dist = np.inf * np.ones(x_batch.shape[0])
            best_x_adv_batch = x_batch.copy()

            for bss in range(self.binary_search_steps):
                logger.debug(
                    "Binary search step %i out of %i (c_mean==%f)",
                    bss,
                    self.binary_search_steps,
                    np.mean(c_current),
                )
                nb_active = int(np.sum(c_current < self._c_upper_bound))
                logger.debug(
                    "Number of samples with c_current < _c_upper_bound: %i out of %i",
                    nb_active,
                    x_batch.shape[0],
                )
                if nb_active == 0:
                    break
                learning_rate = self.learning_rate * np.ones(x_batch.shape[0])

                # Initialize perturbation in tanh space:
                x_adv_batch = x_batch.copy()
                x_adv_batch_tanh = x_batch_tanh.copy()

                z_logits, l2dist, loss = self._loss(x_batch, x_adv_batch,
                                                    y_batch, c_current)
                attack_success = loss - l2dist <= 0
                overall_attack_success = attack_success

                for i_iter in range(self.max_iter):
                    logger.debug("Iteration step %i out of %i", i_iter,
                                 self.max_iter)
                    logger.debug("Average Loss: %f", np.mean(loss))
                    logger.debug("Average L2Dist: %f", np.mean(l2dist))
                    logger.debug("Average Margin Loss: %f",
                                 np.mean(loss - l2dist))
                    logger.debug(
                        "Current number of succeeded attacks: %i out of %i",
                        int(np.sum(attack_success)),
                        len(attack_success),
                    )

                    improved_adv = attack_success & (l2dist < best_l2dist)
                    logger.debug("Number of improved L2 distances: %i",
                                 int(np.sum(improved_adv)))
                    if np.sum(improved_adv) > 0:
                        best_l2dist[improved_adv] = l2dist[improved_adv]
                        best_x_adv_batch[improved_adv] = x_adv_batch[
                            improved_adv]

                    active = (c_current < self._c_upper_bound) & (learning_rate
                                                                  > 0)
                    nb_active = int(np.sum(active))
                    logger.debug(
                        "Number of samples with c_current < _c_upper_bound and learning_rate > 0: %i out of %i",
                        nb_active,
                        x_batch.shape[0],
                    )
                    if nb_active == 0:
                        break

                    # compute gradient:
                    logger.debug("Compute loss gradient")
                    perturbation_tanh = -self._loss_gradient(
                        z_logits[active],
                        y_batch[active],
                        x_batch[active],
                        x_adv_batch[active],
                        x_adv_batch_tanh[active],
                        c_current[active],
                        clip_min,
                        clip_max,
                    )

                    # perform line search to optimize perturbation
                    # first, halve the learning rate until perturbation actually decreases the loss:
                    prev_loss = loss.copy()
                    best_loss = loss.copy()
                    best_lr = np.zeros(x_batch.shape[0])
                    halving = np.zeros(x_batch.shape[0])

                    for i_halve in range(self.max_halving):
                        logger.debug(
                            "Perform halving iteration %i out of %i",
                            i_halve,
                            self.max_halving,
                        )
                        do_halving = loss[active] >= prev_loss[active]
                        logger.debug(
                            "Halving to be performed on %i samples",
                            int(np.sum(do_halving)),
                        )
                        if np.sum(do_halving) == 0:
                            break
                        active_and_do_halving = active.copy()
                        active_and_do_halving[active] = do_halving

                        lr_mult = learning_rate[active_and_do_halving]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        x_adv1 = x_adv_batch_tanh[active_and_do_halving]
                        new_x_adv_batch_tanh = x_adv1 + lr_mult * perturbation_tanh[
                            do_halving]
                        new_x_adv_batch = tanh_to_original(
                            new_x_adv_batch_tanh, clip_min, clip_max)
                        _, l2dist[active_and_do_halving], loss[
                            active_and_do_halving] = self._loss(
                                x_batch[active_and_do_halving],
                                new_x_adv_batch,
                                y_batch[active_and_do_halving],
                                c_current[active_and_do_halving],
                            )

                        logger.debug("New Average Loss: %f", np.mean(loss))
                        logger.debug("New Average L2Dist: %f", np.mean(l2dist))
                        logger.debug("New Average Margin Loss: %f",
                                     np.mean(loss - l2dist))

                        best_lr[loss < best_loss] = learning_rate[
                            loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]
                        learning_rate[active_and_do_halving] /= 2
                        halving[active_and_do_halving] += 1
                    learning_rate[active] *= 2

                    # if no halving was actually required, double the learning rate as long as this
                    # decreases the loss:
                    for i_double in range(self.max_doubling):
                        logger.debug(
                            "Perform doubling iteration %i out of %i",
                            i_double,
                            self.max_doubling,
                        )
                        do_doubling = (halving[active] == 1) & (
                            loss[active] <= best_loss[active])
                        logger.debug(
                            "Doubling to be performed on %i samples",
                            int(np.sum(do_doubling)),
                        )
                        if np.sum(do_doubling) == 0:
                            break
                        active_and_do_doubling = active.copy()
                        active_and_do_doubling[active] = do_doubling
                        learning_rate[active_and_do_doubling] *= 2

                        lr_mult = learning_rate[active_and_do_doubling]
                        for _ in range(len(x.shape) - 1):
                            lr_mult = lr_mult[:, np.newaxis]

                        x_adv2 = x_adv_batch_tanh[active_and_do_doubling]
                        new_x_adv_batch_tanh = x_adv2 + lr_mult * perturbation_tanh[
                            do_doubling]
                        new_x_adv_batch = tanh_to_original(
                            new_x_adv_batch_tanh, clip_min, clip_max)
                        _, l2dist[active_and_do_doubling], loss[
                            active_and_do_doubling] = self._loss(
                                x_batch[active_and_do_doubling],
                                new_x_adv_batch,
                                y_batch[active_and_do_doubling],
                                c_current[active_and_do_doubling],
                            )
                        logger.debug("New Average Loss: %f", np.mean(loss))
                        logger.debug("New Average L2Dist: %f", np.mean(l2dist))
                        logger.debug("New Average Margin Loss: %f",
                                     np.mean(loss - l2dist))
                        best_lr[loss < best_loss] = learning_rate[
                            loss < best_loss]
                        best_loss[loss < best_loss] = loss[loss < best_loss]

                    learning_rate[halving == 1] /= 2

                    update_adv = best_lr[active] > 0
                    logger.debug(
                        "Number of adversarial samples to be finally updated: %i",
                        int(np.sum(update_adv)),
                    )

                    if np.sum(update_adv) > 0:
                        active_and_update_adv = active.copy()
                        active_and_update_adv[active] = update_adv
                        best_lr_mult = best_lr[active_and_update_adv]
                        for _ in range(len(x.shape) - 1):
                            best_lr_mult = best_lr_mult[:, np.newaxis]

                        x_adv4 = x_adv_batch_tanh[active_and_update_adv]
                        best_lr1 = best_lr_mult * perturbation_tanh[update_adv]
                        x_adv_batch_tanh[
                            active_and_update_adv] = x_adv4 + best_lr1

                        x_adv6 = x_adv_batch_tanh[active_and_update_adv]
                        x_adv_batch[active_and_update_adv] = tanh_to_original(
                            x_adv6, clip_min, clip_max)
                        (
                            z_logits[active_and_update_adv],
                            l2dist[active_and_update_adv],
                            loss[active_and_update_adv],
                        ) = self._loss(
                            x_batch[active_and_update_adv],
                            x_adv_batch[active_and_update_adv],
                            y_batch[active_and_update_adv],
                            c_current[active_and_update_adv],
                        )
                        attack_success = loss - l2dist <= 0
                        overall_attack_success = overall_attack_success | attack_success

                # Update depending on attack success:
                improved_adv = attack_success & (l2dist < best_l2dist)
                logger.debug("Number of improved L2 distances: %i",
                             int(np.sum(improved_adv)))

                if np.sum(improved_adv) > 0:
                    best_l2dist[improved_adv] = l2dist[improved_adv]
                    best_x_adv_batch[improved_adv] = x_adv_batch[improved_adv]

                c_double[overall_attack_success] = False
                c_current[overall_attack_success] = (
                    c_lower_bound + c_current)[overall_attack_success] / 2

                c_old = c_current
                c_current[~overall_attack_success & c_double] *= 2

                c_current1 = (c_current -
                              c_lower_bound)[~overall_attack_success
                                             & ~c_double]
                c_current[~overall_attack_success
                          & ~c_double] += c_current1 / 2
                c_lower_bound[~overall_attack_success] = c_old[
                    ~overall_attack_success]

            x_adv[batch_index_1:batch_index_2] = best_x_adv_batch

        logger.info(
            "Success rate of C&W L_2 attack: %.2f%%",
            100 * compute_success(self.estimator,
                                  x,
                                  y,
                                  x_adv,
                                  self.targeted,
                                  batch_size=self.batch_size),
        )

        return x_adv