def generate(self, x_val, **kwargs):
        """
        Generate adversarial samples and return them in a Numpy array.
        :param x_val: (required) A Numpy array with the original inputs.
        :return: A Numpy array holding the adversarial examples.
        """
        assert self.set_params(**kwargs)
        dims = list(x_val.shape)
        dims[0] = None
        nb_classes = self.model.output_shape[1]
        xi_op = tf.placeholder(dtype=tf.float32, shape=dims)
        loss = self.classifier.model(xi_op)
        grads_graph = class_derivative(loss, xi_op, nb_classes)
        x_adv = x_val.copy()

        # Progress bar
        progress_bar = Progbar(target=len(x_val), verbose=self.verbose)

        # Initialize variables
        y_pred = self.classifier.model.predict(x_val)
        pred_class = np.argmax(y_pred, axis=1)

        # Main algorithm for each example
        for j, x in enumerate(x_adv):
            xi = x[None, ...]
            norm_x0 = np.linalg.norm(np.reshape(x, [-1]))
            l = pred_class[j]
            #d = np.zeros(shape=dims[1:])

            # Main loop of the algorithm
            for i in range(self.max_iter):
                # Compute score
                score = self.classifier.model.predict(xi)[0][l]

                # Compute the gradients and norm
                grads = self.sess.run(grads_graph, feed_dict={xi_op: xi})[l][0]
                norm_grad = np.linalg.norm(np.reshape(grads, [-1]))

                # Theta
                theta = self._compute_theta(norm_x0, score, norm_grad,
                                            nb_classes)

                # Pertubation
                di = self._compute_pert(theta, grads, norm_grad)

                # Update xi and pertubation
                xi += di
                #d += di

            # Return the adversarial example
            x_adv[j] = xi[0]
            progress_bar.update(current=j, values=[("perturbation", abs(
                np.linalg.norm((x_adv[j] - x_val[j]).flatten())))])

        return x_adv
    def generate(self, x_val, **kwargs):
        """
        Generate adversarial samples and return them in an array.

        :param x_val: An array with the original inputs to be attacked.
        :type x_val: `np.ndarray`
        :param max_iter: The maximum number of iterations.
        :type max_iter: `int`
        :param clip_min: Minimum input component value.
        :type clip_min: `float`
        :param clip_max: Maximum input component value.
        :type clip_max: `float`
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """
        assert self.set_params(**kwargs)
        k.set_learning_phase(0)

        dims = list(x_val.shape)
        nb_instances = dims[0]
        dims[0] = None
        nb_classes = self.model.output_shape[1]
        xi_op = tf.placeholder(dtype=tf.float32, shape=dims)

        loss = self.classifier._get_predictions(xi_op, log=True)
        grads = class_derivative(loss, xi_op, nb_classes)
        x_adv = x_val.copy()

        # Progress bar
        progress_bar = Progbar(target=len(x_val), verbose=self.verbose)

        for j, x in enumerate(x_adv):
            xi = x[None, ...]

            f, grd = self.sess.run([self.model(xi_op), grads], {xi_op: xi})
            f, grd = f[0], [g[0] for g in grd]
            fk_hat = np.argmax(f)
            fk_i_hat = fk_hat
            nb_iter = 0

            while fk_i_hat == fk_hat and nb_iter < self.max_iter:
                grad_diff = grd - grd[fk_hat]
                f_diff = f - f[fk_hat]

                # Masking true label
                mask = [0] * nb_classes
                mask[fk_hat] = 1
                value = np.ma.array(np.abs(f_diff)/np.linalg.norm(grad_diff.reshape(nb_classes, -1), axis=1), mask=mask)

                l = value.argmin(fill_value=np.inf)
                r = (abs(f_diff[l])/pow(np.linalg.norm(grad_diff[l]), 2)) * grad_diff[l]

                # Add perturbation and clip result
                xi += r
                if self.clip_min or self.clip_max:
                    xi = np.clip(xi, self.clip_min, self.clip_max)

                # Recompute prediction for new xi

                f, grd = self.sess.run([self.model(xi_op), grads], {xi_op: xi})
                f, grd = f[0], [g[0] for g in grd]
                fk_i_hat = np.argmax(f)

                nb_iter += 1

            x_adv[j] = xi[0]
            progress_bar.update(current=j, values=[("perturbation", abs(np.linalg.norm((x_adv[j]-x_val[j]).flatten())))])

        true_y = self.model.predict(x_val)
        adv_y = self.model.predict(x_adv)
        fooling_rate = np.sum(true_y != adv_y) / nb_instances

        self.fooling_rate = fooling_rate
        self.converged = (nb_iter < self.max_iter)
        self.v = np.mean(np.abs(np.linalg.norm((x_adv-x_val).reshape(nb_instances, -1), axis=1)))

        return x_adv
Ejemplo n.º 3
0
    def generate(self, x_val, **kwargs):
        """
        Generate adversarial samples and return them in a Numpy array.

        :param x_val: (required) A Numpy array with the original inputs.
        :param y_val: (optional) Target values if the attack is targeted
        :param theta: (optional float) Perturbation introduced to each modified feature (can be positive or negative)
        :param gamma: (optional float) Maximum percentage of perturbed features (between 0 and 1)
        :param clip_min: (optional float) Minimum component value for clipping
        :param clip_max: (optional float) Maximum component value for clipping
        :return: A Numpy array holding the adversarial examples.
        """
        # Parse and save attack-specific parameters
        assert self.set_params(**kwargs)
        k.set_learning_phase(0)

        # Initialize variables
        dims = [None] + list(x_val.shape[1:])
        self._x = tf.placeholder(tf.float32, shape=dims)
        dims[0] = 1
        x_adv = np.copy(x_val)
        self._nb_features = np.product(x_adv.shape[1:])
        self._nb_classes = self.model.output_shape[1]
        x_adv = np.reshape(x_adv, (-1, self._nb_features))
        preds = self.sess.run(tf.argmax(self.classifier.model(self._x), axis=1), {self._x: x_val})

        loss = self.classifier._get_predictions(self._x, log=False)
        self._grads = class_derivative(loss, self._x, self._nb_classes)

        # Set number of iterations w.r.t. the total perturbation allowed
        max_iter = np.floor(self._nb_features * self.gamma / 2)

        # Determine target classes for attack
        if 'y_val' not in kwargs or kwargs[str('y_val')] is None:
            # Randomly choose target from the incorrect classes for each sample
            from src.utils import random_targets
            targets = np.argmax(random_targets(preds, self._nb_classes), axis=1)
        else:
            targets = kwargs[str('y_val')]

        # Generate the adversarial samples
        for ind, val in enumerate(x_adv):
            # Initialize the search space; optimize to remove features that can't be changed
            if self.theta > 0:
                search_space = set([i for i in range(self._nb_features) if val[i] < self.clip_max])
            else:
                search_space = set([i for i in range(self._nb_features) if val[i] > self.clip_min])

            nb_iter = 0
            current_pred = preds[ind]

            while current_pred != targets[ind] and nb_iter < max_iter and bool(search_space):
                # Compute saliency map
                feat1, feat2 = self._saliency_map(np.reshape(val, dims), targets[ind], search_space)

                # Move on to next examples if there are no more features to change
                if feat1 == feat2 == 0:
                    break

                # Prepare update
                if self.theta > 0:
                    clip_func, clip_value = np.minimum, self.clip_max
                else:
                    clip_func, clip_value = np.maximum, self.clip_min

                # Update adversarial example
                for feature_ind in [feat1, feat2]:
                    # unraveled_ind = np.unravel_index(feature_ind, dims)
                    val[feature_ind] = clip_func(clip_value, val[feature_ind] + self.theta)

                    # Remove indices from search space if max/min values were reached
                    if val[feature_ind] == clip_value:
                        search_space.discard(feature_ind)

                # Recompute model prediction
                current_pred = self.sess.run(tf.argmax(self.classifier.model(self._x), axis=1),
                                             {self._x: np.reshape(val, dims)})
                nb_iter += 1

        x_adv = np.reshape(x_adv, x_val.shape)
        return x_adv