def body(i, ax, m):
      """Do a momentum step"""
      logits = self.model.get_logits(ax)
      loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
      if targeted:
        loss = -loss

      # Define gradient of loss wrt input
      grad, = tf.gradients(loss, ax)

      # Normalize current gradient and add it to the accumulated gradient
      red_ind = list(range(1, len(grad.get_shape())))
      avoid_zero_div = tf.cast(1e-12, grad.dtype)
      grad = grad / tf.maximum(
          avoid_zero_div,
          reduce_mean(tf.abs(grad), red_ind, keepdims=True))
      m = self.decay_factor * m + grad

      optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
      if self.ord == 1:
        raise NotImplementedError("This attack hasn't been tested for ord=1."
                                  "It's not clear that FGM makes a good inner "
                                  "loop step for iterative optimization since "
                                  "it updates just one coordinate at a time.")

      # Update and clip adversarial example in current iteration
      ax = ax + optimal_perturbation
      ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

      if self.clip_min is not None and self.clip_max is not None:
        ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

      ax = tf.stop_gradient(ax)

      return i + 1, ax, m
Ejemplo n.º 2
0
 def test_clip_eta_goldilocks(self):
     """test_clip_eta_goldilocks: Test that the clipping handles perturbations
 that are too small, just right, and too big correctly"""
     eta = tf.constant([[2.], [3.], [4.]])
     self.assertTrue(eta.dtype == tf.float32, eta.dtype)
     eps = 3.
     for ord_arg in [np.inf, 1, 2]:
         for sign in [-1., 1.]:
             try:
                 clipped = utils_tf.clip_eta(eta * sign, ord_arg, eps)
             except NotImplementedError:
                 # Don't raise SkipTest, it skips the rest of the for loop
                 continue
             clipped_value = self.sess.run(clipped)
             gold = sign * np.array([[2.], [3.], [3.]])
             self.assertClose(clipped_value, gold)
             grad, = tf.gradients(clipped, eta)
             grad_value = self.sess.run(grad)
             # Note: the second 1. is debatable (the left-sided derivative
             # and the right-sided derivative do not match, so formally
             # the derivative is not defined). This test makes sure that
             # we at least handle this oddity consistently across all the
             # argument values we test
             gold = sign * np.array([[1.], [1.], [0.]])
             self.assertClose(grad_value, gold)
Ejemplo n.º 3
0
 def test_clip_eta_norm_0(self):
     """test_clip_eta_norm_0: Test that `clip_eta` still works when the
 norm of `eta` is zero. This used to cause a divide by zero for ord
 1 and ord 2."""
     eta = tf.zeros((5, 3))
     self.assertTrue(eta.dtype == tf.float32, eta.dtype)
     eps = .25
     for ord_arg in [np.inf, 1, 2]:
         try:
             clipped = utils_tf.clip_eta(eta, ord_arg, eps)
         except NotImplementedError:
             # Don't raise SkipTest, it skips the rest of the for loop
             continue
         clipped = self.sess.run(clipped)
         self.assertTrue(not np.any(np.isinf(clipped)))
         self.assertTrue(not np.any(np.isnan(clipped)), (ord_arg, clipped))
        def body(i, adv_x):
            """Do a projected gradient step"""
            adv_x = FGM.generate(adv_x, **fgm_params)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # FGM already did it, but subtracting and re-adding eta can add some
            # small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x
    def attack_single_step(self, x, eta, g_feat):
        """
    TensorFlow implementation of the Fast Feature Gradient. This is a
    single step attack similar to Fast Gradient Method that attacks an
    internal representation.

    :param x: the input placeholder
    :param eta: A tensor the same shape as x that holds the perturbation.
    :param g_feat: model's internal tensor for guide
    :return: a tensor for the adversarial example
    """

        adv_x = x + eta
        a_feat = self.model.fprop(adv_x)[self.layer]

        # feat.shape = (batch, c) or (batch, w, h, c)
        axis = list(range(1, len(a_feat.shape)))

        # Compute loss
        # This is a targeted attack, hence the negative sign
        loss = -reduce_sum(tf.square(a_feat - g_feat), axis)

        # Define gradient of loss wrt input
        grad, = tf.gradients(loss, adv_x)

        # Multiply by constant epsilon
        scaled_signed_grad = self.eps_iter * tf.sign(grad)

        # Add perturbation to original example to obtain adversarial example
        adv_x = adv_x + scaled_signed_grad

        # If clipping is needed,
        # reset all values outside of [clip_min, clip_max]
        if (self.clip_min is not None) and (self.clip_max is not None):
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        adv_x = tf.stop_gradient(adv_x)

        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)

        return eta
    def generate(self, x, g, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param g: The target value of the symbolic representation
    :param kwargs: See `parse_params`
    """

        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        g_feat = self.model.fprop(g)[self.layer]

        # Initialize loop variables
        eta = tf.random_uniform(tf.shape(x),
                                -self.eps,
                                self.eps,
                                dtype=self.tf_dtype)
        eta = clip_eta(eta, self.ord, self.eps)

        def cond(i, _):
            return tf.less(i, self.nb_iter)

        def body(i, e):
            new_eta = self.attack_single_step(x, e, g_feat)
            return i + 1, new_eta

        _, eta = tf.while_loop(cond,
                               body, (tf.zeros([]), eta),
                               back_prop=True,
                               maximum_iterations=self.nb_iter)

        # Define adversarial example (and clip if necessary)
        adv_x = x + eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
    def generate(self, x, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param kwargs: See `parse_params`
    """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        asserts = []

        # If a data range was specified, check that the input was in that range
        if self.clip_min is not None:
            asserts.append(
                utils_tf.assert_greater_equal(x,
                                              tf.cast(self.clip_min, x.dtype)))

        if self.clip_max is not None:
            asserts.append(
                utils_tf.assert_less_equal(x, tf.cast(self.clip_max, x.dtype)))

        # Initialize loop variables
        if self.rand_init:
            eta = tf.random_uniform(tf.shape(x),
                                    tf.cast(-self.rand_minmax, x.dtype),
                                    tf.cast(self.rand_minmax, x.dtype),
                                    dtype=x.dtype)
        else:
            eta = tf.zeros(tf.shape(x))

        # Clip eta
        eta = clip_eta(eta, self.ord, self.eps)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            model_preds = self.model.get_probs(x)
            preds_max = tf.reduce_max(model_preds, 1, keepdims=True)
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False
            del model_preds

        y_kwarg = 'y_target' if targeted else 'y'
        fgm_params = {
            'eps': self.eps_iter,
            y_kwarg: y,
            'ord': self.ord,
            'clip_min': self.clip_min,
            'clip_max': self.clip_max
        }
        if self.ord == 1:
            raise NotImplementedError(
                "It's not clear that FGM is a good inner loop"
                " step for PGD when ord=1, because ord=1 FGM "
                " changes only one pixel at a time. We need "
                " to rigorously test a strong ord=1 PGD "
                "before enabling this feature.")

        # Use getattr() to avoid errors in eager execution attacks
        FGM = self.FGM_CLASS(self.model,
                             sess=getattr(self, 'sess', None),
                             dtypestr=self.dtypestr)

        def cond(i, _):
            """Iterate until requested number of iterations is completed"""
            return tf.less(i, self.nb_iter)

        def body(i, adv_x):
            """Do a projected gradient step"""
            adv_x = FGM.generate(adv_x, **fgm_params)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # FGM already did it, but subtracting and re-adding eta can add some
            # small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x

        _, adv_x = tf.while_loop(cond,
                                 body, (tf.zeros([]), adv_x),
                                 back_prop=True,
                                 maximum_iterations=self.nb_iter)

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        common_dtype = tf.float32
        asserts.append(
            utils_tf.assert_less_equal(
                tf.cast(self.eps_iter, dtype=common_dtype),
                tf.cast(self.eps, dtype=common_dtype)))
        if self.ord == np.inf and self.clip_min is not None:
            # The 1e-6 is needed to compensate for numerical error.
            # Without the 1e-6 this fails when e.g. eps=.2, clip_min=.5,
            # clip_max=.7
            asserts.append(
                utils_tf.assert_less_equal(
                    tf.cast(self.eps,
                            x.dtype), 1e-6 + tf.cast(self.clip_max, x.dtype) -
                    tf.cast(self.clip_min, x.dtype)))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x
Ejemplo n.º 8
0
    def attack(self, x, y_p, **kwargs):
        """
    This method creates a symoblic graph of the MadryEtAl attack on
    multiple GPUs. The graph is created on the first n GPUs.

    Stop gradient is needed to get the speed-up. This prevents us from
    being able to back-prop through the attack.

    :param x: A tensor with the input image.
    :param y_p: Ground truth label or predicted label.
    :return: Two lists containing the input and output tensors of each GPU.
    """
        inputs = []
        outputs = []

        # Create the initial random perturbation
        device_name = '/gpu:0'
        self.model.set_device(device_name)
        with tf.device(device_name):
            with tf.variable_scope('init_rand'):
                if self.rand_init:
                    eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps)
                    eta = clip_eta(eta, self.ord, self.eps)
                    eta = tf.stop_gradient(eta)
                else:
                    eta = tf.zeros_like(x)

        # TODO: Break the graph only nGPU times instead of nb_iter times.
        # The current implementation by the time an adversarial example is
        # used for training, the weights of the model have changed nb_iter
        # times. This can cause slower convergence compared to the single GPU
        # adversarial training.
        for i in range(self.nb_iter):
            # Create the graph for i'th step of attack
            inputs += [OrderedDict()]
            outputs += [OrderedDict()]
            device_name = x.device
            self.model.set_device(device_name)
            with tf.device(device_name):
                with tf.variable_scope('step%d' % i):
                    if i > 0:
                        # Clone the variables to separate the graph of 2 GPUs
                        x = clone_variable('x', x)
                        y_p = clone_variable('y_p', y_p)
                        eta = clone_variable('eta', eta)

                    inputs[i]['x'] = x
                    inputs[i]['y_p'] = y_p
                    outputs[i]['x'] = x
                    outputs[i]['y_p'] = y_p
                    inputs[i]['eta'] = eta

                    eta = self.attack_single_step(x, eta, y_p)

                    if i < self.nb_iter - 1:
                        outputs[i]['eta'] = eta
                    else:
                        # adv_x, not eta is the output of the last step
                        adv_x = x + eta
                        if (self.clip_min is not None
                                and self.clip_max is not None):
                            adv_x = tf.clip_by_value(adv_x, self.clip_min,
                                                     self.clip_max)
                        adv_x = tf.stop_gradient(adv_x, name='adv_x')
                        outputs[i]['adv_x'] = adv_x

        return inputs, outputs