def body(i, ax, m):
      """Do a momentum step"""
      logits = self.model.get_logits(ax)
      loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
      if targeted:
        loss = -loss

      # Define gradient of loss wrt input
      grad, = tf.gradients(ys=loss, xs=ax)

      # Normalize current gradient and add it to the accumulated gradient
      red_ind = list(range(1, len(grad.get_shape())))
      avoid_zero_div = tf.cast(1e-12, grad.dtype)
      grad = grad / tf.maximum(
          avoid_zero_div,
          reduce_mean(tf.abs(grad), red_ind, keepdims=True))
      m = self.decay_factor * m + grad

      optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
      if self.ord == 1:
        raise NotImplementedError("This attack hasn't been tested for ord=1."
                                  "It's not clear that FGM makes a good inner "
                                  "loop step for iterative optimization since "
                                  "it updates just one coordinate at a time.")

      # Update and clip adversarial example in current iteration
      ax = ax + optimal_perturbation
      ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

      if self.clip_min is not None and self.clip_max is not None:
        ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

      ax = tf.stop_gradient(ax)

      return i + 1, ax, m
Esempio n. 2
0
def fgm_perturb(x,
                y,
                loss_fn,
                clip_min=None,
                clip_max=None,
                ord=np.inf,
                eps=0.3):
    loss = loss_fn(x)
    grad, = tf.gradients(loss, x)
    optimal_perturbation = optimize_linear(grad, eps, ord)
    adv_x = x + optimal_perturbation

    if (clip_min is not None) or (clip_max is not None):
        # We don't currently support one-sided clipping
        assert clip_min is not None and clip_max is not None
        adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

    return adv_x
Esempio n. 3
0
  def generate(self, x, **kwargs):
    assert self.parse_params(**kwargs)

    asserts = []

    if self.clip_min is not None:
      asserts.append(utils_tf.assert_greater_equal(
        x, tf.cast(self.clip_min,x.dtype)))

    if self.clip_max is not None:
      asserts.append(utils_tf.assert_less_equal(
        x, tf.cast(self.clip_max, x.dtype)))

    m_cache = tf.zeros_like(x)
    v_cache = tf.zeros_like(x)
    adv_x = x

    y, _nb_classes = self.get_or_guess_labels(x, kwargs)
    y = y / reduce_sum(y, 1, keepdims=True)
    targeted = (self.y_target is not None)

    def save_batch(directory, images, labels, iteration, batch_idx):
      for idx, (image, label) in enumerate(zip(images, labels)):
        filename = "id{}_b{}_it{}_l{}.png".format(idx, batch_idx,
                                                  iteration, np.argmax(label))
        save_image_np(join(directory, filename), image)

    for i in range(self.nb_iter):
      self.logger.debug("Starting #{} iteration".format(i + 1))

      logits = self.model.get_logits(adv_x)
      loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
      if targeted:
        loss = -loss

      grad, = tf.gradients(loss, adv_x)

      red_ind = list(range(1, len(grad.get_shape())))
      avoid_zero_div = tf.cast(1e-8, grad.dtype)
      grad = grad / tf.maximum(
        avoid_zero_div,
        reduce_mean(tf.abs(grad), red_ind, keepdims=True))

      m_cache = self.betha1 * m_cache + (1 - self.betha1) * grad
      v_cache = self.betha2 * v_cache + (1 - self.betha2) * tf.square(grad)
      update = tf.divide(m_cache, tf.sqrt(v_cache + avoid_zero_div))

      optimal_perturbation = optimize_linear(update, self.eps_iter, self.ord)
      if self.ord == 1:
        raise NotImplementedError("This attack hasn't been tested for ord=1."
                                  "It's not clear that FGM makes a good inner "
                                  "loop step for iterative optimization since "
                                  "it updates just one coordinate at a time.")

      adv_x = adv_x + optimal_perturbation
      adv_x = x + utils_tf.clip_eta(adv_x - x, self.ord, self.eps)

      if self.clip_min is not None and self.clip_max is not None:
        adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

      adv_x = tf.stop_gradient(adv_x)

      if self.sanity_checks:
        with tf.control_dependencies(asserts):
          adv_x = tf.identity(adv_x)

      with self.sess.as_default():
        self.sess.run(self.init_op)
        for batch in range(self.nb_batches):
          adv_x_np, y_np = self.sess.run([adv_x, y])
          self.logger.debug("Saving attacked batch #{}".format(batch + 1))
          save_batch(self.adv_dir, adv_x_np, y_np, i, batch)