Esempio n. 1
  def test_zero_out_clipped_grads(self):
    test_zero_out_clipped_grads: Test that gradient gets zeroed out at positions
    where no progress can be made due to clipping.

    clip_min = -1
    clip_max = 1
    eta = tf.constant([[0.], [-1.], [1], [0.5], [-1], [1], [-0.9], [0.9]])
    grad = tf.constant([[1.], [-1.], [1.], [1.], [1.], [-1.], [-1.], [1.]])

    grad2 =
        utils_tf.zero_out_clipped_grads(grad, eta, clip_min, clip_max))

    expected = np.asarray([[1.], [0.], [0.], [1.], [1.], [-1.], [-1.], [1.]])
    self.assertClose(grad2, expected)
def sparse_l1_descent(x,
  TensorFlow implementation of the Dense L1 Descent Method.
  :param x: the input placeholder
  :param logits: output of model.get_logits
  :param y: (optional) A placeholder for the true labels. If targeted
            is true, then provide the target label. Otherwise, only provide
            this parameter if you'd like to use true labels when crafting
            adversarial samples. Otherwise, model predictions are used as
            labels to avoid the "label leaking" effect (explained in this
            paper: Default is None.
            Labels should be one-hot-encoded.
  :param eps: the epsilon (input variation parameter)
  :param q: the percentile above which gradient values are retained. Either a
            scalar or a vector of same length as the input batch dimension.
  :param clip_min: Minimum float value for adversarial example components
  :param clip_max: Maximum float value for adversarial example components
  :param clip_grad: (optional bool) Ignore gradient components
                    at positions where the input is already at the boundary
                    of the domain, and the update step will get clipped out.
  :param targeted: Is the attack targeted or untargeted? Untargeted, the
                   default, will try to make the label incorrect. Targeted
                   will instead try to move in the direction of being more
                   like y.
  :return: a tensor for the adversarial example

    asserts = []

    # If a data range was specified, check that the input was in that range
    if clip_min is not None:
            utils_tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype)))

    if clip_max is not None:
            utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype)))

    # Make sure the caller has not passed probs by accident
    assert logits.op.type != 'Softmax'

    if y is None:
        # Using model predictions as ground truth to avoid label leaking
        preds_max = reduce_max(logits, 1, keepdims=True)
        y = tf.to_float(tf.equal(logits, preds_max))
        y = tf.stop_gradient(y)
    y = y / reduce_sum(y, 1, keepdims=True)

    # Compute loss
    loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
    if targeted:
        loss = -loss

    # Define gradient of loss wrt input
    grad, = tf.gradients(loss, x)

    if clip_grad:
        grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max)

    red_ind = list(range(1, len(grad.get_shape())))
    dim = tf.reduce_prod(tf.shape(x)[1:])

    abs_grad = tf.reshape(tf.abs(grad), (-1, dim))

    # if q is a scalar, broadcast it to a vector of same length as the batch dim
    q = tf.cast(tf.broadcast_to(q, tf.shape(x)[0:1]), tf.float32)
    k = tf.cast(tf.floor(q / 100 * tf.cast(dim, tf.float32)), tf.int32)

    # `tf.sort` is much faster than `tf.contrib.distributions.percentile`.
    # For TF <= 1.12, use `tf.nn.top_k` as `tf.sort` is not implemented.
    if LooseVersion(tf.__version__) <= LooseVersion('1.12.0'):
        # `tf.sort` is only available in TF 1.13 onwards
        sorted_grad = -tf.nn.top_k(-abs_grad, k=dim, sorted=True)[0]
        sorted_grad = tf.sort(abs_grad, axis=-1)

    idx = tf.stack((tf.range(tf.shape(abs_grad)[0]), k), -1)
    percentiles = tf.gather_nd(sorted_grad, idx)
    tied_for_max = tf.greater_equal(abs_grad, tf.expand_dims(percentiles, -1))
    tied_for_max = tf.reshape(tf.cast(tied_for_max, x.dtype), tf.shape(grad))
    num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True)

    optimal_perturbation = tf.sign(grad) * tied_for_max / num_ties

    # Add perturbation to original example to obtain adversarial example
    adv_x = x + utils_tf.mul(eps, optimal_perturbation)

    # If clipping is needed, reset all values outside of [clip_min, clip_max]
    if (clip_min is not None) or (clip_max is not None):
        # We don't currently support one-sided clipping
        assert clip_min is not None and clip_max is not None
        adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

    if sanity_checks:
        with tf.control_dependencies(asserts):
            adv_x = tf.identity(adv_x)

    return adv_x
Esempio n. 3
def fgm(x,
  TensorFlow implementation of the Fast Gradient Method.
  :param x: the input placeholder
  :param logits: output of model.get_logits
  :param y: (optional) A placeholder for the true labels. If targeted
            is true, then provide the target label. Otherwise, only provide
            this parameter if you'd like to use true labels when crafting
            adversarial samples. Otherwise, model predictions are used as
            labels to avoid the "label leaking" effect (explained in this
            paper: Default is None.
            Labels should be one-hot-encoded.
  :param eps: the epsilon (input variation parameter)
  :param ord: (optional) Order of the norm (mimics NumPy).
              Possible values: np.inf, 1 or 2.
  :param loss_fn: Loss function that takes (labels, logits) as arguments and returns loss
  :param clip_min: Minimum float value for adversarial example components
  :param clip_max: Maximum float value for adversarial example components
  :param clip_grad: (optional bool) Ignore gradient components
                    at positions where the input is already at the boundary
                    of the domain, and the update step will get clipped out.
  :param targeted: Is the attack targeted or untargeted? Untargeted, the
                   default, will try to make the label incorrect. Targeted
                   will instead try to move in the direction of being more
                   like y.
  :return: a tensor for the adversarial example

  asserts = []

  # If a data range was specified, check that the input was in that range
  if clip_min is not None:
        x, tf.cast(clip_min, x.dtype)))

  if clip_max is not None:
    asserts.append(utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype)))

  # Make sure the caller has not passed probs by accident
  assert logits.op.type != 'Softmax'

  if y is None:
    # Using model predictions as ground truth to avoid label leaking
    preds_max = reduce_max(logits, 1, keepdims=True)
    y = tf.cast(tf.equal(logits, preds_max), dtype=tf.float32)
    y = tf.stop_gradient(y)
  y = y / reduce_sum(y, 1, keepdims=True)

  # Compute loss
  loss = loss_fn(labels=y, logits=logits)
  if targeted:
    loss = -loss

  # Define gradient of loss wrt input
  grad, = tf.gradients(ys=loss, xs=x)

  if clip_grad:
    grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max)

  optimal_perturbation = optimize_linear(grad, eps, ord)

  # Add perturbation to original example to obtain adversarial example
  adv_x = x + optimal_perturbation

  # If clipping is needed, reset all values outside of [clip_min, clip_max]
  if (clip_min is not None) or (clip_max is not None):
    # We don't currently support one-sided clipping
    assert clip_min is not None and clip_max is not None
    adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

  if sanity_checks:
    with tf.control_dependencies(asserts):
      adv_x = tf.identity(adv_x)

  return adv_x