Example #1
0
    def fgm(self, x, labels, targeted=False):
        """
        TensorFlow Eager implementation of the Fast Gradient Method.
        :param x: the input variable
        :param targeted: Is the attack targeted or untargeted? Untargeted, the
                         default, will try to make the label incorrect.
                         Targeted will instead try to move in the direction
                         of being more like y.
        :return: a tensor for the adversarial example
        """
        # Compute loss
        with tf.GradientTape() as tape:
            # input should be watched because it may be
            # combination of trainable and non-trainable variables
            tape.watch(x)
            loss_obj = LossCrossEntropy(self.model, smoothing=0.0)
            loss = loss_obj.fprop(x=x, y=labels)
            if targeted:
                loss = -loss

        # Define gradient of loss wrt input
        grad = tape.gradient(loss, x)
        optimal_perturbation = attacks.optimize_linear(grad, self.eps,
                                                       self.ord)

        # Add perturbation to original example to obtain adversarial example
        adv_x = x + optimal_perturbation

        # If clipping is needed
        # reset all values outside of [clip_min, clip_max]
        if (self.clip_min is not None) and (self.clip_max is not None):
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        return adv_x
        def body(i, ax, m):
            logits = self.model.get_logits(ax)
            loss = self.loss_func(labels=y, logits=logits)
            if targeted:
                loss = -loss

            # Define gradient of loss wrt input
            grad, = tf.gradients(loss, ax)

            # Normalize current gradient and add it to the accumulated gradient
            red_ind = list(xrange(1, len(grad.get_shape())))
            avoid_zero_div = tf.cast(1e-12, grad.dtype)
            grad = grad / tf.maximum(
                avoid_zero_div,
                reduce_mean(tf.abs(grad), red_ind, keepdims=True))
            m = self.decay_factor * m + grad

            optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
            if self.ord == 1:
                raise NotImplementedError(
                    "This attack hasn't been tested for ord=1."
                    "It's not clear that FGM makes a good inner "
                    "loop step for iterative optimization since "
                    "it updates just one coordinate at a time.")

            # Update and clip adversarial example in current iteration
            ax = ax + optimal_perturbation
            ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

            if self.clip_min is not None and self.clip_max is not None:
                ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

            ax = tf.stop_gradient(ax)

            return i + 1, ax, m
def fgm_perturb(x,
                y,
                loss_fn,
                clip_min=None,
                clip_max=None,
                ord=np.inf,
                eps=0.3):
    loss = loss_fn(x)
    grad, = tf.gradients(loss, x)
    optimal_perturbation = optimize_linear(grad, eps, ord)
    adv_x = x + optimal_perturbation

    if (clip_min is not None) or (clip_max is not None):
        # We don't currently support one-sided clipping
        assert clip_min is not None and clip_max is not None
        adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

    return adv_x
def fgm(x,
        logits,
        y=None,
        eps=0.3,
        ord=np.inf,
        clip_min=None,
        clip_max=None,
        targeted=False,
        sanity_checks=True,
        loss_func=None):
    """
        TensorFlow implementation of the Fast Gradient Method.
        :param x: the input placeholder
        :param logits: output of model.get_logits
        :param y: (optional) A placeholder for the model labels. If targeted
        is true, then provide the target label. Otherwise, only provide
        this parameter if you'd like to use true labels when crafting
        adversarial samples. Otherwise, model predictions are used as
        labels to avoid the "label leaking" effect (explained in this
        paper: https://arxiv.org/abs/1611.01236). Default is None.
        Labels should be one-hot-encoded.
        :param eps: the epsilon (input variation parameter)
        :param ord: (optional) Order of the norm (mimics NumPy).
        Possible values: np.inf, 1 or 2.
        :param clip_min: Minimum float value for adversarial example components
        :param clip_max: Maximum float value for adversarial example components
        :param targeted: Is the attack targeted or untargeted? Untargeted, the
        default, will try to make the label incorrect. Targeted
        will instead try to move in the direction of being more
        like y.
        :return: a tensor for the adversarial example
        """

    asserts = []

    # If a data range was specified, check that the input was in that range
    if clip_min is not None:
        asserts.append(
            utils_tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype)))

    if clip_max is not None:
        asserts.append(
            utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype)))

    # Make sure the caller has not passed probs by accident
    assert logits.op.type != 'Softmax'

    if y is None:
        # Using model predictions as ground truth to avoid label leaking
        preds_max = reduce_max(logits, 1, keepdims=True)
        y = tf.to_float(tf.equal(logits, preds_max))
        y = tf.stop_gradient(y)
    #y = y / tf.math.reduce_sum(y, 1, keepdims=True)

    # Compute loss
    loss = loss_func(labels=y, logits=logits)
    if targeted:
        loss = -loss

    # Define gradient of loss wrt input
    grad, = tf.gradients(loss, x)

    optimal_perturbation = optimize_linear(grad, eps, ord)

    # Add perturbation to original example to obtain adversarial example
    adv_x = x + optimal_perturbation

    # If clipping is needed, reset all values outside of [clip_min, clip_max]
    if (clip_min is not None) or (clip_max is not None):
        # We don't currently support one-sided clipping
        assert clip_min is not None and clip_max is not None
        adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

    if sanity_checks:
        with tf.control_dependencies(asserts):
            adv_x = tf.identity(adv_x)

    return adv_x