Example #1
0
    def fgm(self, x, labels, targeted=False):
        """
        TensorFlow Eager implementation of the Fast Gradient Method.
        :param x: the input variable
        :param targeted: Is the attack targeted or untargeted? Untargeted, the
                         default, will try to make the label incorrect.
                         Targeted will instead try to move in the direction
                         of being more like y.
        :return: a tensor for the adversarial example
        """
        # Compute loss
        with tf.GradientTape() as tape:
            # input should be watched because it may be
            # combination of trainable and non-trainable variables
            tape.watch(x)
            loss_obj = LossCrossEntropy(self.model, smoothing=0.0)
            loss = loss_obj.fprop(x=x, y=labels)
            if targeted:
                loss = -loss

        # Define gradient of loss wrt input
        grad = tape.gradient(loss, x)
        optimal_perturbation = attacks.optimize_linear(grad, self.eps,
                                                       self.ord)

        # Add perturbation to original example to obtain adversarial example
        adv_x = x + optimal_perturbation

        # If clipping is needed
        # reset all values outside of [clip_min, clip_max]
        if (self.clip_min is not None) and (self.clip_max is not None):
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        return adv_x
Example #2
0
 def test_xe_smoothing(self):
     loss = LossCrossEntropy(self.model, smoothing=0.1)
     l = loss.fprop(self.x, self.y)
     with tf.Session() as sess:
         vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
         vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
     self.assertClose(vl1, [2.10587597, 1.47194624], atol=1e-6)
     self.assertClose(vl2, [2.10587597, 1.47194624], atol=1e-6)
Example #3
0
 def test_xe(self):
     loss = LossCrossEntropy(self.model, smoothing=0.)
     l = loss.fprop(self.x, self.y)
     with tf.Session() as sess:
         vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
         vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy})
     self.assertClose(vl1, [2.210599660, 1.53666997], atol=1e-6)
     self.assertClose(vl2, [2.210599660, 1.53666997], atol=1e-6)
Example #4
0
    def fgm(self, x, labels, targeted=False):
        """
        TensorFlow Eager implementation of the Fast Gradient Method.
        :param x: the input variable
        :param targeted: Is the attack targeted or untargeted? Untargeted, the
                         default, will try to make the label incorrect.
                         Targeted will instead try to move in the direction
                         of being more like y.
        :return: a tensor for the adversarial example
        """
        # Compute loss
        with tf.GradientTape() as tape:
            loss_obj = LossCrossEntropy(self.model, smoothing=0.)
            loss = loss_obj.fprop(x=x, y=labels)
            if targeted:
                loss = -loss

        # Define gradient of loss wrt input
        grad = tape.gradient(loss, x)
        if self.ord == np.inf:
            # Take sign of gradient
            normalized_grad = tf.sign(grad)
            # The following line should not change the numerical results.
            # It applies only because `normalized_grad` is the output of
            # a `sign` op, which has zero derivative anyway.
            # It should not be applied for the other norms, where the
            # perturbation has a non-zero derivative.
            normalized_grad = tf.stop_gradient(normalized_grad)
        elif self.ord == 1:
            red_ind = list(xrange(1, len(x.get_shape())))
            normalized_grad = grad / tf.reduce_sum(
                tf.abs(grad), reduction_indices=red_ind, keep_dims=True)
        elif self.ord == 2:
            red_ind = list(xrange(1, len(x.get_shape())))
            square = tf.reduce_sum(tf.square(grad),
                                   reduction_indices=red_ind,
                                   keep_dims=True)
            normalized_grad = grad / tf.sqrt(square)
        else:
            raise NotImplementedError("Only L-inf, L1 and L2 norms are "
                                      "currently implemented.")

        # Multiply by constant epsilon
        scaled_grad = self.eps * normalized_grad

        # Add perturbation to original example to obtain adversarial example
        adv_x = x + scaled_grad

        # If clipping is needed
        # reset all values outside of [clip_min, clip_max]
        if (self.clip_min is not None) and (self.clip_max is not None):
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        return adv_x
def train(model,
          X_train=None,
          Y_train=None,
          save=False,
          predictions_adv=None,
          evaluate=None,
          args=None,
          rng=None,
          var_list=None,
          attack=None,
          attack_args=None):
    """
  Train a TF Eager model
  :param model: instance of cleverhans model, takes in input batch,
                  gives out probs(softmax layer).
  :param X_train: numpy array with training inputs
  :param Y_train: numpy array with training outputs
  :param save: boolean controlling the save operation
  :param predictions_adv: if set with the adversarial example tensor,
                          will run adversarial training
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
               If save is True, should also contain 'train_dir'
               and 'filename'
  :param rng: Instance of numpy.random.RandomState
  :param var_list: List of variables to train.
  :param attack: Instance of the class cleverhans.attacks.attacks_eager
  :param attack_args: Parameters required for the attack.
  :return: True if model trained
  """
    args = _ArgsWrapper(args or {})
    if ((attack is None) != (attack_args is None)):
        raise ValueError("attack and attack_args must be " "passed together.")
    if X_train is None or Y_train is None:
        raise ValueError("X_train argument and Y_train argument "
                         "must be supplied.")
    # Check that necessary arguments were given (see doc above)
    assert args.nb_epochs, "Number of epochs was not given in args dict"
    assert args.learning_rate, "Learning rate was not given in args dict"
    assert args.batch_size, "Batch size was not given in args dict"

    if save:
        assert args.train_dir, "Directory for save was not given in args dict"
        assert args.filename, "Filename for save was not given in args dict"

    if rng is None:
        rng = np.random.RandomState()

    # Optimizer
    tfe = tf.contrib.eager
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    batch_x = tfe.Variable(X_train[0:args.batch_size], dtype=tf.float32)
    batch_y = tfe.Variable(Y_train[0:args.batch_size], dtype=tf.float32)

    # One epoch of training.
    for epoch in xrange(args.nb_epochs):
        # Compute number of batches
        nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size))
        assert nb_batches * args.batch_size >= len(X_train)

        # Indices to shuffle training set
        index_shuf = list(range(len(X_train)))
        rng.shuffle(index_shuf)

        prev = time.time()
        for batch in range(nb_batches):

            # Compute batch start and end indices
            start, end = batch_indices(batch, len(X_train), args.batch_size)

            # Perform one training step
            tf.assign(batch_x, X_train[index_shuf[start:end]])
            tf.assign(batch_y, Y_train[index_shuf[start:end]])
            # Compute grads
            with tf.GradientTape() as tape:
                # Define loss
                loss_clean_obj = LossCrossEntropy(model, smoothing=0.)
                loss_clean = loss_clean_obj.fprop(x=batch_x, y=batch_y)
                loss = loss_clean
                # Adversarial training
                if attack is not None:
                    batch_adv_x = attack.generate(batch_x, **attack_args)
                    loss_adv_obj = LossCrossEntropy(model, smoothing=0.)
                    loss_adv = loss_adv_obj.fprop(x=batch_adv_x, y=batch_y)
                    loss = (loss_clean + loss_adv) / 2.0
            # Apply grads
            model_variables = model.get_params()
            grads = tape.gradient(loss, model_variables)
            optimizer.apply_gradients(zip(grads, model_variables))

        assert end >= len(X_train)  # Check that all examples were used
        cur = time.time()
        _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) +
                     " seconds")
        if evaluate is not None:
            evaluate()

    if save:
        save_path = os.path.join(args.train_dir, args.filename)
        saver = tf.train.Saver()
        saver.save(save_path, model_variables)
        _logger.info("Completed model training and saved at: " +
                     str(save_path))
    else:
        _logger.info("Completed model training.")

    return True