Exemple #1
0
def train(ARGS):
    # Define helper function for evaluating on test data during training
    def eval(epoch):
        from train_utils import clean_eval
        test_accuracy, test_loss, _ = clean_eval(sess, x, y, is_training,
                                                 testloader, n_classes, logits,
                                                 preds)
        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/accuracy/test',
                              simple_value=test_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/error/test',
                              simple_value=1.0 - test_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/loss/test',
                               simple_value=test_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define helper function for evaluating on adversarial test data during training
    def adv_eval(epoch):
        from train_utils import adversarial_eval
        adv_accuracy, adv_loss = adversarial_eval(sess,
                                                  x,
                                                  y,
                                                  is_training,
                                                  adv_testloader,
                                                  n_classes,
                                                  preds,
                                                  adv_preds,
                                                  eval_all=True)

        # Write tensorboard summary
        acc_summary = tf.Summary()
        acc_summary.value.add(tag='Evaluation/adversarial-accuracy/test',
                              simple_value=adv_accuracy)
        writer_test.add_summary(acc_summary, epoch)

        # Write tensorboard summary
        err_summary = tf.Summary()
        err_summary.value.add(tag='Evaluation/adversarial-error/test',
                              simple_value=1.0 - adv_accuracy)
        writer_test.add_summary(err_summary, epoch)

        # Write tensorboard summary
        loss_summary = tf.Summary()
        loss_summary.value.add(tag='Evaluation/adversarial-loss/test',
                               simple_value=adv_loss)
        writer_test.add_summary(loss_summary, epoch)

    # Define computational graph
    with tf.Graph().as_default() as g:
        # Define placeholders
        with tf.device('/gpu:0'):
            with tf.name_scope('Placeholders'):
                x = tf.placeholder(dtype=tf.float32,
                                   shape=input_shape,
                                   name='inputs')
                x_pair1 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair1')
                x_pair2 = tf.placeholder(dtype=tf.float32,
                                         shape=input_shape,
                                         name='x-pair2')
                y = tf.placeholder(dtype=tf.float32,
                                   shape=(None, n_classes),
                                   name='labels')
                is_training = tf.placeholder_with_default(True,
                                                          shape=(),
                                                          name='is-training')

        # Define TF session
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(graph=g, config=config)

        # Define model
        with tf.name_scope('Model'):
            with tf.device('/gpu:0'):
                model = Model(nb_classes=n_classes,
                              input_shape=input_shape,
                              is_training=is_training)

                # Define forward-pass
                with tf.name_scope('Logits'):
                    logits = model.get_logits(x)
                with tf.name_scope('Probs'):
                    preds = tf.nn.softmax(logits)

                with tf.name_scope('Accuracy'):
                    ground_truth = tf.argmax(y, axis=1)
                    predicted_label = tf.argmax(preds, axis=1)
                    correct_prediction = tf.equal(predicted_label,
                                                  ground_truth)
                    acc = tf.reduce_mean(tf.to_float(correct_prediction),
                                         name='accuracy')
                    tf.add_to_collection('accuracies', acc)

                    err = tf.identity(1.0 - acc, name='error')
                    tf.add_to_collection('accuracies', err)

                # Define losses
                with tf.name_scope('Losses'):
                    ce_loss, wd_loss, clp_loss, lsq_loss, at_loss, alp_loss = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                    adv_logits = None

                    if ARGS.ct:
                        with tf.name_scope('Cross-Entropy-Loss'):
                            ce_loss = tf.reduce_mean(
                                tf.nn.softmax_cross_entropy_with_logits(
                                    logits=logits, labels=y),
                                name='cross-entropy-loss')

                            tf.add_to_collection('losses', ce_loss)

                    if ARGS.at:
                        with tf.name_scope('Adversarial-Cross-Entropy-Loss'):
                            at_loss, adv_logits = get_at_loss(
                                sess, x, y, model, ARGS.eps, ARGS.eps_iter,
                                ARGS.nb_iter)
                            at_loss = tf.identity(at_loss, name='at-loss')
                            tf.add_to_collection('losses', at_loss)

                    with tf.name_scope('Regularizers'):
                        if ARGS.wd:
                            with tf.name_scope('Weight-Decay'):
                                for var in tf.trainable_variables():
                                    if 'beta' in var.op.name:
                                        # Do not regularize bias of batch normalization
                                        continue
                                    # print('regularizing: ', var.op.name)
                                    wd_loss += tf.nn.l2_loss(var)

                                reg_loss = tf.identity(wd_loss, name='wd-loss')
                                tf.add_to_collection('losses', reg_loss)

                        if ARGS.alp:
                            with tf.name_scope('Adversarial-Logit-Pairing'):
                                alp_loss = get_alp_loss(
                                    sess, x, y, logits, adv_logits, model,
                                    ARGS.eps, ARGS.eps_iter, ARGS.nb_iter)

                                alp_loss = tf.identity(alp_loss,
                                                       name='alp-loss')
                                tf.add_to_collection('losses', alp_loss)

                        if ARGS.clp:
                            with tf.name_scope('Clean-Logit-Pairing'):
                                clp_loss = get_clp_loss(
                                    x_pair1, x_pair2, model)
                                clp_loss = tf.identity(clp_loss,
                                                       name='clp-loss')
                                tf.add_to_collection('losses', clp_loss)

                        if ARGS.lsq:
                            with tf.name_scope('Logit-Squeezing'):
                                lsq_loss = get_lsq_loss(x, model)
                                lsq_loss = tf.identity(lsq_loss,
                                                       name='lsq-loss')
                                tf.add_to_collection('losses', lsq_loss)

                    with tf.name_scope('Total-Loss'):
                        # Define objective function
                        total_loss = (ARGS.ct_lambda * ce_loss) + (
                            ARGS.at_lambda *
                            at_loss) + (ARGS.wd_lambda * wd_loss) + (
                                ARGS.clp_lambda *
                                clp_loss) + (ARGS.lsq_lambda * lsq_loss) + (
                                    ARGS.alp_lambda * alp_loss)

                        total_loss = tf.identity(total_loss, name='total-loss')
                        tf.add_to_collection('losses', total_loss)

                # Define PGD adversary
                with tf.name_scope('PGD-Attacker'):
                    pgd_params = {
                        'ord': np.inf,
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'eps_iter': ARGS.eps_iter / 255,
                        'nb_iter': ARGS.nb_iter,
                        'rand_init': True,
                        'rand_minmax': ARGS.eps / 255,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'sanity_checks': True
                    }

                    pgd = ProjectedGradientDescent(model, sess=sess)
                    adv_x = pgd.generate(x, **pgd_params)

                    with tf.name_scope('Logits'):
                        adv_logits = model.get_logits(adv_x)
                    with tf.name_scope('Probs'):
                        adv_preds = tf.nn.softmax(adv_logits)

        # Define optimizer
        with tf.device('/gpu:0'):
            with tf.name_scope('Optimizer'):
                # Define global step variable
                global_step = tf.get_variable(
                    name='global_step',
                    shape=[],  # scalar
                    dtype=tf.float32,
                    initializer=tf.zeros_initializer(),
                    trainable=False)

                optimizer = tf.train.AdamOptimizer(learning_rate=ARGS.lr,
                                                   beta1=0.9,
                                                   beta2=0.999,
                                                   epsilon=1e-6,
                                                   use_locking=False,
                                                   name='Adam')
                trainable_vars = tf.trainable_variables()

                update_bn_ops = tf.get_collection(
                    tf.GraphKeys.UPDATE_OPS
                )  # this collection stores the moving_mean and moving_variance ops
                #  for batch normalization
                with tf.control_dependencies(update_bn_ops):
                    grads_and_vars = optimizer.compute_gradients(
                        total_loss, trainable_vars)
                    train_step = optimizer.apply_gradients(
                        grads_and_vars, global_step=global_step)

        # Add Tensorboard summaries
        with tf.device('/gpu:0'):
            # Create file writers
            writer_train = tf.summary.FileWriter(ARGS.log_dir + '/train',
                                                 graph=g)
            writer_test = tf.summary.FileWriter(ARGS.log_dir + '/test')

            # Add summary for input images
            with tf.name_scope('Image-Summaries'):
                # Create image summary ops
                tf.summary.image('input',
                                 x,
                                 max_outputs=2,
                                 collections=['training'])

            # Add summaries for the training losses
            losses = tf.get_collection('losses')
            for entry in losses:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for the training accuracies
            accs = tf.get_collection('accuracies')
            for entry in accs:
                tf.summary.scalar(entry.name, entry, collections=['training'])

            # Add summaries for all trainable vars
            for var in trainable_vars:
                tf.summary.histogram(var.op.name,
                                     var,
                                     collections=['training'])
                var_norm = tf.norm(var, ord='euclidean')
                tf.summary.scalar(var.op.name + '/l2norm',
                                  var_norm,
                                  collections=['training'])

            # Add summaries for variable gradients
            for grad, var in grads_and_vars:
                if grad is not None:
                    tf.summary.histogram(var.op.name + '/gradients',
                                         grad,
                                         collections=['training'])
                    grad_norm = tf.norm(grad, ord='euclidean')
                    tf.summary.scalar(var.op.name + '/gradients/l2norm',
                                      grad_norm,
                                      collections=['training'])

            # Add summaries for the logits and model predictions
            with tf.name_scope('Logits-Summaries'):
                variable_summaries(tf.identity(logits, name='logits'),
                                   name='logits',
                                   collections=['training', 'test'],
                                   histo=True)
            with tf.name_scope('Predictions-Summaries'):
                variable_summaries(tf.identity(preds, name='predictions'),
                                   name='predictions',
                                   collections=['training', 'test'],
                                   histo=True)

        # Initialize all variables
        with sess.as_default():
            tf.global_variables_initializer().run()

        # Collect training params
        train_params = {
            'epochs': ARGS.epochs,
            'eval_step': ARGS.eval_step,
            'adv_eval_step': ARGS.adv_eval_step,
            'n_classes': n_classes,
            'clp': ARGS.clp
        }

        # Start training loop
        model_train(sess,
                    x,
                    y,
                    x_pair1,
                    x_pair2,
                    is_training,
                    trainloader,
                    train_step,
                    args=train_params,
                    evaluate=eval,
                    adv_evaluate=adv_eval,
                    writer_train=writer_train)

        # Save the trained model
        if ARGS.save:
            save_path = os.path.join(ARGS.save_dir, ARGS.filename)
            saver = tf.train.Saver(var_list=tf.global_variables())
            saver.save(sess, save_path)
            print("Saved model at {:s}".format(str(ARGS.save_dir)))
def eval_robustness(ARGS, verbose=True):
    #############################################
    # Load pre-trained model
    #############################################

    if verbose:
        print('\n- Loading pre-trained model...')

    # Build evaluation graph
    eval_graph = tf.Graph()
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(graph=eval_graph, config=config)

    # Define input TF placeholder
    with eval_graph.as_default():
        with tf.device('/gpu:0'):
            # Define placeholders
            with tf.name_scope('Placeholders'):
                x = tf.placeholder(dtype=tf.float32,
                                   shape=input_shape,
                                   name='inputs')
                y = tf.placeholder(dtype=tf.float32,
                                   shape=(None, n_classes),
                                   name='labels')
                is_training = tf.placeholder_with_default(False,
                                                          shape=(),
                                                          name='is-training')

            # Define model
            with tf.name_scope('Model'):
                model = Model(nb_classes=n_classes,
                              input_shape=input_shape,
                              is_training=is_training)

            # Define forward-pass
            with tf.name_scope('Logits'):
                logits = model.get_logits(x)
            with tf.name_scope('Probs'):
                preds = tf.nn.softmax(logits)

            # Restore the pre-trained model
            with sess.as_default():
                saver = tf.train.Saver()
                saver.restore(sess, ARGS.restore_path + '/model.ckpt')

            # Define accuracy ops
            with tf.name_scope('Accuracy'):
                ground_truth = tf.argmax(y, axis=1)
                predicted_label = tf.argmax(preds, axis=1)
                correct_prediction = tf.equal(predicted_label, ground_truth)
                clean_acc = tf.reduce_mean(tf.to_float(correct_prediction),
                                           name='accuracy')

            # Define PGD adversary
            if ARGS.attack == 'PGD':
                if verbose:
                    print('\n- Building {:s} attack graph...'.format(
                        ARGS.attack))

                with tf.name_scope('PGD-Attacker'):
                    pgd_params = {
                        'ord': np.inf,
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'eps_iter': ARGS.eps_iter / 255,
                        'nb_iter': ARGS.nb_iter,
                        'rand_init': ARGS.rand_init,
                        'rand_minmax': ARGS.eps / 255,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'sanity_checks': True
                    }

                    pgd = ProjectedGradientDescent(model, sess=None)
                    adv_x = pgd.generate(x, **pgd_params)

            # Define SPSA adversary
            elif ARGS.attack == 'SPSA':
                if verbose:
                    print('\n- Building {:s} attack graph...'.format(
                        ARGS.attack))

                with tf.name_scope('PGD-Attacker'):
                    spsa_params = {
                        'y': y,
                        'eps': ARGS.eps / 255,
                        'nb_iter': ARGS.nb_iter,
                        'spsa_samples': ARGS.spsa_samples,
                        'spsa_iters': ARGS.spsa_iters,
                        'clip_min': 0.,
                        'clip_max': 1.,
                        'learning_rate': ARGS.spsa_lr,
                        'delta': ARGS.spsa_delta
                    }

                    spsa = SPSA(model, sess=sess)
                    adv_x = spsa.generate(x, **spsa_params)
            else:
                raise NotImplementedError

            with tf.name_scope('Logits'):
                adv_logits = model.get_logits(adv_x)
            with tf.name_scope('Probs'):
                adv_preds = tf.nn.softmax(adv_logits)

            adv_loss = tf.nn.softmax_cross_entropy_with_logits(
                logits=adv_logits, labels=y)
            adv_predicted_label = tf.argmax(adv_preds, axis=1)
            correct_prediction = tf.equal(adv_predicted_label, ground_truth)
            adv_accuracy = tf.reduce_mean(tf.to_float(correct_prediction),
                                          name='adv-accuracy')
            is_adv_example = tf.not_equal(ground_truth, adv_predicted_label)

    #############################################
    # Run evaluation
    #############################################

    if verbose:
        print('\n- Running robustness evaluation against {:s} attacker...\n'.
              format(ARGS.attack))

    if ARGS.attack == 'PGD':
        clean, adv_mean, adv_worstcase = run_pgd_eval(x,
                                                      y,
                                                      is_training,
                                                      sess,
                                                      adv_testloader,
                                                      clean_acc,
                                                      adv_accuracy,
                                                      adv_loss,
                                                      is_adv_example,
                                                      ARGS,
                                                      save_loss_dist=False,
                                                      verbose=verbose)

    elif ARGS.attack == 'SPSA':
        clean, adv_mean = run_spsa_eval(x,
                                        y,
                                        is_training,
                                        sess,
                                        adv_testloader,
                                        clean_acc,
                                        adv_accuracy,
                                        adv_loss,
                                        is_adv_example,
                                        ARGS,
                                        save_loss_dist=False,
                                        verbose=verbose)
        adv_worstcase = adv_mean
    else:
        raise NotImplementedError

    return clean, adv_mean, adv_worstcase