def train(ARGS): # Define helper function for evaluating on test data during training def eval(epoch): from train_utils import clean_eval test_accuracy, test_loss, _ = clean_eval(sess, x, y, is_training, testloader, n_classes, logits, preds) # Write tensorboard summary acc_summary = tf.Summary() acc_summary.value.add(tag='Evaluation/accuracy/test', simple_value=test_accuracy) writer_test.add_summary(acc_summary, epoch) # Write tensorboard summary err_summary = tf.Summary() err_summary.value.add(tag='Evaluation/error/test', simple_value=1.0 - test_accuracy) writer_test.add_summary(err_summary, epoch) # Write tensorboard summary loss_summary = tf.Summary() loss_summary.value.add(tag='Evaluation/loss/test', simple_value=test_loss) writer_test.add_summary(loss_summary, epoch) # Define helper function for evaluating on adversarial test data during training def adv_eval(epoch): from train_utils import adversarial_eval adv_accuracy, adv_loss = adversarial_eval(sess, x, y, is_training, adv_testloader, n_classes, preds, adv_preds, eval_all=True) # Write tensorboard summary acc_summary = tf.Summary() acc_summary.value.add(tag='Evaluation/adversarial-accuracy/test', simple_value=adv_accuracy) writer_test.add_summary(acc_summary, epoch) # Write tensorboard summary err_summary = tf.Summary() err_summary.value.add(tag='Evaluation/adversarial-error/test', simple_value=1.0 - adv_accuracy) writer_test.add_summary(err_summary, epoch) # Write tensorboard summary loss_summary = tf.Summary() loss_summary.value.add(tag='Evaluation/adversarial-loss/test', simple_value=adv_loss) writer_test.add_summary(loss_summary, epoch) # Define computational graph with tf.Graph().as_default() as g: # Define placeholders with tf.device('/gpu:0'): with tf.name_scope('Placeholders'): x = tf.placeholder(dtype=tf.float32, shape=input_shape, name='inputs') x_pair1 = tf.placeholder(dtype=tf.float32, shape=input_shape, name='x-pair1') x_pair2 = tf.placeholder(dtype=tf.float32, shape=input_shape, name='x-pair2') y = tf.placeholder(dtype=tf.float32, shape=(None, n_classes), name='labels') is_training = tf.placeholder_with_default(True, shape=(), name='is-training') # Define TF session config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(graph=g, config=config) # Define model with tf.name_scope('Model'): with tf.device('/gpu:0'): model = Model(nb_classes=n_classes, input_shape=input_shape, is_training=is_training) # Define forward-pass with tf.name_scope('Logits'): logits = model.get_logits(x) with tf.name_scope('Probs'): preds = tf.nn.softmax(logits) with tf.name_scope('Accuracy'): ground_truth = tf.argmax(y, axis=1) predicted_label = tf.argmax(preds, axis=1) correct_prediction = tf.equal(predicted_label, ground_truth) acc = tf.reduce_mean(tf.to_float(correct_prediction), name='accuracy') tf.add_to_collection('accuracies', acc) err = tf.identity(1.0 - acc, name='error') tf.add_to_collection('accuracies', err) # Define losses with tf.name_scope('Losses'): ce_loss, wd_loss, clp_loss, lsq_loss, at_loss, alp_loss = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 adv_logits = None if ARGS.ct: with tf.name_scope('Cross-Entropy-Loss'): ce_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=y), name='cross-entropy-loss') tf.add_to_collection('losses', ce_loss) if ARGS.at: with tf.name_scope('Adversarial-Cross-Entropy-Loss'): at_loss, adv_logits = get_at_loss( sess, x, y, model, ARGS.eps, ARGS.eps_iter, ARGS.nb_iter) at_loss = tf.identity(at_loss, name='at-loss') tf.add_to_collection('losses', at_loss) with tf.name_scope('Regularizers'): if ARGS.wd: with tf.name_scope('Weight-Decay'): for var in tf.trainable_variables(): if 'beta' in var.op.name: # Do not regularize bias of batch normalization continue # print('regularizing: ', var.op.name) wd_loss += tf.nn.l2_loss(var) reg_loss = tf.identity(wd_loss, name='wd-loss') tf.add_to_collection('losses', reg_loss) if ARGS.alp: with tf.name_scope('Adversarial-Logit-Pairing'): alp_loss = get_alp_loss( sess, x, y, logits, adv_logits, model, ARGS.eps, ARGS.eps_iter, ARGS.nb_iter) alp_loss = tf.identity(alp_loss, name='alp-loss') tf.add_to_collection('losses', alp_loss) if ARGS.clp: with tf.name_scope('Clean-Logit-Pairing'): clp_loss = get_clp_loss( x_pair1, x_pair2, model) clp_loss = tf.identity(clp_loss, name='clp-loss') tf.add_to_collection('losses', clp_loss) if ARGS.lsq: with tf.name_scope('Logit-Squeezing'): lsq_loss = get_lsq_loss(x, model) lsq_loss = tf.identity(lsq_loss, name='lsq-loss') tf.add_to_collection('losses', lsq_loss) with tf.name_scope('Total-Loss'): # Define objective function total_loss = (ARGS.ct_lambda * ce_loss) + ( ARGS.at_lambda * at_loss) + (ARGS.wd_lambda * wd_loss) + ( ARGS.clp_lambda * clp_loss) + (ARGS.lsq_lambda * lsq_loss) + ( ARGS.alp_lambda * alp_loss) total_loss = tf.identity(total_loss, name='total-loss') tf.add_to_collection('losses', total_loss) # Define PGD adversary with tf.name_scope('PGD-Attacker'): pgd_params = { 'ord': np.inf, 'y': y, 'eps': ARGS.eps / 255, 'eps_iter': ARGS.eps_iter / 255, 'nb_iter': ARGS.nb_iter, 'rand_init': True, 'rand_minmax': ARGS.eps / 255, 'clip_min': 0., 'clip_max': 1., 'sanity_checks': True } pgd = ProjectedGradientDescent(model, sess=sess) adv_x = pgd.generate(x, **pgd_params) with tf.name_scope('Logits'): adv_logits = model.get_logits(adv_x) with tf.name_scope('Probs'): adv_preds = tf.nn.softmax(adv_logits) # Define optimizer with tf.device('/gpu:0'): with tf.name_scope('Optimizer'): # Define global step variable global_step = tf.get_variable( name='global_step', shape=[], # scalar dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=ARGS.lr, beta1=0.9, beta2=0.999, epsilon=1e-6, use_locking=False, name='Adam') trainable_vars = tf.trainable_variables() update_bn_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # this collection stores the moving_mean and moving_variance ops # for batch normalization with tf.control_dependencies(update_bn_ops): grads_and_vars = optimizer.compute_gradients( total_loss, trainable_vars) train_step = optimizer.apply_gradients( grads_and_vars, global_step=global_step) # Add Tensorboard summaries with tf.device('/gpu:0'): # Create file writers writer_train = tf.summary.FileWriter(ARGS.log_dir + '/train', graph=g) writer_test = tf.summary.FileWriter(ARGS.log_dir + '/test') # Add summary for input images with tf.name_scope('Image-Summaries'): # Create image summary ops tf.summary.image('input', x, max_outputs=2, collections=['training']) # Add summaries for the training losses losses = tf.get_collection('losses') for entry in losses: tf.summary.scalar(entry.name, entry, collections=['training']) # Add summaries for the training accuracies accs = tf.get_collection('accuracies') for entry in accs: tf.summary.scalar(entry.name, entry, collections=['training']) # Add summaries for all trainable vars for var in trainable_vars: tf.summary.histogram(var.op.name, var, collections=['training']) var_norm = tf.norm(var, ord='euclidean') tf.summary.scalar(var.op.name + '/l2norm', var_norm, collections=['training']) # Add summaries for variable gradients for grad, var in grads_and_vars: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad, collections=['training']) grad_norm = tf.norm(grad, ord='euclidean') tf.summary.scalar(var.op.name + '/gradients/l2norm', grad_norm, collections=['training']) # Add summaries for the logits and model predictions with tf.name_scope('Logits-Summaries'): variable_summaries(tf.identity(logits, name='logits'), name='logits', collections=['training', 'test'], histo=True) with tf.name_scope('Predictions-Summaries'): variable_summaries(tf.identity(preds, name='predictions'), name='predictions', collections=['training', 'test'], histo=True) # Initialize all variables with sess.as_default(): tf.global_variables_initializer().run() # Collect training params train_params = { 'epochs': ARGS.epochs, 'eval_step': ARGS.eval_step, 'adv_eval_step': ARGS.adv_eval_step, 'n_classes': n_classes, 'clp': ARGS.clp } # Start training loop model_train(sess, x, y, x_pair1, x_pair2, is_training, trainloader, train_step, args=train_params, evaluate=eval, adv_evaluate=adv_eval, writer_train=writer_train) # Save the trained model if ARGS.save: save_path = os.path.join(ARGS.save_dir, ARGS.filename) saver = tf.train.Saver(var_list=tf.global_variables()) saver.save(sess, save_path) print("Saved model at {:s}".format(str(ARGS.save_dir)))
def eval_robustness(ARGS, verbose=True): ############################################# # Load pre-trained model ############################################# if verbose: print('\n- Loading pre-trained model...') # Build evaluation graph eval_graph = tf.Graph() config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(graph=eval_graph, config=config) # Define input TF placeholder with eval_graph.as_default(): with tf.device('/gpu:0'): # Define placeholders with tf.name_scope('Placeholders'): x = tf.placeholder(dtype=tf.float32, shape=input_shape, name='inputs') y = tf.placeholder(dtype=tf.float32, shape=(None, n_classes), name='labels') is_training = tf.placeholder_with_default(False, shape=(), name='is-training') # Define model with tf.name_scope('Model'): model = Model(nb_classes=n_classes, input_shape=input_shape, is_training=is_training) # Define forward-pass with tf.name_scope('Logits'): logits = model.get_logits(x) with tf.name_scope('Probs'): preds = tf.nn.softmax(logits) # Restore the pre-trained model with sess.as_default(): saver = tf.train.Saver() saver.restore(sess, ARGS.restore_path + '/model.ckpt') # Define accuracy ops with tf.name_scope('Accuracy'): ground_truth = tf.argmax(y, axis=1) predicted_label = tf.argmax(preds, axis=1) correct_prediction = tf.equal(predicted_label, ground_truth) clean_acc = tf.reduce_mean(tf.to_float(correct_prediction), name='accuracy') # Define PGD adversary if ARGS.attack == 'PGD': if verbose: print('\n- Building {:s} attack graph...'.format( ARGS.attack)) with tf.name_scope('PGD-Attacker'): pgd_params = { 'ord': np.inf, 'y': y, 'eps': ARGS.eps / 255, 'eps_iter': ARGS.eps_iter / 255, 'nb_iter': ARGS.nb_iter, 'rand_init': ARGS.rand_init, 'rand_minmax': ARGS.eps / 255, 'clip_min': 0., 'clip_max': 1., 'sanity_checks': True } pgd = ProjectedGradientDescent(model, sess=None) adv_x = pgd.generate(x, **pgd_params) # Define SPSA adversary elif ARGS.attack == 'SPSA': if verbose: print('\n- Building {:s} attack graph...'.format( ARGS.attack)) with tf.name_scope('PGD-Attacker'): spsa_params = { 'y': y, 'eps': ARGS.eps / 255, 'nb_iter': ARGS.nb_iter, 'spsa_samples': ARGS.spsa_samples, 'spsa_iters': ARGS.spsa_iters, 'clip_min': 0., 'clip_max': 1., 'learning_rate': ARGS.spsa_lr, 'delta': ARGS.spsa_delta } spsa = SPSA(model, sess=sess) adv_x = spsa.generate(x, **spsa_params) else: raise NotImplementedError with tf.name_scope('Logits'): adv_logits = model.get_logits(adv_x) with tf.name_scope('Probs'): adv_preds = tf.nn.softmax(adv_logits) adv_loss = tf.nn.softmax_cross_entropy_with_logits( logits=adv_logits, labels=y) adv_predicted_label = tf.argmax(adv_preds, axis=1) correct_prediction = tf.equal(adv_predicted_label, ground_truth) adv_accuracy = tf.reduce_mean(tf.to_float(correct_prediction), name='adv-accuracy') is_adv_example = tf.not_equal(ground_truth, adv_predicted_label) ############################################# # Run evaluation ############################################# if verbose: print('\n- Running robustness evaluation against {:s} attacker...\n'. format(ARGS.attack)) if ARGS.attack == 'PGD': clean, adv_mean, adv_worstcase = run_pgd_eval(x, y, is_training, sess, adv_testloader, clean_acc, adv_accuracy, adv_loss, is_adv_example, ARGS, save_loss_dist=False, verbose=verbose) elif ARGS.attack == 'SPSA': clean, adv_mean = run_spsa_eval(x, y, is_training, sess, adv_testloader, clean_acc, adv_accuracy, adv_loss, is_adv_example, ARGS, save_loss_dist=False, verbose=verbose) adv_worstcase = adv_mean else: raise NotImplementedError return clean, adv_mean, adv_worstcase