def main(_): # Load training and test data data = ld_cifar10() model = CNN() loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.optimizers.Adam(learning_rate=0.001) # Metrics to track the different accuracies. train_loss = tf.metrics.Mean(name='train_loss') test_acc_clean = tf.metrics.SparseCategoricalAccuracy() test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy() test_acc_pgd = tf.metrics.SparseCategoricalAccuracy() @tf.function def train_step(x, y): with tf.GradientTape() as tape: predictions = model(x) loss = loss_object(y, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) # Train model with adversarial training for epoch in range(FLAGS.nb_epochs): # keras like display of progress progress_bar_train = tf.keras.utils.Progbar(50000) for (x, y) in data.train: if FLAGS.adv_train: # Replace clean example with adversarial example for adversarial training x = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) train_step(x, y) progress_bar_train.add(x.shape[0], values=[('loss', train_loss.result())]) # Evaluate on clean and adversarial data progress_bar_test = tf.keras.utils.Progbar(10000) for x, y in data.test: y_pred = model(x) test_acc_clean(y, y_pred) x_fgm = fast_gradient_method(model, x, FLAGS.eps, np.inf) y_pred_fgm = model(x_fgm) test_acc_fgsm(y, y_pred_fgm) x_pgd = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40, np.inf) y_pred_pgd = model(x_pgd) test_acc_pgd(y, y_pred_pgd) progress_bar_test.add(x.shape[0]) print('test acc on clean examples (%): {:.3f}'.format( test_acc_clean.result() * 100)) print('test acc on FGM adversarial examples (%): {:.3f}'.format( test_acc_fgsm.result() * 100)) print('test acc on PGD adversarial examples (%): {:.3f}'.format( test_acc_pgd.result() * 100))
def madry_et_al( model_fn, x, eps, eps_iter, nb_iter, norm, clip_min=None, clip_max=None, y=None, targeted=False, rand_minmax=0.3, sanity_checks=True, ): """ The attack from Madry et al 2017 """ return projected_gradient_descent( model_fn, x, eps, eps_iter, nb_iter, norm, clip_min=clip_min, clip_max=clip_max, y=y, targeted=targeted, rand_init=True, rand_minmax=rand_minmax, sanity_checks=sanity_checks, )
def basic_iterative_method( model_fn, x, eps, eps_iter, nb_iter, norm, clip_min=None, clip_max=None, y=None, targeted=False, rand_init=None, rand_minmax=0.3, sanity_checks=True, ): """ The BasicIterativeMethod attack. """ return projected_gradient_descent( model_fn, x, eps, eps_iter, nb_iter, norm, clip_min=clip_min, clip_max=clip_max, y=y, targeted=targeted, rand_init=False, rand_minmax=rand_minmax, sanity_checks=sanity_checks, )