def prep_bbox(sess, logits_scalar, x, y, X_train, Y_train, X_test, Y_test, img_rows, img_cols, channels, nb_epochs, batch_size, learning_rate, rng, phase=None, binary=False, scale=False, nb_filters=64, model_path=None, adv=0, delay=0, eps=0.3): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param rng: numpy.random.RandomState :return: """ # Define TF model graph (for the black-box model) save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path(model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if binary: if scale: #from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn # model = make_scaled_binary_cnn(phase, 'bb_binsc_', input_shape=( from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn model = make_scaled_binary_rand_cnn(phase, logits_scalar, 'bb_binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, logits_scalar, 'bb_bin_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, logits_scalar, 'bb_fp_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) preds = model(x, reuse=False) print("Defined TensorFlow model graph.") def evaluate(): # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: %.4f' % acc) # Train an MNIST model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'bb train loss', 'filename': 'bb_model', 'train_scope': 'bb_model', 'reuse_global_step': False, 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl nb_iter = 20 train_attack_params = { 'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter } train_attacker = MadryEtAl(model, sess=sess) if adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) accuracy = evaluate() return model, preds, accuracy, model_path
def main(argv=None): model_path = FLAGS.model_path targeted = True if FLAGS.targeted else False scale = True if FLAGS.scale else False learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters batch_size = FLAGS.batch_size nb_epochs = FLAGS.nb_epochs delay = FLAGS.delay eps = FLAGS.eps adv = FLAGS.adv attack = FLAGS.attack attack_iterations = FLAGS.attack_iterations nb_iter = FLAGS.nb_iter #### EMPIR extra flags lowprecision=FLAGS.lowprecision abits=FLAGS.abits wbits=FLAGS.wbits abitsList=FLAGS.abitsList wbitsList=FLAGS.wbitsList stocRound=True if FLAGS.stocRound else False rand=FLAGS.rand model_path2 = FLAGS.model_path2 model_path1 = FLAGS.model_path1 model_path3 = FLAGS.model_path3 ensembleThree=True if FLAGS.ensembleThree else False abits2=FLAGS.abits2 wbits2=FLAGS.wbits2 abits2List=FLAGS.abits2List wbits2List=FLAGS.wbits2List #### save = False train_from_scratch = False #### Imagenet flags imagenet_path = FLAGS.imagenet_path if imagenet_path is None: print("Error: Imagenet data path not specified") sys.exit(1) # Imagenet specific dimensions img_rows = _DEFAULT_IMAGE_SIZE img_cols = _DEFAULT_IMAGE_SIZE channels = _NUM_CHANNELS nb_classes = _NUM_CLASSES # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) set_log_level(logging.WARNING) # Get imagenet datasets train_dataset, test_dataset = data_imagenet(nb_epochs, batch_size, imagenet_path) # Creating a initializable iterators train_iterator = train_dataset.make_initializable_iterator() test_iterator = test_dataset.make_initializable_iterator() # Getting next elements from the iterators next_test_element = test_iterator.get_next() next_train_element = train_iterator.get_next() train_x, train_y = train_iterator.get_next() test_x, test_y = test_iterator.get_next() # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) phase = tf.placeholder(tf.bool, name="phase") logits_scalar = tf.placeholder_with_default( INIT_T, shape=(), name="logits_temperature") if ensembleThree: if (model_path1 is None or model_path2 is None or model_path3 is None): train_from_scratch = True else: train_from_scratch = False elif model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): train_from_scratch = False else: model_path = build_model_save_path( model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if ensembleThree: if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified for first model if (wbits==0) or (abits==0): print("Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags") sys.exit(1) else: fixedPrec1 = 1 elif (len(wbitsList) != 6) or (len(abitsList) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the first model") sys.exit(1) else: fixedPrec1 = 0 if (wbits2List is None) or (abits2List is None): # Layer wise separate quantization not specified for second model if (wbits2==0) or (abits2==0): print("Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags") sys.exit(1) else: fixedPrec2 = 1 elif (len(wbits2List) != 6) or (len(abits2List) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the second model") sys.exit(1) else: fixedPrec2 = 0 if (fixedPrec2 != 1) or (fixedPrec1 != 1): # Atleast one of the models have separate precisions per layer fixedPrec=0 print("Within atleast one model has separate precisions") if (fixedPrec1 == 1): # first layer has fixed precision abitsList = (abits, abits, abits, abits, abits, abits) wbitsList = (wbits, wbits, wbits, wbits, wbits, wbits) if (fixedPrec2 == 1): # second layer has fixed precision abits2List = (abits2, abits2, abits2, abits2, abits2, abits2) wbits2List = (wbits2, wbits2, wbits2, wbits2, wbits2, wbits2) else: fixedPrec=1 if (train_from_scratch): print ("The ensemble model cannot be trained from scratch") sys.exit(1) if fixedPrec == 1: from cleverhans_tutorials.tutorial_models import make_ensemble_three_alexnet model = make_ensemble_three_alexnet( phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: from cleverhans_tutorials.tutorial_models import make_layerwise_three_combined_alexnet model = make_layerwise_three_combined_alexnet( phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) elif lowprecision: if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified if (wbits==0) or (abits==0): print("Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags") sys.exit(1) else: fixedPrec = 1 elif (len(wbitsList) != 6) or (len(abitsList) != 6): print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer") sys.exit(1) else: fixedPrec = 0 if fixedPrec: ### For training from scratch from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_alexnet model = make_basic_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbits, abits, input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_alexnet model = make_layerwise_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbitsList, abitsList, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) else: ### For training from scratch from cleverhans_tutorials.tutorial_models import make_basic_alexnet_from_scratch model = make_basic_alexnet_from_scratch(phase, logits_scalar, 'fp_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) # separate calling function for ensemble models if ensembleThree: preds = model.ensemble_call(x, reuse=False) else: ##default preds = model(x, reuse=False) print("Defined TensorFlow model graph.") rng = np.random.RandomState([2017, 8, 30]) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': batch_size} if ensembleThree: acc = model_eval_ensemble_imagenet( sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params) else: #default below acc = model_eval_imagenet( sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an Imagenet model train_params = { 'lowprecision': lowprecision, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl train_attack_params = {'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter} train_attacker = MadryEtAl(model, sess=sess) elif adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod stddev = int(np.ceil((MAX_EPS * 255) // 2)) train_attack_params = {'eps': tf.abs(tf.truncated_normal( shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev))} train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv_train = model.get_probs(adv_x_train) eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.} adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params) preds_adv_eval = model.get_probs(adv_x_eval) # * logits_scalar # if adv: # from cleverhans.attacks import FastGradientMethod # fgsm = FastGradientMethod(model, back='tf', sess=sess) # fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.} # adv_x_train = fgsm.generate(x, phase, **fgsm_params) # preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs with learning rate reducing with time model_train_imagenet2(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train_imagenet(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase, predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng) else: if ensembleThree: ## ensembleThree models have to loaded from different paths variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # First 11 variables from path1 stored_variables = ['lp_conv1_init/k', 'lp_conv1_init/b', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_conv4_init/k', 'lp_conv5_init/k', 'lp_ip1init/W', 'lp_ip1init/b', 'lp_ip2init/W', 'lp_logits_init/W', 'lp_logits_init/b'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[:11]))) # only dict was messing with the order # Restore the first set of variables from model_path1 saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path1)) # Restore the second set of variables from model_path2 # Second 11 variables from path2 variable_dict = dict(OrderedDict(zip(stored_variables, variables[11:22]))) saver2 = tf.train.Saver(variable_dict) saver2.restore(sess, tf.train.latest_checkpoint(model_path2)) # Third 11 variables from path3 stored_variables = ['fp_conv1_init/k', 'fp_conv1_init/b', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_conv4_init/k', 'fp_conv5_init/k', 'fp_ip1init/W', 'fp_ip1init/b', 'fp_ip2init/W', 'fp_logits_init/W', 'fp_logits_init/b'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[22:33]))) saver3 = tf.train.Saver(variable_dict) saver3.restore(sess, tf.train.latest_checkpoint(model_path3)) # Next 24 batch norm variables from path1 stored_variables = ['lp__batchNorm1/batch_normalization/gamma', 'lp__batchNorm1/batch_normalization/beta', 'lp__batchNorm1/batch_normalization/moving_mean', 'lp__batchNorm1/batch_normalization/moving_variance', 'lp__batchNorm2/batch_normalization/gamma', 'lp__batchNorm2/batch_normalization/beta', 'lp__batchNorm2/batch_normalization/moving_mean', 'lp__batchNorm2/batch_normalization/moving_variance', 'lp__batchNorm3/batch_normalization/gamma', 'lp__batchNorm3/batch_normalization/beta', 'lp__batchNorm3/batch_normalization/moving_mean', 'lp__batchNorm3/batch_normalization/moving_variance', 'lp__batchNorm4/batch_normalization/gamma', 'lp__batchNorm4/batch_normalization/beta', 'lp__batchNorm4/batch_normalization/moving_mean', 'lp__batchNorm4/batch_normalization/moving_variance', 'lp__batchNorm5/batch_normalization/gamma', 'lp__batchNorm5/batch_normalization/beta', 'lp__batchNorm5/batch_normalization/moving_mean', 'lp__batchNorm5/batch_normalization/moving_variance', 'lp__batchNorm6/batch_normalization/gamma', 'lp__batchNorm6/batch_normalization/beta', 'lp__batchNorm6/batch_normalization/moving_mean', 'lp__batchNorm6/batch_normalization/moving_variance'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[33:57]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path1)) # Next 24 batch norm variables from path2 variable_dict = dict(OrderedDict(zip(stored_variables, variables[57:81]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path2)) # Final 24 batch norm variables from path1 stored_variables = ['fp__batchNorm1/batch_normalization/gamma', 'fp__batchNorm1/batch_normalization/beta', 'fp__batchNorm1/batch_normalization/moving_mean', 'fp__batchNorm1/batch_normalization/moving_variance', 'fp__batchNorm2/batch_normalization/gamma', 'fp__batchNorm2/batch_normalization/beta', 'fp__batchNorm2/batch_normalization/moving_mean', 'fp__batchNorm2/batch_normalization/moving_variance', 'fp__batchNorm3/batch_normalization/gamma', 'fp__batchNorm3/batch_normalization/beta', 'fp__batchNorm3/batch_normalization/moving_mean', 'fp__batchNorm3/batch_normalization/moving_variance', 'fp__batchNorm4/batch_normalization/gamma', 'fp__batchNorm4/batch_normalization/beta', 'fp__batchNorm4/batch_normalization/moving_mean', 'fp__batchNorm4/batch_normalization/moving_variance', 'fp__batchNorm5/batch_normalization/gamma', 'fp__batchNorm5/batch_normalization/beta', 'fp__batchNorm5/batch_normalization/moving_mean', 'fp__batchNorm5/batch_normalization/moving_variance', 'fp__batchNorm6/batch_normalization/gamma', 'fp__batchNorm6/batch_normalization/beta', 'fp__batchNorm6/batch_normalization/moving_mean', 'fp__batchNorm6/batch_normalization/moving_variance'] variable_dict = dict(OrderedDict(zip(stored_variables, variables[81:105]))) saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path3)) else: # restoring the model trained using this setup, not a downloaded one tf_model_load(sess, model_path) print('Restored model from %s' % model_path) # evaluate() # Evaluate the accuracy of the model on legitimate test examples eval_params = {'batch_size': batch_size} if ensembleThree: accuracy = model_eval_ensemble_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params) else: #default below accuracy = model_eval_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Build dataset ########################################################################### adv_inputs = test_x #adversarial inputs can be generated from any of the test examples ########################################################################### # Craft adversarial examples using generic approach ########################################################################### nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting adversarial examples') print("This could take some time ...") if ensembleThree: model_type = 'ensembleThree' else: model_type = 'default' if attack == ATTACK_CARLINI_WAGNER_L2: from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} elif attack == ATTACK_BASICITER: print('Attack: BasicIterativeMethod') from cleverhans.attacks import BasicIterativeMethod attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({'clip_min': -2.2, 'clip_max': 2.7}) # Since max and min for imagenet turns out to be around -2.11 and 2.12 eval_params = {'batch_size': batch_size} ''' adv_x = attacker.generate(x, phase, **attack_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) eval_params = {'batch_size': batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' print("Evaluating un-targeted results") if ensembleThree: adv_accuracy = model_eval_ensemble_adv_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params) else: adv_accuracy = model_eval_adv_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params) # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) # Close TF session sess.close()
def mnist_attack(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=True, nb_epochs=6, batch_size=128, nb_filters=64, nb_samples=10, learning_rate=0.001, eps=0.3, attack=0, attack_iterations=100, model_path=None, targeted=False, binary=False, scale=False, rand=False, debug=None, test=False, data_dir=None, delay=0, adv=0, nb_iter=40): """ MNIST tutorial for generic attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param nb_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # MNIST-specific dimensions img_rows = 28 img_cols = 28 channels = 1 nb_classes = 10 # Set TF random seed to improve reproducibility tf.set_random_seed(1237) # Create TF session sess = tf.Session() print("Created TensorFlow session.") if debug: set_log_level(logging.DEBUG) else: set_log_level(logging.WARNING) # for running on sharcnet # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) phase = tf.placeholder(tf.bool, name='phase') # for attempting to break unscaled network. logits_scalar = tf.placeholder_with_default(INIT_T, shape=(), name="logits_temperature") save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path(model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given # Define TF model graph if binary: print('binary=True') if scale: print('scale=True') if rand: print('rand=True') from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn model = make_scaled_binary_rand_cnn( phase, logits_scalar, 'binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn model = make_scaled_binary_cnn(phase, logits_scalar, 'binsc_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, logits_scalar, 'bin_', nb_filters=nb_filters) else: if rand: print('rand=True') from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn model = make_scaled_rand_cnn(phase, logits_scalar, 'fp_rand', nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, logits_scalar, 'fp_', nb_filters=nb_filters) preds = model(x, reuse=False) # * logits_scalar print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### rng = np.random.RandomState([2017, 8, 30]) # Train an MNIST model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl train_attack_params = { 'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter } train_attacker = MadryEtAl(model, sess=sess) elif adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod stddev = int(np.ceil((MAX_EPS * 255) // 2)) train_attack_params = { 'eps': tf.abs( tf.truncated_normal(shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev)) } train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv_train = model.get_probs(adv_x_train) eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.} adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params) preds_adv_eval = model.get_probs(adv_x_eval) # * logits_scalar def evaluate(): # Evaluate the accuracy of the MNIST model on clean test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) if adv != 0: # Accuracy of the adversarially trained model on adversarial # examples acc = model_eval(sess, x, y, preds_adv_eval, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % acc) acc = model_eval(sess, x, y, preds_adv_eval, X_test, Y_test, phase=phase, args=eval_params, feed={logits_scalar: ATTACK_T}) print('Test accuracy on adversarial examples (scaled): %0.4f' % acc) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs if test: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) else: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) if test: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng) else: model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv_train, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) evaluate() # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Build dataset ########################################################################### if viz_enabled: assert nb_samples == nb_classes idxs = [ np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] viz_rows = nb_classes if targeted else 2 # Initialize our array for grid visualization grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels) grid_viz_data = np.zeros(grid_shape, dtype='f') if targeted: from cleverhans.utils import build_targeted_dataset if viz_enabled: from cleverhans.utils import grid_visual adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels) else: adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels) else: if viz_enabled: from cleverhans.utils import pair_visual adv_inputs = X_test[idxs] else: adv_inputs = X_test[:nb_samples] ########################################################################### # Craft adversarial examples using generic approach ########################################################################### if targeted: att_batch_size = np.clip(nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1) nb_adv_per_sample = nb_classes - 1 yname = "y_target" else: att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE) nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) + ' adversarial examples') print("This could take some time ...") if attack == ATTACK_CARLINI_WAGNER_L2: print('Attack: CarliniWagnerL2') from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess) attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': att_batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: print('Attack: SaliencyMapMethod') from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: print('Attack: FastGradientMethod') from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: print('Attack: MadryEtAl') from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} elif attack == ATTACK_BASICITER: print('Attack: BasicIterativeMethod') from cleverhans.attacks import BasicIterativeMethod attacker = BasicIterativeMethod(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.}) adv_np = attacker.generate_np(adv_inputs, phase, **attack_params) ''' name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples) fpath = os.path.join( '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name) np.savez(fpath, x=adv_np, y=Y_test[:nb_samples]) ''' ''' adv_x = attacker.generate(x, phase, **attack_params) adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' eval_params = {'batch_size': att_batch_size} if targeted: print("Evaluating targeted results") adv_accuracy = model_eval(sess, x, y, preds, adv_np, true_labels, phase=phase, args=eval_params) else: print("Evaluating untargeted results") if viz_enabled: adv_accuracy = model_eval(sess, x, y, preds, adv_np, Y_test[idxs], phase=phase, args=eval_params) else: adv_accuracy = model_eval(sess, x, y, preds, adv_np, Y_test[:nb_samples], phase=phase, args=eval_params) if viz_enabled: n = nb_classes - 1 for i in range(nb_classes): if targeted: for j in range(nb_classes): if i != j: if j != 0 and i != n: grid_viz_data[i, j] = adv_np[j * n + i] if j == 0 and i > 0 or i == n and j > 0: grid_viz_data[i, j] = adv_np[j * n + i - 1] else: grid_viz_data[i, j] = adv_inputs[j * n] else: grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv_np[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Compute number of modified features (L_0 norm) nb_changed = np.where(adv_np != adv_inputs)[0].shape[0] percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0]) # Compute the average distortion introduced by the algorithm print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb)) # Friendly output for pasting into spreadsheet print('{0:.4f}'.format(accuracy)) print('{0:.4f}'.format(adv_accuracy)) print('{0:.4f}'.format(percent_perturbed)) print('{0:.4f}'.format(percent_perturb)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt _ = grid_visual(grid_viz_data) return report
def main(argv=None): """ CIFAR10 CleverHans tutorial :return: """ # CIFAR10-specific dimensions img_rows = 32 img_cols = 32 channels = 3 nb_classes = 10 # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) set_log_level(logging.WARNING) # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, 10)) phase = tf.placeholder(tf.bool, name="phase") model_path = FLAGS.model_path targeted = True if FLAGS.targeted else False binary = True if FLAGS.binary else False scale = True if FLAGS.scale else False learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters batch_size = FLAGS.batch_size nb_samples = FLAGS.nb_samples nb_epochs = FLAGS.nb_epochs delay = FLAGS.delay eps = FLAGS.eps adv = FLAGS.adv attack = FLAGS.attack attack_iterations = FLAGS.attack_iterations save = False train_from_scratch = False if model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings( model_path) train_from_scratch = False else: model_path = build_model_save_path( model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if binary: if scale: from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn model = make_scaled_binary_cnn(phase, 'bin_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn model = make_basic_binary_cnn(phase, 'bin_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_basic_cnn model = make_basic_cnn(phase, 'fp_', input_shape=( None, img_rows, img_cols, channels), nb_filters=nb_filters) preds = model(x, reuse=False) print("Defined TensorFlow model graph.") rng = np.random.RandomState([2017, 8, 30]) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval( sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an CIFAR10 model train_params = { 'binary': binary, 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv: from cleverhans.attacks import FastGradientMethod fgsm = FastGradientMethod(model, back='tf', sess=sess) fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.} adv_x_train = fgsm.generate(x, phase, **fgsm_params) preds_adv = model.get_probs(adv_x_train) if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate, args=train_params, save=save, rng=rng) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) evaluate() # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Build dataset ########################################################################### if targeted: from cleverhans.utils import build_targeted_dataset adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels) else: adv_inputs = X_test[:nb_samples] ########################################################################### # Craft adversarial examples using generic approach ########################################################################### if targeted: att_batch_size = np.clip( nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1) nb_adv_per_sample = nb_classes - 1 yname = "y_target" else: att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE) nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) + ' adversarial examples') print("This could take some time ...") if attack == ATTACK_CARLINI_WAGNER_L2: from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', sess=sess) attack_params = {'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': att_batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', sess=sess) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', sess=sess) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', sess=sess) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.}) X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params) ''' adv_x = attacker.generate(x, phase, **attack_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) eval_params = {'batch_size': att_batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' if targeted: assert X_test_adv.shape[0] == nb_samples * \ (nb_classes - 1), X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating targeted results") adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels, phase=phase, args=eval_params) else: assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating un-targeted results") adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test, phase=phase, args=eval_params) # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Friendly output for pasting into spreadsheet print('{0:.4f},'.format(accuracy)) print('{0:.4f},'.format(adv_accuracy)) print('{0:.4f},'.format(percent_perturbed)) sess.close() ''' print("Repeating the process, using adversarial training") def evaluate_2(): # Evaluate the accuracy of the adversarialy trained CIFAR10 model on # legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained CIFAR10 model on # adversarial examples accuracy_adv = model_eval(sess, x, y, preds_adv, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate_2, args=train_params) ''' '''
def main(argv=None): """ CIFAR10 CleverHans tutorial :return: """ # CIFAR10-specific dimensions img_rows = 32 img_cols = 32 channels = 3 nb_classes = 10 # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) set_log_level(logging.WARNING) # Get CIFAR10 test data X_train, Y_train, X_test, Y_test = data_cifar10() assert Y_train.shape[1] == 10. label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels)) y = tf.placeholder(tf.float32, shape=(None, 10)) phase = tf.placeholder(tf.bool, name="phase") logits_scalar = tf.placeholder_with_default(INIT_T, shape=(), name="logits_temperature") model_path = FLAGS.model_path targeted = True if FLAGS.targeted else False learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters batch_size = FLAGS.batch_size nb_samples = FLAGS.nb_samples nb_epochs = FLAGS.nb_epochs delay = FLAGS.delay eps = FLAGS.eps adv = FLAGS.adv attack = FLAGS.attack attack_iterations = FLAGS.attack_iterations nb_iter = FLAGS.nb_iter #### EMPIR extra flags lowprecision = FLAGS.lowprecision abits = FLAGS.abits wbits = FLAGS.wbits abitsList = FLAGS.abitsList wbitsList = FLAGS.wbitsList stocRound = True if FLAGS.stocRound else False rand = FLAGS.rand model_path2 = FLAGS.model_path2 model_path1 = FLAGS.model_path1 model_path3 = FLAGS.model_path3 ensembleThree = True if FLAGS.ensembleThree else False abits2 = FLAGS.abits2 wbits2 = FLAGS.wbits2 abits2List = FLAGS.abits2List wbits2List = FLAGS.wbits2List inpgradreg = True if FLAGS.inpgradreg else False distill = True if FLAGS.distill else False student_epochs = FLAGS.student_epochs l2dbl = FLAGS.l2dbl l2cs = FLAGS.l2cs #### save = False train_from_scratch = False if ensembleThree: if (model_path1 is None or model_path2 is None or model_path3 is None): train_from_scratch = True else: train_from_scratch = False elif model_path is not None: if os.path.exists(model_path): # check for existing model in immediate subfolder if any(f.endswith('.meta') for f in os.listdir(model_path)): train_from_scratch = False else: model_path = build_model_save_path(model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay) print(model_path) save = True train_from_scratch = True else: train_from_scratch = True # train from scratch, but don't save since no path given if ensembleThree: if (wbitsList is None) or ( abitsList is None ): # Layer wise separate quantization not specified for first model if (wbits == 0) or (abits == 0): print( "Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags" ) sys.exit(1) else: fixedPrec1 = 1 elif (len(wbitsList) != 3) or (len(abitsList) != 3): print( "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the first model" ) sys.exit(1) else: fixedPrec1 = 0 if (wbits2List is None) or ( abits2List is None ): # Layer wise separate quantization not specified for second model if (wbits2 == 0) or (abits2 == 0): print( "Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags" ) sys.exit(1) else: fixedPrec2 = 1 elif (len(wbits2List) != 3) or (len(abits2List) != 3): print( "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers of the second model" ) sys.exit(1) else: fixedPrec2 = 0 if (fixedPrec2 != 1) or ( fixedPrec1 != 1 ): # Atleast one of the models have separate precisions per layer fixedPrec = 0 print("Within atleast one model has separate precisions") if (fixedPrec1 == 1): # first layer has fixed precision abitsList = (abits, abits, abits) wbitsList = (wbits, wbits, wbits) if (fixedPrec2 == 1): # second layer has fixed precision abits2List = (abits2, abits2, abits2) wbits2List = (wbits2, wbits2, wbits2) else: fixedPrec = 1 if (train_from_scratch): print("The ensemble model cannot be trained from scratch") sys.exit(1) if fixedPrec == 1: from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn model = make_ensemble_three_cifar_cnn(phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) else: from cleverhans_tutorials.tutorial_models import make_ensemble_three_cifar_cnn_layerwise model = make_ensemble_three_cifar_cnn_layerwise( phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) elif lowprecision: if (wbitsList is None) or ( abitsList is None): # Layer wise separate quantization not specified if (wbits == 0) or (abits == 0): print( "Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags" ) sys.exit(1) else: fixedPrec = 1 elif (len(wbitsList) != 3) or (len(abitsList) != 3): print( "Error: Need to specify the precisions for activations and weights for the atleast the three convolutional layers" ) sys.exit(1) else: fixedPrec = 0 if fixedPrec: from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_cifar_cnn model = make_basic_lowprecision_cifar_cnn( phase, logits_scalar, 'lp_', wbits, abits, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, stocRound=stocRound) else: from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_cifar_cnn model = make_layerwise_lowprecision_cifar_cnn( phase, logits_scalar, 'lp_', wbitsList, abitsList, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, stocRound=stocRound) elif distill: from cleverhans_tutorials.tutorial_models import make_distilled_cifar_cnn model = make_distilled_cifar_cnn(phase, logits_scalar, 'teacher_fp_', 'fp_', nb_filters=nb_filters, input_shape=(None, img_rows, img_cols, channels)) #### else: from cleverhans_tutorials.tutorial_models import make_basic_cifar_cnn model = make_basic_cifar_cnn(phase, logits_scalar, 'fp_', input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters) # separate predictions of teacher for distilled training if distill: teacher_preds = model.teacher_call(x, reuse=False) teacher_logits = model.get_teacher_logits(x, reuse=False) # separate calling function for ensemble models if ensembleThree: preds = model.ensemble_call(x, reuse=False) else: ##default preds = model(x, reuse=False) print("Defined TensorFlow model graph.") rng = np.random.RandomState([2017, 8, 30]) def evaluate(): # Evaluate the accuracy of the CIFAR10 model on legitimate test # examples eval_params = {'batch_size': batch_size} if ensembleThree: acc = model_eval_ensemble(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) else: acc = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an CIFAR10 model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if adv != 0: if adv == ADVERSARIAL_TRAINING_MADRYETAL: from cleverhans.attacks import MadryEtAl train_attack_params = { 'eps': MAX_EPS, 'eps_iter': 0.01, 'nb_iter': nb_iter } train_attacker = MadryEtAl(model, sess=sess) elif adv == ADVERSARIAL_TRAINING_FGSM: from cleverhans.attacks import FastGradientMethod stddev = int(np.ceil((MAX_EPS * 255) // 2)) train_attack_params = { 'eps': tf.abs( tf.truncated_normal(shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev)) } train_attacker = FastGradientMethod(model, back='tf', sess=sess) # create the adversarial trainer train_attack_params.update({'clip_min': 0., 'clip_max': 1.}) adv_x_train = train_attacker.generate(x, phase, **train_attack_params) preds_adv_train = model.get_probs(adv_x_train) eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.} adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params) preds_adv_eval = model.get_probs(adv_x_eval) # * logits_scalar if train_from_scratch: if save: train_params.update({'log_dir': model_path}) if adv and delay > 0: train_params.update({'nb_epochs': delay}) # do clean training for 'nb_epochs' or 'delay' epochs if distill: temperature = 10 # 1 means the teacher predictions are used as it is teacher_scaled_preds_val = model_train_teacher(sess, x, y, teacher_preds, teacher_logits, temperature, X_train, Y_train, phase=phase, args=train_params, rng=rng) eval_params = {'batch_size': batch_size} teacher_acc = model_eval(sess, x, y, teacher_preds, X_test, Y_test, phase=phase, args=eval_params) print( 'Test accuracy of the teacher model on legitimate examples: %0.4f' % teacher_acc) print('Training the student model...') student_train_params = { 'nb_epochs': student_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'loss_name': 'train loss', 'filename': 'model', 'reuse_global_step': False, 'train_scope': 'train', 'is_training': True } if save: student_train_params.update({'log_dir': model_path}) y_teacher = tf.placeholder(tf.float32, shape=(None, nb_classes)) model_train_student(sess, x, y, preds, temperature, X_train, Y_train, y_teacher=y_teacher, teacher_preds=teacher_scaled_preds_val, alpha=0.3, beta=0.7, phase=phase, evaluate=evaluate, args=student_train_params, save=save, rng=rng) elif inpgradreg: model_train_inpgrad_reg(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, l2dbl=l2dbl, l2cs=l2cs, args=train_params, save=save, rng=rng) else: # do clean training for 'nb_epochs' or 'delay' epochs model_train(sess, x, y, preds, X_train, Y_train, phase=phase, evaluate=evaluate, args=train_params, save=save, rng=rng) # optionally do additional adversarial training if adv: print("Adversarial training for %d epochs" % (nb_epochs - delay)) train_params.update({'nb_epochs': nb_epochs - delay}) train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng) else: if ensembleThree: variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) stored_variables = [ 'lp_conv1_init/k', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_ip1init/W', 'lp_logits_init/W' ] variable_dict = dict(zip(stored_variables, variables[:5])) # Restore the first set of variables from model_path1 saver = tf.train.Saver(variable_dict) saver.restore(sess, tf.train.latest_checkpoint(model_path1)) # Restore the second set of variables from model_path2 variable_dict = dict(zip(stored_variables, variables[5:10])) saver2 = tf.train.Saver(variable_dict) saver2.restore(sess, tf.train.latest_checkpoint(model_path2)) stored_variables = [ 'fp_conv1_init/k', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_ip1init/W', 'fp_logits_init/W' ] variable_dict = dict(zip(stored_variables, variables[10:])) saver3 = tf.train.Saver(variable_dict) saver3.restore(sess, tf.train.latest_checkpoint(model_path3)) else: tf_model_load(sess, model_path) print('Restored model from %s' % model_path) evaluate() # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': batch_size} if ensembleThree: accuracy = model_eval_ensemble(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) else: accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, feed={phase: False}, args=eval_params) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ########################################################################### # Build dataset ########################################################################### if targeted: from cleverhans.utils import build_targeted_dataset adv_inputs, true_labels, adv_ys = build_targeted_dataset( X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels) else: adv_inputs = X_test[:nb_samples] ########################################################################### # Craft adversarial examples using generic approach ########################################################################### if targeted: att_batch_size = np.clip(nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1) nb_adv_per_sample = nb_classes - 1 yname = "y_target" else: att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE) nb_adv_per_sample = 1 adv_ys = None yname = "y" print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) + ' adversarial examples') print("This could take some time ...") if ensembleThree: model_type = 'ensembleThree' else: model_type = 'default' if attack == ATTACK_CARLINI_WAGNER_L2: from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, back='tf', model_type=model_type, num_classes=nb_classes, sess=sess) attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': att_batch_size, 'initial_const': 10, } elif attack == ATTACK_JSMA: from cleverhans.attacks import SaliencyMapMethod attacker = SaliencyMapMethod(model, back='tf', model_type=model_type, sess=sess, num_classes=nb_classes) attack_params = {'theta': 1., 'gamma': 0.1} elif attack == ATTACK_FGSM: from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, back='tf', model_type=model_type, sess=sess, num_classes=nb_classes) attack_params = {'eps': eps} elif attack == ATTACK_MADRYETAL: from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, back='tf', model_type=model_type, sess=sess, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} elif attack == ATTACK_BASICITER: from cleverhans.attacks import BasicIterativeMethod attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes) attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter} else: print("Attack undefined") sys.exit(1) attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.}) X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params) ''' adv_x = attacker.generate(x, phase, **attack_params) # Craft adversarial examples using Fast Gradient Sign Method (FGSM) eval_params = {'batch_size': att_batch_size} X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={ phase: False}, args=eval_params) ''' if targeted: assert X_test_adv.shape[0] == nb_samples * \ (nb_classes - 1), X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating targeted results") adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels, phase=phase, args=eval_params) else: # assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape # Evaluate the accuracy of the CIFAR10 model on adversarial examples print("Evaluating un-targeted results") if ensembleThree: adv_accuracy = model_eval_ensemble(sess, x, y, preds, X_test_adv, Y_test, phase=phase, args=eval_params) else: #default below adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test, phase=phase, args=eval_params) # Compute the number of adversarial examples that were successfully found print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((X_test_adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Friendly output for pasting into spreadsheet print('{0:.4f},'.format(accuracy)) print('{0:.4f},'.format(adv_accuracy)) print('{0:.4f},'.format(percent_perturbed)) sess.close() ''' print("Repeating the process, using adversarial training") def evaluate_2(): # Evaluate the accuracy of the adversarialy trained CIFAR10 model on # legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Evaluate the accuracy of the adversarially trained CIFAR10 model on # adversarial examples accuracy_adv = model_eval(sess, x, y, preds_adv, X_test, Y_test, phase=phase, args=eval_params) print('Test accuracy on adversarial examples: ' + str(accuracy_adv)) # Perform adversarial training train_params.update({'reuse_global_step': True}) model_train(sess, x, y, preds, X_train, Y_train, phase=phase, predictions_adv=preds_adv, evaluate=evaluate_2, args=train_params) ''' '''