def cifar10_tutorial( train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, ): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10( train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, ) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set("train") x_test, y_test = data.get_set("test") # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { "nb_epochs": nb_epochs, "batch_size": batch_size, "learning_rate": learning_rate, } eval_params = {"batch_size": batch_size} fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = "adversarial" else: report_text = "legitimate" if report_text: print("Test accuracy on %s examples: %0.4f" % (report_text, acc)) if clean_train: model = ModelAllConvolutional("model1", nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, "clean_train_clean_eval", False) train( sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), ) # Calculate training error if testing: do_eval(preds, x_train, y_train, "train_clean_train_clean_eval") # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval") print("Repeating the process, using adversarial training") # Create a new model and train it to be robust to FastGradientMethod model2 = ModelAllConvolutional("model2", nb_classes, nb_filters, input_shape=[32, 32, 3]) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True) # Perform and evaluate adversarial training train( sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params(), ) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval") do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval") return report
def generate_CIFAR10_adv(attacker_name, train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, args=FLAGS): """ CIFAR10 cleverhans tutorial :param attacker_name: :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ if "batch_size" in ATTACK_PARAM[attacker_name]: global BATCH_SIZE batch_size = ATTACK_PARAM[attacker_name]["batch_size"] BATCH_SIZE = batch_size # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session config_args = {} if num_threads: config_args = dict(intra_op_parallelism_threads=1) config_args["gpu_options"] = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(BATCH_SIZE, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(BATCH_SIZE, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_generate_eval(adv_x, pred_adv_x, x_set, y_set, report_key, is_adv=None): adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval(sess, x, y, adv_x, pred_adv_x, x_set, y_set, args=eval_params) setattr(report, report_key, success_rate) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('adversarial attack successful rate on %s: %0.4f' % (report_text, success_rate)) return adv_images_total, adv_pred_total, gt_label_total, success_rate # shape = (total, H,W,C) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) # tf.tensor def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) resume_files = os.listdir(args.resume) loss = CrossEntropy(model, smoothing=label_smoothing) if len(resume_files) == 0: saver = tf.train.Saver() train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # 训练nb_epochs个epochs save_path = saver.save(sess, "{}/model".format(args.resume), global_step=nb_epochs) print("Model saved in path: %s" % save_path) else: # resume from old latest_checkpoint = tf.train.latest_checkpoint(args.resume) saver = tf.train.Saver() saver.restore(sess, latest_checkpoint) # Calculate training error if testing: evaluate() # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph attacker = ATTACKERS[attacker_name](model, sess=sess) param_dict = ATTACK_PARAM[attacker_name] print("begin generate adversarial examples of CIFAR-10 using attacker: {}".format(attacker_name)) adv_x = attacker.generate(x, **param_dict) # tensor preds_adv = model.get_logits(adv_x) # generate adversarial examples adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_train, y_train, "clean_train_adv_eval", True) print("attacker: {} attack successful rate for CIFAR-10 train dataset is {}".format(attacker_name, success_rate)) adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(adv_x, preds_adv, x_test, y_test, "clean_test_adv_eval", True) print("attacker: {} attack successful rate for CIFAR-10 test dataset is {}".format(attacker_name, success_rate)) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, adversarial_training=ADVERSARIAL_TRAINING): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :param adversarial_training: True means using adversarial training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: # put data on cpu and gpu both config_args = dict(allow_soft_placement=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} bim_params = { 'eps': 0.5, 'clip_min': 0., 'eps_iter': 0.002, 'nb_iter': 10, 'clip_max': 1., 'ord': np.inf } rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) """ when training, evaluating can be happened """ train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Basic Iterative Method (BIM) attack object and # graph for i in range(20): bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples print("eps:%0.2f" % (bim_params["eps_iter"] * bim_params['nb_iter'])) do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) bim_params["eps_iter"] = bim_params["eps_iter"] + 0.002 # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') if not adversarial_training: return report print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to BasicIterativeMethod model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) bim2 = BasicIterativeMethod(model2, sess=sess) def attack(x): return bim2.generate(x, **bim_params) # add attack to loss loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, architecture=ARCHITECTURE, load_model=LOAD_MODEL, ckpt_dir='None', learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(int(time.time() * 1000) % 2**31) np.random.seed(int(time.time() * 1001) % 2**31) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') pgd_train = None if FLAGS.load_pgd_train_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_train_samples)) x_train = np.load(os.path.join(pgd_path, 'train_clean.npy')) y_train = np.load(os.path.join(pgd_path, 'train_y.npy')) pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy')) if x_train.shape[1] == 3: x_train = x_train.transpose((0, 2, 3, 1)) pgd_train = pgd_train.transpose((0, 2, 3, 1)) if len(y_train.shape) == 1: y_tmp = np.zeros((len(y_train), np.max(y_train) + 1), y_train.dtype) y_tmp[np.arange(len(y_tmp)), y_train] = 1. y_train = y_tmp x_test, y_test = data.get_set('test') pgd_test = None if FLAGS.load_pgd_test_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_test_samples)) x_test = np.load(os.path.join(pgd_path, 'test_clean.npy')) y_test = np.load(os.path.join(pgd_path, 'test_y.npy')) pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy')) if x_test.shape[1] == 3: x_test = x_test.transpose((0, 2, 3, 1)) pgd_test = pgd_test.transpose((0, 2, 3, 1)) if len(y_test.shape) == 1: y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype) y_tmp[np.arange(len(y_tmp)), y_test] = 1. y_test = y_tmp train_idcs = np.arange(len(x_train)) np.random.shuffle(train_idcs) x_train, y_train = x_train[train_idcs], y_train[train_idcs] if pgd_train is not None: pgd_train = pgd_train[train_idcs] test_idcs = np.arange(len(x_test))[:FLAGS.test_size] np.random.shuffle(test_idcs) x_test, y_test = x_test[test_idcs], y_test[test_idcs] if pgd_test is not None: pgd_test = pgd_test[test_idcs] # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} pgd_params = { # ord: , 'eps': FLAGS.eps, 'eps_iter': (FLAGS.eps / 5), 'nb_iter': 10, 'clip_min': 0, 'clip_max': 255 } cw_params = { 'binary_search_steps': FLAGS.cw_search_steps, 'max_iterations': FLAGS.cw_steps, #1000 'abort_early': True, 'learning_rate': FLAGS.cw_lr, 'batch_size': batch_size, 'confidence': 0, 'initial_const': FLAGS.cw_c, 'clip_min': 0, 'clip_max': 255 } # Madry dosen't divide by 255 x_train *= 255 x_test *= 255 if pgd_train is not None: pgd_train *= 255 if pgd_test is not None: pgd_test *= 255 print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train))) print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test))) print( 'clip_min : {}, clip_max : {} >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<' .format(pgd_params['clip_min'], pgd_params['clip_max'])) rng = np.random.RandomState() # [2017, 8, 30] debug_dict = dict() if FLAGS.save_debug_dict else None def do_eval(preds, x_set, y_set, report_key, is_adv=None, predictor=None, x_adv=None): if predictor is None: acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) else: do_eval(preds, x_set, y_set, report_key, is_adv=is_adv) if x_adv is not None: x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set], batch_size=batch_size) assert x_set.shape == x_set_adv.shape x_set = x_set_adv n_batches = math.ceil(x_set.shape[0] / batch_size) p_set, p_det = np.concatenate([ predictor.send(x_set[b * batch_size:(b + 1) * batch_size]) for b in tqdm.trange(n_batches) ]).T acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean() # if is_adv: # import IPython ; IPython.embed() ; exit(1) if FLAGS.save_debug_dict: debug_dict['x_set'] = x_set debug_dict['y_set'] = y_set ddfn = 'logs/debug_dict_{}.pkl'.format( 'adv' if is_adv else 'clean') if not os.path.exists(ddfn): with open(ddfn, 'wb') as f: pickle.dump(debug_dict, f) debug_dict.clear() if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples %s: %0.4f' % (report_text, 'with correction' if predictor is not None else 'without correction', acc)) if is_adv is not None: label = 'test_acc_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar(label, acc) if predictor is not None: detect = np.equal(p_det, is_adv).mean() label = 'test_det_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') print(label, detect) swriter.add_scalar(label, detect) label = 'test_dac_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar( label, np.equal(p_set, y_set[:len(p_set)].argmax(-1))[np.equal( p_det, is_adv)].mean()) return acc if clean_train: if architecture == 'ConvNet': model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet(scope='ResNet') else: raise Exception('Specify valid classifier architecture!') preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) if load_model: model_name = 'naturally_trained' if FLAGS.load_adv_trained: model_name = 'adv_trained' if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), model_name)) else: ckpt = tf.train.get_checkpoint_state('./models/' + model_name) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path saver = tf.train.Saver(var_list=dict( (v.name.split('/', 1)[1].split(':')[0], v) for v in tf.global_variables())) saver.restore(sess, ckpt_path) print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) logits_op = preds.op while logits_op.type != 'MatMul': logits_op = logits_op.inputs[0].op latent_x_tensor, weights = logits_op.inputs logits_tensor = preds nb_classes = weights.shape[-1].value if not FLAGS.save_pgd_samples: noise_eps = FLAGS.noise_eps.split(',') if FLAGS.noise_eps_detect is None: FLAGS.noise_eps_detect = FLAGS.noise_eps noise_eps_detect = FLAGS.noise_eps_detect.split(',') if pgd_train is not None: pgd_train = pgd_train[:FLAGS.n_collect] if not FLAGS.passthrough: predictor = tf_robustify.collect_statistics( x_train[:FLAGS.n_collect], y_train[:FLAGS.n_collect], x, sess, logits_tensor=logits_tensor, latent_x_tensor=latent_x_tensor, weights=weights, nb_classes=nb_classes, p_ratio_cutoff=FLAGS.p_ratio_cutoff, noise_eps=noise_eps, noise_eps_detect=noise_eps_detect, pgd_eps=pgd_params['eps'], pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'], pgd_iters=pgd_params['nb_iter'], save_alignments_dir='logs/stats' if FLAGS.save_alignments else None, load_alignments_dir=os.path.expanduser( '~/data/advhyp/madry/stats') if FLAGS.load_alignments else None, clip_min=pgd_params['clip_min'], clip_max=pgd_params['clip_max'], batch_size=batch_size, num_noise_samples=FLAGS.num_noise_samples, debug_dict=debug_dict, debug=FLAGS.debug, targeted=False, pgd_train=pgd_train, fit_classifier=FLAGS.fit_classifier, clip_alignments=FLAGS.clip_alignments, just_detect=FLAGS.just_detect) else: def _predictor(): _x = yield while (_x is not None): _y = sess.run(preds, {x: _x}).argmax(-1) _x = yield np.stack((_y, np.zeros_like(_y)), -1) predictor = _predictor() next(predictor) if FLAGS.save_alignments: exit(0) # Evaluate the accuracy of the model on clean examples acc_clean = do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, predictor=predictor) # Initialize the PGD attack object and graph if FLAGS.attack == 'pgd': pgd = MadryEtAl(model, sess=sess) adv_x = pgd.generate(x, **pgd_params) elif FLAGS.attack == 'cw': cw = CarliniWagnerL2(model, sess=sess) adv_x = cw.generate(x, **cw_params) elif FLAGS.attack == 'mean': pgd = MadryEtAl(model, sess=sess) mean_eps = FLAGS.mean_eps * FLAGS.eps def _attack_mean(x): x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1)) x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps, mean_eps) x_noisy = tf.clip_by_value(x_noisy, 0, 255) x_pgd = pgd.generate(x_noisy, **pgd_params) x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps) x_clip = tf.maximum(x_clip, x_many - FLAGS.eps) x_clip = tf.clip_by_value(x_clip, 0, 255) return x_clip adv_x = tf.map_fn(_attack_mean, x) adv_x = tf.reduce_mean(adv_x, 1) preds_adv = model.get_logits(adv_x) if FLAGS.save_pgd_samples: for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test, 'test')): train_batches = math.ceil(len(ds) / FLAGS.batch_size) train_pgd = np.concatenate([ sess.run(adv_x, { x: ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size] }) for b in tqdm.trange(train_batches) ]) np.save('logs/{}_clean.npy'.format(name), ds / 255.) np.save('logs/{}_y.npy'.format(name), y) train_pgd /= 255. np.save('logs/{}_pgd.npy'.format(name), train_pgd) exit(0) # Evaluate the accuracy of the model on adversarial examples if not FLAGS.load_pgd_test_samples: acc_pgd = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, predictor=predictor, x_adv=adv_x) else: acc_pgd = do_eval(preds, pgd_test, y_test, 'clean_train_adv_eval', True, predictor=predictor) swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0) print('Repeating the process, using adversarial training') exit(0) # Create a new model and train it to be robust to MadryEtAl if architecture == 'ConvNet': model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet() else: raise Exception('Specify valid classifier architecture!') pgd2 = MadryEtAl(model2, sess=sess) def attack(x): return pgd2.generate(x, **pgd_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For some attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) if load_model: if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained')) else: ckpt = tf.train.get_checkpoint_state('./models/adv_trained') ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path assert ckpt_path and tf_model_load( sess, file_path=ckpt_path), '\nMODEL LOADING FAILED' print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial # examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Evaluate model do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, retrain=False): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # start = 6 # end = 10 # x_test = x_test[start:end] # y_test = y_test[start:end] ########################### # Adjust hue / saturation # ########################### # hueValue = 0.9 # saturationValue = 0.9 # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, saturationValue), hueValue) # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue) # x_test = sess.run(tf_x_test) ############################### # Transform image to uniimage # ############################### # x_train = convert_uniimage(x_train) # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': save_dir, 'filename': filename, } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 8 / 255, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None, ae=None, type=None, datasetName=None, discretizeColor=1): accuracy, distortion = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params, is_adv=is_adv, ae=ae, type=type, datasetName=datasetName, discretizeColor=discretizeColor) setattr(report, report_key, accuracy) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, accuracy)) return accuracy, distortion if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) # model = UIPModel('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, type=type, datasetName="CIFAR10", discretizeColor=discretizeColor) # train(sess, loss, None, None, # dataset_train=dataset_train, dataset_size=dataset_size, # evaluate=evaluate, args=train_params, rng=rng, # var_list=model.get_params(), save=save) saveFileNumArr = [] # saveFileNumArr = [50, 500, 1000] count = 0 appendNum = 1000 while count < 1000: count = count + appendNum saveFileNumArr.append(count) distortionArr = [] accuracyArr = [] for i in range(len(saveFileNumArr)): saveFileNum = saveFileNumArr[i] model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum)) print("Trying to load trained model from: " + model_path) if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) print("Load trained model") else: train_with_noise(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), save=save, type=type, datasetName="CIFAR10", retrain=retrain, discretizeColor=discretizeColor) retrain = False ########################################## # Generate semantic adversarial examples # ########################################## adv_x, y_test2 = color_shift_attack(sess, x, y, np.copy(x_test), np.copy(y_test), preds, args=eval_params, num_trials=num_trials) x_test2 = adv_x # convert_uniimage(np.copy(x_test2), np.copy(x_test), discretizeColor) accuracy, distortion = do_eval(preds, np.copy(x_test2), np.copy(y_test2), 'clean_train_clean_eval', False, type=type, datasetName="CIFAR10", discretizeColor=discretizeColor) # accuracy, distortion = do_eval(preds, np.copy(x_test), np.copy(y_test), 'clean_train_clean_eval', False, type=type, # datasetName="CIFAR10", discretizeColor=discretizeColor) # # Initialize the Fast Gradient Sign Method (FGSM) attack object and # # graph # fgsm = FastGradientMethod(model, sess=sess) # fgsm = BasicIterativeMethod(model, sess=sess) # fgsm = MomentumIterativeMethod(model, sess=sess) # adv_x = fgsm.generate(x, **fgsm_params) # preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples # accuracy, distortion = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, type=type) # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x, type=type, # datasetName="CIFAR10", discretizeColor=discretizeColor) distortionArr.append(distortion) accuracyArr.append(accuracy) print(str(accuracy)) print(str(distortion)) print("accuracy:") for accuracy in accuracyArr: print(accuracy) print("distortion:") for distortion in distortionArr: print(distortion) # print("hue "+str(hueValue)) return report
sess = tf.Session(config=tf.ConfigProto(**config_args)) model = ModelAllConvolutional('model1', nb_classes, NB_FILTERS, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) train(sess, loss, x_train, y_train, evaluate=None, args=train_params, rng=rng, var_list=model.get_params()) fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) adv_image = adv_x.eval(session=sess, feed_dict={x: my_data}) # 快速显示,debug用 # plt.imshow(adv_image[0,:,:,0]) # plt.show() # 是否生成对抗图片 print("STEP 4: Build melicious data...") printimage = True name_num = 0 directory = "image_adv_" + str(datetime.datetime.now()) if printimage is True:
def cifar10_tutorial(train_start=0, train_end=50000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.13, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) if clean_train: loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model #saver = tf.train.Saver() #saver.save(sess, "./checkpoint_dir/clean_model_100.ckpt") # load model and compute testing accuracy if testing: tf_model_load(sess, file_path="./checkpoint_dir/clean_model_100.ckpt") do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the CIFAR10 model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # generate and show adversarial samples x_test_adv = np.zeros(shape=x_test.shape) for i in range(10): x_test_adv[i * 1000:(i + 1) * 1000] = adv_x.eval( session=sess, feed_dict={x: x_test[i * 1000:(i + 1) * 1000]}) # implement anisotropic diffusion on adversarial samples x_test_filtered = np.zeros(shape=x_test_adv.shape) for i in range(y_test.shape[0]): x_test_filtered[i] = filter.anisotropic_diffusion(x_test_adv[i]) # implement median on adversarial samples # x_test_filtered_med = np.zeros(shape=x_test_adv.shape) # for i in range(y_test.shape[0]): # x_test_filtered_med[i] = medfilt(x_test_filtered_ad[i], kernel_size=(3,3,1)) acc = model_eval(sess, x, y, preds, x_test_filtered, y_test, args=eval_params) print("acc after anisotropic diffusion is {}".format(acc)) return report