class TestLBFGS(CleverHansTest): def setUp(self): super(TestLBFGS, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = LBFGS(self.model, sess=self.sess) def test_generate_np_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=feed_labs) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_targeted_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x = tf.placeholder(tf.float32, x_val.shape) y = tf.placeholder(tf.float32, feed_labs.shape) x_adv_p = self.attack.generate(x, max_iterations=100, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=100, y_target=y) x_adv = self.sess.run(x_adv_p, {x: x_val, y: feed_labs}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(np.argmax(feed_labs, axis=1) == new_labs) > 0.9) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((100, 2)) feed_labs[np.arange(100), np.random.randint(0, 1, 100)] = 1 x_adv = self.attack.generate_np(x_val, max_iterations=10, binary_search_steps=1, initial_const=1, clip_min=-0.2, clip_max=0.3, batch_size=100, y_target=feed_labs) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
def lbfgs(X, which): wrapped = LBFGS(KerasModelWrapper(which.model), sess=session) X = X.copy() for i in tqdm(range(0, len(X), CHILD_BATCH_SIZE), desc=f'batch: ', leave=False): tensor = tf.convert_to_tensor(X[i:i + CHILD_BATCH_SIZE]) tensor = wrapped.generate(tensor, eps=0.1) X[i:i + CHILD_BATCH_SIZE] = session.run(tensor) return X
def lbfgs_attack(train_data, model, sess, tar_class): wrap = KerasModelWrapper(model) lbfgs = LBFGS(wrap, sess=sess) one_hot_target = np.zeros((train_data.shape[0], 10), dtype=np.float32) one_hot_target[:, tar_class] = 1 adv_x = lbfgs.generate_np(train_data, max_iterations=10, binary_search_steps=3, initial_const=1, clip_min=-5, clip_max=5, batch_size=1, y_target=one_hot_target) return adv_x
def lbfgs_attack(train_data,model,sess,tar_class): adv_x = [] wrap = KerasModelWrapper(model) lbfgs = LBFGS(wrap,sess=sess) one_hot_target = np.zeros((train_data.shape[0], 10), dtype=np.float32) one_hot_target[:, tar_class-1] = 1 for i in range(train_data.shape[0]//100): print(one_hot_target[i*100:(i+1)*100].shape) if i == 0: adv_x = lbfgs.generate_np(x_val=train_data[i*100:(i+1)*100], max_iterations=10, binary_search_steps=3, initial_const=1, batch_size=1,clip_min=-5, clip_max=5, y_target=one_hot_target[i*100:(i+1)*100]) else: adv_x = np.concatenate((adv_x,lbfgs.generate_np(x_val=train_data[i*100:(i+1)*100], max_iterations=10, binary_search_steps=3, initial_const=1, batch_size=1,clip_min=-5, clip_max=5, y_target=one_hot_target[i*100:(i+1)*100]))) return adv_x
def lbfgs(model): wrap = KerasModelWrapper(model) att = LBFGS(wrap, sess=session) def attack(X): for i in tqdm(range(0, len(X), CHILD_BATCH_SIZE), desc=f'LBFGS: ', file=sys.stdout, leave=False): # print(X[i:i+CHILD_BATCH_SIZE].shape) tensor = tf.convert_to_tensor(X[i:i + CHILD_BATCH_SIZE]) tensor = att.generate(tensor, batch_size=len(X[i:i + CHILD_BATCH_SIZE]), max_iterations=4, binary_search_steps=3) X[i:i + CHILD_BATCH_SIZE] = session.run(tensor) return attack
def get_adversarial_attack_and_params(attack_name, wrap, sess): params = None stop_gradient = False if attack_name == "fgsm": attack = FastGradientMethod(wrap, sess=sess) params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} stop_gradient = True if attack_name == "deepfool": attack = DeepFool(wrap, sess=sess) if attack_name == "lbfgs": attack = LBFGS(wrap, sess=sess) if attack_name == "saliency": attack = SaliencyMapMethod(wrap, sess=sess) if attack_name == "bim": attack = BasicIterativeMethod(wrap, sess=sess) return attack, params, stop_gradient
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
def setUp(self): super(TestLBFGS, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = LBFGS(self.model, sess=self.sess)
def main(args): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize]) dataset = ImageDataset(args.image_folder, transform=transform, return_paths=True) # n_images = len(dataset) dataloader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, pin_memory=True, num_workers=0) model = models.resnet50(pretrained=True).to(args.device) model.eval() config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 224, 224, )) tf_model = convert_pytorch_model_to_tf(model, args.device) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') # compute clip_min and clip_max suing a full black and a full white image clip_min = normalize(torch.zeros(3, 1, 1)).min().item() clip_max = normalize(torch.ones(3, 1, 1)).max().item() eps = args.eps / 255. eps_iter = 20 nb_iter = 10 args.ord = np.inf if args.ord < 0 else args.ord grad_params = {'eps': eps, 'ord': args.ord} common_params = {'clip_min': clip_min, 'clip_max': clip_max} iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter} attack_name = '' if args.attack == 'fgsm': attack_name = '_L{}_eps{}'.format(args.ord, args.eps) attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params} elif args.attack == 'iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = BasicIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'm-iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'pgd': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'jsma': attack_op = SaliencyMapMethod(cleverhans_model, sess=sess) attack_params = {'theta': eps, 'symbolic_impl': False, **common_params} elif args.attack == 'deepfool': attack_op = DeepFool(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'cw': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'lbfgs': attack_op = LBFGS(cleverhans_model, sess=sess) target = np.zeros((1, 1000)) target[0, np.random.randint(1000)] = 1 y = tf.placeholder(tf.float32, target.shape) attack_params = {'y_target': y, **common_params} attack_name = args.attack + attack_name print('Running [{}]. Params: {}'.format(args.attack.upper(), attack_params)) adv_x_op = attack_op.generate(x_op, **attack_params) adv_preds_op = tf_model(adv_x_op) preds_op = tf_model(x_op) n_success = 0 n_processed = 0 progress = tqdm(dataloader) for paths, x in progress: progress.set_description('ATTACK') z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op], feed_dict={ x_op: x, y: target }) src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1) success = src != dst success_paths = np.array(paths)[success] success_adv_x = adv_x[success] success_src = src[success] success_dst = dst[success] n_success += success_adv_x.shape[0] n_processed += x.shape[0] progress.set_postfix( {'Success': '{:3.2%}'.format(n_success / n_processed)}) progress.set_description('SAVING') for p, a, s, d in zip(success_paths, success_adv_x, success_src, success_dst): path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d) path = os.path.join(args.out_folder, path) np.savez_compressed(path, img=a)
def whitebox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, eps=0.3, online_training=False, test_on_dev=True, attack_type='fgsm', defense_type='gan', num_tests=-1, num_train=-1): """Based on MNIST tutorial from cleverhans. Args: gan: A `GAN` model. rec_data_path: A string to the directory. batch_size: The size of the batch. learning_rate: The learning rate for training the target models. nb_epochs: Number of epochs for training the target model. eps: The epsilon of FGSM. online_training: Training Defense-GAN with online reconstruction. The faster but less accurate way is to reconstruct the dataset once and use it to train the target models with: `python train.py --cfg <path-to-model> --save_recs` attack_type: Type of the white-box attack. It can be `fgsm`, `rand+fgsm`, or `cw`. defense_type: String representing the type of attack. Can be `none`, `defense_gan`, or `adv_tr`. """ FLAGS = tf.flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) if defense_type == 'defense_gan': assert gan is not None # Create TF session. if defense_type == 'defense_gan': sess = gan.sess if FLAGS.train_on_recs: assert rec_data_path is not None or online_training else: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev) rec_test_images = test_images rec_test_labels = test_labels _, _, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape = [None] + list(train_images.shape[1:]) images_pl = tf.placeholder(tf.float32, shape=[None] + list(train_images.shape[1:])) labels_pl = tf.placeholder(tf.float32, shape=[None] + [train_labels.shape[1]]) if num_tests > 0: test_images = test_images[:num_tests] rec_test_images = rec_test_images[:num_tests] test_labels = test_labels[:num_tests] if num_train > 0: train_images = train_images[:num_train] train_labels = train_labels[:num_train] # GAN defense flag. models = { 'A': model_a, 'B': model_b, 'C': model_c, 'D': model_d, 'E': model_e, 'F': model_f } model = models[FLAGS.model](input_shape=x_shape, nb_classes=train_labels.shape[1]) preds = model.get_probs(images_pl) report = AccuracyReport() def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples. eval_params = {'batch_size': batch_size} acc = model_eval(sess, images_pl, labels_pl, preds, rec_test_images, rec_test_labels, args=eval_params, feed={K.learning_phase(): 0}) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, } rng = np.random.RandomState([11, 24, 1990]) tf.set_random_seed(11241990) preds_adv = None if FLAGS.defense_type == 'adv_tr': attack_params = { 'eps': FLAGS.fgsm_eps_tr, 'clip_min': 0., 'clip_max': 1. } if gan: if gan.dataset_name == 'celeba': attack_params['clip_min'] = -1.0 attack_obj = FastGradientMethod(model, sess=sess) adv_x_tr = attack_obj.generate(images_pl, **attack_params) adv_x_tr = tf.stop_gradient(adv_x_tr) preds_adv = model(adv_x_tr) model_train(sess, images_pl, labels_pl, preds, train_images, train_labels, args=train_params, rng=rng, predictions_adv=preds_adv, init_all=False, feed={K.learning_phase(): 1}, evaluate=evaluate) # Calculate training error. eval_params = {'batch_size': batch_size} acc = model_eval( sess, images_pl, labels_pl, preds, train_images, train_labels, args=eval_params, feed={K.learning_phase(): 0}, ) print('[#] Accuracy on clean examples {}'.format(acc)) if attack_type is None: return acc, 0, None # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph. if FLAGS.defense_type == 'defense_gan': z_init_val = None if FLAGS.same_init: z_init_val = tf.constant( np.random.randn(batch_size * gan.rec_rr, gan.latent_dim).astype(np.float32)) model.add_rec_model(gan, z_init_val, batch_size) min_val = 0.0 if gan: if gan.dataset_name == 'celeba': min_val = -1.0 if 'rand' in FLAGS.attack_type: test_images = np.clip( test_images + args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val, 1.0) eps -= args.alpha if 'fgsm' in FLAGS.attack_type: attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } attack_obj = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'cw': attack_obj = CarliniWagnerL2(model, back='tf', sess=sess) attack_iterations = 100 attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 10.0, 'batch_size': batch_size, 'initial_const': 100, 'feed': { K.learning_phase(): 0 } } elif FLAGS.attack_type == 'mim': attack_obj = MomentumIterativeMethod(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } elif FLAGS.attack_type == 'deepfool': attack_obj = DeepFool(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'clip_min': min_val, 'clip_max': 1., 'nb_candidate': 2, 'nb_classes': 2 } elif FLAGS.attack_type == 'lbfgs': attack_obj = LBFGS(model, back='tf', sess=sess) attack_params = {'clip_min': min_val, 'clip_max': 1.} adv_x = attack_obj.generate(images_pl, **attack_params) eval_par = {'batch_size': batch_size} if FLAGS.defense_type == 'defense_gan': preds_adv = model.get_probs(adv_x) num_dims = len(images_pl.get_shape()) avg_inds = list(range(1, num_dims)) diff_op = tf.reduce_mean(tf.square(adv_x - images_pl), axis=avg_inds) acc_adv, roc_info = model_eval_gan( sess, images_pl, labels_pl, preds_adv, None, test_images=test_images, test_labels=test_labels, args=eval_par, feed={K.learning_phase(): 0}, diff_op=diff_op, ) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) else: preds_adv = model(adv_x) roc_info = None acc_adv = model_eval(sess, images_pl, labels_pl, preds_adv, test_images, test_labels, args=eval_par, feed={K.learning_phase(): 0}) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) if FLAGS.debug and gan is not None: # To see some qualitative results. adv_x_debug = adv_x[:batch_size] images_pl_debug = images_pl[:batch_size] debug_dir = os.path.join('debug', 'whitebox', FLAGS.debug_dir) ensure_dir(debug_dir) reconstructed_tensors = gan.reconstruct(adv_x_debug, batch_size=batch_size, reconstructor_id=2) x_rec_orig = gan.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=3) x_adv_sub_val = sess.run(x_adv_sub, feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) sess.run(tf.local_variables_initializer()) x_rec_debug_val, x_rec_orig_val = sess.run( [reconstructed_tensors, x_rec_orig], feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv') postfix = 'gen_rec' save_images_files(x_rec_debug_val, output_dir=debug_dir, postfix=postfix) save_images_files(images_pl_debug, output_dir=debug_dir, postfix='orig') save_images_files(x_rec_orig_val, output_dir=debug_dir, postfix='orig_rec') return acc_adv, 0, roc_info
def main(_): tf.logging.set_verbosity(tf.logging.DEBUG) # Images for inception classifier are normalized to be in [-1, 1] interval, num_classes = 1001 batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] # Load ImageNet Class Labels with open('labels.json') as f: labels = json.load(f) # Prepare Graph with tf.Graph().as_default(): # Build Model if FLAGS.model_arch.lower() == 'resnet_v2_101': model = models.Resnet_V2_101_Model(num_classes) exceptions = [] elif FLAGS.model_arch.lower() == 'inception_v3': model = models.Inception_V3_Model(num_classes) exceptions = ['InceptionV3/AuxLogits.*'] else: raise ValueError('Invalid model architecture specified: {}'.format( FLAGS.model_arch)) # Define Model Variables x_input = tf.placeholder(tf.float32, shape=batch_shape) FastGradientMethod(model).generate(x_input) model_variables = tf.contrib.framework.filter_variables( slim.get_model_variables(), exclude_patterns=exceptions) # Load Session saver = tf.train.Saver(model_variables) with tf.train.SessionManager().prepare_session( master=FLAGS.master, checkpoint_filename_with_path=FLAGS.checkpoint_path, saver=saver) as sess: # For Targeted Attacks target_idx = 0 # This will vary target = tf.constant(0, shape=[FLAGS.batch_size, num_classes]) # target = np.zeros((FLAGS.batch_size, num_classes), dtype=np.uint32) # target[:, target] = 1 # Build Attack if FLAGS.attack_type.lower() == 'fgsm': fgsm_opts = { 'eps': 0.3, 'clip_min': 0, 'clip_max': 1., 'y_target': None } fgsm = FastGradientMethod(model) x_adv = fgsm.generate(x_input, **fgsm_opts) elif FLAGS.attack_type.lower() == 'bim': bim_opts = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'y_target': None } bim = BasicIterativeMethod(model) x_adv = bim.generate(x_input, **bim_opts) elif FLAGS.attack_type.lower() == 'mim': mim_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.} mim = MomentumIterativeMethod(model) x_adv = mim.generate(x_input, **mim_opts) elif FLAGS.attack_type.lower() == 'pgd': pgd_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.} pgd = MadryEtAl(model) x_adv = pgd.generate(x_input, **pgd_opts) # Broken elif FLAGS.attack_type.lower() == 'jsma': jsma_opts = { 'theta': 1., 'gamma': 0.1, 'clip-min': 0., 'clip-max': 1., 'y_target': None } jsma = SaliencyMapMethod(model) x_adv = jsma.generate(x_input, **jsma_opts) elif FLAGS.attack_type.lower() == 'lbfgs': lbfgs_opts = {'y_target': target} lbfgs = LBFGS(model) x_adv = lbfgs.generate(x_input, **lbfgs_opts) else: raise ValueError('Invalid attack type specified: {}'.format( FLAGS.attack_type)) start_time, batch_time, num_processed = time.time(), time.time(), 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) save_images(adv_images, filenames, FLAGS.output_dir) if FLAGS.show_predictions: preds = sess.run(model(np.float32(images))) probs = np.amax(preds, axis=1) classes = np.argmax(preds, axis=1) adv_preds = sess.run(model(adv_images)) adv_probs = np.amax(adv_preds, axis=1) adv_classes = np.argmax(adv_preds, axis=1) for i, _ in enumerate(filenames): print('\nOriginal: {:.2f}% ({})\nAdversarial: {:.2f}% ({})'.format( \ probs[i]*100, labels[str(classes[i])], adv_probs[i]*100, labels[str(adv_classes[i])])) time_delta = time.time() - batch_time batch_time = time.time() num_processed += len(filenames) print('[SPEED ESTIMATION] BatchRate={:.4f} Hz; AverageRate={:.4f} Hz'.format( \ (len(filenames) / time_delta * 1.0), ((num_processed * 1.0) / (batch_time - start_time))))
def adv_generate(nb_epochs=25, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, nb_filters=64, num_threads=None, data='cifar', adv_attack='fgsm', save_dir='data'): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information # set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} config = tf.ConfigProto(**config_args) config.gpu_options.allow_growth = True sess = tf.Session(config=config) if data == "mnist": # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=0, train_end=60000, test_start=0, test_end=10000) else: X_train, Y_train, X_test, Y_test = data_cifar10() # print (Y_test.shape) ''' for i in range(Y_test.shape[0]): img = np.squeeze(X_test[i,:,:,:]) imsave(os.path.join("benign", str(i) + ".jpg"), img) for i in range(Y_test.shape[0]): img = np.squeeze(X_test[i,:,:,:]) benign_path = "benign_" + str(np.argmax(Y_test[i,:], axis=0)) if not os.path.exists(benign_path): os.makedirs(benign_path) imsave(os.path.join(benign_path, str(i) + ".jpg"), img) ''' # Use label smoothing assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder if data == 'mnist': x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) else: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) # model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } rng = np.random.RandomState([2018, 7, 18]) if clean_train: if data == 'mnist': model = build_model(0.01, 1e-6) else: model = build_model_cifar(0.01, 1e-6) preds = model(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params) report.train_clean_train_clean_eval = acc if adv_attack == "FGSM": # Initialize the attack object and graph # FGSM print "FGSM ATTACK..." fgsm_params = {'eps': 0.1, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model(adv_x) elif adv_attack == "CWL2": # CWL2 print "CWL2 ATTACK..." cwl2_params = {'batch_size': 8} cwl2 = CarliniWagnerL2(model, sess=sess) adv_x = cwl2.generate(x, **cwl2_params) preds_adv = model(adv_x) elif adv_attack == "JSMA": # JSMA print "JSMA ATTACK..." jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1. } adv_x = jsma.generate(x, **jsma_params) preds_adv = model(adv_x) elif adv_attack == "DeepFool": # DeepFool print "DeepFool ATTACK..." deepfool = DeepFool(model, sess=sess) deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0.0, 'clip_max': 1.0 } adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model(adv_x) elif adv_attack == "LBFGS": # LBFGS print "LBFGS ATTACK..." lbfgs_params = {'y_target': y, 'batch_size': 100} lbfgs = LBFGS(model, sess=sess) adv_x = lbfgs.generate(x, **lbfgs_params) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} adv_imgs = [] adv_imgs_test = [] if not adv_attack == "LBFGS": for i in range(5000): adv_imgs_train, _ = sess.run( [adv_x, preds_adv], feed_dict={x: X_train[i * 10:(i + 1) * 10]}) adv_imgs.append(adv_imgs_train) adv_imgs = np.vstack(adv_imgs) print(adv_imgs.shape) for i in range(1000): adv_imgs_tmp, _ = sess.run( [adv_x, preds_adv], feed_dict={x: X_test[i * 10:(i + 1) * 10]}) adv_imgs_test.append(adv_imgs_tmp) adv_imgs_test = np.vstack(adv_imgs_test) else: for i in range(500): target = np_utils.to_categorical( (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) % 10, 10) adv_imgs_train, _ = sess.run([adv_x, preds_adv], feed_dict={ x: X_train[i * 100:(i + 1) * 100], y: target }) print('train image: %s' % str(i)) adv_imgs.append(adv_imgs_train) print(adv_imgs.shape) for i in range(100): target = np_utils.to_categorical( (np.argmax(Y_train[i * 100:(i + 1) * 100], axis=1) + 1) % 10, 10) adv_imgs_train, _ = sess.run([adv_x, preds_adv], feed_dict={ x: X_train[i * 100:(i + 1) * 100], y: target }) adv_imgs_test.append(adv_imgs_tmp) print('test image: %s' % str(i)) adv_imgs_test = np.vstack(adv_imgs_test) ''' for i in range(6): target = np_utils.to_categorical((np.argmax(Y_train[i*10000: (i+1)*10000, ...], axis = 1) + 1) % 10, 10) adv_imgs_train, adv_labels_train = sess.run([adv_x, preds_adv], feed_dict={x: X_train[i*10000: (i+1)*10000,...], y: target}) for i in range(60000): target = np_utils.to_categorical((np.argmax(Y_train[i:i+1, ...], axis = 1) + 1) % 10, 10) adv_imgs_train = sess.run([adv_x], feed_dict={x: X_train[i:i+1,...], y: target}) print (len(adv_imgs_train), adv_imgs_train[0].shape, adv_imgs_train[1]) ''' label_truth_train = np.argmax(Y_train, axis=1) label_truth_test = np.argmax(Y_test, axis=1) save_dir = os.path.join( save_dir, os.path.join(adv_attack)) #, "eps_" + str(eps))) if not os.path.exists(save_dir): os.makedirs(save_dir) print(adv_imgs.shape, adv_imgs_test.shape) provider.save_h5(adv_imgs, label_truth_train, os.path.join(save_dir, "train_adv.h5")) provider.save_h5(adv_imgs_test, label_truth_test, os.path.join(save_dir, "test_adv.h5")) # utils.save_h5(X_train, label_truth_train, "FGSM/train_benign.h5") # utils.save_h5(X_test, label_truth_test, "FGSM/test_benign.h5") ''' for i in range(adv_labels.shape[0]): img = np.squeeze(adv_imgs[i,:,:,:]) imsave(os.path.join("adv", str(i) + ".jpg"), img) for i in range(adv_labels.shape[0]): img = np.squeeze(adv_imgs[i,:,:,:]) adv_path = "adv_" + str(np.argmax(adv_labels[i,:], axis=0)) if not os.path.exists(adv_path): os.makedirs(adv_path) imsave(os.path.join(adv_path, str(i) + ".jpg"), img) ''' acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculate training error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, X_train, Y_train, args=eval_par) report.train_clean_train_adv_eval = acc return report
attackParams = { 'eps_iter': Cfg.attackIterations, 'eps': Cfg.epsilon * 3.0, 'clip_min': 0., 'clip_max': 1. } if attackMethod == "BasicIterative": attackParams = { 'eps_iter': Cfg.attackIterations, 'eps': Cfg.epsilon * 10.0, 'clip_min': 0., 'clip_max': 1. } if attackMethod == "SaliencyMap": Cfg.useCleverHans = False # Use Foolbox for Saliency map (Clever Hans is too slow) attackParams = { 'theta': Cfg.epsilon, 'clip_min': 0., 'clip_max': 1. } if Cfg.useCleverHans: # Initialize CleverHans print ("Initializing CleverHans") if attackMethod == "FGSM": print ("Using FGSM attack method!") attack = FastGradientMethod(model=model, sess=sess) if attackMethod == "LBFGS": print ("Using LBFGS attack method!") attack = LBFGS(model=model, sess=sess) if attackMethod == "CarliniWagnerL2": print ("Using Carlini and Wagner attack method!") attack = CarliniWagnerL2(model=model, sess=sess) if attackMethod == "SPSA": print ("Using SPSA attack method!") attack = SPSA(model=model, sess=sess) if attackMethod == "MadryEtAl": print ("Using Madry et al. attack method!") attack = MadryEtAl(model=model, sess=sess) if attackMethod == "ElasticNet": print ("Using Elastic Net attack method!") attack = ElasticNetMethod(model=model, sess=sess) if attackMethod == "DeepFool": print ("Using Deep Fool attack method!") attack = DeepFool(model=model, sess=sess)
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ keras.layers.core.K.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if not hasattr(backend, "tf"): raise RuntimeError("This tutorial requires keras to be configured" " to use the TensorFlow backend.") if keras.backend.image_dim_ordering() != 'tf': keras.backend.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to " "'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session os.environ["CUDA_VISIBLE_DEVICES"] = '0' # only use No.0 GPU config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True sess = tf.Session(config=config) keras.backend.set_session(sess) # Get MNIST test data mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) saver = tf.train.Saver(max_to_keep=1) saver.save(sess, '{}/mnist.ckpt'.format(train_dir), global_step=NB_EPOCHS) print("model has been saved") # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Basic Iterative Method (BIM) attack object and graph lbfgs = LBFGS(wrap, sess=sess) # targeted attack, targeted class is 1 y_target = np.ones(128) y_target = keras.utils.to_categorical(y_target, num_classes=10) y_target = tf.Variable(y_target) sess.run(tf.global_variables_initializer()) lbfgs_params = {'y_target': y_target, 'batch_size': 128} adv_x = lbfgs.generate(x, **lbfgs_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} start_time = time.time() acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) end_time = time.time() print("L-BFGS attack time is {}".format(end_time - start_time)) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc gc.collect() return report
def get_adv_examples(sess, wrap, attack_type, X, Y): """ detect adversarial examples :param sess: target model session :param wrap: wrap model :param attack_type: attack for generating adversarial examples :param X: examples to be attacked :param Y: correct label of the examples :return: x_adv: adversarial examples """ x = tf.placeholder(tf.float32, shape=(None, X.shape[1], X.shape[2], X.shape[3])) y = tf.placeholder(tf.float32, shape=(None, Y.shape[1])) adv_label = np.copy(Y) batch_size = 128 # Define attack method parameters if (attack_type == 'fgsm'): attack_params = { 'eps': 0.1, 'clip_min': 0., 'clip_max': 1. } attack_object = FastGradientMethod(wrap, sess=sess) elif (attack_type == 'jsma'): attack_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } attack_object = SaliencyMapMethod(wrap, sess=sess) batch_size = 32 elif (attack_type == 'cw'): attack_params = { 'binary_search_steps': 1, 'y': y, 'max_iterations': 100, 'learning_rate': .2, 'batch_size': 128, 'initial_const': 10 } attack_object = CarliniWagnerL2(wrap, sess=sess) elif (attack_type == 'mim'): attack_object = MomentumIterativeMethod(wrap, back='tf', sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1., 'eps': 0.1} elif (attack_type == 'df'): attack_params = { 'max_iterations': 50, 'clip_min': 0., 'clip_max': 1., 'overshoot': 0.02 } attack_object = DeepFool(wrap, sess=sess) batch_size = 64 elif (attack_type == 'bim'): attack_object = BasicIterativeMethod(wrap, back='tf', sess=sess) attack_params = {'eps': 0.1, 'eps_iter': 0.05, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } elif (attack_type == 'vam'): attack_object = VirtualAdversarialMethod(wrap, back='tf', sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1., 'nb_iter': 100, 'eps': 2, 'xi': 1e-6} elif (attack_type == 'enm'): attack_object = ElasticNetMethod(wrap, back='tf', sess=sess) attack_params = {'y': y, 'max_iterations': 10, 'batch_size': 128} elif (attack_type == 'spsa'): attack_object = SPSA(wrap, sess=sess) adv_x = attack_object.generate(x=x, y=y, eps=0.1, clip_min=0., clip_max=1., nb_iter=100, early_stop_loss_threshold=-5.) batch_size = 1 elif (attack_type == 'lbfgs'): attack_object = LBFGS(wrap, sess=sess) attack_params = {'clip_min': 0, 'clip_max': 1., 'batch_size': 128, 'max_iterations': 10, "y_target": y} true_label = np.argmax(Y, axis=-1) for i in range(len(Y)): ind = (true_label[i] + 1) % FLAGS.nb_classes adv_label[i] = np.zeros([FLAGS.nb_classes]) adv_label[i, ind] = 1 if (attack_type != 'spsa'): adv_x = attack_object.generate(x, **attack_params) # Get adversarial examples if (attack_type == 'lbfgs'): x_adv = get_adv(sess, x, y, adv_x, X, adv_label, batch_size=batch_size) else: x_adv = get_adv(sess, x, y, adv_x, X, Y, batch_size=batch_size) return x_adv