def load_attack(sess, attack_method, model, targeted, adv_ys, eps, batch_size): if attack_method == 'fgsm': from cleverhans.attacks import FastGradientMethod model_prob = lambda x: model.predict(x, softmax=True) attack = FastGradientMethod(model_prob, sess=sess) attack_params, yname = config_fgsm(targeted, adv_ys, eps, batch_size) if attack_method == 'pgd': from cleverhans.attacks import MadryEtAl model_prob = lambda x: model.predict(x, softmax=True) attack = MadryEtAl(model_prob, sess=sess) attack_params, yname = config_madry(targeted, adv_ys, eps, batch_size) if attack_method == 'mim': from cleverhans.attacks import MomentumIterativeMethod model_prob = lambda x: model.predict(x, softmax=True) attack = MomentumIterativeMethod(model_prob, sess=sess) attack_params, yname = config_mim(targeted, adv_ys, eps, batch_size) if attack_method == 'cw': from cleverhans.attacks import CarliniWagnerL2 model_logit = lambda x: model.predict(x, softmax=False) attack = CarliniWagnerL2(model_logit, sess=sess) attack_params, yname = config_cw(targeted, adv_ys, eps, batch_size) return attack, attack_params, yname
def test_generate_np_high_confidence_untargeted_examples(self): import tensorflow as tf def trivial_model(x): W1 = tf.constant([[1, -1]], dtype=tf.float32) res = tf.matmul(x, W1) return res for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(trivial_model(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] assert np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0 assert np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1)
def test_generate_np_high_confidence_untargeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run( trivial_model.get_logits(x_val)), axis=1) attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), 1 - orig_labs] bad_labs = new_labs[np.arange(10), orig_labs] self.assertTrue( np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0) self.assertTrue( np.isclose(0, np.min(good_labs - (bad_labs + CONFIDENCE)), atol=1e-1))
def carlini_wagner_attack(image_index): import logging # create logger with 'spam_application' logger = logging.getLogger('cleverhans') logger.setLevel(logging.DEBUG) filename, orig_image = importer.load_images_at_index(image_index) tf.reset_default_graph() sess = tf.Session() folder_path = os.path.join(config.ADVERSARIAL_FOLDER, "carlini_wagner_base") os.makedirs(folder_path, exist_ok=True) with tf.Session() as sess: model = Inception_V3_Model(np.float32(orig_image)) attack = CarliniWagnerL2(model, sess=sess) params = { 'confidence': 0, 'y': None, 'max_iterations': 200, 'learning_rate': 0.01, "num_labels": 1001, 'initial_const': 10, 'clip_min': -1, 'clip_max': 1 } variables = tf.get_collection(tf.GraphKeys.VARIABLES) saver = tf.train.Saver(variables) saver.restore(sess, importer.checkpoint_path) x_adv = attack.generate_np(orig_image, **params) print("generated shape:{}".format(x_adv.shape)) utils.image_saver(x_adv, [filename], folder_path)
def __init__(self, model, source_samples=2, binary_search_steps=5, cw_learning_rate=5e-3, confidence=0, attack_iterations=1000, attack_initial_const=1e-2): super(Attack, self).__init__() model_wrap = KerasModelWrapper(model.model) self.model = model_wrap self.sess = model.sess self.x = model.input_ph self.y = Input(shape=(model.num_classes, ), dtype='float32') abort_early = True self.cw = CarliniWagnerL2(self.model, sess=self.sess) self.cw_params = { 'binary_search_steps': binary_search_steps, "y": None, 'abort_early': True, 'max_iterations': attack_iterations, 'learning_rate': cw_learning_rate, 'batch_size': source_samples, 'initial_const': attack_initial_const, 'confidence': confidence, 'clip_min': 0.0, }
def generate_adv_samples(wrap, generator, sess, file_names=None): if ATTACK_NAME == 'fgsm': attack_instance_graph = FastGradientMethod(wrap, sess) attack_instance = fgsm else: attack_instance_graph = CarliniWagnerL2(wrap, sess) attack_instance = cw diff_L2 = [] if file_names is None: file_names = generator.get_file_names() image_index = 0 TEN_LABEL = convert_to_one_hot(10, NB_CLASSES) NINETY_LABEL = convert_to_one_hot(90, NB_CLASSES) for legit_sample, legit_label in generator: ground_truth = np.argmax(legit_label) if ground_truth > 50: adv_x = attack_instance.attack(legit_sample, None, attack_instance_graph) else: adv_x = attack_instance.attack(legit_sample, None, attack_instance_graph) diff_L2.append(L2_distance(legit_sample, adv_x)) save_image(ADV_DATASET_PATH + 'test/' + file_names[image_index], adv_x[0, :, :, :]) image_index += 1 print("Obtaining statistical information for L2 perturbation summed by channels") print_statistical_information(diff_L2)
def mnist_cw_attack(sample, target, model, sess, targeted=True, attack_iterations=100): cw = CarliniWagnerL2(model, back='tf', sess=sess) if targeted: adv_input = sample adv_ys = target yname = "y_target" else: adv_input = sample adv_ys = None yname = "y" cw_params = { 'binary_search_steps': 1, 'abort_early': False, yname: adv_ys, 'confidence': 1, 'max_iterations': attack_iterations, 'learning_rate': 0.1, #'batch_size': 1, 'clip_min': 0., 'clip_max': 1., 'initial_const': 10 } adv = cw.generate_np(adv_input, **cw_params) return adv
def test_generate_np_high_confidence_targeted_examples(self): trivial_model = TrivialModel() for CONFIDENCE in [0, 2.3]: x_val = np.random.rand(10, 1) - .5 x_val = np.array(x_val, dtype=np.float32) feed_labs = np.zeros((10, 2)) feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1 attack = CarliniWagnerL2(trivial_model, sess=self.sess) x_adv = attack.generate_np(x_val, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1, clip_min=-10, clip_max=10, confidence=CONFIDENCE, y_target=feed_labs, batch_size=10) new_labs = self.sess.run(trivial_model.get_logits(x_adv)) good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)] bad_labs = new_labs[np.arange(10), 1 - np.argmax(feed_labs, axis=1)] self.assertClose(CONFIDENCE, np.min(good_labs - bad_labs), atol=1e-1) self.assertTrue( np.mean( np.argmax(new_labs, axis=1) == np.argmax(feed_labs, axis=1)) > .9)
def prepare_attack(sess, args, model, adv_input, target_embeddings): if args.attack_type == 'FGSM': # Define FGSM for the model steps = 1 # eps = args.eps alpha = args.eps / steps fgsm = FastGradientMethod(model) fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(model.face_input, **fgsm_params) elif args.attack_type == 'CW': model.face_input.set_shape(np.shape(adv_input)) # Instantiate a CW attack object cw = CarliniWagnerL2(model, sess) cw_params = { 'binary_search_steps': 0, # 1 'max_iterations': 0, # 100 'learning_rate': .1, # .2 'batch_size': args.lfw_batch_size, 'initial_const': 10 } # initial_const: 10, The initial tradeoff-constant to use to tune the # relative importance of size of the perturbation confidence of classification # adv_x = cw.generate_np(adv_input, **cw_params) feed_dict = { model.face_input: adv_input, model.victim_embedding_input: target_embeddings, model.batch_size: 10, model.phase_train: False } adv_x = cw.generate(model.face_input, feed_dict, **cw_params) return adv_x
def build_adv(make_obs_tf, q_func, num_actions, epsilon, noisy, attack=None): with tf.variable_scope('deepq', reuse=tf.AUTO_REUSE): obs_tf_in = U.ensure_tf_input(make_obs_tf("observation")) stochastic_ph_adv = tf.placeholder(tf.bool, (), name="stochastic_adv") update_eps_ph_adv = tf.placeholder(tf.float32, (), name="update_eps_adv") eps = tf.get_variable("eps", (), initializer=tf.constant_initializer(0)) update_eps_expr_adv = eps.assign( tf.cond(update_eps_ph_adv >= 0, lambda: update_eps_ph_adv, lambda: eps)) print("==========================================") #def wrapper(x): # return q_func(x, num_actions, scope="q_func", reuse=True, concat_softmax=True, noisy=noisy) if attack == 'fgsm': adversary = FastGradientMethod(q_func(obs_tf_in.get(), num_actions, scope="q_func", reuse=True, concat_softmax=True, noisy=noisy), sess=U.get_session()) adv_observations = adversary.generate( obs_tf_in.get(), eps=epsilon, clip_min=0, clip_max=1.0) * 255.0 print("----") print(adv_observations.shape) else: adversary = CarliniWagnerL2(q_func(obs_tf_in.get(), num_actions, scope="q_func", reuse=True, concat_softmax=True, noisy=noisy), sess=U.get_session()) cw_params = { 'binary_search_steps': 1, 'max_iterations': 100, 'learning_rate': 0.1, 'initial_const': 10, 'clip_min': 0, 'clip_max': 1.0 } adv_observations = adversary.generate(obs_tf_in.get(), ** cw_params) * 255.0 # saveScreenPNG(b'test_image.png') craft_adv_obs = U.function( inputs=[obs_tf_in, stochastic_ph_adv, update_eps_ph_adv], outputs=adv_observations, givens={ update_eps_ph_adv: -1.0, stochastic_ph_adv: True }, updates=[update_eps_expr_adv]) return craft_adv_obs
def init_attack(model, attack_params_dict, sess): """ Initialize the adversarial attack using the cleverhans toolbox Parameters ---------- model : Keras Model The model to attack attack_params_dict : dict Self-defined dictionary specifying the attack and its parameters sess : Session The current tf session Returns ------- attack : cleverhans Attack The Attack object attack_params Dictionary with the value of the attack parameters, valid to generate adversarial examples with cleverhans. """ # Wrapper for the Keras model model_wrap = KerasModelWrapper(model) # Initialize attack batch_size = None if attack_params_dict['attack'] == 'fgsm': attack = FastGradientMethod(model_wrap, sess=sess) attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'spsa': attack = SPSA(model_wrap, sess=sess) attack_params = {'epsilon': attack_params_dict['eps'], 'num_steps': attack_params_dict['n_steps']} batch_size = 1 elif attack_params_dict['attack'] == 'deepfool': attack = DeepFool(model_wrap, sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'pgd': attack = ProjectedGradientDescent(model_wrap, sess=sess) attack_params = {'eps': attack_params_dict['eps'], 'eps_iter': attack_params_dict['eps_iter'], 'nb_iter': attack_params_dict['n_steps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'carlini': attack = CarliniWagnerL2(model_wrap, sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1.} else: raise NotImplementedError() return attack, attack_params, batch_size
def eval_cleverhans(): # Set test phase learning_phase = K.learning_phase() K.set_learning_phase(0) # Pre-process images images_tf = images.astype(K.floatx()) images_tf /= 255. # Wrapper for the Keras model model_wrap = KerasModelWrapper(model) # Initialize attack if attack_params_dict['attack'] == 'fgsm': attack = FastGradientMethod(model_wrap, sess=K.get_session()) attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'deepfool': attack = DeepFool(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'madry': attack = ProjectedGradientDescent(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'carlini': attack = CarliniWagnerL2(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} else: raise NotImplementedError() # Define input TF placeholder x = tf.placeholder(K.floatx(), shape=(None,) + images.shape[1:]) y = tf.placeholder(K.floatx(), shape=(None,) + (labels.shape[-1],)) # Define adversarial predictions symbolically x_adv = attack.generate(x, **attack_params) x_adv = tf.stop_gradient(x_adv) predictions_adv = model(x_adv) # Evaluate the accuracy of the model on adversarial examples eval_par = {'batch_size': batch_size} # feed_dict = {K.learning_phase(): attack_params_dict['learning_phase']} # acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images, # labels, feed=feed_dict, args=eval_par) acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images_tf, labels, args=eval_par) print('Aversarial accuracy against %s: %.4f\n' % (attack_params_dict['attack'], acc_adv)) # Set original phase K.set_learning_phase(learning_phase) return acc_adv
def _CW(self): cw_attack = CarliniWagnerL2(self.wrapped_model, sess=self.sess) params = { 'batch_size': 1, 'max_iterations': 1000, 'binary_search_steps': 9, 'initial_const': 1e-3, 'learning_rate': 5e-3, 'clip_min': 0., 'clip_max': 1. } adv_x = cw_attack.generate(self.x, **params) self.save_images(adv_x, self.save_loc)
def CW_attack_l2(): tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 32, 32, )) y_op = tf.placeholder(tf.float32, shape=(None, 10)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an CW attack cw = CarliniWagnerL2(cleverhans_model, sess=sess) cw_params = { 'binary_search_steps': 1, 'max_iterations': 100, 'batch_size': args.b, 'clip_min': 0., 'clip_max': 1., 'y': y_op } adv_x_op = cw.generate(x_op, **cw_params) adv_preds_op = tf_model_fn(adv_x_op) # Evaluation against PGD attacks correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): adv = sess.run(adv_x_op, feed_dict={ x_op: inputs, y_op: torch.nn.functional.one_hot(targets, 10) }) diff = (torch.tensor(adv) - inputs).renorm(p=2, dim=0, maxnorm=0.5) adv = (inputs + diff).clamp(0., 1.) correct += model(adv).topk(1)[1][:, 0].eq( targets.cuda()).cpu().sum().item() total += len(inputs) sys.stdout.write("\rWhite-box CW l2 attack... Acc: %.3f%% (%d/%d)" % (100. * correct / total, correct, total)) sys.stdout.flush() print('Accuracy under CW l2 attack: %.3f%%' % (100. * correct / total))
def setUp(self): super(TestCarliniWagnerL2, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(x, W2) return res self.sess = tf.Session() self.model = my_model self.attack = CarliniWagnerL2(self.model, sess=self.sess)
def generate(sess, model, data_feeder, source, target, adv_dump_dir, nb_samples, learning_rate=0.1, confidence=0): wrap = KerasModelWrapper(model.model) cwl2 = CarliniWagnerL2(wrap, sess=sess) batch_size = 32 max_iterations = 450 abort_early = True bin_search_steps = 1 cwl2_params = { 'confidence': confidence, 'learning_rate': learning_rate, 'binary_search_steps': bin_search_steps, 'batch_size': batch_size, 'max_iterations': max_iterations, 'abort_early': abort_early, 'initial_const': 10, 'clip_min': 0.0, 'clip_max': 1.0, 'y_target': data_feeder.get_labels(target, nb_samples) } craft_data = data_feeder.get_evasion_craft_data(source_class=source, total_count=nb_samples) adv_data = cwl2.generate_np(craft_data, **cwl2_params) # Commit data adv_writer = AdversarialWriterEvasion(source_class=source, target_class=target, attack_params={ 'confidence': confidence, 'learning_rate': learning_rate, 'binary_search_steps': bin_search_steps, 'batch_size': batch_size, 'max_iterations': max_iterations, 'abort_early': abort_early }, adversarial_data_path=adv_dump_dir) adv_writer.batch_put(craft_data, adv_data) adv_writer.commit()
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. batch_size = FLAGS.batch_size batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 targeted = False tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) with tf.Session() as sess: cw_params = {'binary_search_steps': 10, 'confidence': 20, # "y": np.random.randint(1000, size=(8, 1001)), "y": np.zeros((16, 1001)), 'learning_rate': 0.1, 'max_iterations': 20000, 'abort_early': True, 'batch_size': batch_size * num_classes if targeted else batch_size, 'initial_const': 0.01} cw = CarliniWagnerL2(model, back='tf', sess=sess) x_adv = cw.generate(x_input, **cw_params) saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) saver.restore(sess, FLAGS.checkpoint_path) sess.run(tf.global_variables_initializer()) # with tf.train.MonitoredSession(session_creator=session_creator) as sess: i = 0 for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images}) print "input images: ", images.shape #adv_images = cw.generate_np(images, **cw_params) i += 8 print i print filenames print adv_images.shape # adv_images = cw.generate_np( save_images(adv_images, filenames, FLAGS.output_dir)
def attack(model, x_input, input_img): wrap = KerasModelWrapper(model) cw_params = {'binary_search_steps': 1, 'max_iterations': 5, 'learning_rate': 2e-3, 'batch_size': 1, 'initial_const': 0.1, 'confidence' : 0, 'clip_min': -1., 'clip_max': 1.} cw = CarliniWagnerL2(wrap, sess=sess) adv = cw.generate(x=x_input, initial_const=0.1, learning_rate=2e-3, clip_min=-1., clip_max=1., max_iterations=5) adv_img = sess.run(adv, feed_dict={x_input: input_img}) #for i in range(2): # print('iter:', i) # adv_img = sess.run(adv, feed_dict={x_input: adv_img}) return adv_img
def vgg16_evaluate(): sess = tf.Session() keras.backend.set_session(sess) ##Load images for evaluation. Took Stanford 231n tiny set for testing (goldfish) images=[] target=np.ones(100) for index,myfile in enumerate(os.listdir(path)): if index==100: break if myfile.endswith('JPEG'): image=load_img(path+'/'+myfile,target_size=(224,224)) inputs=img_to_array(image) inputs=inputs.reshape(1,inputs.shape[0],inputs.shape[1],inputs.shape[2]) images.append(inputs) #target.append(np.zeros(1000)) #target[-1][1]=1 target=utils.to_categorical(target,1000) x_input=np.vstack(images) x_input=preprocess_input(x_input) model=VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # y=model.evaluate(x_input,target,verbose=1) # print(y) cw_attack=CarliniWagnerL2(model=model,back='tf',sess=sess) ##Untargeted cw_attack parameters cw_params = {'binary_search_steps': 1, 'y_target': None, 'max_iterations': 10, 'learning_rate': 0.1, 'batch_size': 100, 'initial_const': 10} adv_inputs=x_input[:] adv=cw_attack.generate_np(adv_inputs,**cw_params) adv_y=model.evaluate(adv,target,verbose=1) print(adv_y)
def get_CWL2_adversarial(targeted, xs, y_target, classifier, batch_size, cwl2_confidence): #print(xs.shape, y_target.shape) #exit() ATTACK_BATCH = batch_size samples_range = int(xs.shape[0] / ATTACK_BATCH) wrap = KerasModelWrapper(classifier) attack = CarliniWagnerL2(wrap, sess=K.get_session()) fgsm_params = { 'confidence': cwl2_confidence, 'max_iterations': 1000, 'binary_search_steps': 9, 'initial_const': 1, 'clip_min': -5, 'clip_max': 5, 'batch_size': ATTACK_BATCH } if targeted: y_target = np.expand_dims(y_target, axis=1) attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], y_target=y_target[:ATTACK_BATCH], **fgsm_params) for ii in range(1, samples_range): print('iter', ii) new_attack_batch = attack.generate_np( xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :], y_target=y_target[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH], **fgsm_params) attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0) else: attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], **fgsm_params) for ii in range(1, samples_range): print('iter', ii) new_attack_batch = attack.generate_np( xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :], **fgsm_params) attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0) return attack_xs
def __init__(self, model, targeted=False, confidence=0, batch_size=1, learning_rate=5e-3, binary_search_steps=5, max_iterations=1000, abort_early=True, initial_const=1e-2, clip_min=-1, clip_max=1): super().__init__(model=model, clip_min=clip_min, clip_max=clip_max) self._targeted = targeted self._confidence = confidence self._batch_size = batch_size self._learning_rate = learning_rate self._binary_search_steps = binary_search_steps self._max_iterations = max_iterations self._abort_early = abort_early self._initial_const = initial_const with self.graph.as_default(): self._method = CarliniWagnerL2(self._model, sess=self.session, confidence=self._confidence, batch_size=self._batch_size, learning_rate=self._learning_rate, binary_search_steps=self._binary_search_steps, max_iterations=self._max_iterations, abort_early=self._abort_early, initial_const=self._initial_const, clip_min=self._clip_min, clip_max=self._clip_max, targeted=self._targeted)
def cw_attack(sess, x, logits, n_ae, final=False): cw_attack_obj = CarliniWagnerL2(logits, sess=sess, back='tf') if final: cw_params = {'binary_search_steps': 9, 'max_iterations': 2000, 'learning_rate': 0.01, 'initial_const': 1.0, 'abort_early': True, 'batch_size': n_ae } else: cw_params = {'binary_search_steps': 5, 'max_iterations': 500, 'learning_rate': 0.01, 'initial_const': 1.0, 'batch_size': n_ae # need to specify, since CarliniWagnerL2 is not completely symbolic } adv_ex_tensor = cw_attack_obj.generate(x, **cw_params) adv_ex_tensor = tf.stop_gradient(adv_ex_tensor) return adv_ex_tensor
def prepare_attack(sess, args, model, adv_input, target_embeddings): if args.attack_type == 'FGSM': # Define FGSM for the model steps = 1 alpha = args.eps / steps fgsm = FastGradientMethod(model) fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(model.face_input, **fgsm_params) elif args.attack_type == 'CW': model.face_input.set_shape(np.shape(adv_input)) # Instantiate a CW attack object cw = CarliniWagnerL2(model, sess) cw_params = { 'binary_search_steps': 1, # 1 'max_iterations': 100, # 100 'learning_rate': .2, # .2 'batch_size': args.lfw_batch_size, 'initial_const': args.init_c, # 10 'confidence': 10 } # # model.batch_size: 10, model.phase_train: False} feed_dict = { model.face_input: adv_input, model.victim_embedding_input: target_embeddings } # # model.batch_size: 10, model.phase_train: False} # adv_x = cw.generate(model.face_input, feed_dict, **cw_params) adv_x = cw.generate(model.face_input, **cw_params) # adv_x = cw.generate_np(adv_input, **cw_params) print('hello') elif args.attack_type == 'random': random_attack = Noise(model, sess) noise_params = { 'eps': args.eps, 'ord': np.inf, 'clip_min': 0, 'clip_max': 1 } adv_x = random_attack.generate(model.face_input, **noise_params) return adv_x
def main(_): # Images for inception classifier are normalized to be in [-1, 1] interval, # eps is a difference between pixels so it should be in [0, 2] interval. # Renormalizing epsilon from [0, 255] to [0, 2]. batch_shape = [1, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) probs = model(x_input) cw = CarliniWagnerL2(model) cw_params = { 'binary_search_steps': 1, 'max_iterations': 5, 'learning_rate': 0.1, 'batch_size': 1, 'initial_const': 10, 'clip_min': -1., 'clip_max': 1. } x_adv = cw.generate(x=x_input, **cw_params) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): adv_images = sess.run(x_adv, feed_dict={x_input: images})
def calc_radius(self, input, label, norm_type, upper=0.5, eps=1e-2): # only support L2 norm assert norm_type == '2' xs = input.unsqueeze(0) clean_preds = self.model(xs.cuda()).detach().cpu().numpy() clean_pred = np.argmax(clean_preds[0]) if clean_pred != label: return 0. x_op = tf.placeholder(tf.float32, shape=( None, input.shape[0], input.shape[1], input.shape[2], )) attk = CarliniWagnerL2(self.ch_model, sess=self.sess) params = { 'y': tf.one_hot([label], get_num_classes(self.dataset)), 'clip_min': 0.0, 'clip_max': 1.0, 'max_iterations': 1000 } adv_x = attk.generate(x_op, **params) adv_preds_op = self.tf_model(adv_x) (adv_preds, adv_xsamp) = self.sess.run((adv_preds_op, adv_x), feed_dict={x_op: xs}) adv_pred = np.argmax(adv_preds[0]) if adv_pred == label: # fail to find out adv example, return the radius to be the maximum one return la.norm(np.ones_like(adv_xsamp.reshape(-1)) * 0.5, 2) else: dist = la.norm(adv_xsamp.reshape(-1) - xs.numpy().reshape(-1), 2) return dist
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0, test_end=1000, nb_epochs=8, batch_size=100, nb_classes=10, nb_filters=64, learning_rate=0.001): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) print("x_train shape: ", X_train.shape) print("y_train shape: ", Y_train.shape) # do not log model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False, rng=rng) f_out_clean = open("Clean_jsma_elastic_against5.log", "w") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n') # Clean test against JSMA jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x_jsma = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x_jsma) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x_fgsm) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against BIM bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} bim = BasicIterativeMethod(model, sess=sess) adv_x_bim = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x_bim) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against EN en_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} en = ElasticNetMethod(model, back='tf', sess=sess) adv_x_en = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x_en) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against DF deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x_df) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against VAT vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model, sess=sess) adv_x_vat = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x_vat) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n') f_out_clean.close() ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) + ' adversarial examples') model_2 = make_basic_cnn() preds_2 = model(x) # need this for constructing the array sess.run(tf.global_variables_initializer()) # run this again # sess.run(tf.global_variables_initializer()) # 1. Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} adv_random = jsma.generate(x, **jsma_params) preds_adv_random = model_2.get_probs(adv_random) # 2. Instantiate FGSM attack fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model_2.get_probs(adv_x_fgsm) # 3. Instantiate Elastic net attack en_params = {'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} enet = ElasticNetMethod(model_2, sess=sess) adv_x_en = enet.generate(x, **en_params) preds_adv_elastic_net = model_2.get_probs(adv_x_en) # 4. Deepfool deepfool_params = {'nb_candidate':10, 'overshoot':0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model_2, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_deepfool = model_2.get_probs(adv_x_df) # 5. Base Iterative bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} base_iter = BasicIterativeMethod(model_2, sess=sess) adv_x_bi = base_iter.generate(x, **bim_params) preds_adv_base_iter = model_2.get_probs(adv_x_bi) # 6. C & W Attack cw = CarliniWagnerL2(model_2, back='tf', sess=sess) cw_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} adv_x_cw = cw.generate(x, **cw_params) preds_adv_cw = model_2.get_probs(adv_x_cw) #7 vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model_2, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model_2.get_probs(adv_x) # ==> generate 10 targeted classes for every train data regardless # This call runs the Jacobian-based saliency map approach # Loop over the samples we want to perturb into adversarial examples X_train_adv_set = [] Y_train_adv_set = [] for index in range(X_train.shape[0]): print('--------------------------------------') x_val = X_train[index:(index+1)] y_val = Y_train[index] # add normal sample in!!!! X_train_adv_set.append(x_val) Y_train_adv_set.append(y_val) # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_val)) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: # print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(x_val, **jsma_params) # append to X_train_adv_set and Y_train_adv_set X_train_adv_set.append(adv_x) Y_train_adv_set.append(y_val) # shape is: (1, 28, 28, 1) # print("adv_x shape is: ", adv_x.shape) # check for success rate # res = int(model_argmax(sess, x, preds, adv_x) == target) print('-------------Finished Generating Np Adversarial Data-------------------------') X_train_data = np.concatenate(X_train_adv_set, axis=0) Y_train_data = np.stack(Y_train_adv_set, axis=0) print("X_train_data shape is: ", X_train_data.shape) print("Y_train_data shape is: ", Y_train_data.shape) # saves the output so later no need to re-fun file np.savez("jsma_training_data.npz", x_train=X_train_data , y_train=Y_train_data) # >>> data = np.load('/tmp/123.npz') # >>> data['a'] f_out = open("Adversarial_jsma_elastic_against5.log", "w") # evaluate the function against 5 attacks # fgsm, base iterative, jsma, elastic net, and deepfool def evaluate_against_all(): # 1 Clean Data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print('Legitimate accuracy: %0.4f' % accuracy) tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n" f_out.write(tmp) # 2 JSMA accuracy = model_eval(sess, x, y, preds_adv_random, X_test, Y_test, args=eval_params) print('JSMA accuracy: %0.4f' % accuracy) tmp = 'JSMA accuracy:'+ str(accuracy) + "\n" f_out.write(tmp) # 3 FGSM accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('FGSM accuracy: %0.4f' % accuracy) tmp = 'FGSM accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 4 Base Iterative accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test, Y_test, args=eval_params) print('Base Iterative accuracy: %0.4f' % accuracy) tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 5 Elastic Net accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test, Y_test, args=eval_params) print('Elastic Net accuracy: %0.4f' % accuracy) tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 6 DeepFool accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test, Y_test, args=eval_params) print('DeepFool accuracy: %0.4f' % accuracy) tmp = 'DeepFool accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 7 C & W Attack accuracy = model_eval(sess, x, y, preds_adv_cw, X_test, Y_test, args=eval_params) print('C & W accuracy: %0.4f' % accuracy) tmp = 'C & W accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") # 8 Virtual Adversarial accuracy = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('VAT accuracy: %0.4f' % accuracy) tmp = 'VAT accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") print("*******End of Epoch***********\n\n") # report.adv_train_adv_eval = accuracy print("Now Adversarial Training with Elastic Net + modified X_train and Y_train") # trained_model.out train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/', 'filename': 'trained_model.out' } model_train(sess, x, y, preds_2, X_train_data, Y_train_data, predictions_adv=preds_adv_elastic_net, evaluate=evaluate_against_all, verbose=False, args=train_params, rng=rng) # Close TF session sess.close() return report
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
def iterate_through_cwl2_attacks(): tf.logging.set_verbosity(tf.logging.INFO) input_dir = FLAGS.input_image_dir metadata_file_path = FLAGS.metadata_file_path num_images = len(os.listdir(input_dir)) batch_shape = (num_images, 299, 299, 3) num_classes = 1001 batch_size = attack_name_to_params[ATTACKS.CARLINI_WAGNER]['batch_size'] images, labels, target_classes = load_images(input_dir, metadata_file_path, batch_shape, num_classes) list_param_dict = expand_param_dict( attack_name_to_params[ATTACKS.CARLINI_WAGNER], attack_name_to_configurable_params[ATTACKS.CARLINI_WAGNER] ) save_dir = 'saves' os.makedirs(save_dir, exist_ok=True) for idx, params in enumerate(list_param_dict): tf.reset_default_graph() logger.info('Running attack with parameters: {}'.format(params)) logger.info('Current index of parameters: {}/{}'.format(idx, len(list_param_dict))) # Get save path adv_imgs_save_path = get_attack_images_filename_prefix( attack_name=ATTACKS.CARLINI_WAGNER, params=params, model='inception', targeted_prefix='targeted' ) adv_imgs_save_path = os.path.join(save_dir, adv_imgs_save_path) # Run inference graph = tf.Graph() with graph.as_default(): sess = tf.Session(graph=graph) # Prepare graph x_input = tf.placeholder(tf.float32, shape=(batch_size,) + batch_shape[1:]) y_label = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) y_target = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) model = InceptionModel(num_classes) cwl2 = True if cwl2: attack = CarliniWagnerL2(model=model, sess=sess) x_adv = attack.generate(x_input, y_target=y_target, **params) else: attack = SPSA(model=model) x_adv = attack.generate(x_input, y_target=y_label, epsilon=4. / 255, num_steps=30, early_stop_loss_threshold=-1., batch_size=32, spsa_iters=16, is_debug=True) logits = model.get_logits(x_input) acc = _top_k_accuracy(logits, tf.argmax(y_label, axis=1), k=1) success_rate = _top_k_accuracy(logits, tf.argmax(y_target, axis=1), k=1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, save_path=FLAGS.checkpoint_path) list_adv_images = [] if num_images % batch_size == 0: num_batches = int(num_images / batch_size) else: num_batches = int(num_images / batch_size + 1) for i in tqdm.tqdm(range(num_batches)): feed_dict_i = {x_input: images[i * batch_size:(i + 1) * batch_size], y_target: target_classes[i * batch_size:(i + 1) * batch_size]} adv_img = sess.run(x_adv, feed_dict=feed_dict_i) list_adv_images.append(adv_img) adv_images = np.concatenate((list_adv_images)) np.save(adv_imgs_save_path, adv_images) acc_store = [] succ_store = [] for i in tqdm.tqdm(range(num_batches)): feed_dict_i = {x_input: adv_images[i * batch_size:(i + 1) * batch_size], y_target: target_classes[i * batch_size:(i + 1) * batch_size], y_label: labels[i * batch_size:(i + 1) * batch_size]} succ_batch, acc_batch = sess.run([success_rate, acc], feed_dict=feed_dict_i) acc_store.extend(acc_batch) succ_store.extend(succ_batch) logger.info('Accuracy is: {:.4f}'.format(np.mean(acc_store))) logger.info('Success Rate is: {:.4f}'.format(np.mean(succ_store)))
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=True, nb_epochs=6, batch_size=128, source_samples=10, learning_rate=0.001, attack_iterations=100, model_path=os.path.join("models", "mnist"), targeted=True): """ MNIST tutorial for Carlini and Wagner's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data x_train, y_train, x_test, y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = LossCrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': os.path.join(*os.path.split(model_path)[:-1]), 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x, y, x_train, y_train, args=train_params, save=os.path.exists("models"), rng=rng) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerL2(model, back='tf', sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32) else: adv_inputs = np.array([[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" else: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = x_test[idxs] else: adv_inputs = x_test[:source_samples] adv_ys = None yname = "y" cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': 0.1, 'batch_size': source_samples * nb_classes if targeted else source_samples, 'initial_const': 10 } adv = cw.generate_np(adv_inputs, **cw_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: adv_accuracy = 1 - \ model_eval(sess, x, y, preds, adv, y_test[ idxs], args=eval_params) else: adv_accuracy = 1 - \ model_eval(sess, x, y, preds, adv, y_test[ :source_samples], args=eval_params) if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): grid_viz_data[i, j] = adv[i * nb_classes + j] else: grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt _ = grid_visual(grid_viz_data) return report
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs