def create_adv_examples(model, input_t, x_to_adv, attack_dict): """ This fn may seem bizarre and pointless, but the point of it is to enable the entire attack to be specified as a dict from the command line without editing this script, which is a convenient thing to be able to do, both for scripting and repeatablity """ if attack_dict['method'] == 'fgm': attack = attacks.FastGradientMethod(model, sess=K.get_session(), back='tf') elif attack_dict['method'] == 'bim': attack = attacks.BasicIterativeMethod(model, sess=K.get_session(), back='tf') elif attack_dict['method'] == 'mim': attack = attacks.MomentumIterativeMethod(model, sess=K.get_session(), back='tf') else: assert False, 'Current attack needs to be added to the create attack fn' #this hurts a little adv_tensor = attack.generate( input_t, **{k: a for k, a in attack_dict.items() if k != 'method'}) # 'method' key for this fn use x_adv = batch_eval(adv_tensor, input_t, x_to_adv, batch_size=args.batch_size, verbose="Generating adv examples") return x_adv
def __init__(self, model, prob=0.0, actionProbThr=1.0): super(AttackModel, self).__init__() self.sess = model.sess self.layer_names = ['inputs', 'logits', 'probs'] self.layers = [model.inputs, model.logits, model.probs] self.input_shape = tuple(model.inputs.shape.as_list()) self.nb_classes = model.logits.get_shape()[1].value self.attackModels = {} self.attackModels["lbfgs"] = atts.LBFGS(self, "tf", self.sess) self.attackModels["fgsm"] = atts.FastGradientMethod( self, "tf", self.sess) self.attackModels["basicIt"] = atts.BasicIterativeMethod( self, "tf", self.sess) self.attackModels["pgd"] = atts.MadryEtAl(self, "tf", self.sess) self.attackModels["momenIt"] = atts.MomentumIterativeMethod( self, "tf", self.sess) self.attackModels["jsma"] = atts.SaliencyMapMethod( self, "tf", self.sess) self.attackModels["cwl2"] = atts.CarliniWagnerL2(self, "tf", self.sess) self.attackModels["ead"] = atts.ElasticNetMethod(self, "tf", self.sess) self.attackModels["deepfool"] = atts.DeepFool(self, "tf", self.sess) self.attackModels["spsa"] = atts.SPSA(self, "tf", self.sess) self.attackModels["featureAdvs"] = atts.FastFeatureAdversaries( self, "tf", self.sess) self.availableAttacks = list(self.attackModels.keys()) self.availableAttacks.append("ofsm") self.ofsm = OFSM() self.availableAttacks.append("gauss") self.gaussEps = 0.0 self.attack = None self.lastAttack = None self.attackProb = prob self.actionProbThr = actionProbThr
def gen_adv(wrap_model, model_input, attack_method, eps, eta, def_iter, clip_min=0., clip_max=1.): """ Generate adversarial examples using keras wrapper """ if attack_method == 'MadryEtAl': att = attacks.MadryEtAl(wrap_model) att_params = { 'eps': eps, 'eps_iter': eta, 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': def_iter } elif attack_method == 'MomentumIterativeMethod': att = attacks.MomentumIterativeMethod(wrap_model) att_params = { 'eps': eps, 'eps_iter': eta, 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': def_iter } elif attack_method == 'FastGradientMethod': att = attacks.FastGradientMethod(wrap_model) att_params = {'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max} print('attack_method: {}'.format(attack_method)) for k in att_params.keys(): print('{}:{}'.format(k, att_params[k])) adv_x = tf.stop_gradient(att.generate(model_input, **att_params)) return adv_x
eps = 8. / 256. if FLAGS.attack_method == 'MadryEtAl': print('apply ' + is_target + 'PGD' + ' for advtrain') att = attacks.MadryEtAl(wrap_ensemble) att_params = { 'eps': eps, #'eps_iter': 3.*eps/10., 'eps_iter': 2. / 256., 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': 10, 'y_target': y_target } elif FLAGS.attack_method == 'MomentumIterativeMethod': print('apply ' + is_target + 'MIM' + ' for advtrain') att = attacks.MomentumIterativeMethod(wrap_ensemble) att_params = { 'eps': eps, #'eps_iter': 3.*eps/10., 'eps_iter': 2. / 256., 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': 10, 'y_target': y_target } elif FLAGS.attack_method == 'FastGradientMethod': print('apply ' + is_target + 'FGSM' + ' for advtrain') att = attacks.FastGradientMethod(wrap_ensemble) att_params = { 'eps': eps, 'clip_min': clip_min,
def adversarial_attack(data, args): # Set attack parameters eps = float(args.eps) order = args.lorder batch_size = 100 # Set evaluation parameters eval_params = {'batch_size': batch_size} # Object used to keep track of (and return) key accuracies report = utils.AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information utils.set_log_level(logging.DEBUG) tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) keras.backend.set_session(sess) # Get CIFAR10 data x_train, y_train = data.train_data, data.train_label x_test, y_test = data.test_data, data.test_label img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholders x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) def evaluate(preds, x_set, y_set, report_key, is_adv=None): acc = utils_tf.model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) # Load baseline model model = resnet(input_shape=x_train.shape[1:], depth=29, num_classes=10).build() model.load_weights(args.model) wrapper = utils_keras.KerasModelWrapper(model) preds = model(x) evaluate(preds, x_test, y_test, 'clean_train_clean_eval', False) if args.attack == 'fgsm': # Fast Gradient Sign Method (FGSM) attack fgsm_params = { 'eps': eps, 'ord': order, 'clip_min': 0., 'clip_max': 1. } fgsm = attacks.FastGradientMethod(wrapper, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model(adv_x) elif args.attack == 'ifgsm': # Fast Gradient Sign Method (FGSM) attack ifgsm_params = { 'eps': eps, 'eps_iter': eps / 10, 'ord': order, 'clip_min': 0., 'clip_max': 1. } ifgsm = attacks.BasicIterativeMethod(wrapper, sess=sess) adv_x = ifgsm.generate(x, **ifgsm_params) preds_adv = model(adv_x) elif args.attack == 'mifgsm': mifgsm_params = { 'eps': eps, 'eps_iter': eps / 10, 'ord': order, 'clip_min': 0., 'clip_max': 1. } mifgsm = attacks.MomentumIterativeMethod(wrapper, sess=sess) adv_x = mifgsm.generate(x, **mifgsm_params) preds_adv = model(adv_x) elif args.attack == 'jsma': jsma_params = { 'theta': 1., 'gamma': 1., 'ord': order, 'clip_min': 0., 'clip_max': 1. } jsma = attacks.SaliencyMapMethod(wrapper, sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model(adv_x) elif args.attack == 'lbfgs': # y_target = tf.placeholder(tf.float32, shape=(None, nb_classes)) lbfgs_params = { 'y_target': tf.convert_to_tensor(get_random_targets(data.test_label)), 'batch_size': batch_size, 'binary_search_steps': 4, 'max_iterations': 1000, 'clip_min': 0., 'clip_max': 1. } lbfgs = attacks.LBFGS(wrapper, sess=sess) adv_x = lbfgs.generate(x, **lbfgs_params) preds_adv = model(adv_x) elif args.attack == 'deepfool': deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iterations': 100, 'clip_min': 0., 'clip_max': 1. } deepfool = attacks.DeepFool(wrapper, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model(adv_x) elif args.attack == 'cw': cw_params = { 'batch_size': batch_size, 'binary_search_steps': 4, 'max_iterations': 1000, 'abort_early': True, 'clip_min': 0., 'clip_max': 1. } cw = attacks.CarliniWagnerL2(wrapper, sess=sess) adv_x = cw.generate(x, **cw_params) preds_adv = model(adv_x) elif args.attack == 'pgd': pgd_params = { 'eps': eps, 'eps_iter': eps / 10, 'ord': order, 'clip_min': 0., 'clip_max': 1. } pgd = attacks.ProjectedGradientDescent(wrapper, sess=sess) adv_x = pgd.generate(x, **pgd_params) preds_adv = model(adv_x) # Evaluate the accuracy on adversarial examples ''' if args.attack == 'cw': acc = utils_tf.model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % acc) elif args.attack == 'lbfgs': acc = utils_tf.model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % acc) else: evaluate(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) ''' with sess.as_default(): adv = np.zeros(x_test.shape, dtype=np.float32) n = batch_size for i in range(x_test.shape[0] // n): adv[i * n:(i + 1) * n] = sess.run( adv_x, feed_dict={x: x_test[i * n:(i + 1) * n]}) return adv
wrap_ensemble = KerasModelWrapper(model_ensemble) wrap_ensemble_baseline = KerasModelWrapper(model_ensemble_baseline) #Load model model.load_weights(filepath) model_baseline.load_weights(filepath_baseline) # Initialize the attack method if FLAGS.attack_method == 'MadryEtAl': att = attacks.MadryEtAl(wrap_ensemble) att_baseline = attacks.MadryEtAl(wrap_ensemble_baseline) elif FLAGS.attack_method == 'FastGradientMethod': att = attacks.FastGradientMethod(wrap_ensemble) att_baseline = attacks.FastGradientMethod(wrap_ensemble_baseline) elif FLAGS.attack_method == 'MomentumIterativeMethod': att = attacks.MomentumIterativeMethod(wrap_ensemble) att_baseline = attacks.MomentumIterativeMethod(wrap_ensemble_baseline) elif FLAGS.attack_method == 'BasicIterativeMethod': att = attacks.BasicIterativeMethod(wrap_ensemble) att_baseline = attacks.BasicIterativeMethod(wrap_ensemble_baseline) # Consider the attack to be constant eval_par = {'batch_size': 100} eps_ = FLAGS.eps print('eps is %.3f' % eps_) if FLAGS.attack_method == 'FastGradientMethod': att_params = {'eps': eps_, 'clip_min': clip_min, 'clip_max': clip_max} else: att_params = { 'eps': eps_, 'eps_iter': eps_ * 1.0 / 10,