def print_accuracies(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER): """ Load a saved model and print out its accuracy on different data distributions This function works by running a single attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running an attack bundler instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class """ # Set TF random seed to improve reproducibility tf.set_random_seed(20181014) set_log_level(logging.INFO) sess = tf.Session() with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] max_val = dataset.kwargs['max_val'] value_range = max_val * (1. + center) min_value = 0. - center * max_val if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 else: raise NotImplementedError(str(factory.cls)) pgd_params = { 'eps': base_eps * value_range, 'eps_iter': base_eps_iter * value_range, 'nb_iter': nb_iter, 'clip_min': min_value, 'clip_max': max_val } x_data, y_data = dataset.get_set(which_set) semantic = Semantic(model, center, max_val, sess) pgd = ProjectedGradientDescent(model, sess=sess) jobs = [('clean', None, None, None), ('Semantic', semantic, None, None), ('pgd', pgd, pgd_params, None)] for job in jobs: name, attack, attack_params, job_batch_size = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() acc = accuracy(sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params) t2 = time.time() print("Accuracy on " + name + " examples: ", acc) print("Evaluation took", t2 - t1, "seconds")
def do_train(train_start=TRAIN_START, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, backprop_through_attack=False, nb_filters=NB_FILTERS, num_threads=None, use_ema=USE_EMA, ema_decay=EMA_DECAY): print('Parameters') print('-' * 79) for x, y in sorted(locals().items()): print('%-32s %s' % (x, y)) print('-' * 79) if os.path.exists(FLAGS.save_path): print("Model " + FLAGS.save_path + " already exists. Refusing to overwrite.") quit() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) dataset = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, center=True) # Use Image Parameters img_rows, img_cols, nchannels = dataset.x_train.shape[1:4] nb_classes = dataset.NB_CLASSES # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) train_params = { 'nb_epochs': nb_epochs, 'learning_rate': learning_rate, 'batch_size': batch_size, } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) sess = tf.Session() def do_eval(x_set, y_set, is_adv=None): acc = accuracy(sess, model, x_set, y_set) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'clean' if report_text: print('Accuracy on %s examples: %0.4f' % (report_text, acc)) return acc model = Model(filters=nb_filters) model.dataset_factory = dataset.get_factory() pgd = ProjectedGradientDescent(model=model, sess=sess) center = dataset.kwargs['center'] value_range = 1. + center base_eps = 8. / 255. attack_params = { 'eps': base_eps * value_range, 'clip_min': -float(center), 'clip_max': float(center), 'eps_iter': (2. / 255.) * value_range, 'nb_iter': 40. } loss = CrossEntropy( model, attack=pgd, adv_coeff=1., attack_params=attack_params, ) print_test_period = 10 print_train_period = 50 def evaluate(): global epoch global last_test_print global last_train_print global best_result global best_epoch with sess.as_default(): print("Saving to ", FLAGS.save_path) save(FLAGS.save_path, model) if epoch % print_test_period == 0 or time.time( ) - last_test_print > 300: t1 = time.time() result = do_eval(dataset.x_test, dataset.y_test, False) t2 = time.time() if result >= best_result: if result > best_result: best_epoch = epoch else: # Keep track of ties assert result == best_result if not isinstance(best_epoch, list): if best_epoch == -1: best_epoch = [] else: best_epoch = [best_epoch] best_epoch.append(epoch) best_result = result print("Best so far: ", best_result) print("Best epoch: ", best_epoch) last_test_print = t2 print("Test eval time: ", t2 - t1) if (epoch % print_train_period == 0 or time.time() - last_train_print > 3000): t1 = time.time() print("Training set: ") do_eval(dataset.x_train, dataset.y_train, False) t2 = time.time() print("Train eval time: ", t2 - t1) last_train_print = t2 epoch += 1 optimizer = None ema_decay = globals()[ema_decay] assert callable(ema_decay) train(sess, loss, dataset.x_train, dataset.y_train, evaluate=evaluate, optimizer=optimizer, args=train_params, rng=rng, var_list=model.get_params(), use_ema=use_ema, ema_decay=ema_decay) # Make sure we always evaluate on the last epoch, so pickling bugs are more # obvious if (epoch - 1) % print_test_period != 0: do_eval(dataset.x_test, dataset.y_test, False) if (epoch - 1) % print_train_period != 0: print("Training set: ") do_eval(dataset.x_train, dataset.y_train, False) with sess.as_default(): save(FLAGS.save_path, model)
def save_pgd_attacked_images(original_class, target_class, attack_strength, nb_iter=50, seed=1000): random.seed(seed) np.random.seed(seed) tf.set_random_seed(seed) eps = attack_strength labels_by_name = load_labels_by_name() target_label = labels_by_name[target_class].lucid_label benign_dataset_path = DataPaths.get_benign_images_datapath(original_class) assert benign_dataset_path.exists() attacked_dataset_path = DataPaths.get_attacked_images_datapath( original_class, target_class, attack_name='pgd', attack_strength=eps) assert not attacked_dataset_path.exists() print('Saving attacked images to %s' % attacked_dataset_path) img_dataset = hdf5utils.load_image_dataset_from_file(benign_dataset_path) output_file = h5py.File(attacked_dataset_path, 'w') out_dataset = hdf5utils.create_image_dataset(output_file, dataset_name='images') indices_dataset = hdf5utils.create_dataset(output_file, data_shape=(1, ), dataset_name='indices') graph = tf.Graph() with graph.as_default(): model = InceptionV1Model() x = model.default_input_placeholder y_pred = model.get_predicted_class(x) with tf.Session(graph=graph) as sess: attack = ProjectedGradientDescent(model, sess=sess) target_one_hot_encoded = get_one_hot_encoded_targets(target_label) x_adv = attack.generate(x, eps=eps, nb_iter=nb_iter, clip_min=-1, clip_max=1, eps_iter=(eps / 5), ord=2, y_target=target_one_hot_encoded) num_attack_success = 0 pbar = tqdm(unit='imgs', total=len(img_dataset)) try: for i, img in enumerate(img_dataset): ben_img = np.array(img) adv_img = sess.run(x_adv, feed_dict={x: [ben_img]}) attack_pred = sess.run(y_pred, feed_dict={x: adv_img}) adv_img = adv_img[0] attack_pred = attack_pred[0] assert not np.any(np.isnan(adv_img)) assert not np.isnan(attack_pred) if attack_pred == target_label: index = np.array([i]) num_attack_success += 1 hdf5utils.add_image_to_dataset(adv_img, out_dataset) hdf5utils.add_item_to_dataset(index, indices_dataset) pbar.set_postfix(num_attack_success=num_attack_success) pbar.update() except tf.errors.OutOfRangeError: pass
def setUp(self): super(TestProjectedGradientDescent, self).setUp() self.attack = ProjectedGradientDescent(self.model, sess=self.sess)
def single_run_max_confidence_recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, batch_size=BATCH_SIZE): """A reasonable attack bundling recipe for a max norm threat model and a defender that uses confidence thresholding. This recipe uses both uniform noise and randomly-initialized PGD targeted attacks. References: https://openreview.net/forum?id=H1g0piA9tQ This version runs each attack (noise, targeted PGD for each class with nb_iter iterations, target PGD for each class with 25X more iterations) just once and then stops. See `basic_max_confidence_recipe` for a version that runs indefinitely. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version PGD attacks (will also run another version with 25X smaller step size) :param nb_iter: int, number of iterations for the cheaper PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously """ noise_attack = Noise(model, sess) pgd_attack = ProjectedGradientDescent(model, sess) threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params, "noise") attack_configs = [noise_attack_config] pgd_attack_configs = [] pgd_params = copy.copy(threat_params) pgd_params["eps_iter"] = eps_iter pgd_params["nb_iter"] = nb_iter assert batch_size % num_devices == 0 dev_batch_size = batch_size // num_devices ones = tf.ones(dev_batch_size, tf.int32) expensive_pgd = [] for cls in range(nb_classes): cls_params = copy.copy(pgd_params) cls_params['y_target'] = tf.to_float(tf.one_hot( ones * cls, nb_classes)) cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls)) pgd_attack_configs.append(cls_attack_config) expensive_params = copy.copy(cls_params) expensive_params["eps_iter"] /= 25. expensive_params["nb_iter"] *= 25. expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls)) expensive_pgd.append(expensive_config) attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd new_work_goal = {config: 1 for config in attack_configs} goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
def fit(self, X, y, sample_weight=None): if self.train_type is not None: pass if self.train_type == 'adv': #self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[]) #Y = self.lbl_enc.transform(y.reshape(-1, 1)) #initialize_uninitialized_global_variables(self.sess) #input_generator = InputGenerator(X, Y, sample_weight, # attacker=self.attacker, shuffle=True, batch_size=self.batch_size, # random_state=self.random_state) #self.model.fit_generator( # input_generator, # steps_per_epoch=((X.shape[0]*2 - 1) // self.batch_size) + 1, # epochs=self.epochs, # verbose=1, #) ####################################### #Y = self.lbl_enc.transform(y.reshape(-1, 1)) #train_params = { # 'init_all': True, # 'rng': self.random_state, # 'nb_epochs': self.epochs, # 'batch_size': self.batch_size, # 'learning_rate': self.learning_rate, # 'optimizor': tf.train.RMSPropOptimizer, #} #wrap = KerasModelWrapper(self.model) #pgd = ProjectedGradientDescent(wrap, sess=self.sess, nb_iter=20) #pgd_params = {'eps': self.eps} ##attack = pgd.generate(x, y=y, **pgd_params) #def attack(x): # return pgd.generate(x, **pgd_params) #loss = CrossEntropy(wrap, smoothing=0.1, attack=attack) #def evaluate(): # #print("XDDD %f", self.sess.run(loss)) # print('Test accuracy on legitimate examples: %0.4f' % self.score(X, y)) #train(self.sess, loss, X.astype(np.float32), Y.astype(np.float32), # args=train_params, evaluate=evaluate) ###################################### Y = self.lbl_enc.transform(y.reshape(-1, 1)) wrap_2 = KerasModelWrapper(self.model) fgsm_2 = ProjectedGradientDescent(wrap_2, sess=self.sess) self.model(self.model.input) fgsm_params = {'eps': self.eps} # Use a loss function based on legitimate and adversarial examples adv_loss_2 = get_adversarial_loss(self.model, fgsm_2, fgsm_params) adv_acc_metric_2 = get_adversarial_acc_metric( self.model, fgsm_2, fgsm_params) self.model.compile( #optimizer=keras.optimizers.Adam(self.learning_rate), optimizer=keras.optimizers.Nadam(), loss=adv_loss_2, metrics=['accuracy', adv_acc_metric_2]) self.model.fit( X, Y, batch_size=self.batch_size, epochs=self.epochs, verbose=2, sample_weight=sample_weight, ) print((self.model.predict(X).argmax(1) == y).mean()) self.augX, self.augy = None, None elif self.train_type == 'advPruning': y = y.astype(int) * 2 - 1 self.augX, self.augy = find_eps_separated_set(X, self.eps / 2, y, ord=self.ord) self.augy = (self.augy + 1) // 2 self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[]) Y = self.lbl_enc.transform(self.augy.reshape(-1, 1)) self.model.fit(self.augX, Y, batch_size=self.batch_size, verbose=0, epochs=self.epochs, sample_weight=sample_weight) print("number of augX", np.shape(self.augX), len(self.augy)) elif self.train_type is None: self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[]) Y = self.lbl_enc.transform(y.reshape(-1, 1)) self.model.fit(X, Y, batch_size=self.batch_size, verbose=0, epochs=self.epochs, sample_weight=sample_weight) else: raise ValueError("Not supported train type: %s", self.train_type)
def mnist_tutorial_cw(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=True, nb_epochs=6, batch_size=128, nb_classes=10, source_samples=10, learning_rate=0.001, attack_iterations=100, model_path=os.path.join("models", "mnist"), targeted=True): """ MNIST tutorial for Carlini and Wagner's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # MNIST-specific dimensions img_rows = 28 img_cols = 28 channels = 1 # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) K.set_session(sess) set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) K.set_learning_phase(1) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) models = {} preds = {} for model_name in ['mlp', 'cnn', 'hrnn']: try: print('[DEBUG] Loading model.') models[model_name] = load_model('{}{}'.format( model_type, model_name)) except: print( '[ERROR] Adversarially Trained models not found! Train and save strengthened models first. Then, run this.' ) exit(1) preds[model_name] = models[model_name](x) rng = np.random.RandomState([2017, 8, 30]) # Evaluate the accuracy of the Adv trained MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy_test = '' attacks = {} # Make computations graphs for the attacks for model_name in models.keys(): accuracy = model_eval(sess, x, y, preds[model_name], X_test, Y_test, args=eval_params) accuracy_test += '{} {}\n'.format(model_name, accuracy) # Instantiate a CW attack object wrap = KerasModelWrapper(models[model_name]) attacks['$PGD_{}$'.format(model_name[0])] = ProjectedGradientDescent( wrap, sess=sess) # Make the output tensor for specification in the attacks parameters idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(10)] if targeted: one_hot = np.zeros((10, 10)) one_hot[np.arange(10), np.arange(10)] = 1 adv_inputs = np.array([[instance] * 10 for instance in X_test[idxs]], dtype=np.float32) adv_inputs = adv_inputs.reshape((100, 28, 28, 1)) adv_ys = np.array([one_hot] * 10, dtype=np.float32).reshape((100, 10)) yname = "y_target" else: adv_inputs = X_test[idxs] adv_ys = None yname = "y" attack_params = {'eps': 0.3, yname: adv_ys, 'eps_iter': 0.05} table_header = '{}model '.format(model_type) accuracy_attack = '' for model_name in models.keys(): accuracy_attack += '{} '.format(model_name) # For each model, apply all attacks for attack_name in attacks.keys(): print('[DEBUG] Attacking {} using {}.'.format( model_name, attack_name)) # Code brach entered only once for creating the table header with attack names if attack_name not in table_header: table_header += '{} '.format(attack_name) adv = attacks[attack_name].generate_np(adv_inputs, **attack_params) if targeted: adv_accuracy = model_eval(sess, x, y, preds[model_name], adv, adv_ys, args={'batch_size': 10}) else: adv_accuracy = model_eval(sess, x, y, preds[model_name], adv, Y_test[idxs], args={'batch_size': 10}) accuracy_attack += '{} '.format(adv_accuracy * 100) # Move on to attack the next model accuracy_attack += '\n' print(table_header) print(accuracy_attack) print(accuracy_test) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples return report
def fixed_max_confidence_recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, batch_size=BATCH_SIZE): """A reasonable attack bundling recipe for a max norm threat model and a defender that uses confidence thresholding. References: https://openreview.net/forum?id=H1g0piA9tQ This version runs each attack a fixed number of times. It is more exhaustive than `single_run_max_confidence_recipe` but because it uses a fixed budget rather than running indefinitely it is more appropriate for making fair comparisons between two models. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version of PGD attacks (will also run another version with 25X smaller step size) :param nb_iter: int, number of iterations for one version of PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously """ noise_attack = Noise(model, sess) pgd_attack = ProjectedGradientDescent(model, sess) threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params) attack_configs = [noise_attack_config] pgd_attack_configs = [] pgd_params = copy.copy(threat_params) pgd_params["eps_iter"] = eps_iter pgd_params["nb_iter"] = nb_iter assert batch_size % num_devices == 0 dev_batch_size = batch_size // num_devices ones = tf.ones(dev_batch_size, tf.int32) expensive_pgd = [] for cls in range(nb_classes): cls_params = copy.copy(pgd_params) cls_params['y_target'] = tf.to_float(tf.one_hot( ones * cls, nb_classes)) cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls)) pgd_attack_configs.append(cls_attack_config) expensive_params = copy.copy(cls_params) expensive_params["eps_iter"] /= 25. expensive_params["nb_iter"] *= 25. expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls)) expensive_pgd.append(expensive_config) attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd new_work_goal = {config: 5 for config in attack_configs} pgd_work_goal = {config: 5 for config in pgd_attack_configs} # TODO: lower priority: make sure bundler won't waste time running targeted # attacks on examples where the target class is the true class goals = [ Misclassify(new_work_goal={noise_attack_config: 50}), Misclassify(new_work_goal=pgd_work_goal), MaxConfidence(t=0.5, new_work_goal=new_work_goal), MaxConfidence(t=0.75, new_work_goal=new_work_goal), MaxConfidence(t=0.875, new_work_goal=new_work_goal), MaxConfidence(t=0.9375, new_work_goal=new_work_goal), MaxConfidence(t=0.96875, new_work_goal=new_work_goal), MaxConfidence(t=0.984375, new_work_goal=new_work_goal), MaxConfidence(t=1., new_work_goal=new_work_goal) ] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
X_adv_auto = fgsm_auto.generate_np(X_test[indices_test], **fgsm_params) X_adv_ce = fgsm_ce.generate_np(X_test[indices_test], **fgsm_params) X_adv_rob = fgsm_rob.generate_np(X_test[indices_test], **fgsm_params) comp_func(X_adv_stacked, X_adv_auto, X_adv_ce, X_adv_rob, indices_test, pred_base, pred_stacked, pred_auto, pred_ce, pred_rob) #comp_func_transfer(X_adv_stacked, indices_test, pred_base, pred_stacked, model_stacked, model) #comp_func_transfer(X_adv_auto, indices_test, pred_base, pred_auto, model_auto, model) #comp_func_transfer(X_adv_ce, indices_test, pred_base, pred_ce, model_ce, model) #comp_func_transfer(X_adv_rob, indices_test, pred_base, pred_rob, model_rob, model) ################################### #PGD print("\n\n") print("PGD") pgd_stacked = ProjectedGradientDescent(wrap_stacked, sess=sess) pgd_auto = ProjectedGradientDescent(wrap_auto, sess=sess) pgd_ce = ProjectedGradientDescent(wrap_ce, sess=sess) pgd_rob = ProjectedGradientDescent(wrap_rob, sess=sess) X_adv_stacked = pgd_stacked.generate_np(X_test[indices_test], **pgd_params) X_adv_auto = pgd_auto.generate_np(X_test[indices_test], **pgd_params) X_adv_ce = pgd_ce.generate_np(X_test[indices_test], **pgd_params) X_adv_rob = pgd_rob.generate_np(X_test[indices_test], **pgd_params) comp_func(X_adv_stacked, X_adv_auto, X_adv_ce, X_adv_rob, indices_test, pred_base, pred_stacked, pred_auto, pred_ce, pred_rob) #comp_func_transfer(X_adv_stacked, indices_test, pred_base, pred_stacked, model_stacked, model) #comp_func_transfer(X_adv_auto, indices_test, pred_base, pred_auto, model_auto, model) #comp_func_transfer(X_adv_ce, indices_test, pred_base, pred_ce, model_ce, model) #comp_func_transfer(X_adv_rob, indices_test, pred_base, pred_rob, model_rob, model)
def train_child(t, p, m, num=0): # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3]) model = TestCNN().cuda(0) tf_model = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') session = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) fgsm = FastGradientMethod(cleverhans_model, sess=session) # stm = SpatialTransformationMethod(cleverhans_model, sess=session) # cw2 = CarliniWagnerL2(cleverhans_model, sess=session) pgd = ProjectedGradientDescent(cleverhans_model, sess=session) noise = Noise(cleverhans_model, sess=session) mim = MomentumIterativeMethod(cleverhans_model, sess=session) df = DeepFool(cleverhans_model, sess=session) def fgsm_op(x, eps): att = fgsm.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) # def stm_op(x, eps): # att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps) # return session.run(att, feed_dict={x_op: x}) # def cw2_op(x, eps): # att = cw2.generate(x_op, max_iterations=3) def pgd_op(x, eps): att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) return session.run(att, feed_dict={x_op: x}) def noise_op(x, eps): att = noise.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def df_op(x): att = df.generate(x_op, nb_candidate=10, max_iter=3) return session.run(att, feed_dict={x_op: x}) def mim_op(x, eps): att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2) return session.run(att, feed_dict={x_op: x}) def attack_train(x): attacks = [fgsm_op, noise_op, mim_op] attacks_name = ['FGSM', 'Noise', 'MIM'] eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]] train_x_adv = x.copy() adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv)) for i, (ti, pi, mi) in enumerate( tqdm(zip(t, p, m), total=len(t), desc='Subpolicy: ', leave=False)): adv_i = train_x_adv[adv_type == i] for j, (tj, pj, mj) in enumerate( tqdm(zip(ti, pi, mi), total=len(ti), desc='Operation: ', leave=False)): tj, pj, mj = (*tj, *pj, *mj) adv_j = adv_i[np.random.randn(len(adv_i)) < pj] for i in tqdm(range(0, len(adv_j), BATCH_SIZE), desc=attacks_name[tj] + ': ', leave=False): adv_j[i:][:BATCH_SIZE] = attacks[tj]( adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT * (eps[tj][1] - eps[tj][0]) + eps[tj][0]) return train_x_adv optimizer = optim.Adam(model.parameters(), lr=1e-3) epoch_tqdm = tqdm(range(CHILD_EPOCHS), leave=False) trainloader = [] for epoch in epoch_tqdm: if epoch % ADVERSIAL_EVERY == 0: train_x_adv = attack_train(train_x) trainset = torch.utils.data.TensorDataset( torch.tensor(train_x_adv, dtype=torch.float), torch.tensor(train_y, dtype=torch.long)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) batch_tqdm = tqdm(trainloader, leave=False) for x, y in batch_tqdm: model.train() optimizer.zero_grad() output = model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'{loss:.3f} {acc:.3f}') if epoch % ADVERSIAL_EVERY == ADVERSIAL_EVERY - 1 or epoch == len( epoch_tqdm) - 1: batch_tqdm = tqdm(valloader, leave=False) tot_loss, tot_acc = 0, 0 for x, y in batch_tqdm: model.eval() with torch.no_grad(): output = model(x.cuda(0)) loss = float(criterion(output, y.cuda(0))) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_loss += loss * x.size(0) tot_acc += acc raw_loss, raw_acc = tot_loss / len(val_x), tot_acc / len(val_x) epoch_tqdm.set_description(f'{raw_loss:.3f} {raw_acc:.3f}') val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='DF: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = pgd_op(val_x[i:][:BATCH_SIZE], 0.01) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: model.eval() with torch.no_grad(): output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_acc = tot_acc / len(val_x) torch.save(model.state_dict(), f'runs/{num}.pt') return raw_acc, adv_acc
print(results) print("results on target model: ") results = metrics(model_target, X_adv, X_test, y_test, indices) print(results) #####BIM print("BIM") bim_params = {'eps': 0.03, 'nb_iter': 300, 'eps_iter': 0.03/100, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1., 'rand_init': False } bim = ProjectedGradientDescent(wrap, sess=sess) X_adv = np.zeros((len(indices),32,32,3)) for i in range(0,len(indices),batch_attack): X_adv[i:i+batch_attack] = bim.generate_np(X_test[indices[i:(i+batch_attack)]], **bim_params) print("results on source model: ") results = metrics(model, X_adv, X_test, y_test, indices) print(results) print("results on target model: ") results = metrics(model_target, X_adv, X_test, y_test, indices) print(results) #####CWL2 print("CWL2") cwl2_params = {'binary_search_steps': 10, 'max_iterations': 100, 'learning_rate': 0.1,
def eval(sess, model_name, X_train, Y_train, X_test, Y_test, cnn=False, rbf=False, fgsm=False, jsma=False, df=False, bim=False): """ Load model saved in model_name.json and model_name_weights.h5 and evaluate its accuracy on legitimate test samples and adversarial samples. Use cnn=True if the model is CNN based. """ # open text file and output accuracy results to it text_file = open("fmnist_results.txt", "w") # load saved model print("Load model ... ") ''' json = open('models/{}.json'.format(model_name), 'r') model = json.read() json.close() loaded_model = model_from_json(model) loaded_model.load_weights("models/{}_weights.h5".format(model_name)) ''' if rbf: loaded_model = load_model("rbfmodels/{}.h5".format(model_name), custom_objects={'RBFLayer': RBFLayer}) text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name)) else: loaded_model = load_model("models/{}.h5".format(model_name)) text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name)) # Set placeholders if cnn: x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) else: x = tf.placeholder(tf.float32, shape=(None, 784)) y = tf.placeholder(tf.float32, shape=(None, 10)) predictions = loaded_model(x) accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args={"batch_size": 128}) text_file.write('Test accuracy on legitimate test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Craft adversarial examples depending on the input parameters wrap = KerasModelWrapper(loaded_model) # FGSM if fgsm: fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3} adv_x = fgsm.generate(x, **fgsm_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the F-MNIST model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on fgsm adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy)) # JSMA if jsma: jsma = SaliencyMapMethod(wrap, sess=sess) jsma_params = { 'theta': 2., 'gamma': 0.145, 'clip_min': 0., 'clip_max': 1., 'y_target': None } adv_x = jsma.generate(x, **jsma_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the F-MNIST model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on jsma adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy)) # DeepFool if df: df = DeepFool(wrap, sess=sess) df_params = {'nb_candidate': 10, 'max_iter': 50} adv_x = df.generate(x, **df_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the F-MNIST model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on df adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on df adversarial test examples: ' + str(accuracy)) # Basic Iterative Method if bim: bim = ProjectedGradientDescent(wrap, sess=sess) bim_params = {'eps': 0.3} adv_x = bim.generate(x, **bim_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the F-MNIST model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on bim adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on bim adversarial test examples: ' + str(accuracy)) print('Accuracy results outputted to fmnist_results.txt') text_file.close() # Close TF session sess.close()
cleverhans_model = KerasModelWrapper(model) # Choose attack if attack_method == 'MIM': op = MomentumIterativeMethod(cleverhans_model, sess=sess) params = { 'eps': eps, 'nb_iter': nb_iter, 'eps_iter': eps_iter, 'ord': order, 'decay_factor': decay_factor, 'clip_max': 1., 'clip_min': 0 } elif attack_method == 'PGD' and order == np.inf: op = ProjectedGradientDescent(cleverhans_model, sess=sess) params = { 'eps': eps, 'eps_iter': eps_iter, 'nb_iter': nb_iter, 'clip_max': 1., 'clip_min': 0 } elif attack_method == 'PGD' and order == 2: op = ProjectedGradientDescent(cleverhans_model, sess=sess) params = { 'eps': eps, 'eps_iter': eps_iter, 'nb_iter': nb_iter, 'ord': 2, 'clip_max': 1.,