def ld_mnist(): """Load training and test data.""" train_transforms = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()]) test_transforms = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()]) # Load MNIST dataset train_dataset = MNISTDataset(root="/tmp/data", transform=train_transforms) test_dataset = MNISTDataset(root="/tmp/data", train=False, transform=test_transforms) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2) return EasyDict(train=train_loader, test=test_loader)
def main(): global args args = get_args() torch.manual_seed(args.seed) criterion = nn.CrossEntropyLoss() print(args) if len(args.gpu) > 0: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_CUDA = True else: use_CUDA = False cudnn.benchmark = True data_transforms = { 'train': transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'val': transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) } if args.dataset == 'mnist': train_dataset = MNISTDataset(split='train', seed=args.seed) val_dataset = MNISTDataset(split='val', seed=args.seed) input_channel = 1 elif args.dataset == 'cifar10': train_dataset = CIFARDataset(split='train', seed=args.seed, transform=data_transforms['train'], percent=args.percent) val_dataset = CIFARDataset(split='val', seed=args.seed, transform=data_transforms['val'], percent=args.percent) input_channel = 3 elif args.dataset == 'cifar100': train_dataset = CIFAR100Dataset(split='train', seed=args.seed, transform=data_transforms['train'], percent=args.percent) val_dataset = CIFAR100Dataset(split='val', seed=args.seed, transform=data_transforms['val'], percent=args.percent) input_channel = 3 else: raise NotImplementedError train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) val_loader = DataLoader(val_dataset, batch_size=args.val_batch_size, num_workers=args.num_workers) model = get_model(args, input_channel=input_channel, num_classes=args.num_classes) optimizers = get_optimizers(model, args.components, args.lr, args.gamma) #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [80], gamma=0.5, last_epoch=-1) save_path = os.path.join(args.prefix, args.modeldir) if not os.path.exists(save_path): os.mkdir(save_path) writer = SummaryWriter(save_path) best_prec = 0 for epoch in range(args.epochs): train(model, input_channel, optimizers, criterion, args.components, train_loader, val_loader, epoch, writer, args, use_CUDA=use_CUDA, clamp=args.clamp, num_classes=args.num_classes) loss, prec = val(model, val_loader, criterion, epoch, writer, use_CUDA) torch.save(model, os.path.join(save_path, 'checkpoint.pth.tar')) if prec > best_prec: torch.save(model, os.path.join(save_path, 'model_best.pth.tar')) best_prec = prec
import tensorflow as tf from datasets import MNISTDataset from time import time # get the data (train_imgs, train_lbls), (test_imgs, test_lbls) = tf.keras.datasets.mnist.load_data() mnist = MNISTDataset(train_imgs.reshape((-1, 784)), train_lbls, test_imgs.reshape((-1, 784)), test_lbls, batch_size=256, seed=int(time())) # define the model first, from input to output # let's use fewer layers... n_units = 100 n_layers = 2 # just set up a "chain" of hidden layers layers = [] for layer in range(n_layers): layers.append( tf.keras.layers.Dense( n_units, activation=tf.nn.relu, kernel_initializer=tf.initializers.RandomUniform(minval=-0.01, maxval=0.01), bias_initializer=tf.initializers.constant(0.001)))
tf.__version__ # In[ ]: import numpy as np import matplotlib.pyplot as plt from datasets import MNISTDataset # In[3]: mnist = tf.keras.datasets.mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() plt.imshow(train_images[15], cmap='Greys_r') data = MNISTDataset(train_images.reshape([-1, 784]), train_labels, test_images.reshape([-1, 784]), test_labels, batch_size=128) # # Setting up parameters # In[ ]: train_steps = 1000 learning_rate = 1e-4 n_input = 28 * 28 # input layer (28x28 pixels) n_hidden1 = 512 # 1st hidden layer n_hidden2 = 256 # 2nd hidden layer n_hidden3 = 128 # 3rd hidden layer n_output = 10 # output layer (0-9 digits)
import tensorflow as tf from datasets import MNISTDataset from time import time # get the data mnist = MNISTDataset("mnist_data", batch_size=256, seed=int(time())) # define the model first, from input to output imgs = tf.placeholder(tf.float32, shape=[None, 28 * 28]) n_hidden = 100 n_layers = 8 w_range = 0.1 hidden = imgs for layer in range(n_layers): name = "hidden_{}".format(layer) hidden = tf.layers.dense( hidden, n_hidden, activation=tf.nn.relu, #kernel_initializer=tf.random_uniform_initializer(minval=-w_range, maxval=w_range), kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=True), bias_initializer=tf.constant_initializer(0.01), name=name) tf.summary.histogram(name + "_hist", hidden) logits = tf.layers.dense(hidden, 10, kernel_initializer=tf.random_uniform_initializer( minval=-w_range, maxval=w_range), bias_initializer=tf.zeros_initializer,
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, \ calculate_accuracy if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() FLAGS.image_size = 32 # Redundant for the Current Attack. elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() FLAGS.image_size = 224 # Redundant for the Current Attack. # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # Randomized optimizations if FLAGS.dataset_name != "ImageNet": all_idx = np.arange(10000) np.random.shuffle(all_idx) selected_idx = all_idx[:(FLAGS.nb_examples * 2)] X_test_all, Y_test_all = X_test_all[selected_idx], Y_test_all[ selected_idx] # 2. Load a trained model. sess = load_tf_session() keras.backend.set_learning_phase(0) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels)) y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes)) sq_list = FLAGS.squeezers.split(';') print(" Squeezers used for EOT :", sq_list) x_s = [] squeezers = [] models = [] for squeezer in sq_list: x_s.append( tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels))) if squeezer.startswith("median"): squeezers.append(lambda x: x) with tf.variable_scope(FLAGS.model_name + squeezer): cur_model = dataset.load_model_by_name( FLAGS.model_name, logits=False, input_range_type=1, pre_filter=get_squeezer_by_name(squeezer, 'tensorflow')) cur_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) models.append(cur_model) else: squeezers.append(get_squeezer_by_name(squeezer, 'python')) with tf.variable_scope(FLAGS.model_name + "local" + squeezer): cur_model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) cur_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) models.append(cur_model) with tf.variable_scope(FLAGS.model_name + "vanilla"): model_vanilla = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model_vanilla.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) # 3. Evaluate the trained model. # TODO: add top-5 accuracy for ImageNet. print("Evaluating the pre-trained model...") # We use the Vanilla Model here for Prediction print( " ************************************************* Shape of X_test_all :", X_test_all.shape) Y_pred_all = model_vanilla.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) # 4. Select some examples to attack. import hashlib from datasets import get_first_n_examples_id_each_class if FLAGS.select: # Filter out the misclassified examples. correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) if FLAGS.test_mode: # Only select the first example of each class. correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx]) selected_idx = [correct_idx[i] for i in correct_and_selected_idx] else: if not FLAGS.balance_sampling: selected_idx = correct_idx[:FLAGS.nb_examples] else: # select the same number of examples for each class label. nb_examples_per_class = int(FLAGS.nb_examples / Y_test_all.shape[1]) correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx], n=nb_examples_per_class) selected_idx = [ correct_idx[i] for i in correct_and_selected_idx ] else: selected_idx = np.array(range(FLAGS.nb_examples)) from utils.output import format_number_range selected_example_idx_ranges = format_number_range(sorted(selected_idx)) print("Selected %d examples." % len(selected_idx)) print("Selected index in test set (sorted): %s" % selected_example_idx_ranges) X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[ selected_idx], Y_pred_all[selected_idx] # The accuracy should be 100%. accuracy_selected = calculate_accuracy(Y_pred, Y_test) mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test) print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected)) print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected)) task = {} task['dataset_name'] = FLAGS.dataset_name task['model_name'] = FLAGS.model_name task['accuracy_test'] = accuracy_all task['mean_confidence_test'] = mean_conf_all task['test_set_selected_length'] = len(selected_idx) task['test_set_selected_idx_ranges'] = selected_example_idx_ranges task['test_set_selected_idx_hash'] = hashlib.sha1( str(selected_idx).encode('utf-8')).hexdigest() task['accuracy_test_selected'] = accuracy_selected task['mean_confidence_test_selected'] = mean_conf_selected task_id = "%s_%d_%s_%s" % \ (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name'], ) FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id) if not os.path.isdir(FLAGS.result_folder): os.makedirs(FLAGS.result_folder) from utils.output import save_task_descriptor save_task_descriptor(FLAGS.result_folder, [task]) # 5. Generate adversarial examples. from utils.squeeze import reduce_precision_py from utils.parameter_parser import parse_params attack_string_hash = hashlib.sha1( FLAGS.attacks.encode('utf-8')).hexdigest()[:5] sample_string_hash = task['test_set_selected_idx_hash'][:5] from datasets.datasets_utils import get_next_class, get_least_likely_class Y_test_target_next = get_next_class(Y_test) Y_test_target_ll = get_least_likely_class(Y_pred) X_test_adv_list = [] X_test_adv_discretized_list = [] Y_test_adv_discretized_pred_list = [] attack_string_list = filter(lambda x: len(x) > 0, FLAGS.attacks.lower().split(';')) to_csv = [] X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples') adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs') predictions_folder = os.path.join(FLAGS.result_folder, 'predictions') for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if not os.path.isdir(folder): os.makedirs(folder) predictions_fpath = os.path.join(predictions_folder, "legitimate.npy") np.save(predictions_fpath, Y_pred, allow_pickle=False) if FLAGS.clip >= 0: epsilon = FLAGS.clip print("Clip the adversarial perturbations by +-%f" % epsilon) max_clip = np.clip(X_test + epsilon, 0, 1) min_clip = np.clip(X_test - epsilon, 0, 1) # NOTE : At the moment we only support single attacks and single detectors. for attack_string in attack_string_list: attack_name, attack_params = parse_params(attack_string) print("\nRunning attack: %s %s" % (attack_name, attack_params)) if 'targeted' in attack_params: targeted = attack_params['targeted'] print("targeted value: %s" % targeted) if targeted == 'next': Y_test_target = Y_test_target_next elif targeted == 'll': Y_test_target = Y_test_target_ll elif targeted == False: attack_params['targeted'] = False Y_test_target = Y_test.copy() else: targeted = False attack_params['targeted'] = False Y_test_target = Y_test.copy() # Note that we use the attack model here instead of the vanilla model # Note that we pass in the Squeezer function for BPDA X_test_adv = eot_adversarial_attack(sess, model_vanilla, models, x, y, x_s, X_test, Y_test_target, attack_params, squeezers) if FLAGS.clip > 0: # This is L-inf clipping. X_test_adv = np.clip(X_test_adv, min_clip, max_clip) X_test_adv_list.append(X_test_adv) # 5.0 Output predictions. Y_test_adv_pred = model_vanilla.predict(X_test_adv) predictions_fpath = os.path.join(predictions_folder, "%s.npy" % attack_string) np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False) # 5.1 Evaluate the adversarial examples being discretized to uint8. print("\n---Attack (uint8): %s" % attack_string) # All data should be discretized to uint8. X_test_adv_discret = reduce_precision_py(X_test_adv, 256) X_test_adv_discretized_list.append(X_test_adv_discret) Y_test_adv_discret_pred = model_vanilla.predict(X_test_adv_discret) Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred) # Y_test_adv_discret_pred is for the vanilla model rec = evaluate_adversarial_examples(X_test, Y_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred) rec['dataset_name'] = FLAGS.dataset_name rec['model_name'] = FLAGS.model_name rec['attack_string'] = attack_string rec['discretization'] = True to_csv.append(rec) from utils.output import write_to_csv attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder, "%s_attacks_%s_evaluation.csv" % \ (task_id, attack_string_hash)) fieldnames = [ 'dataset_name', 'model_name', 'attack_string', 'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel' ] write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames) if FLAGS.visualize is True: from datasets.visualization import show_imgs_in_rows if FLAGS.test_mode or FLAGS.balance_sampling: selected_idx_vis = range(Y_test.shape[1]) else: selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1) legitimate_examples = X_test[selected_idx_vis] rows = [legitimate_examples] rows += map(lambda x: x[selected_idx_vis], X_test_adv_list) img_fpath = os.path.join( FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash)) #show_imgs_in_rows(rows, img_fpath) print('\n===Adversarial image examples are saved in ', img_fpath) # TODO: output the prediction and confidence for each example, both legitimate and adversarial. # 6. Evaluate robust classification techniques. # Example: --robustness \ # "Base;FeatureSqueezing?squeezer=bit_depth_1;FeatureSqueezing?squeezer=median_filter_2;" if FLAGS.robustness != '': """ Test the accuracy with robust classifiers. Evaluate the accuracy on all the legitimate examples. """ from robustness import evaluate_robustness result_folder_robustness = os.path.join(FLAGS.result_folder, "robustness") fname_prefix = "robustness_summary" evaluate_robustness(FLAGS.robustness, model_vanilla, Y_test_all, X_test_all, Y_test, \ attack_string_list, X_test_adv_discretized_list, fname_prefix, selected_idx_vis, result_folder_robustness) # 7. Detection experiment. # Example: --detection "FeatureSqueezing?distance_measure=l1&squeezers=median_smoothing_2,bit_depth_4,bilateral_filter_15_15_60;" if FLAGS.detection != '': from detections.base import DetectionEvaluator result_folder_detection = os.path.join(FLAGS.result_folder, "detection") csv_fname = "detection_summary.csv" de = DetectionEvaluator(model_vanilla, result_folder_detection, csv_fname, FLAGS.dataset_name) Y_test_all_pred = model_vanilla.predict(X_test_all) de.build_detection_dataset(X_test_all, Y_test_all, Y_test_all_pred, selected_idx, X_test_adv_discretized_list, Y_test_adv_discretized_pred_list, attack_string_list, attack_string_hash, FLAGS.clip, Y_test_target_next, Y_test_target_ll) de.evaluate_detections(FLAGS.detection)
def main(): import torch seed = 42 torch.manual_seed(seed) np.random.seed(seed) ### Data loading # mnist = MNISTDataset.load('haar_mnist.pkl') # mnist = MNISTDataset.load('filtered_mnist.pkl') mnist = MNISTDataset.load() (Xtr, Ytr), (Xts, Yts) = mnist.get_train_test(center=True, reduce=True) m = 1_0 X, Y = Xtr[:m], Ytr[:m] X_val, Y_val = Xtr[-10_000:], Ytr[-10_000:] ### Choice of encoder # encoder = LabelEncoder.load_encodings('js_without_0', convert_to_int=True) # encoder = LabelEncoder.load_encodings('mario') # encoder = LabelEncoder.load_encodings('ideal_mnist', convert_to_int=True) encoder = OneHotEncoder(Ytr) # encoder = AllPairsEncoder(Ytr) ### Choice of weak learner # weak_learner = WLThresholdedRidge(threshold=.5) # weak_learner = WLRidge f_gen = WeightFromBankGenerator(filter_bank=Xtr[-3000:], filters_shape=(11,11), filter_processing=center_weight) filters = Filters(n_filters=3, weights_generator=f_gen, # locality=3, maxpool_shape=(3,3)) # Xtr, X_val, Xts = RandomConvolution.format_data(Xtr), RandomConvolution.format_data(X_val),RandomConvolution.format_data(Xts) # Xtr, X_val, Xts = Xtr.to('cuda'), X_val.to('cuda'), Xts.to('cuda') weak_learner = RandomConvolution(filters=filters, weak_learner=Ridge) # weak_learner = MulticlassDecisionTree(max_n_leaves=4) # weak_learner = MulticlassDecisionStump # sorted_X, sorted_X_idx = weak_learner.sort_data(X) ### Callbacks # filename = 'haar_onehot_ds_' # filename = 'ideal_mnist_ds_' filename = 'test' ckpt = ModelCheckpoint(filename=filename+'_{round}.ckpt', dirname='./results', save_last=True) logger = CSVLogger(filename=filename+'_log.csv', dirname='./results/log') zero_risk = BreakOnZeroRiskCallback() tracker = BestRoundTrackerCallback(quantity='valid_acc', monitor='max') callbacks = [ckpt, logger, zero_risk, # tracker, ] ### Fitting the model qb = QuadBoostMHCR(weak_learner, encoder=encoder, dampening=1) qb.fit(X, Y, max_round_number=2, patience=10, X_val=X_val, Y_val=Y_val, callbacks=callbacks, # n_jobs=1, sorted_X=sorted_X, sorted_X_idx=sorted_X_idx, ) print(f'Best round recap:\nBoosting round {qb.best_round.step_number+1:03d} | Train acc: {qb.best_round.train_acc:.3%} | Valid acc: {qb.best_round.valid_acc:.3%} | Risk: {qb.best_round.risk:.3f}') print(f'Test accuracy on best model: {qb.evaluate(Xts, Yts):.3%}') print(f'Test accuracy on last model: {qb.evaluate(Xts, Yts, mode="last"):.3%}')
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy, calculate_real_untargeted_mean_confidence if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # 2. Load a trained model. sess = load_tf_session() #keras.backend.set_learning_phase(0) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels)) y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes)) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) # 3. Evaluate the trained model. # TODO: add top-5 accuracy for ImageNet. print("Evaluating the pre-trained model...") #X_test_all = scipy.ndimage.rotate(X_test_all, 5, reshape=False, axes=(2, 1)) Y_pred_all = model.predict(X_test_all) mean_conf_all, _, _, _ = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) # 4. Select some examples to attack. import hashlib from datasets import get_first_n_examples_id_each_class if FLAGS.select: # Filter out the misclassified examples. correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) if FLAGS.test_mode: # Only select the first example of each class. correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx]) selected_idx = [correct_idx[i] for i in correct_and_selected_idx] else: if not FLAGS.balance_sampling: selected_idx = correct_idx[:FLAGS.nb_examples] else: # select the same number of examples for each class label. nb_examples_per_class = int(FLAGS.nb_examples / Y_test_all.shape[1]) correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx], n=nb_examples_per_class) selected_idx = [ correct_idx[i] for i in correct_and_selected_idx ] else: selected_idx = np.array(range(FLAGS.nb_examples)) from utils.output import format_number_range selected_example_idx_ranges = format_number_range(sorted(selected_idx)) print("Selected %d examples." % len(selected_idx)) print("Selected index in test set (sorted): %s" % selected_example_idx_ranges) X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[ selected_idx], Y_pred_all[selected_idx] # The accuracy should be 100%. accuracy_selected = calculate_accuracy(Y_pred, Y_test) mean_conf_selected, max_conf_selected, min_conf_selected, std_conf_selected = calculate_mean_confidence( Y_pred, Y_test) print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected)) print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected)) print('max confidence on ground truth classes, selected %.4f\n' % (max_conf_selected)) print('min confidence on ground truth classes, selected %.4f\n' % (min_conf_selected)) print('std confidence on ground truth classes, selected %.4f\n' % (std_conf_selected)) task = {} task['dataset_name'] = FLAGS.dataset_name task['model_name'] = FLAGS.model_name task['accuracy_test'] = accuracy_all task['mean_confidence_test'] = mean_conf_all task['test_set_selected_length'] = len(selected_idx) task['test_set_selected_idx_ranges'] = selected_example_idx_ranges task['test_set_selected_idx_hash'] = hashlib.sha1( str(selected_idx).encode('utf-8')).hexdigest() task['accuracy_test_selected'] = accuracy_selected task['mean_confidence_test_selected'] = mean_conf_selected task_id = "%s_%d_%s_%s" % \ (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name']) FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id) if not os.path.isdir(FLAGS.result_folder): os.makedirs(FLAGS.result_folder) from utils.output import save_task_descriptor save_task_descriptor(FLAGS.result_folder, [task]) # 5. Generate adversarial examples. from attacks import maybe_generate_adv_examples from utils.squeeze import reduce_precision_py from utils.parameter_parser import parse_params attack_string_hash = hashlib.sha1( FLAGS.attacks.encode('utf-8')).hexdigest()[:5] sample_string_hash = task['test_set_selected_idx_hash'][:5] from datasets.datasets_utils import get_next_class, get_least_likely_class, get_most_likely_class Y_test_target_next = get_next_class(Y_test) Y_test_target_ll = get_least_likely_class(Y_pred) Y_test_target_ml = get_most_likely_class(Y_pred) X_test_adv_list = [] X_test_adv_discretized_list = [] Y_test_adv_discretized_pred_list = [] attack_string_list = filter(lambda x: len(x) > 0, FLAGS.attacks.lower().split(';')) to_csv = [] X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples') adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs') predictions_folder = os.path.join(FLAGS.result_folder, 'predictions') for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if not os.path.isdir(folder): os.makedirs(folder) predictions_fpath = os.path.join(predictions_folder, "legitimate.npy") np.save(predictions_fpath, Y_pred, allow_pickle=False) if FLAGS.clip >= 0: epsilon = FLAGS.clip print("Clip the adversarial perturbations by +-%f" % epsilon) max_clip = np.clip(X_test + epsilon, 0, 1) min_clip = np.clip(X_test - epsilon, 0, 1) for attack_string in attack_string_list: attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string)) attack_name, attack_params = parse_params(attack_string) print("\nRunning attack: %s %s" % (attack_name, attack_params)) if 'targeted' in attack_params: targeted = attack_params['targeted'] print("targeted value: %s" % targeted) if targeted == 'next': Y_test_target = Y_test_target_next #Y_test_target = Y_test.copy() elif targeted == 'll': Y_test_target = Y_test_target_ll #Y_test_target = Y_test.copy() #print (Y_test_target_ll) elif targeted == 'most': Y_test_target = Y_test_target_ml #Y_test_target = Y_test.copy() #print (Y_test_target_ml) elif targeted == False: attack_params['targeted'] = False Y_test_target = Y_test.copy() else: targeted = False attack_params['targeted'] = False Y_test_target = Y_test.copy() Y_test_target_all = Y_test_all.copy() x_adv_fname = "%s_%s.pickle" % (task_id, attack_string) x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname) X_test_adv, aux_info = maybe_generate_adv_examples( sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache=x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath) if FLAGS.clip > 0: # This is L-inf clipping. X_test_adv = np.clip(X_test_adv, min_clip, max_clip) X_test_adv_list.append(X_test_adv) if isinstance(aux_info, float): duration = aux_info else: duration = aux_info['duration'] dur_per_sample = duration / len(X_test_adv) # 5.0 Output predictions. Y_test_adv_pred = model.predict(X_test_adv) #predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string) #np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False) # 5.1 Evaluate the adversarial examples being discretized to uint8. print("\n---Attack (uint8): %s" % attack_string) #import utils.squeeze as squeezer # All data should be discretized to uint8. X_test_adv_discret = reduce_precision_py(X_test_adv, 256) #X_test_adv_discret = reduce_precision_py(X_test_adv, 2) X_test_adv_discretized_list.append(X_test_adv_discret) Y_test_adv_discret_pred = model.predict(X_test_adv_discret) #Y_test_adv_discret_pred1 = to_categorical(np.argmax(model1.predict(X_test_adv_discret), axis=1)) from LID.extract_artifacts_obfus import get_lid from LID.util_obfus import get_noisy_samples, random_split, block_split, train_lr, compute_roc from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.preprocessing import scale, MinMaxScaler, StandardScaler #from LID.extract_artifact import * #from LID_util import * X_test_noisy = get_noisy_samples(X_test, X_test_adv, 'mnist', 'fgsm') artifacts, labels = get_lid(model, X_test, X_test_noisy, X_test_adv_discret, 20, 100, 'mnist') #X=artifacts #Y=labels print(X_test_noisy.shape) #print (artifacts.shape) # standarization scaler = MinMaxScaler().fit(artifacts) artifacts = scaler.transform(artifacts) # X = scale(X) # Z-norm # test attack is the same as training attack X_train_lid, Y_train_lid, X_test_lid, Y_test_lid = block_split( artifacts, labels) ## Build detector # print("LR Detector on [dataset: %s, train_attack: %s, test_attack: %s] with:" % # (args.dataset, args.attack, args.test_attack)) lr = train_lr(X_train_lid, Y_train_lid) ## Evaluate detector y_pred_lid = lr.predict_proba(X_test_lid)[:, 1] y_label_pred = lr.predict(X_test_lid) Y_test_lid = np.reshape(Y_test_lid, Y_test_lid.shape[0]) # AUC _, _, auc_score = compute_roc(Y_test_lid[:100], y_pred_lid[:100], plot=False) precision = precision_score(Y_test_lid[:100], y_label_pred[:100]) recall = recall_score(Y_test_lid[:100], y_label_pred[:100]) y_label_pred = lr.predict(X_test_lid[:100]) acc = accuracy_score(Y_test_lid[:100], y_label_pred[:100]) print('start measuring LID') print( 'Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall(TPR): %.4f' % (auc_score, acc, precision, recall)) from detections.base import evalulate_detection_test a, b, c, d, e = evalulate_detection_test(Y_test_lid[:100], y_label_pred[:100]) f1 = f1_score(Y_test_lid[:100], y_label_pred) print( 'SAE_acc: %0.4f, tpr: %.4f, fpr: %.4f, fdr (1- precision): %.4f, fbr (official name false omission rate): %.4f, f1 score: %.4f' % (a, b, c, d, e, f1)) print('end measuring LID') from utils.output import write_to_csv attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder, "%s_attacks_%s_evaluation.csv" % \ (task_id, attack_string_hash)) fieldnames = [ 'dataset_name', 'model_name', 'attack_string', 'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel' ] write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames) # 7. Detection experiment. # Example: --detection "FeatureSqueezing?distance_measure=l1&squeezers=median_smoothing_2,bit_depth_4,bilateral_filter_15_15_60;" if FLAGS.detection != '': from detections.base import DetectionEvaluator result_folder_detection = os.path.join(FLAGS.result_folder, "detection") csv_fname = "%s_attacks_%s_detection.csv" % (task_id, attack_string_hash) de = DetectionEvaluator(model, result_folder_detection, csv_fname, FLAGS.dataset_name) Y_test_all_pred = model.predict(X_test_all) de.build_detection_dataset(X_test_all, Y_test_all, Y_test_all_pred, selected_idx, X_test_adv_discretized_list, Y_test_adv_discretized_pred_list, attack_string_list, attack_string_hash, FLAGS.clip, Y_test_target_most, Y_test_target_ll) de.evaluate_detections(FLAGS.detection)
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy from utils.parameter_parser import parse_params if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() elif FLAGS.dataset_name == "LFW": dataset = LFWDataset() # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # 2. Load a trained model. keras.backend.set_learning_phase(0) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) X_train_all, Y_train_all = dataset.get_train_dataset() if FLAGS.model_name in [ 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110' ] and FLAGS.dataset_name == 'CIFAR-10': # for resnet x_train_mean = np.mean(X_train_all, axis=0) X_test_all -= x_train_mean # 3. Evaluate the trained model. print("Evaluating the pre-trained model...") Y_pred_all = model.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) if FLAGS.attacks: from attacks import load_attack_input #attack_string = filter(lambda x: len(x) > 0, FLAGS.attacks.lower()) attack_string = FLAGS.attacks.lower() correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) selected_idx = correct_idx[:100] X_test_all = load_attack_input(FLAGS.dataset_name, attack_string) Y_test_all = Y_test_all[selected_idx] if FLAGS.output_verifier != '' and FLAGS.attacks != '': for ele in FLAGS.output_verifier.split(';'): if ele in [ 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110' ] and FLAGS.dataset_name == 'CIFAR-10' and FLAGS.model_name not in [ 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110' ]: x_train_mean = np.mean(X_train_all, axis=0) X_test_all -= x_train_mean break # 4. XEnsemble defense. input_verified = X_test_all # input verifier if FLAGS.input_verifier != '': from input_verifier_method import get_inverifier_by_name inverifier_names = [ ele.strip() for ele in FLAGS.input_verifier.split(';') if ele.strip() != '' ] for inverifier_name in inverifier_names: inverifier = get_inverifier_by_name(inverifier_name, 'python') input_verified = np.append(input_verified, inverifier(X_test_all), axis=0) if FLAGS.output_verifier == '': iter = input_verified.shape[0] / X_test_all.shape[0] batch_iter = X_test_all.shape[0] Y_pred = model.predict(input_verified[0:X_test_all.shape[0]]) output = model.predict(input_verified[0:X_test_all.shape[0]]) for i in range(int(iter)): if i > 0: output = np.append(output, model.predict( input_verified[i * batch_iter:(i + 1) * batch_iter]), axis=0) Y_pred = Y_pred + model.predict( input_verified[i * batch_iter:(i + 1) * batch_iter]) Y_pred_inverified = Y_pred / iter ####TODO Only majority voting is provided here from datasets.datasets_utils import calculate_msqueezer_accuracy avg = np.zeros((Y_pred_inverified.shape[0], dataset.num_classes)) for idx in range(Y_pred_inverified.shape[0]): if np.max(Y_pred_inverified[idx]) >= 0.6: avg[idx] = Y_pred_inverified[idx] elif np.max(Y_pred_inverified[idx]) < 0.6: avg[idx] = Y_pred_inverified[idx] + 1 accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all) print( "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f of the input verifier layer" % (accuracy + alert_bad / Y_pred_inverified.shape[0], accuracy, alert_bad / Y_pred_inverified.shape[0])) accuracy = calculate_accuracy(Y_pred_inverified, Y_test_all) print('Majority Voting Test accuracy %.4f' % (accuracy)) # outputput verifier if FLAGS.output_verifier != '': Y_pred_model_verified = np.zeros( (X_test_all.shape[0], dataset.num_classes)) model_verifier_names = [ ele.strip() for ele in FLAGS.output_verifier.split(';') if ele.strip() != '' ] selected_model_verifier_names = model_verifier_names size_base = len(model_verifier_names) size_team = size_base prediction_base = np.zeros( (size_base, Y_test_all.shape[0], Y_test_all.shape[1])) prediction_base_train = np.zeros( (size_base, 5000, Y_train_all.shape[1])) model_list = range(size_base) for i, model_verifier_name in enumerate(model_verifier_names): model_verifier = dataset.load_model_by_name(model_verifier_name, logits=False, input_range_type=1) prediction_base[i] = model_verifier.predict(X_test_all) locals()['model_verifier' + str(i)] = dataset.load_model_by_name( model_verifier_name, logits=False, input_range_type=1) prediction_base_train[i] = model_verifier.predict( X_train_all[:5000]) model_list = [0, 1, 2] selected_model_verifier_names = [] for i in range(len(model_list)): selected_model_verifier_names.append(model_verifier_names[i]) #ensemble on selected models for m, model_verifier_name in enumerate(selected_model_verifier_names): model_verifier = dataset.load_model_by_name(model_verifier_name, logits=False, input_range_type=1) model_verifier.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) iter = input_verified.shape[0] / X_test_all.shape[0] batch_iter = X_test_all.shape[0] Y_pred = model_verifier.predict( input_verified[0:X_test_all.shape[0]]) #output = model_verifier.predict(input_verified[0:X_test_all.shape[0]]) for i in range(int(iter)): if i > 0: #output=np.append(output,model_verifier.predict(input_verified[i*batch_iter:(i+1)*batch_iter]),axis=0) Y_pred = Y_pred + model_verifier.predict( input_verified[i * batch_iter:(i + 1) * batch_iter]) majority_weight = np.ones(size_team) weighted_pred = np.zeros((size_team, 5000, Y_train_all.shape[1])) Y_pred_model_verified = Y_pred_model_verified + majority_weight[ m] * Y_pred / iter Y_pred_model_verified = Y_pred_model_verified / np.sum(majority_weight) from datasets.datasets_utils import calculate_msqueezer_accuracy avg = np.zeros((Y_pred_model_verified.shape[0], dataset.num_classes)) for idx in range(Y_pred_model_verified.shape[0]): if np.max(Y_pred_model_verified[idx]) >= 0.6: avg[idx] = Y_pred_model_verified[idx] elif np.max(Y_pred_model_verified[idx]) < 0.6: avg[idx] = Y_pred_model_verified[idx] + 1 accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all) print( "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f" % (accuracy + alert_bad / Y_pred_model_verified.shape[0], accuracy, alert_bad / Y_pred_model_verified.shape[0])) accuracy = calculate_accuracy(Y_pred_model_verified, Y_test_all) print('Majority Voting Test accuracy %.4f' % (accuracy)) #comparison try: #Adversarial training model_advt = dataset.load_model_by_name('cnn2_adv_trained', logits=False, input_range_type=1) pred_advt = model_advt(X_test_all) accuracy, _, alert_bad = calculate_msqueezer_accuracy( pred_advt, Y_test_all) print('Adversarial training Test accuracy %.4f' % (accuracy)) # Defensive Distillation model_dd = dataset.load_model_by_name('distillation', logits=False, input_range_type=1) pred_dd = model_dd(X_test_all) accuracy, _, alert_bad = calculate_msqueezer_accuracy( pred_dd, Y_test_all) print('Defensive Distillation Test accuracy %.4f' % (accuracy)) # Input transformation if FLAGS.dataset_name == 'MNIST': ensembles_size = 10 crop_size = 24 image_size = 28 if FLAGS.dataset_name == 'CIFAR-10': ensembles_size = 10 crop_size = 28 image_size = 32 start_max = image_size - crop_size ensembles_def_pred = 0 for i in xrange(ensembles_size): start_x = np.random.randint(0, start_max) start_y = np.random.randint(0, start_max) # boxes = [[start_y, start_x, start_y + frac, start_x + frac]] X_test_all_crop = X_test_all[:, start_x:start_x + crop_size, start_y:start_y + crop_size, :] if FLAGS.dataset_name == 'MNIST': X_test_all_rescale = np.zeros( (X_test_all.shape[0], 28, 28, 1)) if FLAGS.dataset_name == 'CIFAR-10': X_test_all_rescale = np.zeros( (X_test_all.shape[0], 32, 32, 3)) for i in xrange(X_test_all_crop.shape[0]): X_test_all_rescale[i] = rescale( X_test_all_crop[i], np.float(image_size) / crop_size) X_test_all_discret_rescale = reduce_precision_py( X_test_all_rescale, 256) # need to put input into the ensemble pred = model.predict(X_test_all_discret_rescale) ensembles_def_pred = ensembles_def_pred + pred Y_defend_all = ensembles_def_pred / ensembles_size # All data should be discretized to uint8. X_test_all_discret = reduce_precision_py(X_test_all, 256) Y_test_all_discret_pred = model.predict(X_test_all_discret) accuracy, _, alert_bad = calculate_msqueezer_accuracy( Y_test_all_discret_pred, Y_test_all) print('Input transformation ensemble Test accuracy %.4f' % (accuracy)) except: raise
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # 2. Load a trained model. sess = load_tf_session() keras.backend.set_learning_phase(0) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels)) y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes)) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) # 3. Evaluate the trained model. # TODO: add top-5 accuracy for ImageNet. print("Evaluating the pre-trained model...") Y_pred_all = model.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) # 4. Select some examples to attack. import hashlib from datasets import get_first_n_examples_id_each_class if FLAGS.select: # Filter out the misclassified examples. correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) if FLAGS.test_mode: # Only select the first example of each class. correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx]) selected_idx = [correct_idx[i] for i in correct_and_selected_idx] else: if not FLAGS.balance_sampling: selected_idx = correct_idx[:FLAGS.nb_examples] else: # select the same number of examples for each class label. nb_examples_per_class = int(FLAGS.nb_examples / Y_test_all.shape[1]) correct_and_selected_idx = get_first_n_examples_id_each_class( Y_test_all[correct_idx], n=nb_examples_per_class) selected_idx = [ correct_idx[i] for i in correct_and_selected_idx ] else: selected_idx = np.array(range(FLAGS.nb_examples)) from utils.output import format_number_range selected_example_idx_ranges = format_number_range(sorted(selected_idx)) print("Selected %d examples." % len(selected_idx)) print("Selected index in test set (sorted): %s" % selected_example_idx_ranges) X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[ selected_idx], Y_pred_all[selected_idx] # The accuracy should be 100%. accuracy_selected = calculate_accuracy(Y_pred, Y_test) mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test) print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected)) print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected)) task = {} task['dataset_name'] = FLAGS.dataset_name task['model_name'] = FLAGS.model_name task['accuracy_test'] = accuracy_all task['mean_confidence_test'] = mean_conf_all task['test_set_selected_length'] = len(selected_idx) task['test_set_selected_idx_ranges'] = selected_example_idx_ranges task['test_set_selected_idx_hash'] = hashlib.sha1( str(selected_idx).encode('utf-8')).hexdigest() task['accuracy_test_selected'] = accuracy_selected task['mean_confidence_test_selected'] = mean_conf_selected task_id = "%s_%d_%s_%s" % \ (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name']) FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id) if not os.path.isdir(FLAGS.result_folder): os.makedirs(FLAGS.result_folder) from utils.output import save_task_descriptor save_task_descriptor(FLAGS.result_folder, [task]) # 5. Generate adversarial examples. from attacks import maybe_generate_adv_examples from utils.squeeze import reduce_precision_py from utils.parameter_parser import parse_params attack_string_hash = hashlib.sha1( FLAGS.attacks.encode('utf-8')).hexdigest()[:5] sample_string_hash = task['test_set_selected_idx_hash'][:5] from datasets.datasets_utils import get_next_class, get_least_likely_class Y_test_target_next = get_next_class(Y_test) Y_test_target_ll = get_least_likely_class(Y_pred) X_test_adv_list = [] X_test_adv_discretized_list = [] Y_test_adv_discretized_pred_list = [] attack_string_list = filter(lambda x: len(x) > 0, FLAGS.attacks.lower().split(';')) to_csv = [] X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples') adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs') predictions_folder = os.path.join(FLAGS.result_folder, 'predictions') for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if not os.path.isdir(folder): os.makedirs(folder) predictions_fpath = os.path.join(predictions_folder, "legitimate.npy") np.save(predictions_fpath, Y_pred, allow_pickle=False) if FLAGS.clip >= 0: epsilon = FLAGS.clip print("Clip the adversarial perturbations by +-%f" % epsilon) max_clip = np.clip(X_test + epsilon, 0, 1) min_clip = np.clip(X_test - epsilon, 0, 1) for attack_string in attack_string_list: attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string)) attack_name, attack_params = parse_params(attack_string) print("\nRunning attack: %s %s" % (attack_name, attack_params)) if 'targeted' in attack_params: targeted = attack_params['targeted'] print("targeted value: %s" % targeted) if targeted == 'next': Y_test_target = Y_test_target_next elif targeted == 'll': Y_test_target = Y_test_target_ll elif targeted == False: attack_params['targeted'] = False Y_test_target = Y_test.copy() else: targeted = False attack_params['targeted'] = False Y_test_target = Y_test.copy() x_adv_fname = "%s_%s.pickle" % (task_id, attack_string) x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname) X_test_adv, aux_info = maybe_generate_adv_examples( sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache=x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath) if FLAGS.clip > 0: # This is L-inf clipping. X_test_adv = np.clip(X_test_adv, min_clip, max_clip) X_test_adv_list.append(X_test_adv) if isinstance(aux_info, float): duration = aux_info else: duration = aux_info['duration'] dur_per_sample = duration / len(X_test_adv) # 5.0 Output predictions. Y_test_adv_pred = model.predict(X_test_adv) predictions_fpath = os.path.join(predictions_folder, "%s.npy" % attack_string) np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False) # 5.1 Evaluate the adversarial examples being discretized to uint8. print("\n---Attack (uint8): %s" % attack_string) # All data should be discretized to uint8. X_test_adv_discret = reduce_precision_py(X_test_adv, 256) X_test_adv_discretized_list.append(X_test_adv_discret) Y_test_adv_discret_pred = model.predict(X_test_adv_discret) Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred) rec = evaluate_adversarial_examples(X_test, Y_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred) rec['dataset_name'] = FLAGS.dataset_name rec['model_name'] = FLAGS.model_name rec['attack_string'] = attack_string rec['duration_per_sample'] = dur_per_sample rec['discretization'] = True to_csv.append(rec) from utils.output import write_to_csv attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder, "%s_attacks_%s_evaluation.csv" % \ (task_id, attack_string_hash)) fieldnames = [ 'dataset_name', 'model_name', 'attack_string', 'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel' ] write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames) if FLAGS.visualize is True: from datasets.visualization import show_imgs_in_rows if FLAGS.test_mode or FLAGS.balance_sampling: selected_idx_vis = range(Y_test.shape[1]) else: selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1) legitimate_examples = X_test[selected_idx_vis] rows = [legitimate_examples] rows += map(lambda x: x[selected_idx_vis], X_test_adv_list) img_fpath = os.path.join( FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash)) show_imgs_in_rows(rows, img_fpath) print('\n===Adversarial image examples are saved in ', img_fpath) # TODO: output the prediction and confidence for each example, both legitimate and adversarial. # 6. Evaluate robust classification techniques. # Example: --robustness \ # "Base;FeatureSqueezing?squeezer=bit_depth_1;FeatureSqueezing?squeezer=median_filter_2;" if FLAGS.robustness != '': """ Test the accuracy with robust classifiers. Evaluate the accuracy on all the legitimate examples. """ from robustness import evaluate_robustness result_folder_robustness = os.path.join(FLAGS.result_folder, "robustness") fname_prefix = "%s_%s_robustness" % (task_id, attack_string_hash) evaluate_robustness(FLAGS.robustness, model, Y_test_all, X_test_all, Y_test, \ attack_string_list, X_test_adv_discretized_list, fname_prefix, selected_idx_vis, result_folder_robustness)
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy from utils.parameter_parser import parse_params if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() elif FLAGS.dataset_name == "LFW": dataset = LFWDataset() # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # 2. Load a trained model. keras.backend.set_learning_phase(0) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) X_train_all, Y_train_all = dataset.get_train_dataset() if FLAGS.model_name in [ 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110' ] and FLAGS.dataset_name == 'CIFAR-10': # for resnet x_train_mean = np.mean(X_train_all, axis=0) X_test_all -= x_train_mean # 3. Evaluate the trained model. print("Evaluating the pre-trained model...") Y_pred_all = model.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) if FLAGS.attacks: from attacks import load_attack_input #attack_string = filter(lambda x: len(x) > 0, FLAGS.attacks.lower()) attack_string = FLAGS.attacks.lower() correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) selected_idx = correct_idx[:100] X_test_all = load_attack_input(FLAGS.dataset_name, attack_string) Y_test_all = Y_test_all[selected_idx] # 4. Input denoise defense. input_verified = X_test_all # input verifier if FLAGS.input_verifier != '': from input_verifier_method import get_inverifier_by_name inverifier_names = [ ele.strip() for ele in FLAGS.input_verifier.split(';') if ele.strip() != '' ] for inverifier_name in inverifier_names: inverifier = get_inverifier_by_name(inverifier_name, 'python') input_verified = np.append(input_verified, inverifier(X_test_all), axis=0) iter = input_verified.shape[0] / X_test_all.shape[0] batch_iter = X_test_all.shape[0] Y_pred = model.predict(input_verified[0:X_test_all.shape[0]]) output = model.predict(input_verified[0:X_test_all.shape[0]]) for i in range(int(iter)): if i > 0: output = np.append(output, model.predict( input_verified[i * batch_iter:(i + 1) * batch_iter]), axis=0) Y_pred = Y_pred + model.predict( input_verified[i * batch_iter:(i + 1) * batch_iter]) Y_pred_inverified = Y_pred / iter from datasets.datasets_utils import calculate_msqueezer_accuracy avg = np.zeros((Y_pred_inverified.shape[0], dataset.num_classes)) for idx in range(Y_pred_inverified.shape[0]): if np.max(Y_pred_inverified[idx]) >= 0.6: avg[idx] = Y_pred_inverified[idx] elif np.max(Y_pred_inverified[idx]) < 0.6: avg[idx] = Y_pred_inverified[idx] + 1 accuracy, _, alert_bad = calculate_msqueezer_accuracy(avg, Y_test_all) print( "Conf-L1 Test accuracy is of %.4f, where correct pred: %.4f, detection: %.4f of the input verifier layer" % (accuracy + alert_bad / Y_pred_inverified.shape[0], accuracy, alert_bad / Y_pred_inverified.shape[0])) accuracy = calculate_accuracy(Y_pred_inverified, Y_test_all) print('Majority Voting Test accuracy %.4f' % (accuracy))
Created on Fri Apr 24 17:11:35 2020 https://blog.aloni.org/posts/backprop-with-tensorflow/ @author: user """ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from datasets import MNISTDataset mnist = tf.keras.datasets.mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() data = MNISTDataset(train_images.reshape([-1, 784]), train_labels, test_images.reshape([-1, 784]), test_labels, batch_size=128) def experimentOne(): training_steps = 1000 lr = 0.05 W1 = tf.Variable(np.random.rand(784, 10), dtype= tf.float32) b1 = tf.Variable(np.random.rand(1, 10), dtype= tf.float32) train_stats = {} train_stats["acc"] = [] train_stats["steps"] = [] train_stats["loss"] = [] for step in range(training_steps): img_batch, lbl_batch = data.next_batch() with tf.GradientTape() as tape: logits = tf.matmul(img_batch, W1) + b1
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset, LFWDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples2, calculate_mean_confidence, calculate_accuracy from utils.parameter_parser import parse_params if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() elif FLAGS.dataset_name == "LFW": dataset = LFWDataset() # 1. Load a dataset. print("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() #z = np.where(Y_test_all == np.asarray([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) #LABEL SELECTION label = np.asarray([0] * Y_test_all.shape[1]) label[FLAGS.label_index] = 1 filter_indices = [] for i in range(len(Y_test_all)): if np.array_equal(Y_test_all[i], label): filter_indices.append(i) print(X_test_all.shape, Y_test_all.shape) X_test_all = np.take(X_test_all, filter_indices, 0) Y_test_all = np.take(Y_test_all, filter_indices, 0) print(X_test_all.shape, Y_test_all.shape) # 2. Load a trained model. sess = load_tf_session() keras.backend.set_learning_phase(0) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels)) y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes)) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc']) # 3. Evaluate the trained model. # TODO: add top-5 accuracy for ImageNet. print("Evaluating the pre-trained model...") Y_pred_all = model.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) # 4. Select some examples to attack. import hashlib from datasets import get_first_n_examples_id_each_class if FLAGS.select: # Filter out the misclassified examples. correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) if FLAGS.test_mode: # Only select the first example of each class. correct_and_selected_idx = get_first_n_examples_id_each_class(Y_test_all[correct_idx]) selected_idx = [correct_idx[i] for i in correct_and_selected_idx] else: if not FLAGS.balance_sampling: # TODO: Possibly randomize this if FLAGS.random_image != 0: np.random.seed(FLAGS.random_image) print("RANDOM NUMBER") print(np.random.randint(100)) np.random.shuffle(correct_idx) selected_idx = correct_idx[:FLAGS.nb_examples] else: # select the same number of examples for each class label. nb_examples_per_class = int(FLAGS.nb_examples / Y_test_all.shape[1]) correct_and_selected_idx = get_first_n_examples_id_each_class(Y_test_all[correct_idx], n=nb_examples_per_class) selected_idx = [correct_idx[i] for i in correct_and_selected_idx] else: selected_idx = np.array(range(FLAGS.nb_examples)) from utils.output import format_number_range selected_example_idx_ranges = format_number_range(sorted(selected_idx)) print("Selected %d examples." % len(selected_idx)) print("Selected index in test set (sorted): %s" % selected_example_idx_ranges) X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[selected_idx], Y_pred_all[selected_idx] # The accuracy should be 100%. accuracy_selected = calculate_accuracy(Y_pred, Y_test) mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test) print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected)) print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected)) task = {} task['dataset_name'] = FLAGS.dataset_name task['model_name'] = FLAGS.model_name task['accuracy_test'] = accuracy_all task['mean_confidence_test'] = mean_conf_all task['test_set_selected_length'] = len(selected_idx) task['test_set_selected_idx_ranges'] = selected_example_idx_ranges task['test_set_selected_idx_hash'] = hashlib.sha1(str(selected_idx).encode('utf-8')).hexdigest() task['accuracy_test_selected'] = accuracy_selected task['mean_confidence_test_selected'] = mean_conf_selected #task_id = "%s_%d_%s_%s" % \ # (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name']) task_id = "%s_%s" % \ (task['dataset_name'], task['model_name']) FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id) if os.path.exists(FLAGS.result_folder): print("RESULTS FOLDER") print(FLAGS.result_folder) shutil.rmtree(FLAGS.result_folder) if not os.path.isdir(FLAGS.result_folder): os.makedirs(FLAGS.result_folder) from utils.output import save_task_descriptor2 save_task_descriptor2(FLAGS.result_folder, [task]) # 5. Generate adversarial examples. from attacks import maybe_generate_adv_examples from utils.squeeze import reduce_precision_py #attack_string_hash = hashlib.sha1(FLAGS.attacks.encode('utf-8')).hexdigest()[:5] attack_string_hash = FLAGS.attacks.encode('utf-8') from datasets.datasets_utils import get_next_class, get_most_likely_class, get_least_likely_class Y_test_target_next = get_next_class(Y_test) Y_test_target_most = get_most_likely_class(Y_test) Y_test_target_ll = get_least_likely_class(Y_pred) X_test_adv_list = [] X_test_adv_discretized_list = [] Y_test_adv_discretized_pred_list = [] attack_string_list = filter(lambda x: len(x) > 0, FLAGS.attacks.lower().split(';')) to_csv = [] X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples') adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs') predictions_folder = os.path.join(FLAGS.result_folder, 'predictions') for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if os.path.isdir(folder): #os.rmdir(folder) shutil.rmtree(folder) for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if not os.path.isdir(folder): os.makedirs(folder) predictions_fpath = os.path.join(predictions_folder, "legitimate.npy") np.save(predictions_fpath, Y_pred, allow_pickle=False) if FLAGS.clip >= 0: epsilon = FLAGS.clip print("Clip the adversarial perturbations by +-%f" % epsilon) max_clip = np.clip(X_test + epsilon, 0, 1) min_clip = np.clip(X_test - epsilon, 0, 1) for attack_string in attack_string_list: attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string)) attack_name, attack_params = parse_params(attack_string) print("\nRunning attack: %s %s" % (attack_name, attack_params)) if 'targeted' in attack_params: targeted = attack_params['targeted'] print("targeted value: %s" % targeted) if targeted == 'next': Y_test_target = Y_test_target_next elif targeted == 'most': Y_test_target = Y_test_target_most elif targeted == 'll': Y_test_target = Y_test_target_ll elif targeted is False: attack_params['targeted'] = False Y_test_target = Y_test.copy() else: targeted = False attack_params['targeted'] = False Y_test_target = Y_test.copy() x_adv_fname = "%s_%s.pickle" % (task_id, attack_string) x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname) X_test_adv, aux_info = maybe_generate_adv_examples(sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache=x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath) if FLAGS.clip > 0: # This is L-inf clipping. X_test_adv = np.clip(X_test_adv, min_clip, max_clip) X_test_adv_list.append(X_test_adv) if isinstance(aux_info, float): duration = aux_info else: duration = aux_info['duration'] dur_per_sample = duration / len(X_test_adv) # 5.0 Output predictions. # Y_test_adv_pred = model.predict(X_test_adv) # predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string) # np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False) # 5.1 Evaluate the adversarial examples being discretized to uint8. print("\n---Attack (uint8): %s" % attack_string) # All data should be discretized to uint8. X_test_adv_discret = reduce_precision_py(X_test_adv, 256) X_test_adv_discretized_list.append(X_test_adv_discret) Y_test_adv_discret_pred = model.predict(X_test_adv_discret) Y_test_adv_discretized_pred_list.append(Y_test_adv_discret_pred) rec = evaluate_adversarial_examples2(X_test, Y_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred, attack_string) confidences = rec['confidence_scores'] preds = np.argmax(Y_test_adv_discret_pred,axis=1) k = 0 confidence_scores = "" preds_after_attack = "" mean = 0 for pred in preds: preds_after_attack += str(pred) + "," if pred == FLAGS.label_index: confidence_scores += str(float("nan")) + "," else: try: confidence_scores += str(confidences[k]) + "," mean += float(confidences[k]) except: confidence_scores += str(float("nan")) + "," k += 1 mean /= len(preds) rec['confidence_scores'] = confidence_scores.rstrip(",") rec['dataset_name'] = FLAGS.dataset_name rec['model_name'] = FLAGS.model_name rec['attack_string'] = attack_string rec['original_label_index'] = FLAGS.label_index rec['random'] = True if FLAGS.random_image != 0 else False rec['duration_per_sample'] = dur_per_sample rec['discretization'] = True rec['prediction_after_attack'] = preds_after_attack.rstrip(",") rec['number_of_images'] = FLAGS.nb_examples rec['mean_confidence'] = mean to_csv.append(rec) from utils.output import write_to_csv attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder,"evaluation.csv") fieldnames = ['dataset_name', 'model_name', 'attack_string', 'original_label_index', 'random', 'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'confidence_scores', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel', 'prediction_after_attack', 'number_of_images'] write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames) if FLAGS.visualize is True: from datasets.visualization import show_imgs_in_rows2 if FLAGS.test_mode or FLAGS.balance_sampling: selected_idx_vis = range(Y_test.shape[1]) else: #selected_idx_vis = get_first_n_examples_id_each_class(Y_test, 1) #selected_idx_vis = selected_idx selected_idx_vis = [i for i in range(FLAGS.nb_examples)] legitimate_examples = X_test[selected_idx_vis] rows = [legitimate_examples] rows += map(lambda x: x[selected_idx_vis], X_test_adv_list) img_fpath = os.path.join(FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash)) show_imgs_in_rows2(rows, dataset.num_channels, img_fpath) print('\n===Adversarial image examples are saved in ', img_fpath) print(Y_test_adv_discretized_pred_list) """rows = [legitimate_examples]
help='how many number of gradients we wish to gather at each iteration') args = parser.parse_args() return args if __name__ == "__main__": # this is only a simple test case comm = MPI.COMM_WORLD rank = comm.Get_rank() world_size = comm.Get_size() args = add_fit_args(argparse.ArgumentParser(description='PyTorch MNIST Single Machine Test')) # fetch dataset if args.dataset == "MNIST": mnist_data = mnist.read_data_sets(train_dir='./mnist_data', reshape=True) train_set = MNISTDataset(dataset=mnist_data.train, transform=transforms.ToTensor()) elif args.dataset == "Cifar10": cifar10_data = cifar10.read_data_sets(padding_size=0, reshape=True) train_set = Cifar10Dataset(dataset=cifar10_data.train, transform=transforms.ToTensor()) kwargs_master = {'batch_size':args.batch_size, 'learning_rate':args.lr, 'max_epochs':args.epochs, 'momentum':args.momentum, 'network':args.network, 'comm_method':args.comm_type, 'kill_threshold': args.num_aggregate} kwargs_worker = {'batch_size':args.batch_size, 'learning_rate':args.lr, 'max_epochs':args.epochs, 'momentum':args.momentum, 'network':args.network, 'comm_method':args.comm_type} if rank == 0: master_fc_nn = SyncReplicasMaster_NN(comm=comm, **kwargs_master) master_fc_nn.build_model() print("I am the master: the world size is {}, cur step: {}".format(master_fc_nn.world_size, master_fc_nn.cur_step)) master_fc_nn.train()
def main(argv=None): # 0. Select a dataset. from datasets import MNISTDataset, CIFAR10Dataset, ImageNetDataset from datasets import get_correct_prediction_idx, evaluate_adversarial_examples, calculate_mean_confidence, calculate_accuracy if FLAGS.dataset_name == "MNIST": dataset = MNISTDataset() elif FLAGS.dataset_name == "CIFAR-10": dataset = CIFAR10Dataset() elif FLAGS.dataset_name == "ImageNet": dataset = ImageNetDataset() # 1. Load a dataset. print ("\n===Loading %s data..." % FLAGS.dataset_name) if FLAGS.dataset_name == 'ImageNet': if FLAGS.model_name == 'inceptionv3': img_size = 299 else: img_size = 224 X_test_all, Y_test_all = dataset.get_test_data(img_size, 0, 200) else: X_test_all, Y_test_all = dataset.get_test_dataset() # 2. Load a trained model. sess = load_tf_session() keras.backend.set_learning_phase(0) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, dataset.image_size, dataset.image_size, dataset.num_channels)) y = tf.placeholder(tf.float32, shape=(None, dataset.num_classes)) with tf.variable_scope(FLAGS.model_name): """ Create a model instance for prediction. The scaling argument, 'input_range_type': {1: [0,1], 2:[-0.5, 0.5], 3:[-1, 1]...} """ model = dataset.load_model_by_name(FLAGS.model_name, logits=False, input_range_type=1) model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['acc']) # 3. Evaluate the trained model. # TODO: add top-5 accuracy for ImageNet. print ("Evaluating the pre-trained model...") Y_pred_all = model.predict(X_test_all) mean_conf_all = calculate_mean_confidence(Y_pred_all, Y_test_all) accuracy_all = calculate_accuracy(Y_pred_all, Y_test_all) print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all)) print('Mean confidence on ground truth classes %.4f' % (mean_conf_all)) # 4. Select some examples to attack. import hashlib from datasets import get_first_example_id_each_class # Filter out the misclassified examples. correct_idx = get_correct_prediction_idx(Y_pred_all, Y_test_all) if FLAGS.test_mode: # Only select the first example of each class. correct_and_selected_idx = get_first_example_id_each_class(Y_test_all[correct_idx]) selected_idx = [ correct_idx[i] for i in correct_and_selected_idx ] else: selected_idx = correct_idx[:FLAGS.nb_examples] from utils.output import format_number_range selected_example_idx_ranges = format_number_range(sorted(selected_idx)) print ( "Selected %d examples." % len(selected_idx)) print ( "Selected index in test set (sorted): %s" % selected_example_idx_ranges ) X_test, Y_test, Y_pred = X_test_all[selected_idx], Y_test_all[selected_idx], Y_pred_all[selected_idx] accuracy_selected = calculate_accuracy(Y_pred, Y_test) mean_conf_selected = calculate_mean_confidence(Y_pred, Y_test) print('Test accuracy on selected legitimate examples %.4f' % (accuracy_selected)) print('Mean confidence on ground truth classes, selected %.4f\n' % (mean_conf_selected)) task = {} task['dataset_name'] = FLAGS.dataset_name task['model_name'] = FLAGS.model_name task['accuracy_test'] = accuracy_all task['mean_confidence_test'] = mean_conf_all task['test_set_selected_length'] = len(selected_idx) task['test_set_selected_idx_ranges'] = selected_example_idx_ranges task['test_set_selected_idx_hash'] = hashlib.sha1(str(selected_idx).encode('utf-8')).hexdigest() task['accuracy_test_selected'] = accuracy_selected task['mean_confidence_test_selected'] = mean_conf_selected task_id = "%s_%d_%s_%s" % \ (task['dataset_name'], task['test_set_selected_length'], task['test_set_selected_idx_hash'][:5], task['model_name']) FLAGS.result_folder = os.path.join(FLAGS.result_folder, task_id) if not os.path.isdir(FLAGS.result_folder): os.makedirs(FLAGS.result_folder) from utils.output import save_task_descriptor save_task_descriptor(FLAGS.result_folder, [task]) # 5. Generate adversarial examples. from attacks import maybe_generate_adv_examples, parse_attack_string from defenses.feature_squeezing.squeeze import reduce_precision_np attack_string_hash = hashlib.sha1(FLAGS.attacks.encode('utf-8')).hexdigest()[:5] sample_string_hash = task['test_set_selected_idx_hash'][:5] from attacks import get_next_class, get_least_likely_class Y_test_target_next = get_next_class(Y_test) Y_test_target_ll = get_least_likely_class(Y_pred) X_test_adv_list = [] attack_string_list = filter(lambda x:len(x)>0, FLAGS.attacks.lower().split(';')) to_csv = [] X_adv_cache_folder = os.path.join(FLAGS.result_folder, 'adv_examples') adv_log_folder = os.path.join(FLAGS.result_folder, 'adv_logs') predictions_folder = os.path.join(FLAGS.result_folder, 'predictions') for folder in [X_adv_cache_folder, adv_log_folder, predictions_folder]: if not os.path.isdir(folder): os.makedirs(folder) predictions_fpath = os.path.join(predictions_folder, "legitimate.npy") np.save(predictions_fpath, Y_pred, allow_pickle=False) for attack_string in attack_string_list: attack_log_fpath = os.path.join(adv_log_folder, "%s_%s.log" % (task_id, attack_string)) attack_name, attack_params = parse_attack_string(attack_string) print ( "\nRunning attack: %s %s" % (attack_name, attack_params)) if 'targeted' in attack_params: targeted = attack_params['targeted'] if targeted == 'next': Y_test_target = Y_test_target_next elif targeted == 'll': Y_test_target = Y_test_target_ll else: targeted = False attack_params['targeted'] = False Y_test_target = Y_test.copy() x_adv_fname = "%s_%s.pickle" % (task_id, attack_string) x_adv_fpath = os.path.join(X_adv_cache_folder, x_adv_fname) X_test_adv, aux_info = maybe_generate_adv_examples(sess, model, x, y, X_test, Y_test_target, attack_name, attack_params, use_cache = x_adv_fpath, verbose=FLAGS.verbose, attack_log_fpath=attack_log_fpath) X_test_adv_list.append(X_test_adv) if isinstance(aux_info, float): duration = aux_info else: print (aux_info) duration = aux_info['duration'] dur_per_sample = duration / len(X_test_adv) # 5.0 Output predictions. Y_test_adv_pred = model.predict(X_test_adv) predictions_fpath = os.path.join(predictions_folder, "%s.npy"% attack_string) np.save(predictions_fpath, Y_test_adv_pred, allow_pickle=False) # 5.1. Evaluate the quality of adversarial examples print ("\n---Attack: %s" % attack_string) rec = evaluate_adversarial_examples(X_test, X_test_adv, Y_test_target.copy(), targeted, Y_test_adv_pred) print ("Duration per sample: %.1fs" % dur_per_sample) rec['dataset_name'] = FLAGS.dataset_name rec['model_name'] = FLAGS.model_name rec['attack_string'] = attack_string rec['duration_per_sample'] = dur_per_sample rec['discretization'] = False to_csv.append(rec) # 5.2 Adversarial examples being discretized to uint8. print ("\n---Attack (uint8): %s" % attack_string) X_test_adv_discret = reduce_precision_np(X_test_adv, 256) Y_test_adv_discret_pred = model.predict(X_test_adv_discret) rec = evaluate_adversarial_examples(X_test, X_test_adv_discret, Y_test_target.copy(), targeted, Y_test_adv_discret_pred) rec['dataset_name'] = FLAGS.dataset_name rec['model_name'] = FLAGS.model_name rec['attack_string'] = attack_string rec['duration_per_sample'] = dur_per_sample rec['discretization'] = True to_csv.append(rec) from utils.output import write_to_csv attacks_evaluation_csv_fpath = os.path.join(FLAGS.result_folder, "%s_attacks_%s_evaluation.csv" % \ (task_id, attack_string_hash)) fieldnames = ['dataset_name', 'model_name', 'attack_string', 'duration_per_sample', 'discretization', 'success_rate', 'mean_confidence', 'mean_l2_dist', 'mean_li_dist', 'mean_l0_dist_value', 'mean_l0_dist_pixel'] write_to_csv(to_csv, attacks_evaluation_csv_fpath, fieldnames) if FLAGS.visualize is True: from datasets.visualization import show_imgs_in_rows if FLAGS.test_mode: selected_idx_vis = range(Y_test.shape[1]) else: selected_idx_vis = get_first_example_id_each_class(Y_test) legitimate_examples = X_test[selected_idx_vis] rows = [legitimate_examples] rows += map(lambda x:x[selected_idx_vis], X_test_adv_list) img_fpath = os.path.join(FLAGS.result_folder, '%s_attacks_%s_examples.png' % (task_id, attack_string_hash) ) show_imgs_in_rows(rows, img_fpath) print ('\n===Adversarial image examples are saved in ', img_fpath) # TODO: output the prediction and confidence for each example, both legitimate and adversarial. # 6. Evaluate defense techniques. if FLAGS.defense == 'feature_squeezing': """ Test the accuracy with feature squeezing filters. """ from defenses.feature_squeezing.robustness import calculate_squeezed_accuracy_new # Calculate the accuracy of legitimate examples for only once. csv_fpath = "%s_%s_robustness.csv" % (task_id, attack_string_hash) print ("Saving robustness test results at %s" % csv_fpath) csv_fpath = os.path.join(FLAGS.result_folder, csv_fpath) calculate_squeezed_accuracy_new(model, Y_test, X_test, attack_string_list, X_test_adv_list, csv_fpath) # 7. Detection experiment. # All data should be discretized to uint8. X_test_adv_discretized_list = [ reduce_precision_np(X_test_adv, 256) for X_test_adv in X_test_adv_list] del X_test_adv_list if FLAGS.detection == 'feature_squeezing': from utils.detection import evalulate_detection_test, get_detection_train_test_set # 7.1 Prepare the dataset for detection. X_detect_train, Y_detect_train, X_detect_test, Y_detect_test, test_idx, failed_adv_idx = \ get_detection_train_test_set(X_test_all, Y_test, X_test_adv_discretized_list, predict_func=model.predict) # 7.2 Enumerate all specified detection methods. # Take Feature Squeezing as an example. csv_fname = "%s_attacks_%s_detection_two_filters_%s_raw_adv.csv" % (task_id, attack_string_hash, FLAGS.detection) detection_csv_fpath = os.path.join(FLAGS.result_folder, csv_fname) to_csv = [] from defenses.feature_squeezing.detection import FeatureSqueezingDetector from sklearn.metrics import roc_curve, auc fsd = FeatureSqueezingDetector(model, task_id, attack_string_hash) # TODO: Automatically get the suitable squeezers through robustness test with legitimate examples. # squeezers_name = fsd.select_squeezers(X_test, Y_test, accuracy_preserved=0.9) if FLAGS.dataset_name == "MNIST": squeezers_name = ['median_smoothing_2', 'median_smoothing_3', 'binary_filter'] elif FLAGS.dataset_name == "CIFAR-10": squeezers_name = ["bit_depth_6", 'median_smoothing_1_2', 'median_smoothing_2_1','median_smoothing_2'] elif FLAGS.dataset_name == "ImageNet": squeezers_name = ["bit_depth_5", 'median_smoothing_1_2', 'median_smoothing_2_1','median_smoothing_2'] # best_metrics = fsd.view_adv_propagation(X_test, X_test_adv_list[0], squeezers_name) # best_metrics = [[len(model.layers)-1, 'none', 'kl_f'], [len(model.layers)-1, 'none', 'l1'], [len(model.layers)-1, 'none', 'l2'], \ # [len(model.layers)-1, 'unit_norm', 'l1'], [len(model.layers)-1, 'unit_norm', 'l2']] best_metrics = [[len(model.layers)-1, 'none', 'l1']] for layer_id, normalizer_name, metric_name in best_metrics: fsd.set_config(layer_id, normalizer_name, metric_name, squeezers_name) print ("===Detection config: Layer-%d, Metric-%s, Norm-%s" % (layer_id, metric_name, normalizer_name)) csv_fpath = "%s_distances_%s_%s_layer_%d.csv" % (task_id, metric_name, normalizer_name, layer_id) csv_fpath = os.path.join(FLAGS.result_folder, csv_fpath) fsd.output_distance_csv([X_test_all] + X_test_adv_discretized_list, ['legitimate'] + attack_string_list, csv_fpath) # continue threshold = fsd.train(X_detect_train, Y_detect_train) Y_detect_pred, distances = fsd.test(X_detect_test) accuracy, tpr, fpr = evalulate_detection_test(Y_detect_test, Y_detect_pred) fprs, tprs, thresholds = roc_curve(Y_detect_test, distances) roc_auc = auc(fprs, tprs) print ("ROC-AUC: %.2f, Accuracy: %.2f, TPR: %.2f, FPR: %.2f, Threshold: %.2f." % (roc_auc, accuracy, tpr, fpr, threshold)) ret = {} ret['threshold'] = threshold ret['accuracy'] = accuracy ret['fpr'] = fpr ret['tpr'] = tpr ret['roc_auc'] = roc_auc # index of false negatives fn_idx = np.where((Y_detect_test == True) & (Y_detect_pred == False)) # index in Y_detect. fn_idx_Y_test = np.array(test_idx)[fn_idx] nb_failed_as_negative = len(fn_idx_Y_test) - len(set(fn_idx_Y_test) - set(failed_adv_idx)) print ("%d/%d failed adv. examples in false negatives." % (nb_failed_as_negative, len(fn_idx_Y_test))) ret['fn'] = len(fn_idx_Y_test) ret['failed_adv_as_fn'] = nb_failed_as_negative tp_idx = np.where((Y_detect_test == True) & (Y_detect_pred == True)) tp_idx_Y_test = np.array(test_idx)[tp_idx] nb_failed_as_positive = len(tp_idx_Y_test) - len(set(tp_idx_Y_test) - set(failed_adv_idx)) print ("%d/%d failed adv. examples in true positives." % (nb_failed_as_positive, len(tp_idx_Y_test))) ret['layer_id'] = layer_id ret['normalizer'] = normalizer_name ret['distance_metric'] = metric_name to_csv.append(ret) fieldnames = ['layer_id', 'distance_metric', 'normalizer', 'roc_auc', 'accuracy', 'tpr', 'fpr', 'threshold', 'failed_adv_as_fn', 'fn'] write_to_csv(to_csv, detection_csv_fpath, fieldnames)