def __init__(self, handle, dataset, train_op, session, epoch_step, batch_step, summary_writer, train_summary_op, img_summary_op, optimizer, GPU_collections, batch_size_placeholder, pretrained=False, adversarial_model=None, adversarial_attacks=None, adversarial_criterion=Misclassification(), saver_path="model.ckpt", num_adversarial_batches=4, batch_size=32, num_epochs=1000, train_summary_period=1000, val_summary_period=1000, adv_summary_period=1000): self.session = session self.saver_path = saver_path self.epoch = 0 self.batch_i = 0 self.handle = handle self.dataset = dataset self.train_op = train_op self.epoch_step = epoch_step self.epoch_step_increment = self.epoch_step.assign_add(1) self.batch_step = batch_step self.batch_placeholder = tf.placeholder(tf.int32, (), 'b_ph') self.batch_step_assign = tf.assign(self.batch_step, self.batch_placeholder) self.num_epochs = num_epochs self.batch_size = batch_size self.optimizer = optimizer self.GPU_collections = GPU_collections self.batch_size_placeholder = batch_size_placeholder # summary ops self.train_summary_op = train_summary_op self.img_summary_op = img_summary_op self.train_summary_period = train_summary_period self.val_summary_period = val_summary_period self.adv_summary_period = adv_summary_period self.summary_writer = summary_writer # validation self.val_top_one_mean = tf.placeholder(tf.float32, name='val_top_one_mean') self.val_top_five_mean = tf.placeholder(tf.float32, name='val_top_five_mean') val_summaries = [] val_summaries.append( tf.summary.scalar('top_1_accuracy_validation', self.val_top_one_mean)) val_summaries.append( tf.summary.scalar('top_5_accuracy_validation', self.val_top_five_mean)) self.val_summary_op = tf.summary.merge(val_summaries, name='val_summaries_op') # Adversarial attacks self.num_adversarial_batches = num_adversarial_batches self.adversarial_criterion = adversarial_criterion self.adv_result = tf.placeholder(tf.float32, name='adv_results') self.adversarial_attacks = adversarial_attacks self.adversarial_model = adversarial_model default_distances = { 'GradientAttack': MSE, 'FGSM': MSE, 'LinfinityBasicIterativeAttack': Linfinity, 'L2BasicIterativeAttack': MSE, 'LinfinityBasicIterativeAttack': Linfinity, 'ProjectedGradientDescentAttack': Linfinity, 'DeepFoolAttack': MSE, 'DeepFoolLinfinityAttack': Linfinity } self.attacks = dict() self.distances = dict() # add support for custom distances self.adv_summaries = dict() for attack in self.adversarial_attacks: self.attacks[attack] = getattr(fb.attacks, attack)() if attack in default_distances.keys(): self.distances[attack] = default_distances[attack] else: self.distances[attack] = MSE key = attack + '_median_dist' self.adv_summaries[attack] = tf.summary.scalar( attack + '_median_dist', self.adv_result) devices = device_lib.list_local_devices() GPU_devices = [dev.name for dev in devices if dev.device_type == 'GPU'] self.num_GPUs = len(GPU_devices) self.pretrained = pretrained if self.dataset.train_handle is None: self.dataset.get_train_handle(self.session) self.saver = tf.train.Saver(tf.global_variables())
def generate_examples(model, config, pretrained_config, output_root): adv_example_filepath = os.path.join(output_root, 'examples') adv_targets_filepath = os.path.join(output_root, 'targets') # Set up adversarial attack. adv_model = PyTorchModel(model, (0, 1), pretrained_config.data.class_count, cuda=config.cuda) criterion = Misclassification() attack = getattr(foolbox.attacks, config.name)(adv_model, criterion) # Get data. pretrained_config.cuda = config.cuda pretrained_config.optim.batch_size = config.data.batch_size data = load_data(pretrained_config) # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test']))) n_examples = config['num_examples'] n_batches = int( math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) # Save the results of the computations in the following variable. adv_ex = torch.Tensor() adv_targets = torch.LongTensor() adv_mse = torch.Tensor() adv_inf = torch.Tensor() success = torch.Tensor() # Set up distance for the adversarial attack. distance_name = config.get('distance') distance = getattr(foolbox.distances, distance_name) if distance_name is not None \ else foolbox.distances.MeanSquaredDistance # Perform the attack. for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): x = sample[0] y = sample[1].type(torch.LongTensor) x = to_cuda(x, cuda=config.cuda) adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack( attack, adv_model, criterion, x, y.cpu().numpy(), config['attack_kwargs'], distance) adv_ex = torch.cat([adv_ex, adv], 0) adv_targets = torch.cat([adv_targets, adv_t], 0) success = torch.cat([success, batch_success], 0) adv_mse = torch.cat([adv_mse, batch_adv_mse], 0) adv_inf = torch.cat([adv_inf, batch_adv_inf], 0) # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda) # Summarize the results. results = { "success_rate": success.mean().item(), "defense_rate": 1 - success.mean().item(), "mean_mse": ((adv_mse * success).sum() / success.sum()).item(), "mean_inf": ((adv_inf * success).sum() / success.sum()).item(), "mse_quartiles": list( np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) } results["median_mse"] = results["mse_quartiles"][2] print("success rate: {}".format(results["success_rate"])) print("defense rate: {}".format(results["defense_rate"])) print("mean MSE for successful attacks: {}".format(results["mean_mse"])) print("mean L_inf for successful attacks: {}".format(results["mean_inf"])) print("MSE quartiles for successful attacks: {}".format( results["mse_quartiles"])) with open(os.path.join(config['output_root'], 'results.json'), 'w') as f: json.dump(results, f, sort_keys=True, indent=4) np.save(adv_example_filepath, adv_ex) np.save(adv_targets_filepath, adv_targets) print( accuracy(model, to_cuda(adv_ex, cuda=config.cuda), to_cuda(adv_targets, cuda=config.cuda)))
<<<<<<< HEAD:NIPS_attack/test.py print "{} images found".format(len(paths)) for path in paths[:100]: ======= for path in paths: >>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py image = imread(path).astype(np.float32) test = image.copy() preds = kmodel.predict(preprocess_input(np.expand_dims(test, 0))) label = np.argmax(preds) #print("Top 3 predictions (regular: ", decode_predictions(preds, top=3)) # run the attack print "running the attack" attack = MIM(model=fmodel, criterion=Misclassification()) adversarial = attack(image[:, :, ::-1], label) if adversarial is None: print "Did not find an adversarial" continue # show results print(foolbox.utils.softmax(fmodel.predictions(adversarial))[781]) adversarial_rgb = adversarial[np.newaxis, :, :, ::-1] preds = kmodel.predict(preprocess_input(adversarial_rgb.copy())) adv_label = np.argmax(preds) if adv_label != label: success += 1 #print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5)) diff = (adversarial_rgb[0] - image)
def BoundaryAttackPlusPlus(model=None, criterion=Misclassification(), distance=MSE, threshold=None): warn("BoundaryAttackPlusPlus is deprecated; use HopSkipJumpAttack.") return LimitedHopSkipJumpAttack(model, criterion, distance, threshold)
def __init__(self, model=None, criterion=Misclassification()): super(BoundaryAttack, self).__init__(model=model, criterion=criterion)
dknn = DKNNL2(net, x_train, y_train, x_valid, y_valid, layers, k=75, num_classes=10) # dknn = DKNNL2Approx(net, x_train, y_train, x_valid, y_valid, layers, # k=1, num_classes=10) y_pred = dknn.classify(x_test) ind = np.where(y_pred.argmax(1) == y_test.numpy())[0] print((y_pred.argmax(1) == y_test.numpy()).sum() / y_test.size(0)) dknn_fb = DkNNFoolboxModel(dknn, (0, 1), 1, preprocessing=(0, 1)) criterion = Misclassification() distance = MeanSquaredDistance attack = foolbox.attacks.BoundaryAttack(model=dknn_fb, criterion=criterion, distance=distance) attack_params = { 'iterations': 5000, 'max_directions': 25, 'starting_point': None, 'initialization_attack': None, 'log_every_n_steps': 100, 'spherical_step': 0.5, 'source_step': 0.05, 'step_adaptation': 1.5,
import foolbox from foolbox.models import KerasModel from foolbox.attacks import LBFGSAttack from foolbox.criteria import TargetClassProbability, Misclassification import numpy as np import keras from keras.models import load_model import matplotlib.pyplot as plt kmodel = load_model('./LeNet.h5') preprocessing = (np.array([104, 116, 123]), 1) fmodel = KerasModel(kmodel, bounds=(0, 255)) attack = LBFGSAttack(model=fmodel, criterion=Misclassification()) adversarial_imgs = [] adversarial_labels = [] # adversarial_imgs = np.asarray(adversarial_imgs) # adversarial_labels = np.asarray(adversarial_labels) # print(type(adversarial_imgs)) img_temp = np.load('./mnist_pure/x_test.npy') # print(img_temp.shape) img_temp = np.asarray(img_temp, dtype=np.float32) # print(img_temp[0].shape) label_temp = np.load('./mnist_pure/y_test.npy') label_temp = np.asarray(label_temp, dtype=np.float32) for i in range(0, 10000): adversarial = attack(img_temp[i], label_temp[i]) adversarial_imgs.append(adversarial) adv_labels = np.argmax(fmodel.predictions(adversarial)) adversarial_labels.append(adv_labels)
def generate_images(self, data_loaders, portion, fraction, epoch): all_images_adversarial, all_adv_preds, adv_image_ids = None, np.array( []), np.array([]) total_possible_adv, created_adv = 0, 0 for idx, (image_ids, inputs, labels, protected_class) in enumerate(data_loaders[portion]): print('Epoch: {}'.format(idx)) image_ids, inputs, labels, protected_class = self.subsample( image_ids, inputs, labels, protected_class, fraction) indices_to_consider, all_images_adversarial, all_adv_preds, adv_image_ids = self.load_from_disk( image_ids, inputs, labels, protected_class, epsilon=self.attack_call_kwargs['epsilons'], epoch=epoch) image_ids, inputs, labels, protected_class = ( image_ids[indices_to_consider], inputs[indices_to_consider], labels[indices_to_consider], protected_class[indices_to_consider]) inputs, labels, image_ids = inputs.to(self.device), labels.to( self.device), image_ids.to(self.device) predicted_classes = self.model.model_ft(inputs.double()) _, predicted_classes = torch.max(predicted_classes, 1) mask = predicted_classes == labels # only attack correctly classified inputs image_ids, inputs, labels, predicted_classes = image_ids[ mask], inputs[mask], labels[mask], predicted_classes[mask] # The input taken by the attack is a channels first image, that is not normalized # (It will be mean normalized later on by foolbox. Mean and Std are passed through fmodel) inputs_ready_for_attack = hp.inverse_transpose_images( inputs, self.ds.data_transform) # because it expects channels first images but not preprocessed inputs_ready_for_attack = np.moveaxis(inputs_ready_for_attack, -1, 1) inputs_ready_for_attack = torch.tensor(inputs_ready_for_attack, device=self.device) ## returned tuple contains 3 elements: ## (perturbed inuts, perturbed inputs clipped to maximum epsilon, ## an array indicating if adversarial attack was a success). criterion = Misclassification( labels) # untargeted attacks only (for now) tup = self.attack(model=self.fmodel, inputs=inputs_ready_for_attack.double(), criterion=criterion, **self.attack_call_kwargs) ## This runs into issues with pickling a pytorch model as defined in foolbox # tup = self.parallel_attack(model=self.fmodel, inputs=inputs_ready_for_attack.double(), # labels=labels, kwargs=self.attack_call_kwargs) if "deepfool" in self.name: ### Epsilon is None for DeepFool, which means that the attacker is ### allowed as much perturbation as needed, so first 2 elements have to be the same assert (np.all((tup[0] == tup[1]).cpu().numpy())) ### these images are NOT normalized and are channels first adversarial_images = tup[1] ### Sanity Checks assert (adversarial_images.shape == inputs.shape) for obj in adversarial_images: assert obj is not None # normalize the attacked image for inference, it's already channels first so no need to move axis adversarial_images_for_inference = adversarial_images - self.mean adversarial_images_for_inference /= self.std predictions_on_attacked = self.model( adversarial_images_for_inference.double()) _, predictions_on_attacked = torch.max(predictions_on_attacked, 1) """ places where adversarial attack was a success (this is returned as third element in the tuple by foolbox) but is not really correct in all cases. So just to be completely sure, take a bitwise and with what we observe as adversarial. """ adversarial_mask = tup[2] & (predictions_on_attacked != labels) total_possible_adv += len(adversarial_mask) created_adv += np.count_nonzero(adversarial_mask.cpu().numpy()) adversarial_images = adversarial_images[adversarial_mask].cpu( ).numpy() predictions_on_attacked = predictions_on_attacked[ adversarial_mask].cpu().numpy() inputs = inputs[adversarial_mask].cpu().numpy() image_ids = image_ids[adversarial_mask].cpu().numpy() labels = labels[adversarial_mask].cpu().numpy() # at this point whatever we have should be adversarial assert np.all(labels != predictions_on_attacked) if len(adversarial_images) == 0: continue ### Visual Sanity Checks # these are not normalized, so just need to move the axis image_adv = np.moveaxis(adversarial_images[0], 0, -1) # image_adv = hp.inverse_transpose_images(adversarial_images[0], self.ds.data_transform) image_original = hp.inverse_transpose_images( inputs[0], self.ds.data_transform) stacked_image = np.concatenate((image_adv, image_original), axis=1) self.plot_example(stacked_image, labels, predictions_on_attacked) all_images_adversarial = adversarial_images if all_images_adversarial is None else np.concatenate( (all_images_adversarial, adversarial_images)) all_adv_preds = np.concatenate( (all_adv_preds, predictions_on_attacked)) adv_image_ids = np.concatenate((adv_image_ids, image_ids)) ### adversarial images are channels first, NOT normalized! return all_images_adversarial, all_adv_preds, adv_image_ids, total_possible_adv, created_adv
model = convolutional.leNet_pooling(dropout, 0, 0) elif args.dropout_type == 'dense': model = convolutional.leNet_dense(dropout, 0, 0) else: raise Exception("Invalid dropout style!") else: raise Exception("Invalid model!") model.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(model.predict(x_test), axis=1) kmodel = KerasModel(model=model, bounds=(min_, max_)) attack = None if args.attack_type == 'l2': attack = CarliniWagnerL2Attack(kmodel, Misclassification()) elif args.attack_type == 'linf': attack = RandomPGD(kmodel, Misclassification()) x_sample = x_test[:1000] y_sample = y_test[:1000] adversarial = None if args.attack_type == 'l2': adversarial = attack(x_sample, np.argmax(y_sample, axis=1), binary_search_steps=5, max_iterations=600) else: adversarial = attack(x_sample, np.argmax(y_sample, axis=1), iterations=30) failed = 0 misclassified = 0
def LinfPGD_attack_func(f_model, inputs, labels): device = f_model.device inputs = inputs.to(device) criterions = Misclassification(labels.to(device)) return LinfPGD_attack(model=f_model, inputs=inputs, criterion=criterions, epsilons=epsilons)
x_train.shape[1:], dropout, dropout) elif args.experiment_type == "six_layer_dnn": kmodel = neural_networks.asymmetric_six_layer_nn_foolbox( x_train.shape[1:], dropout, dropout) elif args.experiment_type == "VGG": kmodel = convolutional.mini_VGG_foolbox(dropout, dropout, 0, "mnist") elif args.experiment_type == "leNet5": kmodel = convolutional.leNet_cnn_foolbox(dropout, dropout, "mnist") # kmodel.fit(x_train, y_train, epochs=10, batch_size=128) kmodel.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(kmodel.predict(x_test), axis=1) # attack = CarliniWagnerL2Attack(kmodel, Misclassification()) attack = RandomPGD(kmodel, Misclassification()) # x_sample = x_test[:10] # y_sample = y_test[:10] x_sample = x_test[:1000] y_sample = y_test[:1000] # adversarial = attack(x_sample, np.argmax(y_sample, axis=1), binary_search_steps=5, max_iterations=600) adversarial = attack(x_sample, np.argmax(y_sample, axis=1), iterations=30) # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters, # we exclude them from the perturbation evaluation. failed = 0 misclassified = 0
images = images_all[i:i + 1].to(device) labels = labels_all[i:i + 1].to(device) if args.targeted: imgTarget = images_tgt[i:i + 1].to(device) classVec = labels_tgt[i:i + 1].to(device) criterion = TargetedMisclassification(classVec) attack = attacksODS.BoundaryAttack( tensorboard=False, steps=args.num_step, surrogate_models=surrogate_model_list, ODS=args.ODS) advs = attack.run(fmodel, images, criterion, starting_points=imgTarget) history = attack.normHistory else: criterion = Misclassification(labels) attack = attacksODS.BoundaryAttack( init_attack=None, tensorboard=False, steps=args.num_step, surrogate_models=surrogate_model_list, ODS=args.ODS) advs = attack.run(fmodel, images, criterion) history = attack.normHistory print('image %d: query %d, current dist = %.4f' % (i + 1, args.num_step, (advs[0] - images[0]).norm())) distList_finalstep[i] = (advs[0] - images[0]).norm() distListAll[i] = history prefix = '_targeted' if args.targeted else ''
def run_adv_hyper(args, hypernet): arch = get_network(args) model_base, fmodel_base = sample_fmodel(args, hypernet, arch) criterion = Misclassification() fgs = foolbox.attacks.BIM(fmodel_base, criterion) _, test_loader = datagen.load_mnist(args) adv, y = [], [] for n_models in [10, 100, 1000]: print('ensemble of {}'.format(n_models)) for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]: total_adv = 0 acc, _accs = [], [] _vars, _stds, _ents = [], [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = sample_adv_batch( data, target, fmodel_base, eps, fgs) if adv_batch is None: continue output = model_base(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq( target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch) - correct.item() total_adv += n_adv padv = np.argmax( fmodel_base.predictions(adv_batch[0].cpu().numpy())) sample_adv, pred_labels, logits = [], [], [] for _ in range(n_models): model, fmodel = sample_fmodel(args, hypernet, arch) output = model(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq( target_batch.data.view_as(pred)).long().cpu().sum() acc.append(correct.item()) n_adv_sample = len(target_batch) - correct.item() sample_adv.append(n_adv_sample) pred_labels.append(pred.view(pred.numel())) logits.append(F.softmax(output, dim=1)) p_labels = torch.stack(pred_labels).float().transpose(0, 1) if len(p_labels) > 1: p_labels_cols = p_labels.transpose(0, 1) modes = mode(p_labels_cols)[0][0] mode_chart = [] for i in range(len(modes)): v = len( np.setdiff1d(p_labels[i], modes[i], assume_unique=False)) mode_chart.append(v) _vars.append(torch.tensor(mode_chart).float().mean()) _ents.append( np.apply_along_axis(entropy, 1, p_labels.detach()).mean()) acc = torch.tensor(acc, dtype=torch.float) _accs.append(torch.mean(acc)) acc, adv, y = [], [], [] # plot_entropy(args, _ents, eps) print('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format( eps, total_adv, len(test_loader.dataset), torch.tensor(_vars).mean(), torch.tensor(_ents).mean()))
from scipy.misc import imread, imresize, imsave import os import numpy as np import tensorflow as tf from matplotlib import gridspec import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from vgg16 import Vgg16 image_dict = {'tabby': 281, 'laska': 356, 'mastiff': 243} attack_list = ['FGSM', 'IterGS', 'SalMap'] criteria_dict = { 'topkmis': TopKMisclassification(k=10), 'mis': Misclassification() } name1 = 'tabby' attack_type = "FGSM" criterion_type = "topkmis" def softmax_np(x, axis=None): return np.exp(x) / np.sum(np.exp(x), axis=axis) def main(): data_dir = "data_imagenet" train_dir = "adv_results_vgg16"
if args.dropout_type == "pooling": kmodel = convolutional.mini_VGG_foolbox(dropout, 0, 0, "mnist") else: kmodel = convolutional.mini_VGG_foolbox(0, dropout, 0, "mnist") elif args.experiment_type == "leNet5": if args.dropout_type == "pooling": kmodel = convolutional.leNet_cnn_foolbox(dropout, 0, "mnist") else: kmodel = convolutional.leNet_cnn_foolbox(0, dropout, "mnist") # kmodel.fit(x_train, y_train, epochs=1, batch_size=128) kmodel.fit(x_train, y_train, epochs=50, batch_size=128) preds = np.argmax(kmodel.predict(x_test), axis=1) attack = CarliniWagnerL2Attack(kmodel, Misclassification()) # x_sample = x_test[:10] # y_sample = y_test[:10] x_sample = x_test[:1000] y_sample = y_test[:1000] adversarial = attack(x_sample, np.argmax(y_sample, axis=1), binary_search_steps=5, max_iterations=600) # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters, # we exclude them from the perturbation evaluation. failed = 0
def run_adv_hyper(args, hypernet): arch = get_network(args) models, fmodels = [], [] #for i in range(10): # model_base, fmodel_base = sample_fmodel(args, hypernet, arch) # models.append(model_base) # fmodels.append(fmodel_base) #fmodel_base = attacks.load_model(FusedNet(models)) model_base, fmodel_base = sample_fmodel(args, hypernet, arch) criterion = Misclassification() fgs = foolbox.attacks.FGSM(fmodel_base, criterion) _, test_loader = datagen.load_mnist(args) adv, y = [], [] for n_models in [10, 100, 800]: print('ensemble of {}'.format(n_models)) for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]: total_adv = 0 acc, _accs = [], [] _soft, _logs, _vars, _ents = [], [], [], [] _soft_adv, _logs_adv, _vars_adv, _ents_adv = [], [], [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = sample_adv_batch( data, target, fmodel_base, eps, fgs) if adv_batch is None: continue # get base hypermodel output, I guess output = model_base(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq( target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch) - correct.item() total_adv += n_adv #dis = [] soft_out, pred_out, logits = [], [], [] soft_out_adv, pred_out_adv, logits_adv = [], [], [] for n in range(n_models): model, fmodel = sample_fmodel(args, hypernet, arch) output = model(data) soft_out.append(F.softmax(output, dim=1)) #pred_out.append(output.data.max(1, keepdim=True)[1]) #logits.append(output) output = model(adv_batch) soft_out_adv.append(F.softmax(output, dim=1)) #pred_out_adv.append(output.data.max(1, keepdim=True)[1]) #logits_adv.append(output) ## correction graph #pred = output.data.max(1, keepdim=True)[1] #correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() #c = len(pred_out) - correct.item() #print ('got {} / {} / {}'.format(correct.item(), len(target_batch), 32)) #dis.append(correct.item()/n_adv) ## #np.save('/scratch/eecs-share/ratzlafn/acc.npy', np.array(dis)) #sys.exit(0) softs = torch.stack(soft_out).float() #preds = torch.stack(pred_out).float() #logs = torch.stack(logits).float() softs_adv = torch.stack(soft_out_adv).float() #preds_adv = torch.stack(pred_out_adv).float() #logs_adv = torch.stack(logits_adv).float() #np.save('/scratch/eecs-share/ratzlafn/softs.npy', softs.detach().cpu().numpy()) #np.save('/scratch/eecs-share/ratzlafn/logs.npy', logs.detach().cpu().numpy()) #sys.exit(0) # Measure variance of individual logits across models. # HyperGAN ensemble has lower variance across 10 class predictions # But a single logit has high variance acorss models units_softmax = softs.var( 0).mean().item() # var across models across images ent = float(entropy(softs.mean(0).detach()).mean()) #units_logprob = logs.var(0).mean().item() ensemble_var = softs.mean(0).var(1).mean().item() units_softmax_adv = softs_adv.var( 0).mean().item() # var across models - images ent_adv = float(entropy(softs_adv.mean(0).detach()).mean()) #units_logprob_adv = logs_adv.var(0).mean().item() ensemble_var_adv = softs_adv.mean(0).var(1).mean().item() """ Core Debug """ # print ('softmax var: ', units_softmax) # print ('logprob var: ', units_logprob) # print ('ensemble var: ', ensemble_var) # build lists _soft.append(units_softmax) #_logs.append(units_logprob) _vars.append(ensemble_var) _ents.append(ent) _soft_adv.append(units_softmax_adv) #_logs_adv.append(units_logprob_adv) _vars_adv.append(ensemble_var_adv) _ents_adv.append(ent_adv) if idx > 5: print( 'NAT: Log var: -, Softmax var: {}, Ent: {}, Ens var: {}' .format( #torch.tensor(_logs).mean(), torch.tensor(_soft).mean(), torch.tensor(_ents).mean(), torch.tensor(_vars).mean())) print( 'ADV Eps: {}, Log var: -, Softmax var: {}, Ent: {}, Ens var: {}' .format( eps, #torch.tensor(_logs_adv).mean(), torch.tensor(_soft_adv).mean(), torch.tensor(_ents_adv).mean(), torch.tensor(_vars_adv).mean())) break """
def criterion(): return Misclassification()
def run_adv_model(args, models): for model in models: model.eval() model = FusedNet(models) fmodel = attacks.load_model(model) criterion = Misclassification() fgs = foolbox.attacks.BIM(fmodel) _, test_loader = datagen.load_mnist(args) for eps in [0.01, 0.03, 0.08, .1, .3, .5, 1.0]: total_adv = 0 _soft, _logs, _vars, _ents = [], [], [], [] _soft_adv, _logs_adv, _vars_adv, _ents_adv = [], [], [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = sample_adv_batch( data, target, fmodel, eps, fgs) if adv_batch is None: continue # get intial prediction of ensemble, sure output = model(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq( target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch) - correct.item() # set up to sample from individual models soft_out, pred_out, logits = [], [], [] soft_out_adv, pred_out_adv, logits_adv = [], [], [] for i in range(len(models)): output = models[i](data) soft_out.append(F.softmax(output, dim=1)) pred_out.append(output.data.max(1, keepdim=True)[1]) logits.append(output) output = model(adv_batch) soft_out_adv.append(F.softmax(output, dim=1)) softs = torch.stack(soft_out).float() preds = torch.stack(pred_out).float() logs = torch.stack(logits).float() softs_adv = torch.stack(soft_out_adv).float() # Measure variance of individual logits across models. # HyperGAN ensemble has lower variance across 10 class predictions # But a single logit has high variance acorss models units_softmax = softs.var( 0).mean().item() # var across models across images units_logprob = logs.var(0).mean().item() ensemble_var = softs.mean(0).var(1).mean().item() ent = float(entropy(softs.mean(0).detach()).mean()) units_softmax_adv = softs_adv.var( 0).mean().item() # var across models - images ent_adv = float(entropy(softs_adv.mean(0).detach()).mean()) ensemble_var_adv = softs_adv.mean(0).var(1).mean().item() """ Core Debug """ # print ('softmax var: ', units_softmax) # print ('logprob var: ', units_logprob) # print ('ensemble var: ', ensemble_var) # build lists _soft.append(units_softmax) _logs.append(units_logprob) _vars.append(ensemble_var) _ents.append(ent) _soft_adv.append(units_softmax_adv) _vars_adv.append(ensemble_var_adv) _ents_adv.append(ent_adv) total_adv += n_adv if idx % 10 == 0 and idx > 1: print( 'NAT: Log var: {}, Softmax var: {}, Ent var: {}, Ens var: {}' .format(eps, torch.tensor(_logs).mean(), torch.tensor(_soft).mean(), torch.tensor(_ents).mean(), torch.tensor(_vars).mean())) print( 'ADV: Eps: {}, Ent var: {}, Softmax var: {}, Ens var: {}'. format(eps, torch.tensor(_ents_adv).mean(), torch.tensor(_soft_adv).mean(), torch.tensor(_vars_adv).mean())) break """
def __init__(self, model, min_perturbation=None, max_iterations=100, subsample=10, criterion=Misclassification(), distance=MSE): super().__init__(attack_method_def=DeepFoolL2Attack, model=model, min_perturbation=min_perturbation, criterion=criterion, distance=distance) self._max_iterations = max_iterations self._subsample = subsample
def __init__(self, model, step_size_iter=0.05, max_perturbation=0.3, n_iterations=10, min_perturbation=None, binary_search=True, random_start=False, return_early=True, criterion=Misclassification(), distance=MSE): super().__init__(attack_method_def=LinfinityBasicIterativeAttack, model=model, min_perturbation=min_perturbation, criterion=criterion, distance=distance) self._binary_search = binary_search self._step_size_iter = step_size_iter self._n_iterations = n_iterations self._random_start = random_start self._return_early = return_early self._max_perturbation = max_perturbation