def __init__(self, model, device, bounds, preprocessing): self.rmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing, device=device) self.model = model
def attack(model, device, attacker, testloader, test_size): fmodel = fb.PyTorchModel(model, bounds=attacker.bounds, preprocessing=attacker.preprocessing, device=device) assert isinstance(attacker.epsilon, Iterable) running_accuracy = 0. running_success = 0. running_distance = 0. for i, data in enumerate(testloader): imgs, labels = data imgs = imgs.to(device) labels = labels.to(device) accuracy_count = accuracy(fmodel, imgs, labels) raw, clipped, is_adv = attacker.attack(fmodel, imgs, labels, epsilons=attacker.epsilon) success_count = is_adv.sum(dim=1) distance = fb.distances.linf(imgs, clipped[0])[is_adv[0]].sum().item() running_accuracy += accuracy_count running_success += success_count running_distance += distance running_accuracy = running_accuracy / test_size running_distance = running_distance / running_success[0] running_success = running_success / test_size return running_accuracy, running_success.tolist(), running_distance
def construct_adversarial_examples(im, crit, method, model, device, epsilon, exclude_wrong_predictions=False, targeted=False): bounds = (im.min().item(), im.max().item()) epsilon = epsilon / 255 fmodel = fb.PyTorchModel(model, bounds=bounds, device=device) im = im.to(device) crit = crit.to(device) probs = model.forward(im) predictions = probs.argmax(dim=-1) if exclude_wrong_predictions: selection = predictions == crit im = im[selection] crit = crit[selection] predictions = predictions[selection] if targeted: target = 1 selection = crit != target im = im[selection] predictions = predictions[selection] miss_classifications = torch.tensor([target] * len(im)) crit = fb.criteria.TargetedMisclassification(miss_classifications) attack = get_attack(method) return attack(fmodel, im, crit, epsilons=epsilon), predictions
def adversarial_validation(self): val_dataloader = self.val_dataloader() self.eval() adv_model = fb.PyTorchModel(self, bounds=(0, 1)) successful_attack_sum = 0 with torch.enable_grad(): for batch_id, (data, label) in enumerate(val_dataloader): data, label = data.cuda(), label.cuda() _, _, success = self.cw(adv_model, data, label, epsilons=[0.01]) successful_attack_no = torch.sum(success.long()) #print("Successful attack:{} Attack count:{} Percentage:{}".format(successful_attack_no, len(label), float(successful_attack_no) /len(label))) successful_attack_sum += successful_attack_no self.zero_grad() print("Successful attack:{} Attack count:{} Percentage:{}".format( successful_attack_sum, len(self.cifar_val), float(successful_attack_sum) / len(self.cifar_val))) robust_accuracy = 100. * ( 1 - (float(successful_attack_sum) / len(self.cifar_val))) return robust_accuracy
def __init__(self, model: nn.Module, device: torch.device, bounds: Tuple[float, float], preprocessing: Optional[Dict]) -> None: self.rmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing, device=device) self.model = model
def __init__(self, model, epsilons, device): self.model = model self.device = device self.model = self.model.to(device) self.fmodel = fb.PyTorchModel(model, bounds=(0, 1), device=device) self.epsilons = epsilons self.type_dict = { 'L2': self.L2Attack, 'Linf': self.LinfAttack, }
def __init__(self, model, attacker, device, bounds, preprocessing, epsilon): model.eval() self.fmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing, device=device) self.model = model self.device = device self.epsilon = epsilon self.attacker = attacker
def deepfool(max_count, model, train_loader, max_epsilon, iters=20, isnorm=False, num_classes=1000): import foolbox as fb import eagerpy as ep if isnorm: # preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) preprocessing = dict(mean=[1, 1, 1], std=[1, 1, 1], axis=-3) max_epsilon = max_epsilon / (255 * 0.224) fmodel = fb.PyTorchModel(model, bounds=(-3, 3), preprocessing=preprocessing) else: preprocessing = dict(mean=[1, 1, 1], std=[1, 1, 1], axis=-3) fmodel = fb.PyTorchModel(model, bounds=(0, 255), preprocessing=preprocessing) mmax = 255 mmin = 0 adversary = fb.attacks.deepfool.LinfDeepFoolAttack(steps=iters, candidates=5) count = 0 total_correct = 0 for x, y in train_loader: x = x.cuda() y = y.cuda() count += len(x) x = ep.astensor(x) y = ep.astensor(y) ad_ex = adversary(fmodel, x, y, epsilons=max_epsilon)[1] z1 = fmodel(ad_ex).argmax(1) total_correct += (z1 == y).sum() if count >= max_count: break return total_correct.numpy() / (count)
def generate_attack(args, model, data, target, adversarial_args): if args.attack_box_type == "white": if args.attack_whitebox_type == "SW": adversarial_args["attack_args"]["net"] = model.module_outer adversarial_args["attack_args"]["attack_params"]["EOT_size"] = 1 else: adversarial_args["attack_args"]["net"] = model elif args.attack_box_type == "other": if args.attack_otherbox_type == "transfer": # it shouldn't enter this clause raise ValueError elif args.attack_otherbox_type == "decision": # this attack fails to find perturbation for misclassification in its # initialization part. Then it quits. import foolbox as fb fmodel = fb.PyTorchModel(model, bounds=(0, 1)) attack = fb.attacks.BoundaryAttack( init_attack=fb.attacks.LinearSearchBlendedUniformNoiseAttack( # directions=100000, steps=1000, ), # init_attack=fb.attacks.LinfDeepFoolAttack(steps=100), steps=2500, spherical_step=0.01, source_step=0.01, source_step_convergance=1e-07, step_adaptation=1.5, tensorboard=False, update_stats_every_k=10, ) # attack = fb.attacks.BoundaryAttack() epsilons = [8 / 255] _, perturbation, success = attack(fmodel, data, target, epsilons=epsilons) return perturbation[0] - data else: raise ValueError adversarial_args["attack_args"]["x"] = data adversarial_args["attack_args"]["y_true"] = target perturbation = adversarial_args["attack"]( **adversarial_args["attack_args"]) return perturbation
def test_pytorch_invalid_model(request: Any) -> None: backend = request.config.option.backend if backend != "pytorch": pytest.skip() class Model: def forward(self, x: Any) -> Any: return x model = Model() bounds = (0, 1) with pytest.raises(ValueError, match="torch.nn.Module"): fbn.PyTorchModel(model, bounds=bounds)
def run_sample_attack(): # create model bounds = (0, 1) fmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing) # transform bounds of model fmodel = fmodel.transform_bounds((0, 1)) # get sample data from imagenet # todo: should eval be called here for FGSM or other gradient-based attacks?? model.eval() images, labels = fb.utils.samples(fmodel, dataset='imagenet', batchsize=16) # print initial model accuracy on the samples print(f"Initial accuracy on samples: {fb.utils.accuracy(fmodel, images, labels)}") # Only use images for attack which are correctly classified by the model correctly_classified_images_mask = is_classified_correctly(fmodel, images, labels) images = images[correctly_classified_images_mask] labels = labels[correctly_classified_images_mask] # ATTACK # attack = fb.attacks.LinfDeepFoolAttack() attack = fb.attacks.LinfFastGradientAttack() # epsilons = np.linspace(0.0, 0.005, num=3) epsilons = [0.01] raw, clipped, is_adv = attack_model(attack, fmodel, images, labels, epsilons) # raw, clipped, is_adv = attack(fmodel, images, labels, epsilons=epsilons) robust_accuracy = 1 - is_adv.type(torch.FloatTensor).mean(axis=-1) print("Predictions and robust accuracies: ") for i, (eps, acc) in enumerate(zip(epsilons, robust_accuracy)): print(f"!!!! Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %") adversarials = [j for j, adv in enumerate(is_adv[i]) if adv == True] for adv_idx in adversarials: img = images[adv_idx] adv_img = clipped[i][adv_idx] print(f"Ground truth label: '{labels[adv_idx]}'") # print(f"Accuracy on original image: " # f"{fb.utils.accuracy(fmodel, img.unsqueeze(0), labels[adv_idx].unsqueeze(0))}") original_imagenet_idx = fmodel(img.unsqueeze(0)).argmax().item() adv_imagenet_idx = fmodel(adv_img.unsqueeze(0)).argmax().item() print(f"Original prediction: {original_imagenet_idx}, " f"{imagenet_label_dict[original_imagenet_idx]}") print(f"Adversarial prediction: {adv_imagenet_idx}, " f"{imagenet_label_dict[adv_imagenet_idx]}") print(" ") first_adv_idx = random.choice(adversarials) torchvision.utils.save_image(images[first_adv_idx], os.path.join(get_root(), f"test_images/{first_adv_idx}_orig_eps_{eps}.jpg")) torchvision.utils.save_image(clipped[i][first_adv_idx], os.path.join(get_root(), f"test_images/{first_adv_idx}_adv_eps_{eps}.jpg"))
def pytorch_resnet18(request: Any) -> ModelAndData: if request.config.option.skipslow: pytest.skip() import torchvision.models as models model = models.resnet18(pretrained=True).eval() preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) fmodel = fbn.PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) x, y = fbn.samples(fmodel, dataset="imagenet", batchsize=16) x = ep.astensor(x) y = ep.astensor(y) return fmodel, x, y
def __init__(self, model: AdversarialDefensiveModel, attacker: Callable, device: torch.device, bounds: Tuple[float, float], preprocessing: Optional[Dict], epsilon: Union[None, float, List[float]]) -> None: model.eval() self.fmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing, device=device) self.model = model self.device = device self.epsilon = epsilon self.attacker = attacker
def test_pytorch_training_warning(request: Any) -> None: backend = request.config.option.backend if backend != "pytorch": pytest.skip() import torch class Model(torch.nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: # type: ignore return x model = Model().train() bounds = (0, 1) with pytest.warns(UserWarning): fbn.PyTorchModel(model, bounds=bounds, device="cpu")
def __init__(self, args): self.args = args transformer = T.Compose([T.ToTensor()]) kwargs = {'num_workers': 4, 'pin_memory': True} self.val_loader = torch.utils.data.DataLoader( datasets.CIFAR10(args.data_root, train=False, transform=transformer), batch_size=args.batch_size, shuffle=True, **kwargs) # Create model, optimizer and scheduler self.model = models.WRN(depth=34, width=1, num_classes=10) if args.spbn: print("SPBN training!") self.model = models.convert_splitbn_model(self.model).cuda() else: self.model.cuda() # Loading model assert self.args.restore is not None model_data = torch.load(self.args.restore) self.model.load_state_dict(model_data['model']) self.model.eval() cudnn.benchmark = True self.save_path = self.args.save_path self.epsilons = args.epsilon / 255.0 # Foolbox Attack # self.model = foolbox.PyTorchModel(self.model, bounds=(0, 1)) if args.attack == 'FGSM': self.attack = foolbox.attacks.LinfFastGradientAttack( random_start=True) elif args.attack == 'PGD': self.attack = foolbox.attacks.LinfPGD(steps=args.attack_steps, abs_stepsize=2.0 / 255.0, random_start=True) elif args.attack == 'BA': self.attack = foolbox.attacks.BoundaryAttack() elif args.attack == 'CW': self.attack = foolbox.attacks.L2CarliniWagnerAttack( steps=1000, confidence=20) # confidence == kappa
def advfit(self, loader, loss_fcn, optimizer, attack, epsilon, nb_epochs=10, ratio=0.5, **kwargs): import foolbox as fb assert (0 <= ratio <= 1), "ratio must be between 0 and 1" if isinstance(loss_fcn, (torch.nn.CrossEntropyLoss, torch.nn.NLLLoss, torch.nn.MultiMarginLoss)): reduce_labels = True else: assert 0 # Start training for _ in range(nb_epochs): pbar = tqdm(loader) # Shuffle the examples for i_batch, o_batch in pbar: i_batch, o_batch = i_batch.to('cuda'), o_batch.to('cuda') self.eval() fmodel = fb.PyTorchModel(self, bounds=(0, 1)) adv_batch, _ = attack(fmodel, i_batch, o_batch, epsilon=epsilon, **kwargs) self.train() optimizer.zero_grad() # Perform prediction model_outputs = self.forward(i_batch) adv_outputs = self.forward(adv_batch) loss = (1 - ratio) * loss_fcn(model_outputs, o_batch) + ratio * loss_fcn( adv_outputs, o_batch) # Actual training loss.backward() optimizer.step()
def model_attack(model, model_type, attack_type, config): if model_type == "pt": fmodel = fb.PyTorchModel(model, bounds=(0, 1)) elif model_type == "tf": fmodel = fb.TensorFlowModel(model, bounds=(0, 1)) else: fmodel = fb.models.MXNetModel(model, bounds=(0, 1)) images, labels = fb.utils.samples(fmodel, dataset='mnist', batchsize=config['batch_size']) if attack_type == "uniform": attack = fb.attacks.L2AdditiveUniformNoiseAttack() elif attack_type == "gaussian": attack = fb.attacks.L2AdditiveGaussianNoiseAttack() elif attack_type == "saltandpepper": attack = fb.attacks.SaltAndPepperNoiseAttack() epsilons = [ 0.0, 0.0002, 0.0005, 0.0008, 0.001, 0.0015, 0.002, 0.003, 0.01, 0.1, 0.3, 0.5, 1.0, ] raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons) if model_type == "pt": robust_accuracy = 1 - success.cpu().numpy().astype( float).flatten().mean(axis=-1) elif model_type == "tf": robust_accuracy = 1 - success.numpy().astype(float).flatten().mean( axis=-1) else: robust_accuracy = 1 - success.numpy().astype(float).flatten().mean( axis=-1) return robust_accuracy
def valid_attack(model, device, valider, dataloader=None, data_size=None): """ valider: Config -- attack -- bounds -- preprocessing -- epsilon -- validloader -- valid_size """ if not dataloader: dataloader = valider.validloader data_size = valider.valid_size fmodel = fb.PyTorchModel(model, bounds=valider.bounds, preprocessing=valider.preprocessing, device=device) running_valid_accuracy = 0. running_valid_success = 0. running_valid_distance = 0. for i, data in enumerate(dataloader): imgs, labels = data imgs = imgs.to(device) labels = labels.to(device) accuracy_count = accuracy(fmodel, imgs, labels) raw, clipped, is_adv = valider.attack(fmodel, imgs, labels, epsilons=valider.epsilon) success_count = is_adv.sum().item() distance = fb.distances.linf(imgs, clipped)[is_adv].sum().item() running_valid_accuracy += accuracy_count running_valid_success += success_count running_valid_distance += distance running_valid_accuracy = running_valid_accuracy / data_size running_valid_distance = running_valid_distance / (running_valid_success + 1e-5) running_valid_success = running_valid_success / data_size return running_valid_accuracy, running_valid_success, running_valid_distance
def __init__(self, model, attack): """ Set-up PyTorchModel to be able to run an attack. Parameters ---------- model: [PyTorch model] Any PyTorch model. attack: [foolbox.attacks] Any foolbox attack. """ self.preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) self.fmodel = fb.PyTorchModel(model, bounds=(0, 1), preprocessing=self.preprocessing) self.fmodel = self.fmodel.transform_bounds((0, 1)) self.attack = attack
def __init__(self, rhs): self.model = csl.PytorchModel(ResNet18().to(theDevice)) self.data = trainset self.batch_size = 256 self.obj_function = self.obj_fun # Constraints self.constraints = [self.adversarialLoss] self.rhs = [rhs] self.foolbox_model = foolbox.PyTorchModel(self.model.model, bounds=(0, 1), device=theDevice, preprocessing = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], axis=-3)) self.attack = foolbox.attacks.LinfPGD(rel_stepsize = 1/3, abs_stepsize = None, steps = 5, random_start = True) super().__init__()
def pytorch_simple_model( device: Any = None, preprocessing: fbn.types.Preprocessing = None ) -> ModelAndData: import torch class Model(torch.nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: # type: ignore x = torch.mean(x, 3) x = torch.mean(x, 2) return x model = Model().eval() bounds = (0, 1) fmodel = fbn.PyTorchModel( model, bounds=bounds, device=device, preprocessing=preprocessing ) x, _ = fbn.samples(fmodel, dataset="imagenet", batchsize=16) x = ep.astensor(x) y = fmodel(x).argmax(axis=-1) return fmodel, x, y
def mnist_pt_objective(config): model = NumberNet(config) trainer = pl.Trainer(max_epochs=config['epochs'], gpus=1, auto_select_gpus=True) trainer.fit(model) trainer.test(model) tune.report(test_loss=model.test_loss) fmodel = fb.PyTorchModel(model, bounds=(0, 1)) images, labels = fb.utils.samples(fmodel, dataset='mnist', batchsize=config['batch_size']) clean_accuracy = fb.utils.accuracy(fmodel, images, labels) attack = fb.attacks.SaltAndPepperNoiseAttack() epsilons = [ 0.0, 0.0002, 0.0005, 0.0008, 0.001, 0.0015, 0.002, 0.003, 0.01, 0.1, 0.3, 0.5, 1.0, ] raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons) robust_accuracy = 1 - success.cpu().numpy().astype(float).flatten().mean( axis=-1) # res test[0] reports the loss from the evaluation, res_test[1] reports the accuracy tune.report(robust_acc=robust_accuracy) return robust_accuracy
#print(device) #print(labels.device) # print(type(images)) imagesVectors = torch.zeros(images.shape[0], images.shape[2] * images.shape[3], device=device) # change image in images to 784 rows vector imagesVectors = images.view(images.size(0), -1) break #from now we have our test images and labels intAE = intAE.to(device) #print(imagesVectors.device) imagesVectors = imagesVectors.to(device) #print(imagesVectors.device) fmodel = fb.PyTorchModel(intAE, bounds=(-1, 1), device=device) print("imagesVectors", imagesVectors.device) print("labels", labels.device) print("fmodel", fmodel.device) clean = fb.utils.accuracy(fmodel, imagesVectors, labels) attackZooL0 = {'L0SAPA': fb.attacks.SaltAndPepperNoiseAttack(), 'L0PA': None} attackZooLinf = { 'LinfFGSM': fb.attacks.LinfFastGradientAttack(), 'LinfIFGSM': None, 'LinfPGD': fb.attacks.LinfBasicIterativeAttack(), 'LinfIGM': None, } attackZooL2 = {
def main(): """Main function. Use config file test_cifar.yml""" # Parse config file with open('test_cifar.yml', 'r') as stream: config = yaml.safe_load(stream) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = config['meta']['gpu_id'] # Set experiment id exp_id = config['meta']['exp_id'] model_name = config['meta']['model_name'] + str(exp_id) # Set all random seeds seed = config['meta']['seed'] np.random.seed(seed) torch.manual_seed(seed) device = 'cuda' if torch.cuda.is_available() else 'cpu' # Set up model directory save_dir = os.path.join(config['meta']['save_path'], 'saved_models') if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) log = get_logger('test_' + model_name, 'test_cifar10') log.info('\n%s', yaml.dump(config)) log.info('Preparing data...') if config['test']['dataset'] == 'cifar10': (x_train, y_train), (_, _), (x_test, y_test) = load_cifar10_all( data_dir=config['meta']['data_path'], val_size=0.1, shuffle=True, seed=seed) num_classes = 10 elif config['test']['dataset'] == 'cifar100': (x_train, y_train), (_, _), (x_test, y_test) = load_cifar100_all( data_dir=config['meta']['data_path'], val_size=0.1, shuffle=True, seed=seed) num_classes = 100 else: raise NotImplementedError('invalid dataset.') log.info('Building model...') if config['test']['network'] == 'resnet': # use ResNetV2-20 net = PreActResNet(PreActBlock, [2, 2, 2, 2], num_classes=num_classes) elif config['test']['network'] == 'wideresnet': # use WideResNet-28-10 net = WideResNet(num_classes=num_classes) else: raise NotImplementedError('Specified network not implemented.') net = net.eval().to(device) # have to handle model loaded from CAT18 differently if 'cat18' in model_path: net = torch.nn.DataParallel(net) net.load_state_dict(torch.load(model_path + '.pt')['net']) net = net.module else: net.load_state_dict(torch.load(model_path + '.pt')) if device == 'cuda': if len(config['meta']['gpu_id']) > 1: net = torch.nn.DataParallel(net) net = net.eval() cudnn.benchmark = True num_test_samples = config['test']['num_test_samples'] if config['pgd']['quant']: y_pred = classify(net, quantize(x_test[:num_test_samples]), num_classes=num_classes) else: y_pred = classify(net, x_test[:num_test_samples], num_classes=num_classes) acc = get_acc(y_pred, y_test[:num_test_samples]) start_time = time.time() if config['bb']: log.info('Starting Brendel & Bethge attack...') netfb = fb.PyTorchModel(net, bounds=(0, 1), preprocessing=dict()) x_init = pgd_init(netfb, x_test[:num_test_samples], y_test[:num_test_samples], device, config['test']['batch_size']) log.info('Finish an initial attack with PGD. Running BB attack...') x_adv = bb_attack(netfb, x_test[:num_test_samples].to(device), y_test[:num_test_samples].to(device), x_init.to(device), config) if config['pgd']['quant']: x_adv = quantize(x_adv) y_pred = classify(net, x_adv, num_classes=num_classes) adv_acc = get_acc(y_pred, y_test[:num_test_samples]) elif config['pgd']['plus']: log.info('Starting PGD+ attack...') adv_acc = pgdp(net, x_train, y_train, x_test[:num_test_samples], y_test[:num_test_samples], config['test']['batch_size'], config['pgd'], num_classes=num_classes) else: log.info('Starting PGD attack...') attack = PGDAttack(net, x_train, y_train) x_adv = attack(x_test[:num_test_samples], y_test[:num_test_samples], batch_size=config['test']['batch_size'], **config['pgd']) y_pred = classify(net, x_adv, num_classes=num_classes) adv_acc = get_acc(y_pred, y_test[:num_test_samples]) log.info('Clean acc: %.4f, adv acc: %.4f.', acc, adv_acc) log.info('Attack runtime: %.4fs', time.time() - start_time)
def main_foolbox(): data_loaders, dataset_sizes, class_to_idx = load_data(args.test_data, args.batch_size, train_flag=False, kwargs=kwargs) print('length of dataset: {}'.format(dataset_sizes)) if args.model_type == 0: # transfer learning based model from TransferNet import Transfer_Net model_target = Transfer_Net(num_class=10, base_net=args.base_net) model_target.load_state_dict(torch.load(args.input_model)) model_target.to(DEVICE) model_target = model_target.predict elif args.model_type == 1: # normal DNN model_target = torch.load(args.target_model) model_target.to(DEVICE) model_target = model_target.eval() fmodel = fb.PyTorchModel(model_target, bounds=(0, 1)) epsilons = [ 0.0, 0.0005, 0.001, 0.0015, 0.002, 0.003, 0.005, 0.01, 0.02, 0.03, 0.1, 0.3, 0.5, 1.0 ] attacks = [ fb.attacks.L2RepeatedAdditiveGaussianNoiseAttack(), # fb.attacks.LinfDeepFoolAttack(steps=50, candidates=10, overshoot=0.02, loss='logits'), ] attack = attacks[0] running_corrects_adv_untargeted = 0 if os.path.isdir(args.output_path): shutil.rmtree(args.output_path) for batch_idx, (inputs, labels) in enumerate(data_loaders): cln_data, true_label = inputs.to(DEVICE), labels.to(DEVICE) print() print('clean data shape: {}, true label: {}'.format( cln_data.shape, true_label)) print() advs, _, success = attack(fmodel, cln_data, true_label, epsilons=epsilons) adv_images = advs[4].clone().detach().requires_grad_(True) # predict adversarial samples outputs = model_target(adv_images.to(DEVICE)) _, predicted = torch.max(outputs, 1) running_corrects_adv_untargeted += torch.sum( predicted == true_label.data) print('perturbed data predict label: ', predicted) # save adversarial images to local for idx, adver_seed in enumerate(adv_images): for key, value in class_to_idx.items(): if true_label[idx].item() == value: adver_seed_dir = os.path.join(args.output_path, key) if not os.path.isdir(adver_seed_dir): os.makedirs(adver_seed_dir) adver_seed_path = os.path.join( adver_seed_dir, str(batch_idx) + '_' + str(idx) + '.jpg') torchvision.utils.save_image(adver_seed, adver_seed_path, normalize=True, scale_each=True) print('running_corrects_adver: {}'.format(running_corrects_adv_untargeted))
import torchvision import foolbox as fb import PIL import matplotlib.pyplot as plt # Obtain the pretrained ResNet model, specify the preprocessing expected by the model and the bound of the input space model = torchvision.models.resnet18(pretrained=True) preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) bounds = (0, 1) model = model.eval() # Turn your PyTorch model into Foolbox model fmodel = fb.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing) # If model had different bounds, we do fmodel = fmodel.transform_bounds((0, 1)) # Provide a small set of sample images from different datasets images, labels = fb.utils.samples(fmodel, dataset='imagenet', batchsize=16) print(type(images)) print(images.shape) fb.plot.images(images) plt.show() # Check the accuracy of model on our evaluation set print(fb.utils.accuracy(fmodel, images, labels)) # Instantiate the attack class attack = fb.attacks.LinfDeepFoolAttack()
accuracies.append(taccuracy) max_accuracy = np.max(np.array(accuracies)) adv_loader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False) examples = enumerate(adv_loader) batch_idx, (images, labels) = next(examples) images = images.to(device) labels = labels.to(device) print(model) fmodel = fb.PyTorchModel(model, bounds=(0, 1)) print(" ") print("accuracy", end=' | ') print(accuracy(fmodel, images, labels)) print("") attacks = [fb.attacks.PGD()] epsilons = [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5,
solver.plot() #################################### # TESTING # #################################### # Test data testset = csl.datasets.CIFAR10(root = 'data', train = False, transform = csl.datasets.utils.ToTensor(device=theDevice), target_transform = csl.datasets.utils.ToTensor(device=theDevice)) # Adversarial attack problem.model.eval() foolbox_model = foolbox.PyTorchModel(problem.model.model, bounds=(0, 1), device=theDevice, preprocessing = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], axis=-3)) attack = foolbox.attacks.LinfPGD(rel_stepsize = 1/30, abs_stepsize = None, steps = 50, random_start = True) epsilon_test = np.linspace(0.01,0.06,7) # Prepare batches batch_idx = np.arange(0, len(testset)+1, problem.batch_size) if batch_idx[-1] < len(testset): batch_idx = np.append(batch_idx, len(testset)) n_total = 0 acc_test = 0 acc_adv = np.zeros(epsilon_test.shape[0]) success_adv = np.zeros_like(acc_adv)
def main(): args = parse_args() # Create a pytorch dataset data_dir = pathlib.Path('./tiny-imagenet-200/') image_count = len(list(data_dir.glob('**/*.JPEG'))) CLASS_NAMES = np.array([item.name for item in (data_dir / 'train').glob('*')]) print('Discovered {} images'.format(image_count)) # Create the training data generator batch_size = 32 im_height = 64 im_width = 64 if args.model == "cait_m48_448": im_height = 448 im_width = 448 else: im_height=224 im_width=224 transform_test = transforms.Compose([ transforms.Resize((im_height,im_width)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) torch.cuda.device("cuda:0") device = "cuda:0" if args.model in model_to_arch: model = timm.create_model(model_to_arch[args.model], pretrained=True) else: print("model does not exist") if args.model == "inception_resnet_v2": num_ftrs = model.classif.in_features model.classif = nn.Sequential( nn.Dropout(0.4), nn.Linear(num_ftrs, 1024), nn.ReLU(), nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 200)) elif args.model == "pit": num_ftrs = model.head.in_features if args.sparse_attn_k: for transformer in model.transformers: for block in transformer.blocks: block.attn = JankAttention(block.attn, args.sparse_attn_k) model.head = nn.Sequential( nn.Dropout(0.4), nn.Linear(num_ftrs, 1024), nn.ReLU(), nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 200)) model.head_dist = nn.Sequential( nn.Dropout(0.4), nn.Linear(num_ftrs, 1024), nn.ReLU(), nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 200)) elif args.model == "vit": num_ftrs = model.head.in_features model.head = nn.Sequential( nn.Dropout(0.4), nn.Linear(num_ftrs, 1024), nn.ReLU(), nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 200)) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['net']) criterion = nn.CrossEntropyLoss() model = model.to(device) validation_set = ValidationSet(data_dir / 'val', transform_test) # robusta.batchnorm.adapt(model, adapt_type="batch_wise") val_loader = torch.utils.data.DataLoader(validation_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) if args.update_bn: model = update_bn_params(model, val_loader, args.update_bn, device) model.eval() if args.check_robustness: fmodel = fb.PyTorchModel(model, bounds=(0,1)) attack = fb.attacks.LinfPGD(steps=1) epsilons = [1.0] robust_accuracies = [] all_preds = [] all_labels = [] all_losses = [] index = 0 for batch in tqdm.tqdm(val_loader): inputs = batch[0] targets = batch[1] targets = targets.cuda() inputs = inputs.cuda() if args.check_robustness: _, advs, success = attack(fmodel, inputs, targets, epsilons=epsilons) robust_accuracy = 1 - success.cpu().numpy().astype(np.float).mean(axis=-1) robust_accuracies.append(robust_accuracy) # print("robust accuracy for perturbations with") # for eps, acc in zip(epsilons, robust_accuracy): # print(f" Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %") with torch.no_grad(): preds = model(inputs) loss = nn.CrossEntropyLoss()(preds, targets) all_losses.append(loss.cpu()) all_preds.append(preds.cpu()) all_labels.append(targets.cpu()) if args.check_robustness: print("Top 1 Validation Accuracy (robust): {}".format(np.mean(np.array(robust_accuracies)))) top_preds = [x.argsort(dim=-1)[:,-1:].squeeze() for x in all_preds] correct = 0 for idx, batch_preds in enumerate(top_preds): correct += torch.eq(all_labels[idx], batch_preds).sum() print(correct.item()) print(len(all_labels)) accuracy = correct.item() / (32 * len(all_labels)) print(f"Top 1 Validation Accuracy (clean): {accuracy}") top_preds = [x.argsort(dim=-1)[:,-3:] for x in all_preds] correct = 0 for idx, batch_preds in enumerate(top_preds): correct += torch.eq(all_labels[idx], batch_preds[:,0:1].squeeze()).sum() correct += torch.eq(all_labels[idx], batch_preds[:,1:2].squeeze()).sum() correct += torch.eq(all_labels[idx], batch_preds[:,2:3].squeeze()).sum() accuracy = correct.item() / (32 * len(all_labels)) print(f"top 3 Validation Accuracy (clean): {accuracy}")
def run( self, model: Model, inputs: T, criterion: Union[Criterion, T], *, early_stop: Optional[float] = None, starting_points: Optional[T] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) originals, restore_type = ep.astensor_(inputs) del inputs, kwargs criterion = get_criterion(criterion) is_adversarial = get_is_adversarial(criterion, model) if starting_points is None: init_attack: MinimizationAttack if self.init_attack is None: init_attack = LinearSearchBlendedUniformNoiseAttack(steps=50) logging.info( f"Neither starting_points nor init_attack given. Falling" f" back to {init_attack!r} for initialization.") else: init_attack = self.init_attack # TODO: use call and support all types of attacks (once early_stop is # possible in __call__) best_advs = init_attack.run(model, originals, criterion, early_stop=early_stop) else: best_advs = ep.astensor(starting_points) is_adv = is_adversarial(best_advs) if not is_adv.all(): failed = is_adv.logical_not().float32().sum() if starting_points is None: raise ValueError( f"init_attack failed for {failed} of {len(is_adv)} inputs") else: raise ValueError( f"{failed} of {len(is_adv)} starting_points are not adversarial" ) del starting_points tb = TensorBoard(logdir=self.tensorboard) N = len(originals) ndim = originals.ndim spherical_steps = ep.ones(originals, N) * self.spherical_step source_steps = ep.ones(originals, N) * self.source_step tb.scalar("batchsize", N, 0) # create two queues for each sample to track success rates # (used to update the hyper parameters) stats_spherical_adversarial = ArrayQueue(maxlen=100, N=N) stats_step_adversarial = ArrayQueue(maxlen=30, N=N) bounds = model.bounds self.class_1 = [] self.class_2 = [] self.surrogate_model = None device = model.device train_step = 500 for step in tqdm(range(1, self.steps + 1)): converged = source_steps < self.source_step_convergance if converged.all(): break # pragma: no cover converged = atleast_kd(converged, ndim) # TODO: performance: ignore those that have converged # (we could select the non-converged ones, but we currently # cannot easily invert this in the end using EagerPy) unnormalized_source_directions = originals - best_advs source_norms = ep.norms.l2(flatten(unnormalized_source_directions), axis=-1) source_directions = unnormalized_source_directions / atleast_kd( source_norms, ndim) # only check spherical candidates every k steps check_spherical_and_update_stats = step % self.update_stats_every_k == 0 candidates, spherical_candidates = draw_proposals( bounds, originals, best_advs, unnormalized_source_directions, source_directions, source_norms, spherical_steps, source_steps, self.surrogate_model) candidates.dtype == originals.dtype spherical_candidates.dtype == spherical_candidates.dtype is_adv = is_adversarial(candidates) is_adv_spherical_candidates = is_adversarial(spherical_candidates) if is_adv.item(): self.class_1.append(candidates) if not is_adv_spherical_candidates.item(): self.class_2.append(spherical_candidates) if (step % train_step == 0) and (step > 0): start_time = time() class_1 = self.class_1 class_2 = self.class_2 class_1 = np.array([image.numpy()[0] for image in class_1]) class_2 = np.array([image.numpy()[0] for image in class_2]) class_2 = class_2[:len(class_1)] data = np.concatenate([class_1, class_2]) labels = np.append(np.ones(len(class_1)), np.zeros(len(class_2))) X = torch.tensor(data).to(device) y = torch.tensor(labels, dtype=torch.long).to(device) if self.surrogate_model is None: model_sur = torchvision.models.resnet18(pretrained=True) #model.features[0] = torch.nn.Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) model_sur.fc = torch.nn.Linear(in_features=512, out_features=2, bias=True) model_sur = model_sur.to(device) else: model_sur = model_surrogate X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42) optimizer = torch.optim.Adam(model_sur.parameters(), lr=3e-4) loss = torch.nn.CrossEntropyLoss() model_surrogate, accuracy_history_test, accuracy_history_train = train( model_sur, optimizer, loss, X_train, y_train, X_test, y_test) model_surrogate = model_surrogate.eval() self.surrogate_model = fb.PyTorchModel(model_surrogate, bounds=(0, 1), device=device) end_time = time() #print('Time for train: ', np.round(end_time - start_time, 2)) #print('\n') spherical_is_adv: Optional[ep.Tensor] if check_spherical_and_update_stats: spherical_is_adv = is_adversarial(spherical_candidates) stats_spherical_adversarial.append(spherical_is_adv) # TODO: algorithm: the original implementation ignores those samples # for which spherical is not adversarial and continues with the # next iteration -> we estimate different probabilities (conditional vs. unconditional) # TODO: thoughts: should we always track this because we compute it anyway stats_step_adversarial.append(is_adv) else: spherical_is_adv = None # in theory, we are closer per construction # but limited numerical precision might break this distances = ep.norms.l2(flatten(originals - candidates), axis=-1) closer = distances < source_norms is_best_adv = ep.logical_and(is_adv, closer) is_best_adv = atleast_kd(is_best_adv, ndim) cond = converged.logical_not().logical_and(is_best_adv) best_advs = ep.where(cond, candidates, best_advs) tb.probability("converged", converged, step) tb.scalar("updated_stats", check_spherical_and_update_stats, step) tb.histogram("norms", source_norms, step) tb.probability("is_adv", is_adv, step) if spherical_is_adv is not None: tb.probability("spherical_is_adv", spherical_is_adv, step) tb.histogram("candidates/distances", distances, step) tb.probability("candidates/closer", closer, step) tb.probability("candidates/is_best_adv", is_best_adv, step) tb.probability("new_best_adv_including_converged", is_best_adv, step) tb.probability("new_best_adv", cond, step) if check_spherical_and_update_stats: full = stats_spherical_adversarial.isfull() tb.probability("spherical_stats/full", full, step) if full.any(): probs = stats_spherical_adversarial.mean() cond1 = ep.logical_and(probs > 0.5, full) spherical_steps = ep.where( cond1, spherical_steps * self.step_adaptation, spherical_steps) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.2, full) spherical_steps = ep.where( cond2, spherical_steps / self.step_adaptation, spherical_steps) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps) stats_spherical_adversarial.clear( ep.logical_or(cond1, cond2)) tb.conditional_mean( "spherical_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio("spherical_stats/isfull/too_linear", cond1, full, step) tb.probability_ratio( "spherical_stats/isfull/too_nonlinear", cond2, full, step) full = stats_step_adversarial.isfull() tb.probability("step_stats/full", full, step) if full.any(): probs = stats_step_adversarial.mean() # TODO: algorithm: changed the two values because we are currently tracking p(source_step_sucess) # instead of p(source_step_success | spherical_step_sucess) that was tracked before cond1 = ep.logical_and(probs > 0.25, full) source_steps = ep.where( cond1, source_steps * self.step_adaptation, source_steps) cond2 = ep.logical_and(probs < 0.1, full) source_steps = ep.where( cond2, source_steps / self.step_adaptation, source_steps) stats_step_adversarial.clear(ep.logical_or(cond1, cond2)) tb.conditional_mean("step_stats/isfull/success_rate/mean", probs, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_high", cond1, full, step) tb.probability_ratio( "step_stats/isfull/success_rate_too_low", cond2, full, step) tb.histogram("spherical_step", spherical_steps, step) tb.histogram("source_step", source_steps, step) tb.close() return restore_type(best_advs)