def test_pytorch_model_preprocessing_shape_change(): import torch import torch.nn as nn num_classes = 1000 bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) logits = x return logits model = Net() model1 = PyTorchModel(model, bounds=bounds, num_classes=num_classes) def preprocessing2(x): if x.ndim == 3: x = np.transpose(x, axes=(2, 0, 1)) elif x.ndim == 4: x = np.transpose(x, axes=(0, 3, 1, 2)) def grad(dmdp): assert dmdp.ndim == 3 dmdx = np.transpose(dmdp, axes=(1, 2, 0)) return dmdx return x, grad model2 = PyTorchModel(model, bounds=bounds, num_classes=num_classes, preprocessing=preprocessing2) np.random.seed(22) test_images_nhwc = np.random.rand(2, 5, 5, channels).astype(np.float32) test_images_nchw = np.transpose(test_images_nhwc, (0, 3, 1, 2)) p1 = model1.batch_predictions(test_images_nchw) p2 = model2.batch_predictions(test_images_nhwc) assert np.all(p1 == p2) p1 = model1.predictions(test_images_nchw[0]) p2 = model2.predictions(test_images_nhwc[0]) assert np.all(p1 == p2) g1 = model1.gradient(test_images_nchw[0], 3) assert g1.ndim == 3 g1 = np.transpose(g1, (1, 2, 0)) g2 = model2.gradient(test_images_nhwc[0], 3) np.testing.assert_array_almost_equal(g1, g2)
def test_pytorch_model_preprocessing(): num_classes = 1000 bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.squeeze(x, dim=3) x = torch.mean(x, 2) x = torch.squeeze(x, dim=2) logits = x return logits model = Net() def preprocess_fn(x): # modify x in-place x /= 2 return x model1 = PyTorchModel(model, bounds=bounds, num_classes=num_classes, cuda=False) model2 = PyTorchModel(model, bounds=bounds, num_classes=num_classes, cuda=False, preprocess_fn=preprocess_fn) model3 = PyTorchModel(model, bounds=bounds, num_classes=num_classes, cuda=False) np.random.seed(22) test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) test_images_copy = test_images.copy() p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) # make sure the images have not been changed by # the in-place preprocessing assert np.all(test_images == test_images_copy) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal(p1 - p1.max(), p3 - p3.max(), decimal=5)
def test_pytorch_model_preprocessing(): import torch import torch.nn as nn num_classes = 1000 bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) logits = x return logits model = Net() preprocessing = (np.arange(num_classes)[:, None, None], np.random.uniform(size=(channels, 5, 5)) + 1) model1 = PyTorchModel( model, bounds=bounds, num_classes=num_classes) model2 = PyTorchModel( model, bounds=bounds, num_classes=num_classes, preprocessing=preprocessing) model3 = PyTorchModel( model, bounds=bounds, num_classes=num_classes) np.random.seed(22) test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) test_images_copy = test_images.copy() p1 = model1.batch_predictions(test_images) p2 = model2.batch_predictions(test_images) # make sure the images have not been changed by # the in-place preprocessing assert np.all(test_images == test_images_copy) p3 = model3.batch_predictions(test_images) assert p1.shape == p2.shape == p3.shape == (2, num_classes) np.testing.assert_array_almost_equal( p1 - p1.max(), p3 - p3.max(), decimal=5)
def create_fmodel(dataset="tiny_imagenet",model_name="resnet18",gpu=None): if dataset == "imagenet": model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() # # def preprocessing(x): # mean = np.array([0.485, 0.456, 0.406]) # std = np.array([0.229, 0.224, 0.225]) # _mean = mean.astype(x.dtype) # _std = std.astype(x.dtype) # x = x - _mean # x /= _std # # assert x.ndim in [3, 4] # if x.ndim == 3: # x = np.transpose(x, axes=(2, 0, 1)) # elif x.ndim == 4: # x = np.transpose(x, axes=(0, 3, 1, 2)) # # def grad(dmdp): # assert dmdp.ndim == 3 # dmdx = np.transpose(dmdp, axes=(1, 2, 0)) # return dmdx / _std # return x, grad preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) elif dataset == "cifa10": model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() preprocessing = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], axis=-3) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=10, preprocessing=preprocessing) elif dataset == "dev": model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() preprocessing = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], axis=-3) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) return fmodel
def test_pytorch_device(bn_model_pytorch): m = bn_model_pytorch model1 = PyTorchModel(m._model, bounds=m.bounds(), num_classes=m.num_classes(), device='cpu') model2 = PyTorchModel(m._model, bounds=m.bounds(), num_classes=m.num_classes(), device=torch.device('cpu')) assert model1.device == model2.device
def cw(model, X, y, binary_search_steps=5, max_iterations=1000, learning_rate=5E-3, initial_const=1E-2, tau_decrease_factor=0.9 ): if not HAVE_FOOLBOX: raise ImportError('Could not import FoolBox') foolbox_model = PyTorchModel(model, bounds=(0, 1), num_classes=10) attack = CarliniWagnerLIAttack(foolbox_model, distance=Linf) linf_distances = [] for i in range(len(X)): logging.info('Example: %g', i) image = X[i, :].detach().cpu().numpy() label = y[i].cpu().numpy() adversarial = attack(image, label, binary_search_steps=binary_search_steps, max_iterations=max_iterations, learning_rate=learning_rate, initial_const=initial_const, tau_decrease_factor=tau_decrease_factor) logging.info('Linf distance: %g', np.max(np.abs(adversarial - image))) linf_distances.append(np.max(np.abs(adversarial - image))) return linf_distances
def bn_model_pytorch(): """Same as bn_model but with PyTorch.""" import torch import torch.nn as nn bounds = (0, 1) num_classes = 10 class Net(nn.Module): def forward(self, x): assert isinstance(x.data, torch.FloatTensor) x = torch.mean(x, 3) x = torch.squeeze(x, dim=3) x = torch.mean(x, 2) x = torch.squeeze(x, dim=2) logits = x return logits model = Net() model = PyTorchModel(model, bounds=bounds, num_classes=num_classes, cuda=False) return model
def create_bmodel(): model = EnsembleNet() model.eval() def preprocessing(x_): import copy x = copy.deepcopy(x_) assert x.ndim in [3, 4] if x.ndim == 3: x = np.transpose(x, axes=(2, 0, 1)) elif x.ndim == 4: x = np.transpose(x, axes=(0, 3, 1, 2)) def grad(dmdp): assert dmdp.ndim == 3 dmdx = np.transpose(dmdp, axes=(1, 2, 0)) return dmdx return x, grad fmodel = PyTorchModel(model, bounds=(0, 255), num_classes=200, channel_axis=3, preprocessing=preprocessing) return fmodel
def test_pytorch_backward(num_classes): import torch import torch.nn as nn bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) logits = x return logits model = Net() model = PyTorchModel(model, bounds=bounds, num_classes=num_classes) test_image = np.random.rand(channels, 5, 5).astype(np.float32) test_grad_pre = np.random.rand(num_classes).astype(np.float32) test_grad = model.backward(test_grad_pre, test_image) assert test_grad.shape == test_image.shape manual_grad = np.repeat(np.repeat((test_grad_pre / 25.).reshape( (-1, 1, 1)), 5, axis=1), 5, axis=2) np.testing.assert_almost_equal(test_grad, manual_grad)
def validate(val_loader, model, epsilon, args): # switch to evaluate mode model.eval() mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) preprocessing = (mean, std) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=2048, preprocessing=preprocessing) np.random.seed(1) # fix seed for reproducibility across models, images clean_label = 0 # dummy label target_labels = np.random.choice( np.setdiff1d(np.arange(2048), clean_label), 6) print(target_labels) imgs = [] advs = [] # Batch processing is experimental in foolbox, so we feed images one by one. for i, (images, target) in enumerate(val_loader): if i == 2: image = np.float32(np.random.rand(3, 224, 224)) imgs.append(image) print(image) else: image = images.cpu().numpy()[0] imgs.append(image) print(image) for j in range(len(target_labels)): target_label = target_labels[j] attack = ProjectedGradientDescentAttack( model=fmodel, criterion=TargetClassProbability(target_label, 1. - 1e-6), distance=Linfinity) adversarial = attack(image, clean_label, binary_search=False, epsilon=epsilon, stepsize=1. / 255, iterations=500, random_start=False, return_early=False) adv_pred_label = np.argmax(fmodel.predictions(adversarial)) clean_pred_label = np.argmax(fmodel.predictions(image)) print('Iter, Clean_pred, Adv, Adv_pred: ', i, clean_pred_label, target_label, adv_pred_label) advs.append(adversarial) if i == 2: return imgs, advs
def create_fmodel(cls, cfg): model = cls(**cfg["cfg"]) model.load_checkpoint(cfg["checkpoint"], load_name_space=cfg.get("load_name_space", None)) # pytorch models usually receive input of bounds (0,1) due to tansform.ToTensor fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=cfg["num_classes"]) return fmodel
def get_attack_model(model, dataset, device, mean=None, std=None): """Wrap an `torch.nn.Module` as an attack model.""" if mean is None: mean = datasets.MEANS[dataset] if std is None: std = datasets.STDS[dataset] mean, std = [[[m]] for m in mean], [[[s]] for s in std] return PyTorchModel( model.eval().to(device), bounds=(0, 1), num_classes=datasets.NUM_CLASSES[dataset], preprocessing=(mean, std), device=device, )
def test_pytorch_model_gradient(): import torch import torch.nn as nn num_classes = 1000 bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) logits = x return logits model = Net() preprocessing = (np.arange(num_classes)[:, None, None], np.random.uniform(size=(channels, 5, 5)) + 1) model = PyTorchModel( model, bounds=bounds, num_classes=num_classes, preprocessing=preprocessing) epsilon = 1e-2 np.random.seed(23) test_image = np.random.rand(channels, 5, 5).astype(np.float32) test_label = 7 _, g1 = model.predictions_and_gradient(test_image, test_label) l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) assert 1e4 * (l2 - l1) > 1 # make sure that gradient is numerically correct np.testing.assert_array_almost_equal( 1e4 * (l2 - l1), 1e4 * epsilon * np.linalg.norm(g1)**2, decimal=1)
def ead_attack(model: nn.Module, inputs: Tensor, labels: Tensor, targeted: bool = False, **kwargs) -> Tensor: fmodel = PyTorchModel(model=model, bounds=(0, 1)) attack = EADAttack(**kwargs) if targeted: criterion = TargetedMisclassification(target_classes=labels), else: criterion = Misclassification(labels=labels) adv_inputs = attack(model=fmodel, inputs=inputs, criterion=criterion, epsilons=None)[0] return adv_inputs
def test_pytorch_model(num_classes): import torch import torch.nn as nn bounds = (0, 255) channels = num_classes class Net(nn.Module): def __init__(self): super(Net, self).__init__() def forward(self, x): x = torch.mean(x, 3) x = torch.mean(x, 2) logits = x return logits model = Net() model = PyTorchModel( model, bounds=bounds, num_classes=num_classes) test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) test_label = 7 assert model.batch_predictions(test_images).shape \ == (2, num_classes) test_logits = model.predictions(test_images[0]) assert test_logits.shape == (num_classes,) test_gradient = model.gradient(test_images[0], test_label) assert test_gradient.shape == test_images[0].shape np.testing.assert_almost_equal( model.predictions_and_gradient(test_images[0], test_label)[0], test_logits) np.testing.assert_almost_equal( model.predictions_and_gradient(test_images[0], test_label)[1], test_gradient) assert model.num_classes() == num_classes
def create() -> PyTorchModel: model = nn.Sequential( nn.Conv2d(1, 32, 3), nn.ReLU(), nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2), nn.Dropout2d(0.25), nn.Flatten(), nn.Linear(9216, 128), nn.ReLU(), nn.Dropout2d(0.5), nn.Linear(128, 10), ) path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "mnist_cnn.pth") model.load_state_dict(torch.load(path)) # type: ignore model.eval() preprocessing = dict(mean=0.1307, std=0.3081) fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) return fmodel
def cw(model, X, y, binary_search_steps=5, max_iterations=1000, learning_rate=5E-3, initial_const=1E-2, tau_decrease_factor=0.9, eps=8 / 255. ): if not HAVE_FOOLBOX: raise ImportError('Could not import FoolBox') foolbox_model = PyTorchModel(model, bounds=(0, 1), num_classes=10) attack = CarliniWagnerLIAttack(foolbox_model, distance=Linf) linf_distances = [] correct = 0 for i in range(len(X)): logging.info('Example: %g', i) image = X[i, :].detach().cpu().numpy() label = y[i].cpu().numpy() adversarial = attack(image, label, binary_search_steps=binary_search_steps, max_iterations=max_iterations, learning_rate=learning_rate, initial_const=initial_const, tau_decrease_factor=tau_decrease_factor) if adversarial is None: adversarial = image adversarial = image + np.clip(adversarial - image, -eps, eps) adversarial = np.clip(adversarial, 0., 1.) logging.info('Linf distance: %g', np.max(np.abs(adversarial - image))) linf_distances.append(np.max(np.abs(adversarial - image))) if foolbox_model.predictions(adversarial).argmax(axis=-1) == label: correct += 1 return correct
def attack_integrated(image, label, attack_name): fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000) # , preprocessing=(mean, std) criterion1 = Misclassification() distance = Linfinity # MeanAbsoluteDistance attacker = attackers[attack_name](fmodel, criterion=criterion1, distance=distance) image = image.cpu().numpy() label = label.cpu().numpy() adversarials = image.copy() advs = attacker( image, label) # , unpack=True, steps=self.max_iter, subsample=self.subsample) # for i in range(len(advs)): # if advs is not None: # adv = torch.renorm(torch.from_numpy(advs[i] - image[i]), p=2, dim=0, maxnorm=1).numpy() + image[i] # # adversarials[i] = adv adversarials = torch.from_numpy(advs).to(DEVICE) return adversarials
def generate_examples(model, config, pretrained_config, output_root): adv_example_filepath = os.path.join(output_root, 'examples') adv_targets_filepath = os.path.join(output_root, 'targets') # Set up adversarial attack. adv_model = PyTorchModel(model, (0, 1), pretrained_config.data.class_count, cuda=config.cuda) criterion = Misclassification() attack = getattr(foolbox.attacks, config.name)(adv_model, criterion) # Get data. pretrained_config.cuda = config.cuda pretrained_config.optim.batch_size = config.data.batch_size data = load_data(pretrained_config) # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test']))) n_examples = config['num_examples'] n_batches = int( math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) # Save the results of the computations in the following variable. adv_ex = torch.Tensor() adv_targets = torch.LongTensor() adv_mse = torch.Tensor() adv_inf = torch.Tensor() success = torch.Tensor() # Set up distance for the adversarial attack. distance_name = config.get('distance') distance = getattr(foolbox.distances, distance_name) if distance_name is not None \ else foolbox.distances.MeanSquaredDistance # Perform the attack. for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): x = sample[0] y = sample[1].type(torch.LongTensor) x = to_cuda(x, cuda=config.cuda) adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack( attack, adv_model, criterion, x, y.cpu().numpy(), config['attack_kwargs'], distance) adv_ex = torch.cat([adv_ex, adv], 0) adv_targets = torch.cat([adv_targets, adv_t], 0) success = torch.cat([success, batch_success], 0) adv_mse = torch.cat([adv_mse, batch_adv_mse], 0) adv_inf = torch.cat([adv_inf, batch_adv_inf], 0) # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda) # Summarize the results. results = { "success_rate": success.mean().item(), "defense_rate": 1 - success.mean().item(), "mean_mse": ((adv_mse * success).sum() / success.sum()).item(), "mean_inf": ((adv_inf * success).sum() / success.sum()).item(), "mse_quartiles": list( np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) } results["median_mse"] = results["mse_quartiles"][2] print("success rate: {}".format(results["success_rate"])) print("defense rate: {}".format(results["defense_rate"])) print("mean MSE for successful attacks: {}".format(results["mean_mse"])) print("mean L_inf for successful attacks: {}".format(results["mean_inf"])) print("MSE quartiles for successful attacks: {}".format( results["mse_quartiles"])) with open(os.path.join(config['output_root'], 'results.json'), 'w') as f: json.dump(results, f, sort_keys=True, indent=4) np.save(adv_example_filepath, adv_ex) np.save(adv_targets_filepath, adv_targets) print( accuracy(model, to_cuda(adv_ex, cuda=config.cuda), to_cuda(adv_targets, cuda=config.cuda)))
args = parser.parse_args() device = torch.device(args.device if torch.cuda.is_available() else "cpu") model_list = ['Resnet34', 'Resnet50', 'VGG19', 'Densenet121', 'Mobilenet'] attr_list = ['resnet34', 'resnet50', 'vgg19_bn', 'densenet121', 'mobilenet_v2'] preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) for i in range(len(model_list)): if model_list[i] == args.model_name: pretrained_model = getattr(models, attr_list[i])(pretrained=True).eval() fmodel = PyTorchModel(pretrained_model, bounds=(0, 1), preprocessing=preprocessing) surrogate_model_list = [] if args.smodel_name == '': #multiSurrogate for i in range(len(model_list)): if model_list[i] != args.model_name: pretrained_model = getattr(models, attr_list[i])(pretrained=True).eval() surrogate_model_list.append( PyTorchModel(pretrained_model, bounds=(0, 1), preprocessing=preprocessing)) else: for i in range(len(model_list)): if model_list[i] == args.smodel_name:
device = torch.device("cpu") else: device = torch.device(args.device) # setup test model model = pamdl.pa_resnet152_config1() model.to(device) model.eval() # setup attacker nClasses = 1000 victim_model = PyTorchModel(model.model, (0, 1), nClasses, device=device, preprocessing=(np.array([0.485, 0.456, 0.406]).reshape( (3, 1, 1)), np.array([0.229, 0.224, 0.225]).reshape( (3, 1, 1)))) if args.attack == "pgd": attack = atk.pgdAttack(victim_model) elif args.attack == "fgsm": attack = atk.fgsmAttack(victim_model) elif args.attack == "df": attack = atk.dfAttack(victim_model) elif args.attack == "cw": attack = atk.cwAttack(victim_model) else: attack = atk.NullAttack()
def wrapFoolboxModel(model): return PyTorchModel(model, bounds=(0, 1), num_classes=10, channel_axis=1, preprocessing=(0, 1))
def create_bmodel(dataset="tiny_imagenet",model_name="resnet101",gpu=None,params=None): if dataset == "imagenet": model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() # def preprocessing(x): # mean = np.array([0.485, 0.456, 0.406]) # std = np.array([0.229, 0.224, 0.225]) # _mean = mean.astype(x.dtype) # _std = std.astype(x.dtype) # x = x - _mean # x /= _std # # assert x.ndim in [3, 4] # if x.ndim == 3: # x = np.transpose(x, axes=(2, 0, 1)) # elif x.ndim == 4: # x = np.transpose(x, axes=(0, 3, 1, 2)) # # def grad(dmdp): # assert dmdp.ndim == 3 # dmdx = np.transpose(dmdp, axes=(1, 2, 0)) # return dmdx / _std # # return x, grad preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) bmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) elif dataset == "cifa10": model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() preprocessing = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], axis=-3) bmodel = PyTorchModel(model, bounds=(0, 1), num_classes=10, preprocessing=preprocessing) elif dataset in ["dev","sharp","real"]: model = ptcv_get_model(model_name, pretrained=True) model.eval() if gpu is not None: model = model.cuda() preprocessing = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], axis=-3) bmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) elif dataset == "mnist": import tools.spatial_transformer.model as stn_model from tools.spatial_transformer.model import initialize from tools.spatial_transformer.vision_transforms import gen_random_perspective_transform, apply_transform_to_batch from tools.spatial_transformer import utils as stn_utils P_init = gen_random_perspective_transform(params) model = stn_model.STN(getattr(stn_model, params.stn_module), params, P_init).to(params.device) initialize(model) stn_utils.load_checkpoint('./tools/spatial_transformer/experiments/base_stn_model/state_checkpoint.pt', model) bmodel = PyTorchModel(model, bounds=(0, 1), num_classes=10) return bmodel
cudnn.benchmark = True model = args.model if model == "base": checkpoint = torch.load("./models/base/ckpt.pth") else: checkpoint = torch.load("./models/" + model + "/robust.pth") net.load_state_dict(checkpoint["net"]) net.eval() preprocessing = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], axis=-3) fmodel = PyTorchModel(net, bounds=(0, 1), preprocessing=preprocessing, device=device) attack = fa.hop_skip_jump.HopSkipJump(steps=args.iter, max_gradient_eval_steps=1000, tensorboard=None) adv_distances = [] location = "./models/" + model fname = "HSJA_new_iter=" + str(args.iter) + ".png" for images, labels in testloader: images = images.to(device) labels = labels.to(device) print("True labels", labels)
def main(): args = parser.parse_args() if not os.path.isdir('CMDs'): os.mkdir('CMDs') with open('CMDs/construct_adversarial_attack.cmd', 'a') as f: f.write(' '.join(sys.argv) + '\n') f.write('--------------------------------\n') if os.path.isdir(args.output_path) and not args.overwrite: print(f'Directory {args.output_path} exists. Exiting...') sys.exit() elif os.path.isdir(args.output_path) and args.overwrite: os.remove(args.output_path + '/*') else: os.makedirs(args.output_path) os.makedirs(os.path.join(args.output_path, 'images')) # Check that we are using a sensible GPU device = select_gpu(args.gpu) # Load up the model model_dir = Path(args.model_dir) ckpt = torch.load(os.path.join(model_dir, 'model/model.tar'), map_location=device) model = ModelFactory.model_from_checkpoint(ckpt) model.to(device) model.eval() # Wrap model with a Foolbox wrapper. mean = np.array([0.4914, 0.4823, 0.4465]).reshape((3, 1, 1)) std = np.array([0.247, 0.243, 0.261]).reshape((3, 1, 1)) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=ckpt['num_classes'], preprocessing=(mean, std)) # Load the evaluation data if args.train: dataset = DATASET_DICT[args.dataset](root=args.data_path, transform=construct_transforms( n_in=ckpt['n_in'], mode='train'), target_transform=None, download=True, split='train') else: dataset = DATASET_DICT[args.dataset](root=args.data_path, transform=construct_transforms( n_in=ckpt['n_in'], mode='eval'), target_transform=None, download=True, split='test') loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=1) # Construct adversarial attack if args.attack == 'CWL2': if args.adaptive: attack = AdaptiveCarliniWagnerL2Attack(model=fmodel) else: attack = CarliniWagnerL2Attack(model=fmodel) elif args.attack == 'EAD': if args.adaptive: attack = AdaptiveEADAttack(model=fmodel) else: attack = EADAttack(model=fmodel) else: raise NotImplementedError adversarials = [] for i, data in enumerate(loader): start = time.time() images, labels = data images = images.numpy() labels = labels.numpy() adversarials.extend(attack(inputs=images, labels=labels, unpack=False)) print( f"Batch {i}/{len(loader)} took {np.round((time.time() - start) / 60.0, 1)} minutes." ) adv_labels = np.stack( [adversarial.adversarial_class for adversarial in adversarials], axis=0) labels = np.stack( [adversarial.original_class for adversarial in adversarials], axis=0) distances = np.stack( [adversarial.distance for adversarial in adversarials], axis=0) logits = np.stack([adversarial.output for adversarial in adversarials], axis=0) np.savetxt(os.path.join(args.output_path, 'labels.txt'), labels, dtype=np.int32) np.savetxt(os.path.join(args.output_path, 'adv_labels.txt'), adv_labels, dtype=np.int32) np.savetxt(os.path.join(args.output_path, 'logits.txt'), logits, dtype=np.float32) np.savetxt(os.path.join(args.output_path, 'distances.txt'), distances, dtype=np.float32) accuracy = np.mean(np.asarray(labels == adv_labels, dtype=np.float32)) sr = np.mean(np.asarray(labels != adv_labels, dtype=np.float32)) with open(os.path.join(args.output_path, 'results.txt'), 'a') as f: f.write( f'Classification Error: {np.round(100 * (1.0 - accuracy), 1)} \n') f.write(f'Success Rate: {np.round(100 * sr, 1)} \n') print("Saving images to folder...") adversarial_images = np.stack( [adversarial.perturbed for adversarial in adversarials], axis=0) for i, image in enumerate([ np.asarray(255.0 * adversarial.perturbed, dtype=np.uint8) for adversarial in adversarials ]): print(np.max(adversarial_images), np.min(adversarial_images)) Image.fromarray(image).save( os.path.join(args.output_path, f"images/{i}.png"))
def main(): X_train = np.load("./Data/sGrid/X_train.npy") X_test = np.load("./Data/sGrid/X_test.npy") X_vaild = np.load("./Data/sGrid/X_vaild.npy") Y_train = np.load("./Data/sGrid/Y_train.npy") Y_test = np.load("./Data/sGrid/Y_test.npy") Y_vaild = np.load("./Data/sGrid/Y_vaild.npy") torch.manual_seed(1) embedding = nn.Embedding(128, 5, max_norm=1) Y_train = torch.from_numpy(Y_train) Y_test = torch.from_numpy(Y_test) Y_vaild = torch.from_numpy(Y_vaild) input = Variable(torch.from_numpy(X_train * 128).long()) X_train_embed = embedding(input) X_train_embed = X_train_embed.detach() input = Variable(torch.from_numpy(X_test * 128).long()) X_test_embed = embedding(input) X_test_embed = X_test_embed.detach() input = Variable(torch.from_numpy(X_vaild * 128).long()) X_vaild_embed = embedding(input) X_vaild_embed = X_vaild_embed.detach() dic = {} count = 0 for i in range(X_train.shape[0]): for j in range(400): if chr(int(X_train[i, j] * 128)) not in dic.keys(): dic[chr(int(X_train[i, j] * 128))] = X_train_embed[i, j] symbol_dict = dic args = Args() net = CNN_Text_dropout(args).cuda() print(net) pretrained_dict = torch.load( 'Parameters/cnn_text_kernel3.5.7.9_128_embed_dropout.pkl').state_dict( ) model_dict = net.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } # 更新现有的model_dict model_dict.update(pretrained_dict) # 加载我们真正需要的state_dict net.load_state_dict(model_dict) batch_size = 500 Train_data = Data.TensorDataset(X_train_embed, Y_train) Test_data = Data.TensorDataset(X_test_embed, Y_test) train_data = Data.DataLoader(dataset=Train_data, batch_size=batch_size, shuffle=False) test_data = Data.DataLoader(dataset=Test_data, batch_size=1, shuffle=False) optimizer = optim.Adam(net.parameters(), lr=0.0001, weight_decay=1e-9) loss_function = nn.CrossEntropyLoss() attack_log_list = None attack_log_string_list = [] net.eval() # This is the begin of the attack # model and boudary attack model = PyTorchModel( net, (-1, 1), 2, ) attack = BoundaryAttack(model) # find the nearest attack sample as the starting point X_test_string = find_string_from_tensor(X_test) dict_attack_string_tensor = {} for i in range(len(X_train)): x, label = X_train[i], int(Y_train[i].numpy()[0]) # the prediction of an attack sample should be an attack if label == 1 and np.argmax(model.predictions( X_train_embed[i].numpy())) == 1: string = "" for v in x: string += chr(int(v * 128)) ''' duplication of attack if string in dict_attack_string_tensor: print(string) ''' dict_attack_string_tensor[string] = X_train_embed[i] n_test = 100 dict_nearest_str = find_nearest_adversial( X_test_string[:n_test], list(dict_attack_string_tensor.keys()), str_similarity) list_X_test_nearest_tensor = [] for log in X_test_string[:n_test]: list_X_test_nearest_tensor.append( dict_attack_string_tensor[dict_nearest_str[log]]) # begin the attack try_time = 1 max_iteration = 50 n_success = 0 n_total = 0 iterations = [] file = open( f'./Data/boundary_attack_unfixed_iteration_nearest_starting_max_{max_iteration}_test_{n_test}.txt', "w") for i in tqdm.tqdm_notebook(range(n_test)): url, label = X_test_embed[i].numpy(), int(Y_test[i].numpy()[0]) prediction = np.argmax(model.predictions(url)) if label == 0 and prediction == 0: n_total += 1 good_adversarial = None good_iteration = 0 for iteration in range(max_iteration + 1): adversarial = attack( url, label, starting_point=list_X_test_nearest_tensor[i].numpy(), log_every_n_steps=20, iterations=iteration) # adversarial log str_adversarial = Tensor_to_Log(symbol_dict, torch.from_numpy(adversarial)) # need to change the adversarial string back to the tensor prediction = np.argmax( model.predictions( Log_to_Tensor(symbol_dict, str_adversarial).numpy())) if prediction == 1: good_iteration = iteration good_adversarial = adversarial if not good_adversarial is None: n_success += 1 iterations.append(good_iteration) # original log file.write(X_test_string[i]) file.write("\n") # adversarial log file.write( Tensor_to_Log(symbol_dict, torch.from_numpy(good_adversarial))) file.write("\n\n") file.close()
def validate(val_loader, model, epsilon, args): batch_time = AverageMeter('Time', ':6.3f') top1 = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, top1], prefix='Test: ') # switch to evaluate mode model.eval() mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) preprocessing = (mean, std) fmodel = PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing) clean_labels = np.zeros(len(val_loader)) target_labels = np.zeros(len(val_loader)) clean_pred_labels = np.zeros(len(val_loader)) adv_pred_labels = np.zeros(len(val_loader)) end = time.time() # Batch processing is not supported in in foolbox 1.8, so we feed images one by one. Note that we are using a batch # size of 2, which means we consider every other image (due to computational costs) for i, (images, target) in enumerate(val_loader): image = images.cpu().numpy()[0] clean_label = target.cpu().numpy()[0] target_label = np.random.choice( np.setdiff1d(np.arange(1000), clean_label)) attack = RandomStartProjectedGradientDescentAttack( model=fmodel, criterion=TargetClass(target_label), distance=Linfinity) adversarial = attack(image, clean_label, binary_search=False, epsilon=epsilon, stepsize=2. / 255, iterations=args.pgd_steps, random_start=True) if np.any(adversarial == None): # Non-adversarial adversarial = image target_label = clean_label adv_pred_labels[i] = np.argmax(fmodel.predictions(adversarial)) clean_labels[i] = clean_label target_labels[i] = target_label clean_pred_labels[i] = np.argmax(fmodel.predictions(image)) print('Iter, Clean, Clean_pred, Adv, Adv_pred: ', i, clean_label, clean_pred_labels[i], target_label, adv_pred_labels[i]) # measure accuracy and update average acc1 = 100. * np.mean(clean_label == adv_pred_labels[i]) top1.update(acc1, 1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print('* Acc@1 {top1.avg:.3f} '.format(top1=top1)) return top1.avg
def create_fmodel(cls, cfg): model = cls(**cfg["cfg"]) fmodel = PyTorchModel(model, bounds=(0, 255), num_classes=cfg["num_classes"]) return fmodel
def run(): run_button.config(state='disabled') test_loader = load_data() torch_model = select_model() if use_cuda.get(): log_text = 'Move model to cuda.' torch_model = torch_model.cuda() torch_model.eval() fmodel = PyTorchModel(torch_model, bounds=[0, 1], num_classes=10) method = attack_comboxlist.get() log_text = 'Perform {} attack... \n'.format(method) send_information(Receive_window, log_text) if method == 'FGSM': from foolbox.attacks import FGSM attack = FGSM(model=fmodel, criterion=Misclassification()) if method == 'iFGSM': from foolbox.attacks import IterativeGradientSignAttack attack = IterativeGradientSignAttack(model=fmodel, criterion=Misclassification()) if method == 'DeepFool': from foolbox.attacks import DeepFoolAttack attack = DeepFoolAttack(model=fmodel, criterion=Misclassification()) attack_root = 'attacks/' + comboxlist.get() + '/' + method + '_ms' hacked_path = attack_root + '/hacked' hacked_data_path = attack_root + '/hacked_data' original_path = attack_root + '/original' original_data_path = attack_root + '/original_data' if os.path.exists(attack_root) is False: os.makedirs(attack_root) os.mkdir(hacked_path) os.mkdir(hacked_data_path) os.mkdir(original_path) os.mkdir(original_data_path) count = 1 for data, label in test_loader: data = data[0].numpy() label = label.item() adversarial = attack(data, label) if adversarial is not None: if np.linalg.norm(adversarial - data) == 0: continue adv_label = np.argmax(fmodel.predictions(adversarial)) if np.linalg.norm(adversarial - data) > 0: dataset = comboxlist.get() if dataset == 'MNIST': image_data = adversarial[0] * 255 ori_data = data[0] * 255 if dataset == 'CIFAR10': image_data = adversarial.transpose(1, 2, 0) * 255 ori_data = data.transpose(1, 2, 0) * 255 if save_adv.get(): hackedname = hacked_data_path + '/' + str( count) + '-' + str(label) + '-' + str( adv_label) + ".npy" np.save(hackedname, image_data) image = Image.fromarray(image_data.astype(np.uint8)) image.save( "{hackedpath}/{name}-{label}-{adv_label}.png".format( hackedpath=hacked_path, name=count, label=label, adv_label=adv_label)) if save_ori.get(): oriname = original_data_path + '/' + str( count) + '-' + str(label) + ".npy" np.save(oriname, ori_data) oriimage = Image.fromarray(ori_data.astype(np.uint8)) oriimage.save("{originalpath}/{name}-{label}.png".format( originalpath=original_path, name=count, label=label)) count = count + 1 if count % (int(att_num.get()) / 10) == 0: log_text = "Attack: {}/{}".format(count, att_num.get()) send_information(Receive_window, log_text) if count > int(att_num.get()): break log_text = "Done! The adversarial images and correspoinding data are stored in attacks for next use in step4!" send_information(Receive_window, log_text) run_button.config(state='normal')
def main(seed=0, n_epochs=5, batch_size=100): np.random.seed(seed) if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) print() print('Creating and training the ANN...') print() # Create and train an ANN on the MNIST dataset. ANN = FullyConnectedNetwork() # Get the MNIST data. images, labels = MNIST(os.path.join( ROOT_DIR, 'data', 'MNIST' ), download=True).get_train() images /= images.max() # Standardizing to [0, 1]. images = images.view(-1, 784) labels = labels.long() # Specify optimizer and loss function. optimizer = optim.Adam(params=ANN.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss() # Train the ANN. batches_per_epoch = int(images.size(0) / batch_size) for i in range(n_epochs): losses = [] accuracies = [] for j in range(batches_per_epoch): batch_idxs = torch.from_numpy( np.random.choice(np.arange(images.size(0)), size=batch_size, replace=False) ) im_batch = images[batch_idxs] label_batch = labels[batch_idxs] outputs = ANN.forward(im_batch) loss = criterion(outputs, label_batch) predictions = torch.max(outputs, 1)[1] correct = (label_batch == predictions).sum().float() / batch_size optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) accuracies.append(correct.item()) print(f'Epoch: {i+1} / {n_epochs}; Loss: {np.mean(losses):.4f}; Accuracy: {np.mean(accuracies) * 100:.4f}') ANN = ANN.eval() fmodel = PyTorchModel( ANN, bounds=(0, 1), num_classes=10 ) # apply attack on source image for i in range(10000): image = images[i].cpu().numpy() label = labels[i].long().item() attack = foolbox.attacks.BoundaryAttack(fmodel) try: adversarial = attack(image, label, verbose=True, iterations=1000) * 1.001 except AssertionError: continue print(f'{i}: adversarial')