def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Create simple CNN model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=1) # Attack nf = NewtonFool(ptc, max_iter=5) x_test_adv = nf.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) y_pred = ptc.predict(x_test) y_pred_adv = ptc.predict(x_test_adv) y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred y_pred_max = y_pred.max(axis=1) y_pred_adv_max = y_pred_adv[y_pred_bool] self.assertTrue((y_pred_max >= y_pred_adv_max).all())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_iris_pt(self): """ Third test for Pytorch. :return: """ # Build PyTorchClassifier victim_ptc = get_iris_classifier_pt() class Model(nn.Module): """ Create Iris model for PyTorch. """ def __init__(self): super(Model, self).__init__() self.fully_connected1 = nn.Linear(4, 10) self.fully_connected2 = nn.Linear(10, 10) self.fully_connected3 = nn.Linear(10, 3) # pylint: disable=W0221 # disable pylint because of API requirements for function def forward(self, x): x = self.fully_connected1(x) x = self.fully_connected2(x) logit_output = self.fully_connected3(x) return logit_output # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Get classifier thieved_ptc = PyTorchClassifier(model=model, loss=loss_fn, optimizer=optimizer, input_shape=(4, ), nb_classes=3, clip_values=(0, 1), channel_index=1) # Create attack copycat_cnn = CopycatCNN(classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN) thieved_ptc = copycat_cnn.extract(x=self.x_train, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3)
def main(config_filepath): config = load_config(config_filepath) if os.path.isfile(config.metrics_output_path): click.confirm(f"Overwrite {config.metrics_output_path}?", abort=True) np.random.seed(config.seed) torch.manual_seed(config.seed) # Load data x = torch.load(config.x_filepath) y = torch.load(config.y_filepath) # Flatten x = x.reshape(x.shape[0], -1) model = torch.load(config.trained_model_filepath) clip_values = {} with open(config.clip_values_filepath, "r") as f: clip_values = json.load(f) clip_values = ( clip_values.get("min_pixel_value"), clip_values.get("max_pixel_value"), ) classifier = PyTorchClassifier( model=model, clip_values=clip_values, loss=model.criterion, optimizer=model.optimizer, input_shape=(1, 28, 28), nb_classes=10, ) # TODO: move these parameters to config # Evaluate the classifier on benign data predictions = classifier.predict(x) # Convert one-hots to numbers for metrics y = utils.one_hot_to_num(y) predictions = utils.one_hot_to_num(predictions) accuracy = { "Accuracy": metrics.accuracy_score(y, predictions), "Confusion Matrix": metrics.confusion_matrix(y, predictions).tolist(), } # Save data with open(config.metrics_output_path, "w") as f: json.dump( accuracy, f, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ": "), )
def apply_attack(model_path, dataset, model_attack_config): model = torch.load(model_path, map_location='cpu').model input_shape = model_attack_config[lookup.input_shape] criterion = model_attack_config[lookup.criterion] optimizer = model_attack_config[lookup.optimizer] nb_classes = model_attack_config[lookup.nb_classes] attack_method = model_attack_config[lookup.attack_method] robust_db_name = model_attack_config[lookup.robust_db_name] if criterion == 'cross_entropy': criterion = nn.CrossEntropyLoss() else: raise ValueError if optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) elif optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=1e-4) else: raise ValueError classifier = PyTorchClassifier(model=model, input_shape=input_shape, loss=criterion, optimizer=optimizer, nb_classes=nb_classes) x = np.array([x_element.numpy()[0] for x_element in dataset[0]]) y = np.array(dataset[1]) predictions = classifier.predict(x) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len (y) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) attack_function = get_attack_method(attack_method) attack_instance = attack_function(classifier=classifier) x_adv = attack_instance.generate(x=x) predictions = classifier.predict(x_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len(y) print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100)) path_to_directory = join(abspath(lookup.get_db_dirs()[lookup.dataset]), fs.get_uuid()) fs.make_dir(path_to_directory) db_uuid = processor.convert_to_image(path_to_directory, robust_db_name, x_adv) return db_uuid
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Create simple CNN # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=1) # Attack # TODO Launch with all possible attacks attack_params = { "attacker": "newtonfool", "attacker_params": { "max_iter": 5 } } up = UniversalPerturbation(ptc) x_train_adv = up.generate(x_train, **attack_params) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.v self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def main(args): (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str('cifar10')) x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_train = x_train y_train = y_train model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) pred_y = classifier.predict(x_train) v_max = 0 n = 1000 n_sp = list(split_list(x_train, n)) for i, X_t in enumerate(n_sp): print(f'split_No {i + 1}') v_max = simulated_anniling(classifier, X_t, args) # Compute fooling rate adv_x = x_train + v_max adv_y = classifier.predict(adv_x) fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate") print(fooling_rate)
def main(args): attack_fn = {'fgsm': fgsm, 'saliency_map': saliency_map} _, val_dataset, input_shape, n_classes = utils.load_dataset(args.dataset) inputs = np.array([x.numpy() for x, _ in val_dataset]) inputs /= 255 targets = np.array([int(y) for _, y in val_dataset]) model = torch.load(args.model_file) model = model.eval().to(args.device) loss = torch.nn.CrossEntropyLoss() classifier = PyTorchClassifier(model, loss, None, input_shape, n_classes, preprocessing=(0, 1 / 255), clip_values=(0, 1)) base_preds = np.argmax(classifier.predict(inputs, 1024), 1) results = { # 'baseline': utils.evaluate(model, val_dataset, args.device) 'baseline': np.sum(base_preds == targets) / len(inputs) } for attack in args.attacks: logger.info('Crafting Adversarial Examples Using %s' % attack) x_test_adv = attack_fn[attack](classifier, inputs, targets, args.epsilon) # adv_dataset = mDataset(x_test_adv, targets) preds = np.argmax(classifier.predict(x_test_adv, 1024), 1) acc = np.sum(preds == targets) / len(inputs) results[attack] = acc x_test_adv = np.transpose(x_test_adv, (0, 2, 3, 1)) #.astype('uint8') save_images(x_test_adv, os.path.join(args.outdir, attack)) # logger.info('adversarial_pred\tbaseline_pred\ttarget') # for ap,bp,t in zip(preds, base_preds, targets): # logger.info('%d\t\t\t%d\t\t\t%d' % (ap,bp,t)) logger.info(resultsToString(results))
def test(): (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str('cifar10')) x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.h5.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_ ), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100))
print("training...") classifier1.fit(model1_x_train, model1_y_train, batch_size=batch_size, nb_epochs=n_epochs) classifier2.fit(model2_x_train, model2_y_train, batch_size=batch_size, nb_epochs=n_epochs) # evaluation model1.eval() model2.eval() predictions = classifier1.predict(shared_x_test) acc = accuracy(predictions, shared_y_test) print('Accuracy of model1 on shared test examples: {}%'.format(acc * 100)) top_five_acc = accuracy_n(predictions, shared_y_test, 5) print('Top 5 accuracy of model1 on shared test examples: {}%'.format( top_five_acc * 100)) predictions = classifier2.predict(shared_x_test) acc = accuracy(predictions, shared_y_test) print('Accuracy of model2 on shared test examples: {}%'.format(acc * 100)) top_five_acc = accuracy_n(predictions, shared_y_test, 5) print('Top 5 accuracy of model2 on shared test examples: {}%'.format( top_five_acc * 100))
def gzsl_launch(dataloader_seen, dataloader_unseen, all_vectors, criterion, params): if params["dataset"] == "CUB": from configs.config_CUB import MODEL_PATH, SMOOTHED_MODEL_PATH elif params["dataset"] == "AWA2": from configs.config_AWA2 import MODEL_PATH, SMOOTHED_MODEL_PATH elif params["dataset"] == "SUN": from configs.config_SUN import MODEL_PATH, SMOOTHED_MODEL_PATH resnet = torchvision.models.resnet101(pretrained=True).cuda() feature_extractor = nn.Sequential(*list(resnet.children())[:-1]) if params["hasDefense"] and params["defense"] == "label_smooth": model_ale = torch.load(SMOOTHED_MODEL_PATH).cuda() else: model_ale = torch.load(MODEL_PATH).cuda() full_graph = FullGraph(feature_extractor, model_ale, all_vectors).cuda() full_graph.eval() optimizer = optim.SGD(full_graph.parameters(), lr=0.01, momentum=0.5) if params["dataset"] == "CUB": no_classes = 200 elif params["dataset"] == "AWA2": no_classes = 50 elif params["dataset"] == "SUN": no_classes = 717 classifier = PyTorchClassifier(model=full_graph, loss=criterion, optimizer=optimizer, input_shape=(1, 150, 150), nb_classes=no_classes) if params["attack"] == "fgsm": batch_size = 1 attack = FastGradientMethod(classifier=classifier, eps=params["fgsm_params"]["epsilon"], batch_size=batch_size) elif params["attack"] == "deepfool": batch_size = 1 attack = DeepFool(classifier, max_iter=params["deepfool_params"]["max_iter"], epsilon=params["deepfool_params"]["epsilon"], nb_grads=params["deepfool_params"]["nb_grads_gzsl"], batch_size=batch_size) elif params["attack"] == "carlini_wagner": batch_size = params["batch_size"] if params["custom_collate"] else 1 attack = CarliniL2Method( classifier, confidence=params["carliniwagner_params"]["confidence"], learning_rate=params["carliniwagner_params"]["learning_rate"], binary_search_steps=params["carliniwagner_params"] ["binary_search_steps"], max_iter=params["carliniwagner_params"]["max_iter"], initial_const=params["carliniwagner_params"]["initial_const"], max_halving=params["carliniwagner_params"]["max_halving"], max_doubling=params["carliniwagner_params"]["max_doubling"], batch_size=batch_size) preds_seen = [] preds_seen_defended = [] adv_preds_seen = [] adv_preds_seen_defended = [] labels_seen_ = [] start = time.time() if params["hasDefense"]: if params["defense"] == "spatial_smooth": defense = SpatialSmoothing( window_size=params["ss_params"]["window_size"]) elif params["defense"] == "totalvar": defense = TotalVarMin( max_iter=params["totalvar_params"]["max_iter"]) for index, sample in enumerate(dataloader_seen): img = sample[0].numpy() label = sample[1].numpy() if params["clean_results"]: if params["hasDefense"] and params["defense"] != "label_smooth": img_def, _ = defense(img) predictions_defended = classifier.predict( img_def, batch_size=batch_size) preds_seen_defended.extend( np.argmax(predictions_defended, axis=1)) predictions = classifier.predict(img, batch_size=batch_size) preds_seen.extend(np.argmax(predictions, axis=1)) img_perturbed = attack.generate(x=img) if params["hasDefense"] and params["defense"] != "label_smooth": img_perturbed_defended, _ = defense(img_perturbed) predictions_adv_defended = classifier.predict( img_perturbed_defended, batch_size=batch_size) adv_preds_seen_defended.extend( np.argmax(predictions_adv_defended, axis=1)) predictions_adv = classifier.predict(img_perturbed, batch_size=batch_size) adv_preds_seen.extend(np.argmax(predictions_adv, axis=1)) labels_seen_.extend(label) if index % 1000 == 0: print(index, len(dataloader_seen)) labels_seen_ = np.array(labels_seen_) adv_preds_seen = np.array(adv_preds_seen) adv_preds_seen_defended = np.array(adv_preds_seen_defended) uniq_labels_seen = np.unique(labels_seen_) adv_preds_unseen = [] adv_preds_unseen_defended = [] labels_unseen_ = [] if params["clean_results"]: preds_unseen = [] preds_seen = np.array(preds_seen) preds_unseen_defended = [] preds_seen_defended = np.array(preds_seen_defended) for index, sample in enumerate(dataloader_unseen): img = sample[0].numpy() label = sample[1].numpy() if params["clean_results"]: if params["hasDefense"] and params["defense"] != "label_smooth": img_def, _ = defense(img) predictions_defended = classifier.predict( img_def, batch_size=batch_size) preds_unseen_defended.extend( np.argmax(predictions_defended, axis=1)) predictions = classifier.predict(img, batch_size=batch_size) preds_unseen.extend(np.argmax(predictions, axis=1)) img_perturbed = attack.generate(x=img) if params["hasDefense"] and params["defense"] != "label_smooth": img_perturbed_defended, _ = defense(img_perturbed) predictions_adv_defended = classifier.predict( img_perturbed_defended, batch_size=batch_size) adv_preds_unseen_defended.extend( np.argmax(predictions_adv_defended, axis=1)) predictions_adv = classifier.predict(img_perturbed, batch_size=batch_size) adv_preds_unseen.extend(np.argmax(predictions_adv, axis=1)) labels_unseen_.extend(label) if index % 1000 == 0: print(index, len(dataloader_unseen)) end = time.time() labels_unseen_ = np.array(labels_unseen_) adv_preds_unseen = np.array(adv_preds_unseen) adv_preds_unseen_defended = np.array(adv_preds_unseen_defended) uniq_labels_unseen = np.unique(labels_unseen_) combined_labels = np.concatenate((labels_seen_, labels_unseen_)) combined_preds_adv = np.concatenate((adv_preds_seen, adv_preds_unseen)) combined_preds_adv_defended = np.concatenate( (adv_preds_seen_defended, adv_preds_unseen_defended)) if params["clean_results"]: preds_unseen = np.array(preds_unseen) combined_preds = np.concatenate((preds_seen, preds_unseen)) seen, unseen, h = harmonic_score_gzsl(combined_preds, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Clean (s/u/h):", seen, unseen, h) if params["hasDefense"] and params["defense"] != "label_smooth": preds_unseen_defended = np.array(preds_unseen_defended) combined_preds_defended = np.concatenate( (preds_seen_defended, preds_unseen_defended)) seen, unseen, h = harmonic_score_gzsl(combined_preds_defended, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Clean + defended (s/u/h):", seen, unseen, h) seen, unseen, h = harmonic_score_gzsl(combined_preds_adv, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Attacked (s/u/h):", seen, unseen, h) if params["hasDefense"] and params["defense"] != "label_smooth": seen, unseen, h = harmonic_score_gzsl(combined_preds_adv_defended, combined_labels, uniq_labels_seen, uniq_labels_unseen) print("GZSL Attacked + defended (s/u/h):", seen, unseen, h) print(end - start, "seconds passed for GZSL.")
def train_adv(data='mnist', model_name='basic', n_samples=2000, eps=2., path_output='results', path_data='data', is_test=False, batch_size=128, device='cpu'): # Prepare data transforms = tv.transforms.Compose([tv.transforms.ToTensor()]) if data == 'mnist': dataset_test = datasets.MNIST(path_data, train=False, download=True, transform=transforms) elif data == 'cifar10': dataset_test = datasets.CIFAR10(path_data, train=False, download=True, transform=transforms) else: raise NotImplementedError loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) # Load model if data == 'mnist': model = BaseModel(use_prob=False).to(device) n_features = (1, 28, 28) pretrained = 'mnist_200.pt' elif data == 'cifar10': n_features = (3, 32, 32) if model_name == 'resnet': model = Resnet(use_prob=False).to(device) pretrained = 'cifar10_resnet_200.pt' elif model_name == 'vgg': model = Vgg(use_prob=False).to(device) pretrained = 'cifar10_vgg_200.pt' else: raise NotImplementedError else: raise NotImplementedError pretrained_path = os.path.join(path_output, pretrained) model.load_state_dict(torch.load(pretrained_path, map_location=device)) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) loss = nn.CrossEntropyLoss() _, acc_test = validate(model, loader_test, loss, device) print('Accuracy on test set: {:.4f}%'.format(acc_test * 100)) tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True) # Get samples from the tail if not is_test: # This is for training the surrogate model tensor_test_X = tensor_test_X[-n_samples:] tensor_test_y = tensor_test_y[-n_samples:] else: # This is for testing the surrogate model tensor_test_X = tensor_test_X[-n_samples - 2000:-2000] tensor_test_y = tensor_test_y[-n_samples - 2000:-2000] dataset_test = TensorDataset(tensor_test_X, tensor_test_y) loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False) _, acc_perfect = validate(model, loader_test, loss, device) print('Accuracy on {} filtered test set: {:.4f}%'.format( len(dataset_test), acc_perfect * 100)) classifier = PyTorchClassifier(model=model, loss=loss, input_shape=n_features, optimizer=optimizer, nb_classes=10, clip_values=(0.0, 1.0), device_type='gpu') attack = AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=0.1, max_iter=1000, batch_size=batch_size, targeted=False) X_benign = tensor_test_X.cpu().detach().numpy() y_true = tensor_test_y.cpu().detach().numpy() adv = attack.generate(x=X_benign) pred_adv = np.argmax(classifier.predict(adv), axis=1) acc_adv = np.mean(pred_adv == y_true) print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100)) if not is_test: output_file = '{}_{}_baard_surro_train_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) else: output_file = '{}_{}_baard_surro_test_eps{}_size{}.pt'.format( data, model_name, eps, n_samples) file_path = os.path.join(path_output, output_file) output = {'X': X_benign, 'adv': adv, 'y': y_true} torch.save(output, file_path) print('Save to:', file_path)
def main(args): (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str('cifar10')) x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_train = x_train[:50] y_train = y_train[:50] model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) attack_params = { "attacker": "fgsm", # "attacker_params": { # "max_iter": 1000, # "epsilon": 0.02 # }, "delta": 0.01, "max_iter": 1000, "eps": 13.0 / 255.0, "norm": np.inf } # Craft attack on training examples adv_crafter = UniversalPerturbation(classifier, **attack_params) x_train_adv = adv_crafter.generate(x_train) # fooling rate on train set adv_crafter.fooling_rate # # Convergence adv_crafter.converged print('\nCraft attack train examples') # adv_crafter.v: vector (array) for perturbation # perturbation = adv_crafter.v[0, :] # universal perturbation perturbation = adv_crafter.noise x_train_adv = x_train + perturbation # randomized perturbation (control) # perturbation_rand = np.random.permutation(perturbation.reshape(32 * 32 * 3)).reshape(3, 32, 32) # x_train_adv_rand = x_train + perturbation_rand preds = np.argmax(classifier.predict(x_train), axis=1) preds_adv = np.argmax(classifier.predict(x_train_adv), axis=1) acc = np.sum(preds != preds_adv) / y_train.shape[0] # Fooling rate on train set (universal perturbation) print("\nFooling rate: %.2f%%" % (acc * 100))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier victim_ptc = get_classifier_pt() class Model(nn.Module): """ Create model for pytorch. """ def __init__(self): super(Model, self).__init__() self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=7) self.pool = nn.MaxPool2d(4, 4) self.fullyconnected = nn.Linear(25, 10) # pylint: disable=W0221 # disable pylint because of API requirements for function def forward(self, x): """ Forward function to evaluate the model :param x: Input to the model :return: Prediction of the model """ x = self.conv(x) x = torch.nn.functional.relu(x) x = self.pool(x) x = x.reshape(-1, 25) x = self.fullyconnected(x) x = torch.nn.functional.softmax(x) return x # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier thieved_ptc = PyTorchClassifier(model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, clip_values=(0, 1)) # Create attack copycat_cnn = CopycatCNN(classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN) self.x_train = np.swapaxes(self.x_train, 1, 3) thieved_ptc = copycat_cnn.extract(x=self.x_train, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]), axis=1) self.x_train = np.swapaxes(self.x_train, 1, 3) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3)
class Evaluator(object): """Evaluator for adversarial robust""" def __init__(self, model, dataset): model.eval() self.model = model self.dataset = dataset optimizer = torch.optim.Adam(model.parameters()) # Useless self.nb_classes = nb_classes[dataset] self.classifier = PyTorchClassifier((0., 1.), model=self.model, loss=nn.CrossEntropyLoss(), optimizer=optimizer, input_shape=input_shape[dataset], nb_classes=self.nb_classes) def evaluate(self, data_loader): """Natural evaluation""" data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() preds = np.argmax(self.classifier.predict(examples), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc def evaluate_fgsm(self, data_loader): """Adversarial evaluation by FGSM""" norm, eps = np.inf, attack_configs['FGSM'][self.dataset]['epsilon'] adv_crafter = FastGradientMethod(self.classifier, norm=norm, eps=eps) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc def evaluate_pgd(self, data_loader, num_iter=40): """Adversarial evaluation by PGD""" norm, eps = np.inf, attack_configs['PGD'][self.dataset]['epsilon'] eps_step = 2 * eps / num_iter adv_crafter = ProjectedGradientDescent(self.classifier, norm=norm, eps=eps, eps_step=eps_step, max_iter=num_iter, random_init=True) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc def evaluate_cw(self, data_loader): eps = attack_configs['PGD'][self.dataset]['epsilon'] adv_crafter = CarliniLInfMethod(self.classifier, targeted=False, eps=eps) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc def evaluate_cw_l2(self, data_loader): adv_crafter = CarliniL2Method(self.classifier, targeted=False) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc def evaluate_robust(self, data_loader): data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] losses = [] # Compute loss with implicit batching batch_size = 256 for batch_id in range( int(np.ceil(examples.shape[0] / float(batch_size)))): batch_index_1, batch_index_2 = batch_id * batch_size, ( batch_id + 1) * batch_size batch = examples[batch_index_1:batch_index_2] batch_labels = labels_one_hot[batch_index_1:batch_index_2] loss = loss_sensitivity(self.classifier, batch, batch_labels) losses.append(loss * batch.shape[0]) res = sum(losses) / examples.shape[0] return res
def main(args): mode = args.mode eps = args.eps norm = norm_list.get(args.norm) p_mod = args.p_mod temperature = args.temperature (x_train, y_train), (x_test, y_test), min_, max_ = load_data() x_train = x_train[:500] y_train = y_train[:500] model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) x = x_train.copy().astype(np.float32) nb_instances = len(x) pred_y = classifier.predict(x) nb_dims = x.shape[1] * x.shape[2] * x.shape[3] # 変更を加えるピクセル数 n_mod = int(round(nb_dims * p_mod)) v = random_sphere(nb_points=1, nb_dims=nb_dims, radius=eps, norm=norm) v = v.reshape(1, x.shape[1], x.shape[2], x.shape[3]) v = np.array(v, dtype='float32') v_max = v.copy() current_y = classifier.predict(x + v) fooling_rate = loss_fn(pred_y, current_y, mode=mode) fooling_rate_max = fooling_rate # Go through the data set and compute the perturbation increments sequentially for j in range(len(x) * 1000): v_ = v.reshape(nb_dims).copy() # 変更を加えるピクセルの番地 idx = random.sample(range(len(v_)), n_mod) # 正規分布からの乱数で置き換え #v_[idx] = np.random.normal(loc=0.0, scale=np.std(v_), size=n_mod) # 正規分布からの乱数を加算 v_[idx] = v_[idx] + np.random.normal( loc=0.0, scale=np.std(v_), size=n_mod) # 均一分布からの乱数を加算 #v_[idx] = v_[idx] + np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 均一分布から乱数で置き換え #v_[idx] = np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 摂動が任意の長さに収まるように射影 v_ = projection(v_.reshape(1, 3, 32, 32), eps, norm) # fooling rate current_y = classifier.predict(x + v_) fooling_rate_ = loss_fn(pred_y, current_y, mode=mode) # 判定 if random.random() < np.exp( (fooling_rate_ - fooling_rate_max) / (temperature + 1e-10)): print(j, fooling_rate_, fooling_rate_max, temperature, np.linalg.norm(v_)) v = v_.copy() if fooling_rate_max < fooling_rate_: fooling_rate_max = fooling_rate_ v_max = v_.copy() fooling_rate = fooling_rate_ temperature = 0.99 * temperature # Compute fooling rate adv_x = x + v_max plot_image(v_max[0], 'sample1.png') plot_image(adv_x[1], 'sample2.png') plot_image(x[1], 'sample3.png') adv_y = classifier.predict(adv_x) fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate") print(fooling_rate)
# Step 2: Create the model vgg_ver = "VGG16" model = VGG(vgg_ver) # Step 2a: Define the loss function and the optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) # Step 3: Create the ART classifier classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) # Step 4: Train the ART classifier classifier.fit(x_train, y_train, batch_size=128, nb_epochs=30) classifier.save(f"pytorch_{vgg_ver}", "./logs") # Step 5: Evaluate the ART classifier on benign test examples predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100))
def main(seed, dataset_name, clf_name, detector_name, epsilon_lst, input_shape, json_param, path): set_seeds(SEEDS[seed]) device = device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) # Load classifier print("load the classifier") file_model = os.path.join( 'result_{:}'.format(seed), '{:}_{:}_model.pt'.format(dataset_name, clf_name)) if clf_name == 'dnn': model = BaseModel(use_prob=False).to(device) elif clf_name == 'resnet': model = Resnet(use_prob=False).to(device) else: raise ValueError("model idx unknown") model.load_state_dict(torch.load(file_model, map_location=device)) file_data = os.path.join( 'result_{:}'.format(seed), '{:}_{:}_apgd2_2000.pt'.format(dataset_name, clf_name)) obj = torch.load(file_data) X = obj['X'] y = obj['y'] adv = obj['adv'] print("undefended model acc") pred = predict_numpy(model, X, device) print('Acc on clean:', np.mean(pred == y)) # Split data X_att_test = X[2000:3000].astype(np.float32) y_att_test = y[2000:3000].astype(np.float32) print("x attr shape ", X_att_test.shape) ################################################################# print("Load Magnet") with open(json_param) as j: param = json.load(j) print("before load magnet") model_with_reformer_nn_module, detector_nn_module, full_magnet_orig = \ loadmagnet(dataset_name, clf_name,param, device,path, model) print("Magnet loaded") loss = torch.nn.CrossEntropyLoss() # this one return the logits art_classifier = PyTorchClassifier(model=model_with_reformer_nn_module, loss=loss, input_shape=input_shape, nb_classes=10, optimizer=None) # y_pred = model_with_reformer_nn_module(X) # print("model_with_reformer_nn_module", y_pred.shape) y_pred = art_classifier.predict(X) print("art_classifier", y_pred.shape) print("check full magnet ") _, y_pred = full_magnet_orig.detect(X) print("full magnet", y_pred.shape) print("check detector nn module") # correcly return an array with the logits y_pred = detector_nn_module(X) print("y pred ", y_pred) print("detector_nn_module", y_pred.shape) print("create pytorch detector") # must be only the detector art_detector = PyTorchClassifier(model=detector_nn_module, loss=loss, input_shape=input_shape, nb_classes=2, optimizer=None) print("check art detector") y_pred = art_detector.predict(X + 1000) print("detector_nn_module", y_pred.shape) print("art detector ok") print("y pred ", y_pred) print("detected by detector used by attack ", np.mean(y_pred.argmax(axis=1) == 1)) clip_fun = None ################################################################# pred_folder = 'result_{:}/predictions_wb_eval/{:}_{:}_{:}'.format( seed, dataset_name, clf_name, detector_name) print("compute prediction for samples at epsilon 0") x = X_att_test[:10] y = y_att_test[:10] # compute and save predictions cmpt_and_save_predictions(art_classifier, full_magnet_orig, art_detector, device, x, y, pred_folder, 0) for eps in epsilon_lst: print("epsilon ", eps) print("detector threshold ", detector_nn_module.detector.threshold) attack = AutoProjectedGradientDescentDetectorsMagnet( estimator=art_classifier, detector=art_detector, detector_th=0, detector_clip_fun=clip_fun, loss_type='logits_difference', batch_size=128, norm=2, eps=eps, eps_step=0.9, beta=1.0, max_iter=100) adv_x = attack.generate(x=x, y=None) # compute and save predictions cmpt_and_save_predictions(art_classifier, full_magnet_orig, art_detector, device, adv_x, y, pred_folder, eps)
def main(): set_seeds(SEED) device = device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) # Load classifier file_model = os.path.join('result_0', 'mnist_dnn_model.pt') model = BaseModel(use_prob=False).to(device) model.load_state_dict(torch.load(file_model, map_location=device)) file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt') obj = torch.load(file_data) X = obj['X'] y = obj['y'] adv = obj['adv'] pred = predict_numpy(model, X, device) print('Acc on clean:', np.mean(pred == y)) pred = predict_numpy(model, adv, device) print('Acc on adv:', np.mean(pred == y)) # Split data X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] X_def_val = X[1000:2000] y_def_val = y[1000:2000] adv_def_val = adv[1000:2000] pred_adv_def_val = pred[1000:2000] X_att_test = X[2000:4000] y_att_test = y[2000:4000] adv_att_test = adv[2000:4000] pred_adv_att_test = pred[2000:4000] X_surro_train = X[4000:] y_surro_train = y[4000:] adv_surro_train = adv[4000:] pred_adv_surro_train = pred[4000:] # Load baard file_baard_train = os.path.join('result_0', 'mnist_dnn_baard_s1_train_data.pt') obj = torch.load(file_baard_train) X_baard_train_s1 = obj['X_s1'] X_baard_train = obj['X'] y_baard_train = obj['y'] stages = [] stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False)) stages.append( ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False)) stages.append( DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False)) detector = BAARDOperator(stages=stages) detector.stages[0].fit(X_baard_train_s1, y_baard_train) for stage in detector.stages[1:]: stage.fit(X_baard_train, y_baard_train) file_baard_threshold = os.path.join('result_0', 'mnist_dnn_baard_threshold.pt') thresholds = torch.load(file_baard_threshold)['thresholds'] detector.load(file_baard_threshold) file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt') surrogate = get_pretrained_surrogate(file_surro, device) # Test surrogate model X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:])) pred_test = predict_numpy(model, X_test, device) label_test = detector.detect(X_test, pred_test) acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:]) fpr = np.mean(label_test[:1000]) print('BAARD Acc_on_adv:', acc) print('BAARD FPR:', fpr) label_surro = predict_numpy(surrogate, X_test, device) acc = np.mean(label_surro == label_test) print('Acc on surrogate:', acc) loss = torch.nn.CrossEntropyLoss() optimizer_clf = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) art_classifier = PyTorchClassifier(model=model, loss=loss, input_shape=(1, 28, 28), nb_classes=10, optimizer=optimizer_clf) optimizer_sur = torch.optim.SGD(surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) art_detector = PyTorchClassifier(model=surrogate, loss=loss, input_shape=(1, 28, 28), nb_classes=2, optimizer=optimizer_sur) loss_multiplier = 1. / 36. clip_fun = BAARD_Clipper(detector) attack = AutoProjectedGradientDescentDetectors( estimator=art_classifier, detector=art_detector, detector_th=0, #fpr, clf_loss_multiplier=loss_multiplier, detector_clip_fun=clip_fun, loss_type='logits_difference', batch_size=128, norm=2, eps=8.0, eps_step=0.9, beta=0.5, max_iter=100) # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32) # pred_toy = art_classifier.predict(X_toy) # rejected_s1 = detector.stages[0].predict(X_toy, pred_toy) # print('Without:', np.mean(rejected_s1)) # X_clipped = clip_fun(X_toy, art_classifier) # rejected_s1 = detector.stages[0].predict(X_clipped, pred_toy) # print('With:', np.mean(rejected_s1)) # adv_x = attack.generate(x=X_toy) # pred_adv = predict_numpy(model, adv_x, device) # pred_sur = art_detector.predict(adv_x) # print('From surrogate model:', np.mean(pred_sur == 1)) # labelled_as_adv = detector.detect(adv_x, pred_adv) # print('From BAARD', np.mean(labelled_as_adv == 1)) # # Test it stage by stage # reject_s1 = detector.stages[0].predict(adv_x, pred_adv) # print('reject_s1', np.mean(reject_s1)) # reject_s2 = detector.stages[1].predict(adv_x, pred_adv) # print('reject_s2', np.mean(reject_s2)) # reject_s3 = detector.stages[2].predict(adv_x, pred_adv) # print('reject_s3', np.mean(reject_s3)) x = X_att_test[:10] y = y_att_test[:10] adv_x = attack.generate(x=x, y=None) pred_adv = predict_numpy(model, adv_x, device) pred_sur = art_detector.predict(adv_x) pred = predict_numpy(model, adv_x, device) print('Acc classifier:', np.mean(pred == y)) print('From surrogate model:', np.mean(pred_sur == 1)) labelled_as_adv = detector.detect(adv_x, pred_adv) print('From BAARD', np.mean(labelled_as_adv == 1)) # Test it stage by stage reject_s1 = detector.stages[0].predict(adv_x, pred_adv) print('reject_s1', np.mean(reject_s1)) reject_s2 = detector.stages[1].predict(adv_x, pred_adv) print('reject_s2', np.mean(reject_s2)) reject_s3 = detector.stages[2].predict(adv_x, pred_adv) print('reject_s3', np.mean(reject_s3)) print()
max_iter=40, targeted=False, num_random_init=5, batch_size=32, ) # Step 4: Create the trainer object - AdversarialTrainerFBFPyTorch # if you have apex installed, change use_amp to True epsilon = 8.0 / 255.0 trainer = AdversarialTrainerFBFPyTorch(classifier, eps=epsilon, use_amp=False) # Build a Keras image augmentation object and wrap it in ART art_datagen = PyTorchDataGenerator(iterator=dataloader, size=x_train.shape[0], batch_size=128) # Step 5: fit the trainer trainer.fit_generator(art_datagen, nb_epochs=30) x_test_pred = np.argmax(classifier.predict(x_test), axis=1) print( "Accuracy on benign test samples after adversarial training: %.2f%%" % (np.sum(x_test_pred == np.argmax(y_test, axis=1)) / x_test.shape[0] * 100)) x_test_attack = attack.generate(x_test) x_test_attack_pred = np.argmax(classifier.predict(x_test_attack), axis=1) print( "Accuracy on original PGD adversarial samples after adversarial training: %.2f%%" % (np.sum(x_test_attack_pred == np.argmax(y_test, axis=1)) / x_test.shape[0] * 100))
def main(): set_seeds(SEED) device = device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) # Load classifier file_model = os.path.join('result_0', 'mnist_dnn_model.pt') model = BaseModel(use_prob=False).to(device) model.load_state_dict(torch.load(file_model, map_location=device)) file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt') obj = torch.load(file_data) X = obj['X'] y = obj['y'] adv = obj['adv'] pred = predict_numpy(model, X, device) print('Acc on clean:', np.mean(pred == y)) pred = predict_numpy(model, adv, device) print('Acc on adv:', np.mean(pred == y)) # Split data X_def_test = X[:1000] y_def_test = y[:1000] adv_def_test = adv[:1000] pred_adv_def_test = pred[:1000] X_def_val = X[1000:2000] y_def_val = y[1000:2000] adv_def_val = adv[1000:2000] pred_adv_def_val = pred[1000:2000] X_att_test = X[2000:4000] y_att_test = y[2000:4000] adv_att_test = adv[2000:4000] pred_adv_att_test = pred[2000:4000] X_surro_train = X[4000:] y_surro_train = y[4000:] adv_surro_train = adv[4000:] pred_adv_surro_train = pred[4000:] # Load baard file_baard_train = os.path.join( 'result_0', 'mnist_dnn_baard_s1_train_data.pt') obj = torch.load(file_baard_train) X_baard_train_s1 = obj['X_s1'] X_baard_train = obj['X'] y_baard_train = obj['y'] file_baard_threshold = os.path.join( 'result_0', 'mnist_dnn_baard_threshold.pt') thresholds = torch.load(file_baard_threshold)['thresholds'] stage1 = ApplicabilityStage(n_classes=10, quantile=1.) stage1.thresholds_ = thresholds[0] file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt') surrogate = get_pretrained_surrogate(file_surro, device) # Test surrogate model X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:])) pred_test = predict_numpy(model, X_test, device) # label_test = detector.detect(X_test, pred_test) # acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:]) # fpr = np.mean(label_test[:1000]) # print('BAARD Acc_on_adv:', acc) # print('BAARD FPR:', fpr) label_surro = predict_numpy(surrogate, X_test, device) # acc = np.mean(label_surro == label_test) # print('Acc on surrogate:', acc) loss = torch.nn.CrossEntropyLoss() optimizer_clf = torch.optim.SGD( model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) art_classifier = PyTorchClassifier( model=model, loss=loss, input_shape=(1, 28, 28), nb_classes=10, optimizer=optimizer_clf ) optimizer_sur = torch.optim.SGD( surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) art_detector = PyTorchClassifier( model=surrogate, loss=loss, input_shape=(1, 28, 28), nb_classes=2, optimizer=optimizer_sur ) fpr = 0.05 attack = AutoProjectedGradientDescentDetectors( estimator=art_classifier, detector=art_detector, detector_th=fpr, clf_loss_multiplier=1. / 36., loss_type='logits_difference', batch_size=128, norm=2, eps=5.0, eps_step=0.9, beta=0.5, max_iter=100) # adv_x = attack.generate(x=X_att_test[:100], y=y_att_test[:100]) file_whitebox_adv = 'mnist_apgd2_3000_whitebox_size100.npy' # np.save(file_whitebox_adv, adv_x) adv_x = np.load(file_whitebox_adv) print('adv_x', adv_x.shape) pred_adv = predict_numpy(model, adv_x, device) adv_x = clip_by_threshold(adv_x, pred_adv, thresholds[0]) pred_sur = art_detector.predict(adv_x) print('From surrogate model:', np.mean(pred_sur == 1)) labelled_as_adv = stage1.predict(adv_x, pred_adv) print('From BAARD', np.mean(labelled_as_adv == 1)) # Testing # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32) # Same size as MNIST in a single batch # y_toy = np.concatenate((np.zeros(50), np.ones(50))) # rejected = stage1.predict(X_toy, y_toy) # print('rejected', np.mean(rejected)) # X_bypass = clip_by_threshold(X_toy, y_toy, thresholds[0]) # rejected_after = stage1.predict(X_bypass, y_toy) # print('rejected_after', np.mean(rejected_after)) print('Pause')