def __init__(self, model, targeted=False, step_size_iter=.1, max_perturbation=.3, norm_order=np.inf, max_iterations=100, num_random_init=0, batch_size=16): super().__init__(model=model) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._norm_order = norm_order self._max_iterations = max_iterations self._num_random_init = num_random_init self._method = ProjectedGradientDescent( classifier=self.model, targeted=self._targeted, norm=self._norm_order, eps=self._max_perturbation, eps_step=self._step_size_iter, max_iter=self._max_iterations, num_random_init=self._num_random_init, batch_size=batch_size)
def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128): self.wrapped_pytorch_model = wrapModel(model, loss_criterion) self.norm = norm self.batch_size = batch_size self.attack = ProjectedGradientDescent(self.wrapped_pytorch_model, norm=norm, random_init=False, batch_size=batch_size) # Use GPU for computation if it is available self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu")
def pgd_linf(model, X, y, optimizer,epsilon=0.1): """ Construct pgd adversarial examples on the examples X""" classifier = PyTorchClassifier( model=model_concetenate, loss = custom_loss, optimizer=optimizer, input_shape=(1,28,28), nb_classes=10, device_type='gpu' ) attack = ProjectedGradientDescent(classifier=classifier,eps=epsilon) X_adv = attack.generate(X.numpy(),y.numpy()) return torch.Tensor(X_adv)
class PGDAttack(AdversarialAttack): def __init__(self, model, targeted=False, step_size_iter=.1, max_perturbation=.3, norm_order=np.inf, max_iterations=100, num_random_init=0, batch_size=16): super().__init__(model=model) self._targeted = targeted self._step_size_iter = step_size_iter self._max_perturbation = max_perturbation self._norm_order = norm_order self._max_iterations = max_iterations self._num_random_init = num_random_init self._method = ProjectedGradientDescent( classifier=self.model, targeted=self._targeted, norm=self._norm_order, eps=self._max_perturbation, eps_step=self._step_size_iter, max_iter=self._max_iterations, num_random_init=self._num_random_init, batch_size=batch_size) def attack_method(self, x, y=None): params = {} if y is not None: params['y'] = y return self._method.generate(x=x, **params)
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test PGD with np.inf norm attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', acc * 100) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', acc * 100) # Test PGD with 3 random initialisations attack = ProjectedGradientDescent(classifier, num_random_init=3) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on adversarial train examples with 3 random initialisations: %.2f%%', acc * 100) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with 3 random initialisations: %.2f%%', acc * 100)
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100))
def evaluate_pgd(self, data_loader, num_iter=40): """Adversarial evaluation by PGD""" norm, eps = np.inf, attack_configs['PGD'][self.dataset]['epsilon'] eps_step = 2 * eps / num_iter adv_crafter = ProjectedGradientDescent(self.classifier, norm=norm, eps=eps, eps_step=eps_step, max_iter=num_iter, random_init=True) data_iter = iter(data_loader) examples, labels = next(data_iter) examples, labels = examples.cpu().numpy(), labels.cpu().numpy() labels_one_hot = np.eye(self.nb_classes)[labels] examples_adv = adv_crafter.generate(examples, y=labels_one_hot) preds = np.argmax(self.classifier.predict(examples_adv), axis=1) acc = np.sum(preds == labels) / labels.shape[0] return acc
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression, SVC: ScikitlearnSVC, LinearSVC: ScikitlearnSVC, } (_, _), (x_test, y_test) = self.iris x_test_original = x_test.copy() for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum( preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with PGD adversarial examples: " "%.2f%%", (acc * 100), )
def test_pytorch_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum( preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with PGD adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted PGD on Iris: %.2f%%', (acc * 100))
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression, SVC: ScikitlearnSVC, LinearSVC: ScikitlearnSVC } (_, _), (x_test, y_test) = self.iris x_test_original = x_test.copy() for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with PGD adversarial examples: ' '%.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum( preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info( 'Success rate of ' + classifier.__class__.__name__ + ' on targeted PGD on Iris: %.2f%%', (acc * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_classifier_type_check_fail_classifier(self): # Use a useless test classifier to test basic classifier properties class ClassifierNoAPI: pass classifier = ClassifierNoAPI with self.assertRaises(TypeError) as context: _ = ProjectedGradientDescent(classifier=classifier) self.assertIn( 'For `ProjectedGradientDescent` classifier must be an instance of ' '`art.classifiers.classifier.Classifier`, the provided classifier is instance of ' '(<class \'object\'>,).', str(context.exception))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_classifier_type_check_fail_gradients(self): # Use a test classifier not providing gradients required by white-box attack from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier classifier = ScikitlearnDecisionTreeClassifier( model=DecisionTreeClassifier()) with self.assertRaises(TypeError) as context: _ = ProjectedGradientDescent(classifier=classifier) self.assertIn( 'For `ProjectedGradientDescent` classifier must be an instance of ' '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of ' '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
def train_model(model, train_generator, val_generator, save_path, nb_epochs=20, adv_eps=0.0, adv_frac=0.5, **kwargs): optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) best_acc = 0 attacker = None if adv_eps > 0: classifier = cnn_model.SmoothedPytorchClassifier( model, nb_classes=40, loss=torch.nn.CrossEntropyLoss(), optimizer=torch.optim.Adam(model.parameters(), lr=0.0001), input_shape=(cnn_model.WINDOW_LENGTH, )) attacker = ProjectedGradientDescent( classifier, eps=adv_eps, eps_step=adv_eps / 5, max_iter=10, batch_size=train_generator.batch_size) #attacker = FastGradientMethod(classifier,eps=adv_eps,batch_size=train_generator.batch_size) for i in range(nb_epochs): logger.info("Epoch %d" % i) fit(model, train_generator, optimizer, adversarial_attacker=attacker, adversarial_frac=adv_frac, **kwargs) loss, nat_acc, adv_acc = eval_benign(model, val_generator, adversarial_attacker=attacker, niters=1) logger.info("Validation loss : %f" % loss) logger.info("Validation accuracy : %f" % nat_acc) acc = nat_acc if adv_eps > 0: logger.info("Adversarial accuracy : %f" % adv_acc) acc = adv_acc if acc > best_acc: best_acc = acc logger.info("Saving model") torch.save(model.state_dict(), save_path)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def pgd_adv_train(model, data, outpath, model_name): attack = ProjectedGradientDescent(model, eps=0.015, eps_step=0.001, max_iter=2, targeted=False, num_random_init=0, ) adv_trainer = AdversarialTrainer(model, attacks=attack, ratio=1.0) print('>>> Processing adversarial training, it will take a while...') x_train, y_train = data adv_trainer.fit(x_train, y_train, nb_epochs=30, batch_size=32) savefile = os.path.join(outpath, model_name) print('>>>Save the model to [{}]'.format(savefile)) adv_trainer.classifier.save(savefile) return adv_trainer.classifier
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3): model.eval() wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class) if method == 'deepfool': adv_crafter = DeepFool(wmodel) elif method == 'bim': adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size) elif method == 'jsma': adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size) elif method == 'cw2': adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size) elif method == 'cwi': adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size) elif method == 'fgsm': adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size) elif method == 'pgd': adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps) else: raise NotImplementedError('Unsupported Attack Method: {}'.format(method)) return adv_crafter
class PGD: """ Class for adversarial attacks based on projected gradient descent (PGD). The implementation of PGD in ART executes projection on a feasible region after each iteration. However, random restrating is not used in this implementation. Not using radom restarting is the difference between the PGD implemented in ART and the one described by Madry et al. This adversarial attack subsumes the iterative FGSM. """ def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128): self.wrapped_pytorch_model = wrapModel(model, loss_criterion) self.norm = norm self.batch_size = batch_size self.attack = ProjectedGradientDescent(self.wrapped_pytorch_model, norm=norm, random_init=False, batch_size=batch_size) # Use GPU for computation if it is available self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") def generatePerturbation(self, data, budget, max_iter=15): images, _ = data # eps_step is not allowed to be larger than budget according to the # documentation of ART. eps_step = budget / 5 images_adv = self.attack.generate(x=images.cpu().numpy(), norm=self.norm, eps=budget, eps_step=eps_step, max_iter=max_iter, batch_size=self.batch_size) images_adv = torch.from_numpy(images_adv) # The output to be returned should be loaded on an appropriate device. return images_adv.to(self.device)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=.5, gamma=1.) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=1, eps=1, eps_step=0.5, max_iter=100, targeted=False, num_random_init=0, batch_size=1) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_pytorch_iris_pt(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted PGD on Iris: %.2f%%", (acc * 100))
def _test_backend_mnist(self, classifier, x_train, y_train, x_test, y_test): x_test_original = x_test.copy() # Test PGD with np.inf norm attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / len(y_train) logger.info("Accuracy on adversarial train examples: %.2f%%", acc * 100) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on adversarial test examples: %.2f%%", acc * 100) # Test PGD with 3 random initialisations attack = ProjectedGradientDescent(classifier, num_random_init=3, max_iter=5) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / len(y_train) logger.info( "Accuracy on adversarial train examples with 3 random initialisations: %.2f%%", acc * 100) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info( "Accuracy on adversarial test examples with 3 random initialisations: %.2f%%", acc * 100) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(np.array(x_test_original) - np.array(x_test)))), 0.0, delta=0.00001)
def general_test_v2(model, optimizer, input_shape, nb_classes, test_loader, method, conf, btrain=False, model_file='last_model_92_sgd.pkl'): global _classes if not btrain: checked_state = torch.load(model_file)['state_dict'] model.load_state_dict(checked_state) assert isinstance(model, AttackPGD), 'Incorrect Model Configuration' model = model.model.eval() # model.eval() loss = nn.CrossEntropyLoss() warped_model = PyTorchClassifier(model, loss, optimizer, input_shape, nb_classes, clip_values=(.0, 1.)) if method == 'Deepfool': adv_crafter = DeepFool(warped_model) elif method == 'BIM': adv_crafter = BasicIterativeMethod(warped_model, batch_size=32) elif method == 'JSMA': adv_crafter = SaliencyMapMethod(warped_model, batch_size=32) elif method == 'CW2': adv_crafter = CarliniL2Method(warped_model, batch_size=32) elif method == 'CWI': adv_crafter = CarliniLInfMethod(warped_model, batch_size=32) elif method == 'FGSM': adv_crafter = FastGradientMethod(warped_model, batch_size=32) elif method == 'PGD': adv_crafter = ProjectedGradientDescent(warped_model, batch_size=32) correct, total = 0, 0 adv_dataset = adv_generalization(test_loader, adv_crafter, conf) temp_loader = DataLoader(dataset=adv_dataset, batch_size=32, shuffle=False, drop_last=True) # temp_loader = test_loader for images, labels in temp_loader: images = Variable(images.cuda()) labels = Variable(labels.cuda()) outputs = model(images, conf) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() print('Accuracy of the model on the test images: %d %%' % (100 * float(correct) / total)) print('Accuracy of the model on the test images:', float(correct) / total) return correct / total
datagen.fit(x_train) art_datagen = KerasDataGenerator( datagen.flow(x=x_train, y=y_train, batch_size=batch_size, shuffle=True), size=x_train.shape[0], batch_size=batch_size, ) # Create a toy Keras CNN architecture & wrap it under ART interface classifier = KerasClassifier(build_model(), clip_values=(0, 1), use_logits=False) # Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model pgd = ProjectedGradientDescent(classifier, eps=8, eps_step=2, max_iter=10, num_random_init=20) # Create some adversarial samples for evaluation x_test_pgd = pgd.generate(x_test) # Create adversarial trainer and perform adversarial training adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0) adv_trainer.fit_generator(art_datagen, nb_epochs=83) # Evaluate the adversarially trained model on clean test set labels_true = np.argmax(y_test, axis=1) labels_test = np.argmax(classifier.predict(x_test), axis=1) print("Accuracy test set: %.2f%%" % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))
predict = classifier.predict(x_test_adv) predict_classes = np.argmax(predict, axis=-1) target_names = ["Class {}".format(i) for i in range(CLASSES)] print( classification_report(y_test, predict_classes, target_names=target_names)) accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test) print('Accuracy on CarliniLInfMethod test examples: {:.3f}%'.format( accuracy * 100)) carlini_example = x_test_adv[example] print("*" * 100) attack = ProjectedGradientDescent(classifier, norm=np.inf, eps=0.3, eps_step=0.1, max_iter=100) x_test_adv = attack.generate(x_test) perturbation = np.mean(np.abs((x_test_adv - x_test))) print('Average perturbation: {:.10f}'.format(perturbation)) predict = classifier.predict(x_test_adv) predict_classes = np.argmax(predict, axis=-1) target_names = ["Class {}".format(i) for i in range(CLASSES)] print( classification_report(y_test, predict_classes, target_names=target_names)) accuracy = np.sum(np.argmax(predict, axis=1) == y_test) / len(y_test) print( 'Accuracy on ProjectedGradientDescent[norm=inf] test examples: {:.3f}%'
def test_robust(opt, model, classifier, attack_method, c, norm=None): if opt.attack == 'FGSM': adv_crafter = FastGradientMethod(classifier, norm=norm, eps=c, targeted=False, num_random_init=0, batch_size=opt.bs) if opt.attack == 'PGD': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=1, batch_size=opt.bs) if opt.attack == 'BIM': adv_crafter = ProjectedGradientDescent(classifier, norm=norm, eps=c, eps_step=c / 10., max_iter=10, targeted=False, num_random_init=0, batch_size=bs) if opt.attack == 'JSMA': adv_crafter = SaliencyMapMethod(classifier, theta=0.1, gamma=c, batch_size=opt.bs) if opt.attack == 'CW': adv_crafter = cw.L2Adversary(targeted=False, confidence=0.01, c_range=(c, 1e10), max_steps=1000, abort_early=False, search_steps=5, box=(0., 1.0), optimizer_lr=0.01) correct = 0 total = 0 total_sum = 0 common_id = [] for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() output = classifier.predict(inputs.cpu().numpy(), batch_size=opt.bs) output = torch.tensor(output) output = output.cuda() init_pred = output.max(1, keepdim=False)[1] common_id = np.where( init_pred.cpu().numpy() == targets.cpu().numpy())[0] if opt.attack == 'CW': x_test_adv = adv_crafter(model, inputs, targets, to_numpy=True) else: x_test_adv = adv_crafter.generate(x=inputs.cpu().numpy()) perturbed_output = classifier.predict(x_test_adv) perturbed_output = torch.tensor(perturbed_output) perturbed_output = perturbed_output.cuda() final_pred = perturbed_output.max(1, keepdim=False)[1] total_sum += targets.size(0) total += len(common_id) correct += final_pred[common_id].eq( targets[common_id].data).cpu().sum() attack_acc = 100. * float(correct) / total progress.progress_bar( batch_idx, len(testloader), 'Attack Strength:%.3f, robust accuracy: %.3f%% (%d/%d)' '' % (c, attack_acc, correct, total))
def get_adversarial(targeted, attack_name, classifier, xs, target_ys, batch_size, dataset, fgsm_epsilon=0, cwl2_confidence=0): # The attack attack = '' samples_range = xs.shape[0] #====================================== if attack_name == 'FastGradientMethod': # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False attack = FastGradientMethod(classifier=classifier, targeted=targeted, eps=fgsm_epsilon, batch_size=batch_size) #===================================== elif attack_name == 'CarliniLInfMethod': # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5, #max_doubling=5, eps=0.3, batch_size=128 attack = CarliniLInfMethod(classifier=classifier, max_iter=1000, targeted=targeted, batch_size=batch_size) #------------------------------- elif attack_name == 'UniversalPerturbation': # attacker='deepfool', attacker_params=None, delta=0.2, # max_iter=20, eps=10.0, norm=np.inf if targeted: print('UniversalPerturbation attack cannot be targeted.') exit() attack = UniversalPerturbation(classifier=classifier, max_iter=5) #============================================== elif attack_name == 'ProjectedGradientDescent': # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100, # targeted=False, num_random_init=0, batch_size=1 if dataset == 'mnist': attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=.3, eps_step=0.01, num_random_init=0, max_iter=40, batch_size=batch_size) else: attack = ProjectedGradientDescent(classifier=classifier, targeted=targeted, norm=1, eps=8.0, eps_step=2.0, num_random_init=0, max_iter=7, batch_size=batch_size) if targeted: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :], y=target_ys[0:batch_size]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :], y=target_ys[ii:ii + batch_size]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate( last_samples, y=target_ys[last_ii + batch_size:]) ################ adv = np.concatenate((adv, adv_samples), axis=0) else: # Generate the adversarial samples in steps adv = attack.generate(xs[0:batch_size, :, :, :]) ################### last_ii = 0 for ii in range(batch_size, samples_range - batch_size, batch_size): print(ii) adv_samples = attack.generate( xs[ii:ii + batch_size, :, :, :]) #################### adv = np.concatenate((adv, adv_samples), axis=0) last_ii = ii # The rest of the samples if last_ii + batch_size < xs.shape[0]: last_samples = xs[last_ii + batch_size:, :, :, :] adv_samples = attack.generate(last_samples) ################ adv = np.concatenate((adv, adv_samples), axis=0) adv = np.asarray(adv) return adv
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with PGD adversarial examples: " "%.2f%%", (acc * 100), ) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info( "Success rate of " + classifier.__class__.__name__ + " on targeted PGD on Iris: %.2f%%", (acc * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
# # training for MNIST # classifier = KerasClassifier(clip_values=(-0.5, 0.5), model=model, use_logits=False) # attack = ProjectedGradientDescent(classifier, eps=0.3, eps_step=0.01, max_iter=20, batch_size=128) # ## training for CIFAR # classifier = KerasClassifier(model=model, use_logits=False) # attack = ProjectedGradientDescent(classifier, eps=8/255, eps_step=2/255, max_iter=10, batch_size=512) ## training for SVHN classifier = KerasClassifier(clip_values=(-0.5, 0.5), model=model, use_logits=False) attack = ProjectedGradientDescent(classifier, eps=8 / 255, eps_step=1 / 255, max_iter=20, batch_size=512) x_test_pgd = attack.generate(x_test, y_test) # np.save('./data/' + dataset + '_data/model/' + model_name + '_y_' + attack_name + '.npy', x_test_pgd) # Evaluate the benign trained model on adv test set labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1) print('Accuracy on original PGD adversarial samples: %.2f%%' % (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100)) trainer = AdversarialTrainer(classifier, attack, ratio=1.0) trainer.fit(x_train, y_train, nb_epochs=60, batch_size=1024) classifier.save(filename='adv_' + model_name + '.h5',
predictions = classifier2.predict(shared_x_test) acc = accuracy(predictions, shared_y_test) print('Accuracy of model2 on shared test examples: {}%'.format(acc * 100)) top_five_acc = accuracy_n(predictions, shared_y_test, 5) print('Top 5 accuracy of model2 on shared test examples: {}%'.format( top_five_acc * 100)) # Define attack based on model1 if attack_choice == "fgsm": attack = FastGradientMethod(classifier=classifier1, eps=eps) else: attack = ProjectedGradientDescent(classifier=classifier1, eps=eps, max_iter=adv_steps) print() print("generating adversarial examples...") # generate adv examples for model1 based on shared data x_test_adv = attack.generate(x=shared_x_test) # test adv examples generated from model1 on model1 predictions = classifier1.predict(x_test_adv) acc = accuracy(predictions, shared_y_test) print('Accuracy of model1 on adversarial test examples: {}%'.format(acc * 100)) top_five_acc = accuracy_n(predictions, shared_y_test, 5)