def test_without_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper classifier = QueryEfficientBBGradientEstimation(self.classifier_k, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on adversarial train examples with limited query info: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with limited query info: %.2f%%', (acc * 100))
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Create the classifier classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all())
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris def t(x): return x def transformation(): while True: yield t classifier = get_tabular_classifier_kr() classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) # Test untargeted attack attack = FastGradientMethod(classifier, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_subsetscan_detector(self): (x_train, y_train), (x_test, y_test), _, _ = load_dataset("mnist") x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier = get_image_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.5) x_train_adv = attacker.generate(x_train) x_test_adv = attacker.generate(x_test) # Compile training data for detector: x_train_detector = np.concatenate((x_train, x_train_adv), axis=0) bgd = x_train clean = x_test anom = x_test_adv detector = SubsetScanningDetector(classifier, bgd, layer=1) _, _, dpwr = detector.scan(clean, clean) self.assertAlmostEqual(dpwr, 0.5) _, _, dpwr = detector.scan(clean, anom) self.assertGreater(dpwr, 0.5) _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15) self.assertGreater(dpwr, 0.5)
def _fgsm(model, data, labels, attack_args): """ Fast Gradient Sign Method Explaining and Harnessing Adversarial Examples by Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy ``https://arxiv.org/abs/1412.6572`` :param model: :param data: :param labels: :param attack_args: :return: """ print('>>> Generating FGSM examples.') eps = attack_args.get('eps', 0.3) targeted = attack_args.get('targeted', False) num_random_init = attack_args.get('num_random_init', 0) minimal = attack_args.get('minimal', False) attacker = FastGradientMethod(model, eps=eps, eps_step=eps, targeted=targeted, num_random_init=num_random_init, minimal=minimal) return attacker.generate(data, labels)
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() def t(x): return x def transformation(): while True: yield t # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.36) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_binary_activation_detector(self): """ Test the binary activation detector end-to-end. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier = get_image_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:NB_TRAIN]) x_test_adv = attacker.generate(x_test[:NB_TRAIN]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0) y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0) # Create a simple CNN for the detector activation_shape = classifier.get_activations(x_test[:1], 0, batch_size=128).shape[1:] number_outputs = 2 model = Sequential() model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape)) model.add(Flatten()) model.add(Dense(number_outputs, activation="softmax")) model.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"] ) # Create detector and train it. # Detector consider activations at layer=0: detector = BinaryActivationDetector( classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0 ) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug("Number of true positives detected: %i", nb_true_positives) logger.debug("Number of true negatives detected: %i", nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def test_two_attacks_with_generator(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train_original = x_train.copy() x_test_original = x_test.copy() class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): super().__init__(size=size, batch_size=batch_size) self.x = x self.y = y self._size = size self._batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, size=x_train.shape[0], batch_size=16) attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit_generator(generator, nb_epochs=3) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02) self.assertAlmostEqual(accuracy, 0.11, delta=0.0) # Check that x_train and x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_train_original - x_train))), 0.0, delta=0.00001) self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_fit_predict_different_classifiers(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = FastGradientMethod(self.classifier) x_test_adv = attack.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_2, attack) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.32) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001) # fit_generator class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): super().__init__(size=size, batch_size=batch_size) self.x = x self.y = y self._size = size self._batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, size=x_train.shape[0], batch_size=16) adv_trainer.fit_generator(generator, nb_epochs=5) adv_trainer_2 = AdversarialTrainer(self.classifier_2, attack, ratio=1.0) adv_trainer_2.fit_generator(generator, nb_epochs=5)
def test_excpetions(self): with self.assertRaises(ValueError): _ = AdversarialTrainer(self.classifier, "attack") with self.assertRaises(ValueError): attack = FastGradientMethod(self.classifier) _ = AdversarialTrainer(self.classifier, attack, ratio=1.5)
def test_fgsm_defences(art_warning, fix_get_mnist_subset, image_dl_estimator, device_type): try: clip_values = (0, 1) smooth_3x3 = SpatialSmoothingPyTorch(window_size=3, channels_first=True, device_type=device_type) smooth_5x5 = SpatialSmoothingPyTorch(window_size=5, channels_first=True, device_type=device_type) smooth_7x7 = SpatialSmoothingPyTorch(window_size=7, channels_first=True, device_type=device_type) classifier_, _ = image_dl_estimator() criterion = nn.CrossEntropyLoss() classifier = PyTorchClassifier( clip_values=clip_values, model=classifier_.model, preprocessing_defences=[smooth_3x3, smooth_5x5, smooth_7x7], loss=criterion, input_shape=(1, 28, 28), nb_classes=10, device_type=device_type, ) assert len(classifier.preprocessing_defences) == 3 attack = FastGradientMethod(classifier, eps=1.0, batch_size=128) backend_test_defended_images(attack, fix_get_mnist_subset) except ARTTestException as e: art_warning(e)
def test_classifier_match(self): attack = FastGradientMethod(self.classifier) adv_trainer = AdversarialTrainer(self.classifier, attack) self.assertEqual(len(adv_trainer.attacks), 1) self.assertEqual(adv_trainer.attacks[0].estimator, adv_trainer.get_classifier())
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) # Test untargeted attack attack = FastGradientMethod(classifier, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris ptc = get_tabular_classifier_pt() rs = PyTorchRandomizedSmoothing( model=ptc.model, loss=ptc._loss, input_shape=ptc.input_shape, nb_classes=ptc.nb_classes, channels_first=ptc.channels_first, clip_values=ptc.clip_values, sample_size=100, scale=0.01, alpha=0.001, ) # Test untargeted attack attack = FastGradientMethod(ptc, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all()) pred = rs.predict(x_test) pred2 = rs.predict(x_test_adv) acc, cov = compute_accuracy(pred, y_test) acc2, cov2 = compute_accuracy(pred2, y_test) logger.info("Accuracy on Iris with smoothing on adversarial examples: %.2f%%", (acc * 100)) logger.info("Coverage on Iris with smoothing on adversarial examples: %.2f%%", (cov * 100)) logger.info("Accuracy on Iris with smoothing: %.2f%%", (acc2 * 100)) logger.info("Coverage on Iris with smoothing: %.2f%%", (cov2 * 100)) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), len(x_test)) self.assertEqual(len(radius), len(x_test)) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all())
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def get_adversarial_examples(X, Y, model, nb_classes, attack=None): assert model is not None assert attack is not None art_classifier = SklearnClassifier(model=model, clip_values=(0, nb_classes)) attacker = None if attack == ATTACK.PGD: attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=0.2, eps_step=0.1, max_iter=3, targeted=False, num_random_init=0, batch_size=128) elif attack == ATTACK.DEEPFOOL: attacker = DeepFool(classifier=art_classifier, max_iter=5, epsilon=1e-6, nb_grads=3, batch_size=1) elif attack == ATTACK.FGSM: attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=0.3, targeted=False, batch_size=128) elif attack == ATTACK.BIM: attacker = BasicIterativeMethod(classifier=art_classifier, eps=0.3, eps_step=0.1, targeted=False, batch_size=128) elif attack == ATTACK.JSMA: attacker = SaliencyMapMethod(classifier=art_classifier, theta=0.3, gamma=0.5, batch_size=128) elif attack == ATTACK.CW_L2: attacker = CarliniL2Method(classifier=art_classifier, learning_rate=0.1) elif attack == ATTACK.CW_Linf: attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=0.01) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_without_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model and wrap it in query efficient gradient estimator classifier = QueryEfficientGradientEstimationClassifier( self.classifier_k, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all())
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Wrap the classifier classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( "Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%", (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( "Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%", (acc * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_krclassifier(self): """ Test with a KerasClassifier. :return: """ # Build KerasClassifier krc = get_image_classifier_kr() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First attack (without EoT): fgsm = FastGradientMethod(estimator=krc, targeted=True) params = {"y": random_targets(y_test, krc.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Second attack (with EoT): def t(x): return x def transformation(): while True: yield t eot = ExpectationOverTransformations(classifier=krc, sample_size=1, transformation=transformation) fgsm_with_eot = FastGradientMethod(estimator=eot, targeted=True) x_test_adv_with_eot = fgsm_with_eot.generate(x_test, **params) self.assertTrue( (np.abs(x_test_adv - x_test_adv_with_eot) < 0.001).all())
def test_2_pt(self): """ Test with a PyTorch Classifier. :return: """ # Build KerasClassifier ptc = get_image_classifier_pt() # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test = x_test.transpose(0, 3, 1, 2).astype(np.float32) # First FGSM attack: fgsm = FastGradientMethod(estimator=ptc, targeted=True) params = {"y": random_targets(y_test, ptc.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Initialize RS object and attack with FGSM rs = PyTorchRandomizedSmoothing( model=ptc.model, loss=ptc._loss, optimizer=torch.optim.Adam(ptc.model.parameters(), lr=0.01), input_shape=ptc.input_shape, nb_classes=ptc.nb_classes, channels_first=ptc.channels_first, clip_values=ptc.clip_values, sample_size=100, scale=0.01, alpha=0.001, ) fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True) x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params) # Compare results # check shapes are equal and values are within a certain range self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape) self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all()) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) y_test_base = ptc.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all()) self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), NB_TEST) self.assertEqual(len(radius), NB_TEST) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all()) # loss gradient grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 1, 28, 28) # fit rs.fit(x=x_test, y=y_test)
def test_targeted_attack_error(self): """ Test the adversarial trainer using a targeted attack, which will currently result in a NotImplementError. :return: None """ (x_train, y_train), (_, _) = self.mnist params = {"nb_epochs": 2, "batch_size": BATCH_SIZE} adv = FastGradientMethod(self.classifier, targeted=True) adv_trainer = AdversarialTrainer(self.classifier, attacks=adv) self.assertRaises(NotImplementedError, adv_trainer.fit, x_train, y_train, **params)
def test_fit_predict(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = FastGradientMethod(self.classifier) x_test_adv = attack.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attack) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.12) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_3_kr(self): """ Test with a Keras Classifier. :return: """ # Build KerasClassifier classifier = get_image_classifier_kr() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First FGSM attack: fgsm = FastGradientMethod(estimator=classifier, targeted=True) params = {"y": random_targets(y_test, classifier.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Initialize RS object and attack with FGSM rs = NumpyRandomizedSmoothing( classifier=classifier, sample_size=100, scale=0.01, alpha=0.001, ) fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True) x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params) # Compare results # check shapes are equal and values are within a certain range self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape) self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all()) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) y_test_base = classifier.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all()) self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), NB_TEST) self.assertEqual(len(radius), NB_TEST) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all()) # loss gradient grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 28, 28, 1) # fit rs.fit(x=x_test, y=y_test)
def robustness_evaluation(object_storage_url, object_storage_username, object_storage_password, data_bucket_name, result_bucket_name, model_id, feature_testset_path='processed_data/X_test.npy', label_testset_path='processed_data/y_test.npy', clip_values=(0, 1), nb_classes=2, input_shape=(1, 3, 64, 64), model_class_file='model.py', model_class_name='model', LossFn='', Optimizer='', epsilon=0.2): url = re.compile(r"https?://") cos = Minio(url.sub('', object_storage_url), access_key=object_storage_username, secret_key=object_storage_password, secure=False) dataset_filenamex = "X_test.npy" dataset_filenamey = "y_test.npy" weights_filename = "model.pt" model_files = model_id + '/_submitted_code/model.zip' cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex) cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey) cos.fget_object(result_bucket_name, model_id + '/' + weights_filename, weights_filename) cos.fget_object(result_bucket_name, model_files, 'model.zip') # Load PyTorch model definition from the source code. zip_ref = zipfile.ZipFile('model.zip', 'r') zip_ref.extractall('model_files') zip_ref.close() modulename = 'model_files.' + model_class_file.split('.')[0].replace( '-', '_') ''' We required users to define where the model class is located or follow some naming convention we have provided. ''' model_class = getattr(importlib.import_module(modulename), model_class_name) # load & compile model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = model_class().to(device) model.load_state_dict(torch.load(weights_filename, map_location=device)) # Define Loss and optimizer function for the PyTorch model if LossFn: loss_fn = eval(LossFn) else: loss_fn = torch.nn.CrossEntropyLoss() if Optimizer: optimizer = eval(Optimizer) else: optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer, input_shape, nb_classes) # load test dataset x = np.load(dataset_filenamex) y = np.load(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) return metrics
def craft(X, Y, art_classifier, attack=None, **attack_params): assert art_classifier is not None assert attack is not None attacker = None if attack == ATTACK.PGD: eps = attack_params.get('eps', 0.2) eps_step = attack_params.get('eps_step', eps / 5.) max_iter = attack_params.get('max_iter', 3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, num_random_init=0, batch_size=batch_size) elif attack == ATTACK.DEEPFOOL: eps = attack_params.get('eps', 1e-6) max_iter = attack_params.get('max_iter', 5) nb_grads = attack_params.get('nb_grads', 3) batch_size = attack_params.get('batch_size', 1) attacker = DeepFool(classifier=art_classifier, max_iter=max_iter, epsilon=eps, nb_grads=nb_grads, batch_size=batch_size) elif attack == ATTACK.FGSM: eps = attack_params.get('eps', 0.3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=eps, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.BIM: eps = attack_params.get('eps', 0.3) eps_step = attack_params.get('eps_step', eps / 5.) norm = attack_params.get('norm', np.inf) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = BasicIterativeMethod(classifier=art_classifier, norm=norm, eps=eps, eps_step=eps_step, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.JSMA: theta = attack_params.get('theta', 0.3) gamma = attack_params.get('gamma', 0.5) batch_size = attack_params.get('batch_size', 128) attacker = SaliencyMapMethod(classifier=art_classifier, theta=theta, gamma=gamma, batch_size=batch_size) elif attack == ATTACK.CW_L2: lr = attack_params.get('lr', 0.1) bsearch_steps = attack_params.get('bsearch_steps', 10) attacker = CarliniL2Method(classifier=art_classifier, learning_rate=lr, binary_search_steps=bsearch_steps) elif attack == ATTACK.CW_Linf: lr = attack_params.get('lr', 0.01) attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=lr) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_binary_input_detector(self): """ Test the binary input detector end-to-end. :return: """ # Get MNIST nb_train, nb_test = 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier = get_image_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:nb_train]) x_test_adv = attacker.generate(x_test[:nb_test]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv), axis=0) y_train_detector = np.concatenate( (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)), axis=0) # Create a simple CNN for the detector input_shape = x_train.shape[1:] try: from keras.optimizers import Adam optimizer = Adam(lr=0.01) except ImportError: from keras.optimizers import adam_v2 optimizer = adam_v2.Adam(lr=0.01) model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(2, activation="softmax")) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=["accuracy"]) # Create detector and train it: detector = BinaryInputDetector( KerasClassifier(model=model, clip_values=(0, 1), use_logits=False)) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative: nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug("Number of true positives detected: %i", nb_true_positives) logger.debug("Number of true negatives detected: %i", nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def test_1_tf(self): """ Test with a TensorFlow Classifier. :return: """ tf_version = list(map(int, tf.__version__.lower().split("+")[0].split("."))) if tf_version[0] == 2: # Build TensorFlowV2Classifier classifier, _ = get_image_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First FGSM attack: fgsm = FastGradientMethod(estimator=classifier, targeted=True) params = {"y": random_targets(y_test, classifier.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Initialize RS object and attack with FGSM rs = TensorFlowV2RandomizedSmoothing( model=classifier.model, nb_classes=classifier.nb_classes, input_shape=classifier.input_shape, loss_object=classifier.loss_object, train_step=classifier.train_step, channels_first=classifier.channels_first, clip_values=classifier.clip_values, preprocessing_defences=classifier.preprocessing_defences, postprocessing_defences=classifier.postprocessing_defences, preprocessing=classifier.preprocessing, sample_size=100, scale=0.01, alpha=0.001, ) fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True) x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params) # Compare results # check shapes are equal and values are within a certain range self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape) self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all()) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) y_test_base = classifier.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all()) self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), NB_TEST) self.assertEqual(len(radius), NB_TEST) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all()) # loss gradient grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 28, 28, 1) # fit rs.fit(x=x_test, y=y_test)