def test_pytorch_iris(self): """ Third test for Pytorch. :return: """ # Create the trained classifier trained_classifier = get_tabular_classifier_pt() # Create the modified classifier transformed_classifier = get_tabular_classifier_pt(load_init=False) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation with self.assertRaises(ValueError) as context: transformed_classifier = transformer( x=self.x_train_iris, transformed_classifier=transformed_classifier) self.assertIn( "The input trained classifier do not produce probability outputs.", str(context.exception))
def test_4_pytorch_iris(self): """ Third test for PyTorch. :return: """ # Build PyTorchClassifier victim_ptc = get_tabular_classifier_pt() # Create the thieved classifier thieved_ptc = get_tabular_classifier_pt(load_init=False) # Create random attack attack = KnockoffNets( classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="random", verbose=False, ) thieved_ptc = attack.extract(x=self.x_train_iris, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Create adaptive attack attack = KnockoffNets( classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="adaptive", reward="all", verbose=False, ) thieved_ptc = attack.extract(x=self.x_train_iris, y=self.y_train_iris, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.4)
def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() x_test = self.x_test_iris.astype(np.float32) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100))
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def _get_tabular_classifier_list(clipped=True): if framework == "keras": if clipped: classifier_list = [get_tabular_classifier_kr()] else: classifier = get_tabular_classifier_kr() classifier_list = [ KerasClassifier(model=classifier.model, use_logits=False, channels_first=True) ] if framework == "tensorflow": if clipped: classifier, _ = get_tabular_classifier_tf() classifier_list = [classifier] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "pytorch": if clipped: classifier_list = [get_tabular_classifier_pt()] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "scikitlearn": return get_tabular_classifier_scikit_list(clipped=False) return classifier_list
def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False, "verbose": False }, } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_pytorch_iris_pt(self): classifier = get_tabular_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted PGD on Iris: %.2f%%", (acc * 100))
def test_pytorch_iris_LInf(self): classifier = get_tabular_classifier_pt() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris.astype(np.float32)) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with DeepFool adversarial examples: %.2f%%", (accuracy * 100))
def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack = VirtualAdversarialMethod(classifier, eps=0.1, verbose=False) with self.assertRaises(TypeError) as context: _ = attack.generate(self.x_test_iris.astype(np.float32)) self.assertIn( "This attack requires a classifier predicting probabilities in the range [0, 1] as output." "Values smaller than 0.0 or larger than 1.0 have been detected.", str(context.exception), )
def test_3_pytorch_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_pt() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_4_pytorch_iris_vector(self): classifier = get_tabular_classifier_pt() attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%", (accuracy * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris ptc = get_tabular_classifier_pt() rs = PyTorchRandomizedSmoothing( model=ptc.model, loss=ptc._loss, input_shape=ptc.input_shape, nb_classes=ptc.nb_classes, channels_first=ptc.channels_first, clip_values=ptc.clip_values, sample_size=100, scale=0.01, alpha=0.001, ) # Test untargeted attack attack = FastGradientMethod(ptc, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all()) pred = rs.predict(x_test) pred2 = rs.predict(x_test_adv) acc, cov = compute_accuracy(pred, y_test) acc2, cov2 = compute_accuracy(pred2, y_test) logger.info("Accuracy on Iris with smoothing on adversarial examples: %.2f%%", (acc * 100)) logger.info("Coverage on Iris with smoothing on adversarial examples: %.2f%%", (cov * 100)) logger.info("Accuracy on Iris with smoothing: %.2f%%", (acc2 * 100)) logger.info("Coverage on Iris with smoothing: %.2f%%", (cov2 * 100)) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), len(x_test)) self.assertEqual(len(radius), len(x_test)) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all())
def _tabular_dl_estimator(clipped=True): classifier = None if framework == "keras": if clipped: classifier = get_tabular_classifier_kr() else: kr_classifier = get_tabular_classifier_kr() classifier = KerasClassifier(model=kr_classifier.model, use_logits=False, channels_first=True) if framework == "tensorflow1" or framework == "tensorflow2": if clipped: classifier, _ = get_tabular_classifier_tf() if framework == "pytorch": if clipped: classifier = get_tabular_classifier_pt() if classifier is None: raise ARTTestFixtureNotImplemented( "no deep learning tabular estimator available", tabular_dl_estimator.__name__, framework ) return classifier
def test_pytorch_iris(self): classifier = get_tabular_classifier_pt() attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test_iris.astype(np.float32)) expected_x_test_adv = np.asarray( [0.8479194, 0.42525578, 0.70166135, 0.28664517]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 2, 1, 2, 0, 2, 2, 1, 1, 2, ]), ) accuracy = 1.0 - np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100))
def test_pytorch_iris(self): """ Third test for PyTorch. :return: """ # Build PyTorchClassifier victim_ptc = get_tabular_classifier_pt() class Model(nn.Module): """ Create Iris model for PyTorch. """ def __init__(self): super(Model, self).__init__() self.fully_connected1 = nn.Linear(4, 10) self.fully_connected2 = nn.Linear(10, 10) self.fully_connected3 = nn.Linear(10, 3) # pylint: disable=W0221 # disable pylint because of API requirements for function def forward(self, x): x = self.fully_connected1(x) x = self.fully_connected2(x) logit_output = self.fully_connected3(x) return logit_output # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Get classifier thieved_ptc = PyTorchClassifier( model=model, loss=loss_fn, optimizer=optimizer, input_shape=(4, ), nb_classes=3, clip_values=(0, 1), channels_first=True, ) # Create attack copycat_cnn = CopycatCNN( classifier=victim_ptc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_ptc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train_iris[:100]), axis=1) thieved_preds = np.argmax( thieved_ptc.predict(x=self.x_train_iris[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3)