def test_7_keras_mnist_targeted(self): """ Test with the KerasClassifier. (Targeted Attack) :return: """ classifier = get_image_classifier_kr() self._test_attack(classifier, self.x_test_mnist, self.y_test_mnist, True)
def test_5_keras_mnist(self): """ Test with the KerasClassifier. (Untargeted Attack) :return: """ classifier = get_image_classifier_kr() self._test_attack(classifier, self.x_test_mnist, self.y_test_mnist, False)
def test_keras_mnist(self): classifier = get_image_classifier_kr() scores = classifier._model.evaluate(self.x_train_mnist, self.y_train_mnist) logging.info("[Keras, MNIST] Accuracy on training set: %.2f%%", (scores[1] * 100)) scores = classifier._model.evaluate(self.x_test_mnist, self.y_test_mnist) logging.info("[Keras, MNIST] Accuracy on test set: %.2f%%", (scores[1] * 100)) self._test_backend_mnist(classifier, self.x_test_mnist, self.y_test_mnist)
def test_keras(self): """ Test working keras implementation. :return: """ krc = get_image_classifier_kr() x_adv, y_adv = self.poison_dataset(krc, self.x_train_mnist, self.y_train_mnist) krc.fit(x_adv, y_adv, nb_epochs=NB_EPOCHS, batch_size=32)
def test_6_keras(self): """ Second test with the KerasClassifier. :return: """ krc = get_image_classifier_kr(from_logits=True) attack_ap = AdversarialPatch( krc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=5 ) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(self.x_train_mnist, target) self.assertAlmostEqual(patch_adv[8, 8, 0], 0.67151666, delta=0.05) self.assertAlmostEqual(patch_adv[14, 14, 0], 0.6292826, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 424.31439208984375, delta=1.0) # insert_transformed_patch x_out = attack_ap.insert_transformed_patch( self.x_train_mnist[0], np.ones((14, 14, 1)), np.asarray([[2, 13], [2, 18], [12, 22], [8, 13]]) ) x_out_expexted = np.array( [ 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.84313726, 0.0, 0.0, 0.0, 0.0, 0.1764706, 0.7294118, 0.99215686, 0.99215686, 0.5882353, 0.10588235, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ], dtype=np.float32, ) np.testing.assert_almost_equal(x_out[15, :, 0], x_out_expexted, decimal=3)
def test_5_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr() # Targeted attack # zoo = ZooAttack(classifier=krc, targeted=True, batch_size=5) # params = {'y': random_targets(self.y_test, krc.nb_classes)} # x_test_adv = zoo.generate(self.x_test, **params) # # self.assertFalse((self.x_test == x_test_adv).all()) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # target = np.argmax(params['y'], axis=1) # y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) # logger.debug('ZOO target: %s', target) # logger.debug('ZOO actual: %s', y_pred_adv) # logger.info('ZOO success rate on MNIST: %.2f', (sum(target == y_pred_adv) / float(len(target)))) # Untargeted attack # zoo = ZooAttack(classifier=krc, targeted=False, max_iter=20) zoo = ZooAttack(classifier=krc, targeted=False, batch_size=5, max_iter=10, binary_search_steps=3) # x_test_adv = zoo.generate(x_test) params = {"y": random_targets(self.y_test_mnist, krc.nb_classes)} x_test_mnist_adv = zoo.generate(self.x_test_mnist, **params) # x_test_adv_true = [0.00000000e+00, 2.50167388e-04, 1.50529508e-04, 4.69674182e-04, # 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, # 1.67321396e-05, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, # 0.00000000e+00, 2.08451956e-06, 0.00000000e+00, 0.00000000e+00, # 2.53360748e-01, 9.60119188e-01, 9.85227525e-01, 2.53600776e-01, # 0.00000000e+00, 0.00000000e+00, 5.23251540e-04, 0.00000000e+00, # 0.00000000e+00, 0.00000000e+00, 1.08632184e-05, 0.00000000e+00] # # for i in range(14): # self.assertAlmostEqual(x_test_adv_true[i], x_test_adv[0, 14, i, 0]) # self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) y_pred_adv = np.argmax(krc.predict(x_test_mnist_adv), axis=1) y_pred = np.argmax(krc.predict(self.x_test_mnist), axis=1) logger.debug("ZOO actual: %s", y_pred_adv) logger.info("ZOO success rate on MNIST: %.2f", (sum(y_pred != y_pred_adv) / float(len(y_pred)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up k.clear_session()
def _image_dl_estimator(one_classifier=False, functional=False, **kwargs): sess = None wildcard = False classifier_list = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier_list = [ get_image_classifier_kr_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr(**kwargs)] if framework == "tensorflow": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) classifier_list = [classifier] if framework == "pytorch": if wildcard is False and functional is False: classifier_list = [get_image_classifier_pt(**kwargs)] if framework == "scikitlearn": logging.warning( "{0} doesn't have an image classifier defined yet".format( framework)) classifier_list = None if framework == "kerastf": if wildcard: classifier_list = [ get_image_classifier_kr_tf_with_wildcard(**kwargs) ] else: if functional: classifier_list = [ get_image_classifier_kr_tf_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr_tf(**kwargs)] if framework == "mxnet": if wildcard is False and functional is False: classifier_list = [get_image_classifier_mx_instance(**kwargs)] if classifier_list is None: return None, None if one_classifier: return classifier_list[0], sess return classifier_list, sess
def test_9_keras_mnist_partial_grads(self): classifier = get_image_classifier_kr(from_logits=True) attack = DeepFool(classifier, max_iter=2, nb_grads=3) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum10 = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum10 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100))
def setUpClass(cls): (x_train, y_train), (x_test, y_test), _, _ = load_dataset("mnist") x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] cls.mnist = (x_train, y_train), (x_test, y_test) # Keras classifier cls.classifier_k = get_image_classifier_kr()
def _image_dl_estimator(functional=False, **kwargs): sess = None wildcard = False classifier = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier = get_image_classifier_kr_functional(**kwargs) else: try: classifier = get_image_classifier_kr(**kwargs) except NotImplementedError: raise ARTTestFixtureNotImplemented( "This combination of loss function options is currently not supported.", image_dl_estimator.__name__, framework, ) if framework == "tensorflow1" or framework == "tensorflow2": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) return classifier, sess if framework == "pytorch": if not wildcard: if functional: classifier = get_image_classifier_pt_functional(**kwargs) else: classifier = get_image_classifier_pt(**kwargs) if framework == "kerastf": if wildcard: classifier = get_image_classifier_kr_tf_with_wildcard(**kwargs) else: if functional: classifier = get_image_classifier_kr_tf_functional( **kwargs) else: classifier = get_image_classifier_kr_tf(**kwargs) if framework == "mxnet": if wildcard is False and functional is False: classifier = get_image_classifier_mx_instance(**kwargs) if classifier is None: raise ARTTestFixtureNotImplemented( "no test deep learning estimator available", image_dl_estimator.__name__, framework) return classifier, sess
def test_3_kr(self): """ Test with a Keras Classifier. :return: """ # Build KerasClassifier classifier = get_image_classifier_kr() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First FGSM attack: fgsm = FastGradientMethod(estimator=classifier, targeted=True) params = {"y": random_targets(y_test, classifier.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Initialize RS object and attack with FGSM rs = NumpyRandomizedSmoothing( classifier=classifier, sample_size=100, scale=0.01, alpha=0.001, ) fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True) x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params) # Compare results # check shapes are equal and values are within a certain range self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape) self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all()) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) y_test_base = classifier.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all()) self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), NB_TEST) self.assertEqual(len(radius), NB_TEST) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all()) # loss gradient grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 28, 28, 1) # fit rs.fit(x=x_test, y=y_test)
def test_binary_activation_detector(self): """ Test the binary activation detector end-to-end. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier = get_image_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:NB_TRAIN]) x_test_adv = attacker.generate(x_test[:NB_TRAIN]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0) y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0) # Create a simple CNN for the detector activation_shape = classifier.get_activations(x_test[:1], 0, batch_size=128).shape[1:] number_outputs = 2 model = Sequential() model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape)) model.add(Flatten()) model.add(Dense(number_outputs, activation="softmax")) model.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"] ) # Create detector and train it. # Detector consider activations at layer=0: detector = BinaryActivationDetector( classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0 ) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug("Number of true positives detected: %i", nb_true_positives) logger.debug("Number of true negatives detected: %i", nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def test_5_keras_classifier(self): """ Third test with the KerasClassifier. :return: """ # Create the trained classifier trained_classifier = get_image_classifier_kr() # Create the modified classifier transformed_classifier = get_image_classifier_kr(load_init=False) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation transformed_classifier = transformer( x=self.x_train_mnist, transformed_classifier=transformed_classifier) # Compare the 2 outputs preds1 = trained_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds2 = transformed_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds1 = np.argmax(preds1, axis=1) preds2 = np.argmax(preds2, axis=1) acc = np.sum(preds1 == preds2) / len(preds1) self.assertGreater(acc, 0.5) ce = cross_entropy(preds1, preds2) self.assertLess(ce, 10) self.assertGreaterEqual(ce, 0)
def test_check_params(self): krc = get_image_classifier_kr(from_logits=True) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, learning_rate=-1) with self.assertRaises(TypeError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1.0) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, decay_coeff=-1) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, stopping_tol=-1) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, obj_threshold=-1) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, max_iter=-1) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, watermark=1) with self.assertRaises(ValueError): _ = FeatureCollisionAttack(krc, target=self.x_train_mnist, feature_layer=1, verbose="true")
def setUpClass(cls): k.clear_session() # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] cls.mnist = (x_train, y_train), (x_test, y_test) # Load small Keras model cls.model_mnist = get_image_classifier_kr()
def test_keras_classifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier victim_krc = get_image_classifier_kr() # Create simple CNN model = Sequential() model.add( Conv2D(1, kernel_size=(7, 7), activation="relu", input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(4, 4))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) loss = keras.losses.categorical_crossentropy model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_mnist, thieved_classifier=thieved_krc) victim_preds = np.argmax( victim_krc.predict(x=self.x_train_mnist[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_mnist[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def test_keras(self): """ Second test with the KerasClassifier. :return: """ krc = get_image_classifier_kr() attack_ap = AdversarialPatch( krc, rotation_max=22.5, scale_min=0.1, scale_max=1.0, learning_rate=5.0, batch_size=10, max_iter=500 ) master_seed(seed=1234) patch_adv, _ = attack_ap.generate(self.x_train_mnist) self.assertAlmostEqual(patch_adv[8, 8, 0], -3.494, delta=0.2) self.assertAlmostEqual(patch_adv[14, 14, 0], 18.402, delta=0.2) self.assertAlmostEqual(float(np.sum(patch_adv)), 1099.293, delta=50)
def test_8_keras_mnist(self): x_test_original = self.x_test_mnist.copy() # Keras classifier classifier = get_image_classifier_kr(from_logits=True) scores = classifier._model.evaluate(self.x_train_mnist, self.y_train_mnist) logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%", (scores[1] * 100)) scores = classifier._model.evaluate(self.x_test_mnist, self.y_test_mnist) logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%", (scores[1] * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11, verbose=False) x_train_adv = attack.generate(self.x_train_mnist) x_test_adv = attack.generate(self.x_test_mnist) self.assertFalse((self.x_train_mnist == x_train_adv).all()) self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum_0 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy_0 = sum_0 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy_0 * 100)) sum_1 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy_1 = sum_1 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy_1 * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_backdoor_image(self): """ Test the backdoor attack with a image-based perturbation can be trained on classifier """ krc = get_image_classifier_kr() (is_poison_train, x_poisoned_raw, y_poisoned_raw) = self.poison_dataset( self.x_train_mnist, self.y_train_mnist, self.poison_func_3 ) # Shuffle training data n_train = np.shape(y_poisoned_raw)[0] shuffled_indices = np.arange(n_train) np.random.shuffle(shuffled_indices) x_train = x_poisoned_raw[shuffled_indices] y_train = y_poisoned_raw[shuffled_indices] krc.fit(x_train, y_train, nb_epochs=NB_EPOCHS, batch_size=32)
def setUpClass(cls): (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test), (min_, max_) from tests.utils import get_image_classifier_kr cls.classifier = get_image_classifier_kr() cls.defence = SpectralSignatureDefense( cls.classifier, x_train, y_train, batch_size=BATCH_SIZE, eps_multiplier=EPS_MULTIPLIER, ub_pct_poison=UB_PCT_POISON, nb_classes=10, )
def _image_dl_estimator_defended(one_classifier=False, **kwargs): sess = None classifier = None clip_values = (0, 1) fs = FeatureSqueezing(bit_depth=2, clip_values=clip_values) defenses = [] if kwargs.get("defenses") is None: defenses.append(fs) else: if "FeatureSqueezing" in kwargs.get("defenses"): defenses.append(fs) if "JpegCompression" in kwargs.get("defenses"): defenses.append( JpegCompression(clip_values=clip_values, apply_predict=True)) if "SpatialSmoothing" in kwargs.get("defenses"): defenses.append(SpatialSmoothing()) del kwargs["defenses"] if framework == "tensorflow2": classifier, _ = get_image_classifier_tf(**kwargs) if framework == "keras": classifier = get_image_classifier_kr(**kwargs) if framework == "kerastf": classifier = get_image_classifier_kr_tf(**kwargs) if framework == "pytorch": classifier = get_image_classifier_pt(**kwargs) for i, defense in enumerate(defenses): if "channels_first" in defense.params: defenses[i].channels_first = classifier.channels_first if classifier is not None: classifier.set_params(preprocessing_defences=defenses) else: raise ARTTestFixtureNotImplemented( "no defended image estimator", image_dl_estimator_defended.__name__, framework, {"defenses": defenses}) return classifier, sess
def test_multiple_perturbations(self): """ Test using multiple perturbation functions in the same attack can be trained on classifier """ krc = get_image_classifier_kr() (is_poison_train, x_poisoned_raw, y_poisoned_raw) = self.poison_dataset( self.x_train_mnist, self.y_train_mnist, [self.poison_func_4, self.poison_func_1] ) # Shuffle training data n_train = np.shape(y_poisoned_raw)[0] shuffled_indices = np.arange(n_train) np.random.shuffle(shuffled_indices) x_train = x_poisoned_raw[shuffled_indices] y_train = y_poisoned_raw[shuffled_indices] krc.fit(x_train, y_train, nb_epochs=NB_EPOCHS, batch_size=32)
def test_check_params(self): krc = get_image_classifier_kr(from_logits=True) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, norm=-1) with self.assertRaises(TypeError): _ = ProjectedGradientDescentCommon(krc, eps="1", eps_step=0.1) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, eps=-1) with self.assertRaises(TypeError): _ = ProjectedGradientDescentCommon(krc, eps=np.array([-1])) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, eps_step=-1) with self.assertRaises(TypeError): _ = ProjectedGradientDescentCommon(krc, eps_step=np.array([-1])) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, eps=np.array([1.0, 1.0]), eps_step=np.array([1.0])) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, targeted="False") with self.assertRaises(TypeError): _ = ProjectedGradientDescentCommon(krc, num_random_init="1") with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, num_random_init=-1) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, batch_size=-1) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, max_iter=-1) with self.assertRaises(ValueError): _ = ProjectedGradientDescentCommon(krc, verbose="False")
def test_4_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr() # set target label target = 0 y_target = np.zeros([len(self.x_train_mnist), 10]) for i in range(len(self.x_train_mnist)): y_target[i, target] = 1.0 # Attack up = TargetedUniversalPerturbation(krc, max_iter=1, attacker="fgsm", attacker_params={ "eps": 0.3, "targeted": True, "verbose": False }) x_train_adv = up.generate(self.x_train_mnist, y=y_target) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_keras(self): """ Test with a KerasClassifier. :return: """ if keras.__version__ != "2.2.4": self.assertRaises(NotImplementedError) else: # Build KerasClassifier krc = get_image_classifier_kr() # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist krc.fit(x_train, y_train, nb_epochs=1) cleanse = NeuralCleanse(krc) defense_cleanse = cleanse(krc, steps=2) defense_cleanse.mitigate(x_test, y_test, mitigation_types=["filtering", "pruning", "unlearning"])
def _get_image_classifier_list(one_classifier=False, **kwargs): sess = None if framework == "keras": classifier_list = [get_image_classifier_kr(**kwargs)] if framework == "tensorflow": classifier, sess = get_image_classifier_tf(**kwargs) classifier_list = [classifier] if framework == "pytorch": classifier_list = [get_image_classifier_pt()] if framework == "scikitlearn": logging.warning("{0} doesn't have an image classifier defined yet".format(framework)) classifier_list = None if classifier_list is None: return None, None if one_classifier: return classifier_list[0], sess return classifier_list, sess
def _image_dl_estimator_defended(one_classifier=False, **kwargs): sess = None classifier = None clip_values = (0, 1) fs = FeatureSqueezing(bit_depth=2, clip_values=clip_values) defenses = [] if kwargs.get("defenses") is None: defenses.append(fs) else: if "FeatureSqueezing" in kwargs.get("defenses"): defenses.append(fs) if "JpegCompression" in kwargs.get("defenses"): defenses.append( JpegCompression(clip_values=clip_values, apply_predict=True)) if "SpatialSmoothing" in kwargs.get("defenses"): defenses.append(SpatialSmoothing()) del kwargs["defenses"] if framework == "keras": kr_classifier = get_image_classifier_kr(**kwargs) # Get the ready-trained Keras model classifier = KerasClassifier(model=kr_classifier._model, clip_values=(0, 1), preprocessing_defences=defenses) if framework == "kerastf": kr_tf_classifier = get_image_classifier_kr_tf(**kwargs) classifier = KerasClassifier(model=kr_tf_classifier._model, clip_values=(0, 1), preprocessing_defences=defenses) if classifier is None: raise ARTTestFixtureNotImplemented( "no defended image estimator", image_dl_estimator_defended.__name__, framework, {"defenses": defenses}) return classifier, sess
def test_4_keras_classifier(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr() # Attack attack_st = SpatialTransformation(krc, max_translation=10.0, num_translations=3, max_rotation=30.0, num_rotations=3, verbose=False) x_train_adv = attack_st.generate(self.x_train_mnist) self.assertAlmostEqual(x_train_adv[0, 8, 13, 0], 0.49004024, delta=0.01) self.assertAlmostEqual(attack_st.fooling_rate, 0.71, delta=0.02) self.assertEqual(attack_st.attack_trans_x, 3) self.assertEqual(attack_st.attack_trans_y, 3) self.assertEqual(attack_st.attack_rot, 30.0) x_test_adv = attack_st.generate(self.x_test_mnist) self.assertAlmostEqual(x_test_adv[0, 14, 14, 0], 0.013572651, delta=0.01) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) k.clear_session()
def test_8_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr() # Attack up = UniversalPerturbation( krc, max_iter=1, attacker="ead", attacker_params={ "max_iter": 2, "targeted": False, "verbose": False }, verbose=False, ) x_train_adv = up.generate(self.x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = self.x_test_mnist + up.noise self.assertFalse((self.x_test_mnist == x_test_adv).all()) train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def test_keras_mnist_L2(self): """ Second test with the KerasClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build KerasClassifier krc = get_image_classifier_kr(from_logits=True) # First attack cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=10) y_target = [6, 6, 7, 4, 9, 7, 9, 0, 1, 0] x_test_adv = cl2m.generate(self.x_test_mnist, y=to_categorical(y_target, nb_classes=10)) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW2 Target: %s", y_target) logger.debug("CW2 Actual: %s", y_pred_adv) logger.info("CW2 Success Rate: %.2f", (np.sum(y_target == y_pred_adv) / float(len(y_target)))) self.assertTrue((y_target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=10) x_test_adv = cl2m.generate(self.x_test_mnist) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) logger.debug("CW2 Target: %s", y_target) logger.debug("CW2 Actual: %s", y_pred_adv) logger.info("CW2 Success Rate: %.2f", (np.sum(y_target != y_pred_adv) / float(len(y_target)))) self.assertTrue((y_target != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up k.clear_session()