def test_5_pytorch_classifier(self): """ Third test with the PyTorchClassifier. :return: """ self.x_train_mnist = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier victim_ptc = get_image_classifier_pt() # Create the thieved classifier thieved_ptc = get_image_classifier_pt(load_init=False) # Create random attack attack = KnockoffNets( classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="random", verbose=False, ) thieved_ptc = attack.extract(x=self.x_train_mnist, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train_mnist), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train_mnist), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Create adaptive attack attack = KnockoffNets( classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="adaptive", reward="all", verbose=False, ) thieved_ptc = attack.extract(x=self.x_train_mnist, y=self.y_train_mnist, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train_mnist), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train_mnist), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.4) self.x_train_mnist = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 28, 28, 1)).astype(np.float32)
def test_fit_generator(self): classifier = get_image_classifier_pt() accuracy = (np.sum( np.argmax(classifier.predict(self.x_test_mnist), axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test) logger.info("Accuracy: %.2f%%", (accuracy * 100)) # Create tensors from data x_train_tens = torch.from_numpy(self.x_train_mnist) x_train_tens = x_train_tens.float() y_train_tens = torch.from_numpy(self.y_train_mnist) # Create PyTorch dataset and loader dataset = torch.utils.data.TensorDataset(x_train_tens, y_train_tens) data_loader = DataLoader(dataset=dataset, batch_size=5, shuffle=True) data_gen = PyTorchDataGenerator(data_loader, size=self.n_train, batch_size=5) # Fit model with generator classifier.fit_generator(data_gen, nb_epochs=2) accuracy_2 = (np.sum( np.argmax(classifier.predict(self.x_test_mnist), axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test) logger.info("Accuracy: %.2f%%", (accuracy_2 * 100)) self.assertEqual(accuracy, 0.32) self.assertAlmostEqual(accuracy_2, 0.75, delta=0.1)
def test_2_pt(self): """ Test with a PyTorch Classifier. :return: """ # Build KerasClassifier ptc = get_image_classifier_pt() # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test = x_test.transpose(0, 3, 1, 2).astype(np.float32) # First FGSM attack: fgsm = FastGradientMethod(estimator=ptc, targeted=True) params = {"y": random_targets(y_test, ptc.nb_classes)} x_test_adv = fgsm.generate(x_test, **params) # Initialize RS object and attack with FGSM rs = PyTorchRandomizedSmoothing( model=ptc.model, loss=ptc._loss, optimizer=torch.optim.Adam(ptc.model.parameters(), lr=0.01), input_shape=ptc.input_shape, nb_classes=ptc.nb_classes, channels_first=ptc.channels_first, clip_values=ptc.clip_values, sample_size=100, scale=0.01, alpha=0.001, ) fgsm_with_rs = FastGradientMethod(estimator=rs, targeted=True) x_test_adv_with_rs = fgsm_with_rs.generate(x_test, **params) # Compare results # check shapes are equal and values are within a certain range self.assertEqual(x_test_adv.shape, x_test_adv_with_rs.shape) self.assertTrue((np.abs(x_test_adv - x_test_adv_with_rs) < 0.75).all()) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) y_test_base = ptc.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= np.ones((NB_TEST,))).all()) self.assertTrue((np.argmax(y_test_smooth, axis=1) == np.argmax(y_test_base, axis=1)).all()) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), NB_TEST) self.assertEqual(len(radius), NB_TEST) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all()) # loss gradient grad = rs.loss_gradient(x=x_test, y=y_test, sampling=True) assert grad.shape == (10, 1, 28, 28) # fit rs.fit(x=x_test, y=y_test)
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = ZooAttack(ptc, binary_search_steps=1.0) with self.assertRaises(ValueError): _ = ZooAttack(ptc, binary_search_steps=-1) with self.assertRaises(ValueError): _ = ZooAttack(ptc, max_iter=1.0) with self.assertRaises(ValueError): _ = ZooAttack(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = ZooAttack(ptc, nb_parallel=1.0) with self.assertRaises(ValueError): _ = ZooAttack(ptc, nb_parallel=-1) with self.assertRaises(ValueError): _ = ZooAttack(ptc, batch_size=1.0) with self.assertRaises(ValueError): _ = ZooAttack(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = ZooAttack(ptc, verbose="true")
def test_check_params_L0(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, binary_search_steps="1.0") with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, binary_search_steps=-1) with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_iter="1.0") with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_halving="1.0") with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_halving=-1) with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_doubling="1.0") with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, max_doubling=-1) with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, batch_size="1.0") with self.assertRaises(ValueError): _ = CarliniL0Method(ptc, batch_size=-1)
def test_5_pytorch_resume(self): x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype( np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt() # HSJ attack hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=10, max_eval=100, init_eval=10) params = {"y": self.y_test_mnist[2:3], "x_adv_init": x_test[2:3]} x_test_adv1 = hsj.generate(x_test[0:1], **params) diff1 = np.linalg.norm(x_test_adv1 - x_test) params.update(resume=True, x_adv_init=x_test_adv1) x_test_adv2 = hsj.generate(x_test[0:1], **params) params.update(x_adv_init=x_test_adv2) x_test_adv2 = hsj.generate(x_test[0:1], **params) diff2 = np.linalg.norm(x_test_adv2 - x_test) self.assertGreater(diff1, diff2)
def test_pytorch_mnist_L2(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) logger.info("CW2 Success Rate: %.2f", (sum(target == y_pred_adv) / float(len(target)))) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) x_test_adv = cl2m.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) logger.info("CW2 Success Rate: %.2f", (sum(target != y_pred_adv) / float(len(target)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch(self): """ Third test with the PyTorchClassifier. :return: """ ptc = get_image_classifier_pt() x_train = np.reshape(self.x_train_mnist, (self.n_train, 1, 28, 28)).astype(np.float32) attack_ap = AdversarialPatch(ptc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=5) master_seed(seed=1234) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(x_train, target) self.assertAlmostEqual(patch_adv[0, 8, 8], 0.5002671, delta=0.05) self.assertAlmostEqual(patch_adv[0, 14, 14], 0.5109714, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 393.09832763671875, delta=1.0)
def test_pytorch_classifier(self): """ Third test with the PyTorchClassifier. :return: """ x_train_mnist = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype(np.float32) x_test_mnist = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test_mnist.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # Attack attack_st = SpatialTransformation( ptc, max_translation=10.0, num_translations=3, max_rotation=30.0, num_rotations=3 ) x_train__mnistadv = attack_st.generate(x_train_mnist) self.assertAlmostEqual(x_train__mnistadv[0, 0, 13, 18], 0.627451, delta=0.01) self.assertAlmostEqual(attack_st.fooling_rate, 0.57, delta=0.03) self.assertEqual(attack_st.attack_trans_x, 0) self.assertEqual(attack_st.attack_trans_y, 3) self.assertEqual(attack_st.attack_rot, 0.0) x_test_adv = attack_st.generate(x_test_mnist) self.assertLessEqual(abs(x_test_adv[0, 0, 14, 14] - 0.008591662), 0.01) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def test_check_params_LInf(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, max_iter="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, decrease_factor="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, decrease_factor=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, initial_const="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, initial_const=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, largest_const="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, largest_const=-1) with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, const_factor="1.0") with self.assertRaises(ValueError): _ = CarliniLInfMethod(ptc, const_factor=-1)
def test_3_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # set target label target = 0 y_target = np.zeros([len(self.x_train_mnist), 10]) for i in range(len(self.x_train_mnist)): y_target[i, target] = 1.0 # Attack up = TargetedUniversalPerturbation( ptc, max_iter=1, attacker="fgsm", attacker_params={"eps": 0.3, "targeted": True} ) x_train_mnist_adv = up.generate(x_train_mnist, y=y_target) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_mnist_adv = x_test_mnist + up.noise self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_mnist_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_mnist_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def test_5_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # Attack nf = NewtonFool(ptc, max_iter=5, batch_size=100) x_test_adv = nf.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) y_pred = ptc.predict(x_test) y_pred_adv = ptc.predict(x_test_adv) y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred y_pred_max = y_pred.max(axis=1) y_pred_adv_max = y_pred_adv[y_pred_bool] self.assertTrue((y_pred_max >= 0.9 * y_pred_adv_max).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist_LInf(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = SimBA(ptc, max_iter=1.0) with self.assertRaises(ValueError): _ = SimBA(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = SimBA(ptc, epsilon=-1) with self.assertRaises(ValueError): _ = SimBA(ptc, batch_size=2) with self.assertRaises(ValueError): _ = SimBA(ptc, stride=1.0) with self.assertRaises(ValueError): _ = SimBA(ptc, stride=-1) with self.assertRaises(ValueError): _ = SimBA(ptc, freq_dim=1.0) with self.assertRaises(ValueError): _ = SimBA(ptc, freq_dim=-1) with self.assertRaises(ValueError): _ = SimBA(ptc, order="test") with self.assertRaises(ValueError): _ = SimBA(ptc, attack="test") with self.assertRaises(ValueError): _ = SimBA(ptc, targeted="test")
def setUpClass(cls): super().setUpClass() cls.x_train_mnist = np.reshape(cls.x_train_mnist, (cls.x_train_mnist.shape[0], 1, 28, 28)).astype(np.float32) cls.x_test_mnist = np.reshape(cls.x_test_mnist, (cls.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Define the internal classifier classifier = get_image_classifier_pt() # Define the internal detector conv = nn.Conv2d(1, 16, 5) linear = nn.Linear(2304, 1) torch.nn.init.xavier_uniform_(conv.weight) torch.nn.init.xavier_uniform_(linear.weight) model = nn.Sequential(conv, nn.ReLU(), nn.MaxPool2d(2, 2), Flatten(), linear) model = Model(model) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) detector = PyTorchClassifier( model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=1, clip_values=(0, 1) ) # Define the detector-classifier cls.detector_classifier = DetectorClassifier(classifier=classifier, detector=detector) cls.x_train_mnist = np.reshape(cls.x_train_mnist, (cls.x_train_mnist.shape[0], 28, 28, 1)).astype(np.float32) cls.x_test_mnist = np.reshape(cls.x_test_mnist, (cls.x_test_mnist.shape[0], 28, 28, 1)).astype(np.float32)
def test_4_pytorch(self): """ Third test with the PyTorchClassifier. :return: """ ptc = get_image_classifier_pt(from_logits=True) x_train = np.reshape(self.x_train_mnist, (self.n_train, 1, 28, 28)).astype(np.float32) attack_ap = AdversarialPatch( ptc, rotation_max=0.5, scale_min=0.4, scale_max=0.41, learning_rate=5.0, batch_size=10, max_iter=5, verbose=False, ) target = np.zeros(self.x_train_mnist.shape[0]) patch_adv, _ = attack_ap.generate(x_train, target) self.assertAlmostEqual(patch_adv[0, 8, 8], 0.6715167, delta=0.05) self.assertAlmostEqual(patch_adv[0, 14, 14], 0.6292826, delta=0.05) self.assertAlmostEqual(float(np.sum(patch_adv)), 424.31439208984375, delta=1.0)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_mnist_adv = up.generate(x_train_mnist) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_mnist_adv = x_test_mnist + up.noise self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_mnist_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_mnist_adv), axis=1) self.assertFalse((np.argmax(self.y_test_mnist, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(self.y_train_mnist, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def test_3_pytorch_classifier(self): """ Second test with the PyTorchClassifier. :return: """ self.x_train_mnist = np.reshape( self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype(np.float32) # Create the trained classifier trained_classifier = get_image_classifier_pt() # Create the modified classifier transformed_classifier = get_image_classifier_pt(load_init=False) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation transformed_classifier = transformer( x=self.x_train_mnist, transformed_classifier=transformed_classifier) # Compare the 2 outputs preds1 = trained_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds2 = transformed_classifier.predict(x=self.x_train_mnist, batch_size=BATCH_SIZE) preds1 = np.argmax(preds1, axis=1) preds2 = np.argmax(preds2, axis=1) acc = np.sum(preds1 == preds2) / len(preds1) self.assertGreater(acc, 0.5) ce = cross_entropy(preds1, preds2) self.assertLess(ce, 10) self.assertGreaterEqual(ce, 0) self.x_train_mnist = np.reshape( self.x_train_mnist, (self.x_train_mnist.shape[0], 28, 28, 1)).astype(np.float32)
def test_4_pytorch_mnist(self): """ Test with the PyTorchClassifier. (Untargeted Attack) :return: """ x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32) classifier = get_image_classifier_pt() self._test_attack(classifier, x_test, self.y_test_mnist, False)
def test_fit_predict(self): classifier = get_image_classifier_pt() predictions = classifier.predict(self.x_test_mnist) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy after fitting: %.2f%%", (accuracy * 100)) self.assertEqual(accuracy, 0.32)
def test_5_pytorch_mnist(self): x_train = np.reshape(self.x_train_mnist, (self.x_train_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype( np.float32) x_test_original = x_test.copy() # Create basic PyTorch model classifier = get_image_classifier_pt(from_logits=True) scores = get_labels_np_array(classifier.predict(x_train)) sum6 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum6 / self.y_train_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test)) sum7 = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum7 / self.y_test_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11, verbose=False) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_train_mnist == train_y_pred).all()) self.assertFalse((self.y_test_mnist == test_y_pred).all()) sum8 = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax(self.y_train_mnist, axis=1)) accuracy = sum8 / self.y_train_mnist.shape[0] logger.info("Accuracy on adversarial train examples: %.2f%%", (accuracy * 100)) sum9 = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) accuracy = sum9 / self.y_test_mnist.shape[0] logger.info("Accuracy on adversarial test examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_framework_pytorch_mnist(self): self.x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) self.x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) classifier = get_image_classifier_pt() self._test_framework_vs_numpy(classifier) self.x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) self.x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32)
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = Wasserstein(ptc, targeted="true") with self.assertRaises(ValueError): _ = Wasserstein(ptc, regularization=-1) with self.assertRaises(TypeError): _ = Wasserstein(ptc, p=1.0) with self.assertRaises(ValueError): _ = Wasserstein(ptc, p=-1) with self.assertRaises(TypeError): _ = Wasserstein(ptc, kernel_size=1.0) with self.assertRaises(ValueError): _ = Wasserstein(ptc, kernel_size=2) with self.assertRaises(ValueError): _ = Wasserstein(ptc, norm=0) with self.assertRaises(ValueError): _ = Wasserstein(ptc, ball=0) with self.assertRaises(ValueError): _ = Wasserstein(ptc, eps=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, eps_step=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, norm="inf", eps=1, eps_step=2) with self.assertRaises(ValueError): _ = Wasserstein(ptc, eps_iter=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, eps_factor=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, conjugate_sinkhorn_max_iter=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, projected_sinkhorn_max_iter=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = Wasserstein(ptc, verbose="true")
def _image_dl_estimator(one_classifier=False, functional=False, **kwargs): sess = None wildcard = False classifier_list = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier_list = [ get_image_classifier_kr_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr(**kwargs)] if framework == "tensorflow": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) classifier_list = [classifier] if framework == "pytorch": if wildcard is False and functional is False: classifier_list = [get_image_classifier_pt(**kwargs)] if framework == "scikitlearn": logging.warning( "{0} doesn't have an image classifier defined yet".format( framework)) classifier_list = None if framework == "kerastf": if wildcard: classifier_list = [ get_image_classifier_kr_tf_with_wildcard(**kwargs) ] else: if functional: classifier_list = [ get_image_classifier_kr_tf_functional(**kwargs) ] else: classifier_list = [get_image_classifier_kr_tf(**kwargs)] if framework == "mxnet": if wildcard is False and functional is False: classifier_list = [get_image_classifier_mx_instance(**kwargs)] if classifier_list is None: return None, None if one_classifier: return classifier_list[0], sess return classifier_list, sess
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, gamma=-1) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, verbose="False")
def _image_dl_estimator(functional=False, **kwargs): sess = None wildcard = False classifier = None if kwargs.get("wildcard") is not None: if kwargs.get("wildcard") is True: wildcard = True del kwargs["wildcard"] if framework == "keras": if wildcard is False and functional is False: if functional: classifier = get_image_classifier_kr_functional(**kwargs) else: try: classifier = get_image_classifier_kr(**kwargs) except NotImplementedError: raise ARTTestFixtureNotImplemented( "This combination of loss function options is currently not supported.", image_dl_estimator.__name__, framework, ) if framework == "tensorflow1" or framework == "tensorflow2": if wildcard is False and functional is False: classifier, sess = get_image_classifier_tf(**kwargs) return classifier, sess if framework == "pytorch": if not wildcard: if functional: classifier = get_image_classifier_pt_functional(**kwargs) else: classifier = get_image_classifier_pt(**kwargs) if framework == "kerastf": if wildcard: classifier = get_image_classifier_kr_tf_with_wildcard(**kwargs) else: if functional: classifier = get_image_classifier_kr_tf_functional( **kwargs) else: classifier = get_image_classifier_kr_tf(**kwargs) if framework == "mxnet": if wildcard is False and functional is False: classifier = get_image_classifier_mx_instance(**kwargs) if classifier is None: raise ARTTestFixtureNotImplemented( "no test deep learning estimator available", image_dl_estimator.__name__, framework) return classifier, sess
def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = TargetedUniversalPerturbation(ptc, delta=-1) with self.assertRaises(ValueError): _ = TargetedUniversalPerturbation(ptc, max_iter=-1) with self.assertRaises(ValueError): _ = TargetedUniversalPerturbation(ptc, eps=-1)
def test_check_params_pt(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = DefensiveDistillation(ptc, batch_size=1.0) with self.assertRaises(ValueError): _ = DefensiveDistillation(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = DefensiveDistillation(ptc, nb_epochs=1.0) with self.assertRaises(ValueError): _ = DefensiveDistillation(ptc, nb_epochs=-1)
def test_4_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_image_classifier_pt() # Get MNIST x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # First attack # zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10, binary_search_steps=10, verbose=False) # params = {'y': random_targets(self.y_test, ptc.nb_classes)} # x_test_adv = zoo.generate(x_test, **params) # self.assertFalse((x_test == x_test_adv).all()) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # target = np.argmax(params['y'], axis=1) # y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) # logger.debug('ZOO target: %s', target) # logger.debug('ZOO actual: %s', y_pred_adv) # logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target)))) # Second attack zoo = ZooAttack( classifier=ptc, targeted=False, learning_rate=1e-2, max_iter=10, binary_search_steps=3, abort_early=False, use_resize=False, use_importance=False, verbose=False, ) x_test_mnist_adv = zoo.generate(x_test_mnist) self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0) # print(x_test[0, 0, 14, :]) # print(x_test_adv[0, 0, 14, :]) # print(np.amax(x_test - x_test_adv)) # x_test_adv_expected = [] # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): classifier = get_image_classifier_pt() x_train = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) scores = get_labels_np_array(classifier.predict(x_train)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) / self.y_train_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (acc * 100)) scores = get_labels_np_array(classifier.predict(x_test)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.y_test_mnist.shape[0] logger.info("[PyTorch, MNIST] Accuracy on test set: %.2f%%", (acc * 100)) self._test_backend_mnist(classifier, x_train, self.y_train_mnist, x_test, self.y_test_mnist)