def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier krc = get_classifier_kr() # First targeted attack boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20) params = {'y': random_targets(self.y_test, krc.nb_classes())} x_test_adv = boundary.generate(self.x_test, **params) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20) x_test_adv = boundary.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(krc.predict(self.x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Clean-up session k.clear_session()
def test_iris_tf(self): classifier, _ = get_iris_classifier_tf() # Test untargeted attack attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test, nb_classes=3) attack = BoundaryAttack(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(self.x_test, **{'y': targets}) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) accuracy = np.sum( preds_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0] logger.info('Success rate of targeted boundary on Iris: %.2f%%', (accuracy * 100))
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32) # First targeted attack boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20) params = {'y': random_targets(self.y_test, ptc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_keras_mnist(self): """ Second test with the KerasClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test_original = x_test.copy() # Build KerasClassifier krc = get_classifier_kr() # First targeted attack boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20) params = {'y': random_targets(y_test, krc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(krc.predict(x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001) # Clean-up session k.clear_session()
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # First targeted attack boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier, ScikitlearnExtraTreeClassifier from art.classifiers.scikitlearn import ScikitlearnAdaBoostClassifier, ScikitlearnBaggingClassifier from art.classifiers.scikitlearn import ScikitlearnExtraTreesClassifier, ScikitlearnGradientBoostingClassifier from art.classifiers.scikitlearn import ScikitlearnRandomForestClassifier, ScikitlearnLogisticRegression from art.classifiers.scikitlearn import ScikitlearnSVC scikitlearn_test_cases = { DecisionTreeClassifier: ScikitlearnDecisionTreeClassifier, ExtraTreeClassifier: ScikitlearnExtraTreeClassifier, AdaBoostClassifier: ScikitlearnAdaBoostClassifier, BaggingClassifier: ScikitlearnBaggingClassifier, ExtraTreesClassifier: ScikitlearnExtraTreesClassifier, GradientBoostingClassifier: ScikitlearnGradientBoostingClassifier, RandomForestClassifier: ScikitlearnRandomForestClassifier, LogisticRegression: ScikitlearnLogisticRegression, SVC: ScikitlearnSVC, LinearSVC: ScikitlearnSVC } for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test, y=self.y_test) attack = BoundaryAttack(classifier, targeted=False, delta=0.01, epsilon=0.01, step_adapt=0.667, max_iter=50, num_trial=25, sample_size=20, init_size=100) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with BoundaryAttack adversarial ' 'examples: %.2f%%', (accuracy * 100))
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build TFClassifier tfc, sess = get_classifier_tf() # Get MNIST (_, _), (x_test, y_test) = self.mnist # First targeted attack boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=20) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Clean-up session sess.close() tf.reset_default_graph()
def test_iris_pt(self): classifier = get_iris_classifier_pt() attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test.astype(np.float32)) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100))
def test_keras_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_kr() attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100))
def test_iris_k_unbounded(self): classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = BoundaryAttack(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0] logger.info( 'Accuracy on Iris with boundary adversarial examples: %.2f%%', (accuracy * 100))
def test_tfclassifier(self): """ First test with the TensorFlowClassifier. :return: """ # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # First targeted attack boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=200, delta=0.5) params = {'y': random_targets(self.y_test, tfc.nb_classes())} x_test_adv = boundary.generate(self.x_test, **params) expected_x_test_adv_1 = np.asarray([ 0.42622495, 0.0, 0.0, 0.33005068, 0.2277837, 0.0, 0.18348512, 0.42622495, 0.27452883, 0.0, 0.0, 0.0, 0.1653487, 0.70523715, 0.7367977, 0.7974912, 0.28579983, 0.0, 0.36499417, 0.0, 0.0, 0.0, 0.42622495, 0.0, 0.26680174, 0.42622495, 0.0, 0.19260764 ]) expected_x_test_adv_2 = np.asarray([ 0.0459, 0., 0., 0.0756, 0.2048, 0.037, 0., 0., 0.0126, 0.4338, 0.1566, 0.3061, 0., 0.296, 0.8318, 0.7267, 0.2252, 0.074, 0., 0.1208, 0.4362, 0., 0., 0., 0., 0.0359, 0., 0.1191 ]) try: np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_1, decimal=4) except AssertionError: np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_2, decimal=4) self.assertLessEqual(np.max(x_test_adv), 1.0) self.assertGreaterEqual(np.min(x_test_adv), 0.0) y_pred_adv = tfc.predict(x_test_adv) y_pred_adv_expected = np.asarray([ 1.57103419e-01, -7.31061280e-01, -4.03979905e-02, -4.79048371e-01, 9.37852338e-02, -8.01057637e-01, -4.77534801e-01, 1.08687377e+00, -3.06577891e-01, -5.74976981e-01 ]) np.testing.assert_array_almost_equal(y_pred_adv[0], y_pred_adv_expected, decimal=4) # Second untargeted attack boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20) x_test_adv = boundary.generate(self.x_test) self.assertFalse((self.x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Clean-up session sess.close()
targeted=False, delta=0.05, epsilon=0.05, step_adapt=0.5) n_selected = 100 corrected = [] c_labels = [] for i in range(len(test_label)): if test_label[i] == predictions[i]: corrected.append(test_data[i]) c_labels.append(test_label[i]) if len(corrected) >= n_selected: break corrected = np.array(corrected) start = time.time() test_adv = attack.generate(corrected) end = time.time() avg_time = (end - start) / test_adv.shape[0] dis = 0 for i in range(test_adv.shape[0]): d = np.max(np.abs(corrected[i] - test_adv[i])) print(str(d)) dis += d avg_dis = dis / test_adv.shape[0] os.chdir('{}/attack/Boundary'.format(home)) with open('{}_Boundary_{}_ave.txt'.format(d_name, m_name), 'w') as f: f.write('average distance: ' + str(avg_dis)) f.write('\n\naverage used time: ' + str(avg_time))
def attack_run_rejection_policy(model, hps): """ An attack run with rejection policy. :param model: Pytorch model. :param hps: hyperparameters :return: """ model.eval() # Get thresholds threshold_list1 = [] threshold_list2 = [] for label_id in range(hps.n_classes): # No data augmentation(crop_flip=False) when getting in-distribution thresholds dataset = get_dataset(data_name=hps.problem, train=True, label_id=label_id, crop_flip=False) in_test_loader = DataLoader(dataset=dataset, batch_size=hps.n_batch_test, shuffle=False) print('Inference on {}, label_id {}'.format(hps.problem, label_id)) in_ll_list = [] for batch_id, (x, y) in enumerate(in_test_loader): x = x.to(hps.device) y = y.to(hps.device) ll = model(x) correct_idx = ll.argmax(dim=1) == y ll_, y_ = ll[correct_idx], y[correct_idx] # choose samples are classified correctly in_ll_list += list(ll_[:, label_id].detach().cpu().numpy()) thresh_idx = int(0.01 * len(in_ll_list)) thresh1 = sorted(in_ll_list)[thresh_idx] thresh_idx = int(0.02 * len(in_ll_list)) thresh2 = sorted(in_ll_list)[thresh_idx] threshold_list1.append(thresh1) # class mean as threshold threshold_list2.append(thresh2) # class mean as threshold print('1st & 2nd percentile thresholds: {:.3f}, {:.3f}'.format(thresh1, thresh2)) # Evaluation n_total = 0 # total number of correct classified samples by clean classifier n_successful_adv = 0 # total number of successful adversarial examples generated n_rejected_adv1 = 0 # total number of successfully rejected (successful) adversarial examples, <= n_successful_adv n_rejected_adv2 = 0 # total number of successfully rejected (successful) adversarial examples, <= n_successful_adv attack_path = os.path.join(hps.attack_dir, hps.attack) if not os.path.exists(attack_path): os.mkdir(attack_path) thresholds1 = torch.tensor(threshold_list1).to(hps.device) thresholds2 = torch.tensor(threshold_list2).to(hps.device) l2_distortion_list = [] n_eval = 0 wrapped_target_model = PyTorchClassifier(model=model, loss=None, optimizer=None, input_shape=(hps.image_channel, 32, 32), nb_classes=hps.n_classes) if hps.attack == 'boundary': attack = BoundaryAttack(wrapped_target_model, targeted=hps.targeted) elif hps.attack == 'cw': attack = CarliniL2Method(wrapped_target_model, confidence=hps.cw_confidence, targeted=hps.targeted) hps.n_batch_test = 1 for label_id in range(hps.n_classes): dataset = get_dataset(data_name=hps.problem, train=False, label_id=label_id) test_loader = DataLoader(dataset=dataset, batch_size=hps.n_batch_test, shuffle=False) for batch_id, (x, y) in enumerate(test_loader): # Note that images are scaled to [0., 1.0] x, y = x.to(hps.device), y.to(hps.device) with torch.no_grad(): output = model(x) pred = output.argmax(dim=1) correct_idx = pred == y # Only evaluate on the correct classified samples by clean classifier. x, y = x[correct_idx], y[correct_idx] n_eval += correct_idx.sum().item() for id in range(hps.n_classes): if label_id != id: n_total += 1 y_cur = torch.LongTensor([id] * x.size(0)).to(hps.device) # adv_x = adversary.perturb(x, y_cur) x_ = x.cpu().numpy().astype(np.float32) y_ = y_cur.cpu().numpy().astype(np.float32) adv_x = attack.generate(x_, y_) with torch.no_grad(): adv_x = torch.tensor(adv_x).to(hps.device) output = model(adv_x) logits, preds = output.max(dim=1) success_idx = preds == y_cur n_successful_adv += success_idx.sum().item() diff = adv_x - x l2_distortion = diff.norm(p=2, dim=-1).mean().item() # mean l2 distortion l2_distortion_list.append(l2_distortion) rej_idx1 = logits < thresholds1[preds] n_rejected_adv1 += rej_idx1.sum().item() rej_idx2 = logits < thresholds2[preds] n_rejected_adv2 += rej_idx2.sum().item() break # only one batch print('Evaluating on samples of class {} ...'.format(label_id)) reject_rate1 = n_rejected_adv1 / n_successful_adv reject_rate2 = n_rejected_adv2 / n_successful_adv success_adv_rate = n_successful_adv / n_total print('success rate of adv examples generation: {}/{}={:.4f}'.format(n_successful_adv, n_total, success_adv_rate)) print('Mean L2 distortion of Adv Examples: {:.4f}'.format(np.mean(l2_distortion_list))) print('1st percentile, reject success rate: {}/{}={:.4f}'.format(n_rejected_adv1, n_successful_adv, reject_rate1)) print('2nd percentile, reject success rate: {}/{}={:.4f}'.format(n_rejected_adv2, n_successful_adv, reject_rate2))
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test_original = x_test.copy() # Build TensorFlowClassifier tfc, sess = get_classifier_tf() # First targeted attack boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=200, delta=0.5) params = {'y': random_targets(y_test, tfc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) # expected_x_test_adv_1 = np.asarray([0.42622495, 0.0, 0.0, 0.33005068, 0.2277837, 0.0, # 0.18348512, 0.42622495, 0.27452883, 0.0, 0.0, 0.0, # 0.1653487, 0.70523715, 0.7367977, 0.7974912, 0.28579983, 0.0, # 0.36499417, 0.0, 0.0, 0.0, 0.42622495, 0.0, # 0.26680174, 0.42622495, 0.0, 0.19260764]) # expected_x_test_adv_2 = np.asarray([0.0459, 0., 0., 0.0756, 0.2048, 0.037, 0., 0., # 0.0126, 0.4338, 0.1566, 0.3061, 0., 0.296, 0.8318, 0.7267, # 0.2252, 0.074, 0., 0.1208, 0.4362, 0., 0., 0., # 0., 0.0359, 0., 0.1191]) # # expected_x_test_adv_3 = np.asarray([0.0671, 0.0644, 0.3012, 0., 0., 0., 0.3407, 0., # 0.1507, 0.0478, 0.3253, 0., 0.3334, 0.3473, 1., 0.8649, # 0.5639, 0.5198, 0., 0., 0.6173, 0., 0.3116, 0., # 0.3937, 0.6173, 0., 0.0021]) # try: # np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_1, decimal=4) # except AssertionError: # try: # np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_2, decimal=4) # except AssertionError: # np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_3, decimal=4) self.assertLessEqual(np.max(x_test_adv), 1.0) self.assertGreaterEqual(np.min(x_test_adv), 0.0) y_pred_adv = tfc.predict(x_test_adv) y_pred_adv_expected = np.asarray([ 1.57103419e-01, -7.31061280e-01, -4.03979905e-02, -4.79048371e-01, 9.37852338e-02, -8.01057637e-01, -4.77534801e-01, 1.08687377e+00, -3.06577891e-01, -5.74976981e-01 ]) # np.testing.assert_array_almost_equal(y_pred_adv[0], y_pred_adv_expected, decimal=4) # Second untargeted attack boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001) # Clean-up session sess.close()