def test_5_pytorch_resume(self): x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype( np.float32) # Build PyTorchClassifier ptc = get_image_classifier_pt() # HSJ attack hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=10, max_eval=100, init_eval=10) params = {"y": self.y_test_mnist[2:3], "x_adv_init": x_test[2:3]} x_test_adv1 = hsj.generate(x_test[0:1], **params) diff1 = np.linalg.norm(x_test_adv1 - x_test) params.update(resume=True, x_adv_init=x_test_adv1) x_test_adv2 = hsj.generate(x_test[0:1], **params) params.update(x_adv_init=x_test_adv2) x_test_adv2 = hsj.generate(x_test[0:1], **params) diff2 = np.linalg.norm(x_test_adv2 - x_test) self.assertGreater(diff1, diff2)
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership of input `x` in estimator's training data. :param x: Input data. :param y: True labels for `x`. :Keyword Arguments for HopSkipJump: * *norm*: Order of the norm. Possible values: "inf", np.inf or 2. * *max_iter*: Maximum number of iterations. * *max_eval*: Maximum number of evaluations for estimating gradient. * *init_eval*: Initial number of evaluations for estimating gradient. * *init_size*: Maximum number of trials for initial generation of adversarial examples. * *verbose*: Show progress bars. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump if y is None: raise ValueError( "Argument `y` is None, but this attack requires true labels `y` to be provided." ) if self.distance_threshold_tau is None: raise ValueError( "No value for distance threshold `distance_threshold_tau` provided. Please set" "`distance_threshold_tau` or run method `calibrate_distance_threshold` on known training and test" "dataset.") if "classifier" in kwargs: raise ValueError( "Keyword `classifier` in kwargs is not supported.") if "targeted" in kwargs: raise ValueError("Keyword `targeted` in kwargs is not supported.") y = check_and_transform_label_format(y, self.estimator.nb_classes) hsj = HopSkipJump(classifier=self.estimator, targeted=False, **kwargs) x_adv = hsj.generate(x=x, y=y) distance = np.linalg.norm((x_adv - x).reshape((x.shape[0], -1)), ord=2, axis=1) y_pred = self.estimator.predict(x=x) distance[np.argmax(y_pred, axis=1) != np.argmax(y, axis=1)] = 0 is_member = np.where(distance > self.distance_threshold_tau, 1, 0) return is_member
def calibrate_distance_threshold( self, classifier_train: "CLASSIFIER_TYPE", x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray, y_test: np.ndarray, **kwargs ) -> NoReturn: """ Calibrate distance threshold maximising the membership inference accuracy on `x_train` and `x_test`. :param classifier_train: A trained classifier :param x_train: Training data. :param y_train: Labels of training data `x_train`. :param x_test: Test data. :param y_test: Labels of test data `x_test`. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump hsj = HopSkipJump(classifier=classifier_train, **kwargs) x_train_adv = hsj.generate(x=x_train, y=y_train) x_test_adv = hsj.generate(x=x_test, y=y_test) distance_train = np.linalg.norm((x_train_adv - x_train).reshape((x_train.shape[0], -1)), ord=2, axis=1) distance_test = np.linalg.norm((x_test_adv - x_test).reshape((x_test.shape[0], -1)), ord=2, axis=1) y_train_pred = self.estimator.predict(x=x_train) y_test_pred = self.estimator.predict(x=x_test) distance_train[np.argmax(y_train_pred, axis=1) != np.argmax(y_train, axis=1)] = 0 distance_test[np.argmax(y_test_pred, axis=1) != np.argmax(y_test, axis=1)] = 0 num_increments = 100 tau_increment = np.amax([np.amax(distance_train), np.amax(distance_test)]) / num_increments acc_max = 0.0 distance_threshold_tau = 0.0 for i_tau in range(1, num_increments): is_member_train = np.where(distance_train > i_tau * tau_increment, 1, 0) is_member_test = np.where(distance_test > i_tau * tau_increment, 1, 0) acc = (np.sum(is_member_train) + (is_member_test.shape[0] - np.sum(is_member_test))) / ( is_member_train.shape[0] + is_member_test.shape[0] ) if acc > acc_max: distance_threshold_tau = i_tau * tau_increment acc_max = acc self.distance_threshold_tau = distance_threshold_tau
def _hop_skip_jump(model, data, labels, attack_args): norm = _get_norm_value(attack_args.get('norm', 'l2')) max_iter = attack_args.get('max_iter', 50) max_eval = attack_args.get('max_eval', 10000) init_eval = attack_args.get('init_eval', 100) init_size = attack_args.get('init_size', 100) targeted = attack_args.get('targeted', False) print('>>> Generating Hop-Skip-Jump examples.') attacker = HopSkipJump(classifier=model, targeted=targeted, norm=norm, max_iter=max_iter, max_eval=max_eval, init_eval=init_eval, init_size=init_size) return attacker.generate(data, labels)
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def test_4_pytorch_iris(self): classifier = get_tabular_classifier_pt() x_test = self.x_test_iris.astype(np.float32) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100))
def test_7_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership of input `x` in estimator's training data. :param x: Input data. :param y: True labels for `x`. :param kwargs: Parameters for HopSkipJump attack except argument `estimator`. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump hsj = HopSkipJump(classifier=self.estimator, **kwargs) x_adv = hsj.generate(x=x, y=y) distance = np.linalg.norm((x_adv - x).reshape((x.shape[0], -1)), ord=2, axis=1) y_pred = self.estimator.predict(x=x) distance[np.argmax(y_pred, axis=1) != np.argmax(y, axis=1)] = 0 is_member = np.where(distance > self.distance_threshold_tau, 1, 0) return is_member
def test_6_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from art.estimators.classification.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ DecisionTreeClassifier(), ExtraTreeClassifier(), AdaBoostClassifier(), BaggingClassifier(), ExtraTreesClassifier(n_estimators=10), GradientBoostingClassifier(n_estimators=10), RandomForestClassifier(n_estimators=10), LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with HopSkipJump adversarial " "examples: %.2f%%", (acc * 100), ) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with HopSkipJump adversarial " "examples: %.2f%%", (acc * 100), ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def test_2_tensorflow_iris(self): classifier, sess = get_tabular_classifier_tf() # Test untargeted attack and norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test untargeted attack and norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack and norm=2 targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Test targeted attack and norm=np.inf targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Clean-up session if sess is not None: sess.close()
def test_pytorch_classifier(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_3_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=20, max_eval=100, init_eval=10) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(self.x_test_mnist, **params) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=20, max_eval=100, init_eval=10) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape)) mask = mask.reshape(self.x_test_mnist.shape) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(self.x_test_mnist.shape[1:])) mask = mask.reshape(self.x_test_mnist.shape[1:]) x_test_adv = hsj.generate(self.x_test_mnist, mask=mask) mask_diff = (1 - mask) * (x_test_adv - self.x_test_mnist) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - self.x_test_mnist) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()
def test_4_pytorch_classifier(self): """ Third test with the PyTorchClassifier. :return: """ x_test = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_image_classifier_pt() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape)) mask = mask.reshape(x_test.shape) params.update(mask=mask) x_test_adv = hsj.generate(x_test, **params) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape[1:])) mask = mask.reshape(x_test.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(x_test, **params) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape)) mask = mask.reshape(x_test.shape) params.update(mask=mask) x_test_adv = hsj.generate(x_test, **params) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape[1:])) mask = mask.reshape(x_test.shape[1:]) params.update(mask=mask) x_test_adv = hsj.generate(x_test, **params) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape)) mask = mask.reshape(x_test.shape) x_test_adv = hsj.generate(x_test, mask=mask) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape[1:])) mask = mask.reshape(x_test.shape[1:]) x_test_adv = hsj.generate(x_test, mask=mask) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Test the masking 1 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape)) mask = mask.reshape(x_test.shape) x_test_adv = hsj.generate(x_test, mask=mask) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Test the masking 2 mask = np.random.binomial(n=1, p=0.5, size=np.prod(x_test.shape[1:])) mask = mask.reshape(x_test.shape[1:]) x_test_adv = hsj.generate(x_test, mask=mask) mask_diff = (1 - mask) * (x_test_adv - x_test) self.assertAlmostEqual(float(np.max(np.abs(mask_diff))), 0.0, delta=0.00001) unmask_diff = mask * (x_test_adv - x_test) self.assertGreater(float(np.sum(np.abs(unmask_diff))), 0.0) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def calibrate_distance_threshold(self, x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray, y_test: np.ndarray, **kwargs): """ Calibrate distance threshold maximising the membership inference accuracy on `x_train` and `x_test`. :param x_train: Training data. :param y_train: Labels of training data `x_train`. :param x_test: Test data. :param y_test: Labels of test data `x_test`. :Keyword Arguments for HopSkipJump: * *norm*: Order of the norm. Possible values: "inf", np.inf or 2. * *max_iter*: Maximum number of iterations. * *max_eval*: Maximum number of evaluations for estimating gradient. * *init_eval*: Initial number of evaluations for estimating gradient. * *init_size*: Maximum number of trials for initial generation of adversarial examples. * *verbose*: Show progress bars. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump if "classifier" in kwargs: raise ValueError( "Keyword `classifier` in kwargs is not supported.") if "targeted" in kwargs: raise ValueError("Keyword `targeted` in kwargs is not supported.") y_train = check_and_transform_label_format(y_train, self.estimator.nb_classes) y_test = check_and_transform_label_format(y_test, self.estimator.nb_classes) hsj = HopSkipJump(classifier=self.estimator, targeted=False, **kwargs) x_train_adv = hsj.generate(x=x_train, y=y_train) x_test_adv = hsj.generate(x=x_test, y=y_test) distance_train = np.linalg.norm((x_train_adv - x_train).reshape( (x_train.shape[0], -1)), ord=2, axis=1) distance_test = np.linalg.norm((x_test_adv - x_test).reshape( (x_test.shape[0], -1)), ord=2, axis=1) y_train_pred = self.estimator.predict(x=x_train) y_test_pred = self.estimator.predict(x=x_test) distance_train[ np.argmax(y_train_pred, axis=1) != np.argmax(y_train, axis=1)] = 0 distance_test[ np.argmax(y_test_pred, axis=1) != np.argmax(y_test, axis=1)] = 0 num_increments = 100 tau_increment = np.amax( [np.amax(distance_train), np.amax(distance_test)]) / num_increments acc_max = 0.0 distance_threshold_tau = 0.0 for i_tau in range(1, num_increments): is_member_train = np.where(distance_train > i_tau * tau_increment, 1, 0) is_member_test = np.where(distance_test > i_tau * tau_increment, 1, 0) acc = (np.sum(is_member_train) + (is_member_test.shape[0] - np.sum(is_member_test))) / ( is_member_train.shape[0] + is_member_test.shape[0]) if acc > acc_max: distance_threshold_tau = i_tau * tau_increment acc_max = acc self.distance_threshold_tau = distance_threshold_tau
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership of input `x` in estimator's training data. :param x: Input data. :param y: True labels for `x`. :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just the predicted class :Keyword Arguments for HopSkipJump: * *norm*: Order of the norm. Possible values: "inf", np.inf or 2. * *max_iter*: Maximum number of iterations. * *max_eval*: Maximum number of evaluations for estimating gradient. * *init_eval*: Initial number of evaluations for estimating gradient. * *init_size*: Maximum number of trials for initial generation of adversarial examples. * *verbose*: Show progress bars. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member, or class probabilities. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump if y is None: raise ValueError( "Argument `y` is None, but this attack requires true labels `y` to be provided." ) if self.distance_threshold_tau is None: raise ValueError( "No value for distance threshold `distance_threshold_tau` provided. Please set" "`distance_threshold_tau` or run method `calibrate_distance_threshold` on known training and test" "dataset.") if "probabilities" in kwargs.keys(): probabilities = kwargs.get("probabilities") del kwargs["probabilities"] else: probabilities = False if "classifier" in kwargs: raise ValueError( "Keyword `classifier` in kwargs is not supported.") if "targeted" in kwargs: raise ValueError("Keyword `targeted` in kwargs is not supported.") y = check_and_transform_label_format(y, self.estimator.nb_classes) hsj = HopSkipJump(classifier=self.estimator, targeted=False, **kwargs) x_adv = hsj.generate(x=x, y=y) distance = np.linalg.norm((x_adv - x).reshape((x.shape[0], -1)), ord=2, axis=1) y_pred = self.estimator.predict(x=x) distance[np.argmax(y_pred, axis=1) != np.argmax(y, axis=1)] = 0 predicted_class = np.where(distance > self.distance_threshold_tau, 1, 0) if probabilities: prob_1 = np.zeros_like(distance) if self.threshold_bins: # bin accuracy is the probability of being a member for t_bin in self.threshold_bins: prob_1[distance > t_bin[0]] = t_bin[1] else: # use sigmoid on distance from threshold dist_threshold = distance - self.distance_threshold_tau prob_1 = 1 / (1 + np.exp(-dist_threshold)) prob_0 = np.ones_like(prob_1) - prob_1 return np.stack((prob_0, prob_1), axis=1) return predicted_class
def calibrate_distance_threshold_unsupervised( self, top_t: int = 50, num_samples: int = 100, max_queries: int = 1, **kwargs ): """ Calibrate distance threshold on randomly generated samples, choosing the top-t percentile of the noise needed to change the classifier's initial prediction. This method requires the model's clip_values to be set. | Paper link: https://arxiv.org/abs/2007.15528 :param top_t: Top-t percentile. :param num_samples: Number of random samples to generate. :param max_queries: Maximum number of queries. Maximum number of HSJ iterations on a single sample will be max_queries * max_iter. :Keyword Arguments for HopSkipJump: * *norm*: Order of the norm. Possible values: "inf", np.inf or 2. * *max_iter*: Maximum number of iterations. * *max_eval*: Maximum number of evaluations for estimating gradient. * *init_eval*: Initial number of evaluations for estimating gradient. * *init_size*: Maximum number of trials for initial generation of adversarial examples. * *verbose*: Show progress bars. """ from art.attacks.evasion.hop_skip_jump import HopSkipJump if self.estimator.clip_values is not None: x_min, x_max = self.estimator.clip_values else: # pragma: no cover raise RuntimeError( "You need to set the estimator's clip_values in order to calibrate the distance threshold." ) x_rand = np.random.rand(*(num_samples,) + self.estimator.input_shape).astype(np.float32) x_rand *= x_max - x_min # scale x_rand += x_min # shift y_rand = self.estimator.predict(x=x_rand) y_rand = check_and_transform_label_format(y_rand, self.estimator.nb_classes) hsj = HopSkipJump(classifier=self.estimator, targeted=False, **kwargs) distances = [] i = 0 while len(x_rand) != 0 and i < max_queries: x_adv = hsj.generate(x=x_rand, y=y_rand) distance = np.linalg.norm((x_adv - x_rand).reshape((x_rand.shape[0], -1)), ord=2, axis=1) y_pred = self.estimator.predict(x=x_adv) changed_predictions = np.argmax(y_pred, axis=1) != np.argmax(y_rand, axis=1) distances.extend(distance[changed_predictions]) x_rand, y_rand = x_adv[~changed_predictions], y_rand[~changed_predictions] i += 1 if len(distances) == 0: # pragma: no cover raise RuntimeWarning( "No successful adversarial examples were generated - no distances were obtained." "Distance threshold will not be set." ) self.distance_threshold_tau = np.percentile(distances, top_t)
def test_tensorflow_mnist(self): """ First test with the TensorFlowClassifier. :return: """ x_test_original = self.x_test_mnist.copy() # Build TensorFlowClassifier tfc, sess = get_image_classifier_tf() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=2, max_eval=100, init_eval=10) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)} x_test_adv = hsj.generate(self.x_test_mnist, **params) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params["y"], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=tfc, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001) # Clean-up session if sess is not None: sess.close()