def explain(self, request: Dict) -> Dict: image = request["instances"][0] label = request["instances"][1] try: inputs = np.array(image) logging.info("Calling explain on image of shape %s", (inputs.shape,)) except Exception as e: raise Exception( "Failed to initialize NumPy array from inputs: %s, %s" % (e, request["instances"])) try: if str.lower(self.adversary_type) == "squareattack": classifier = BlackBoxClassifier(self._predict, inputs.shape, self.nb_classes, clip_values=(-np.inf, np.inf)) preds = np.argmax(classifier.predict(inputs, batch_size=1)) classifier.channels_first = False attack = SquareAttack(estimator=classifier, max_iter=self.max_iter) x_adv = attack.generate(x=inputs, y=label) adv_preds = np.argmax(classifier.predict(x_adv)) l2_error = np.linalg.norm(np.reshape(x_adv[0] - inputs, [-1])) return {"explanations": {"adversarial_example": x_adv.tolist(), "L2 error": l2_error.tolist(), "adversarial_prediction": adv_preds.tolist(), "prediction": preds.tolist()}} except Exception as e: raise Exception("Failed to explain %s" % e)
def test_generate_attacks_and_targeted(fix_get_mnist_subset, is_tf_version_2): classifier, _ = get_image_classifier_tf(from_logits=True) norm = np.inf eps = 0.3 eps_step = 0.1 batch_size = 32 attacks = list() attacks.append( AutoProjectedGradientDescent( estimator=classifier, norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=True, nb_random_init=5, batch_size=batch_size, loss_type="cross_entropy", )) if is_tf_version_2: loss_type_2 = "difference_logits_ratio" else: loss_type_2 = "cross_entropy" attacks.append( AutoProjectedGradientDescent( estimator=classifier, norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=False, nb_random_init=5, batch_size=batch_size, loss_type=loss_type_2, )) attacks.append( DeepFool(classifier=classifier, max_iter=100, epsilon=1e-6, nb_grads=3, batch_size=batch_size)) attacks.append( SquareAttack(estimator=classifier, norm=norm, max_iter=5000, eps=eps, p_init=0.8, nb_restarts=5)) (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset # First test with defined_attack_only=False attack = AutoAttack( estimator=classifier, norm=norm, eps=eps, eps_step=eps_step, attacks=attacks, batch_size=batch_size, estimator_orig=None, targeted=False, ) x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.mean(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx( 0.0182, abs=0.105) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx( 0.3, abs=0.05) # Then test with defined_attack_only=True attack = AutoAttack( estimator=classifier, norm=norm, eps=eps, eps_step=eps_step, attacks=attacks, batch_size=batch_size, estimator_orig=None, targeted=True, ) x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist) assert np.mean(x_train_mnist_adv - x_train_mnist) == pytest.approx( 0.0179, abs=0.0075) assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx( eps, abs=0.005)
def __init__( self, estimator: "CLASSIFIER_TYPE", norm: Union[int, float, str] = np.inf, eps: float = 0.3, eps_step: float = 0.1, attacks: Optional[List[EvasionAttack]] = None, batch_size: int = 32, estimator_orig: Optional["CLASSIFIER_TYPE"] = None, targeted: bool = False, ): """ Create a :class:`.AutoAttack` instance. :param estimator: An trained estimator. :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param attacks: The list of `art.attacks.EvasionAttack` attacks to be used for AutoAttack. If it is `None` or empty the standard attacks (PGD, APGD-ce, APGD-dlr, DeepFool, Square) will be used. :param batch_size: Size of the batch on which adversarial samples are generated. :param estimator_orig: Original estimator to be attacked by adversarial examples. :param targeted: If False run only untargeted attacks, if True also run targeted attacks against each possible target. """ super().__init__(estimator=estimator) if attacks is None or not attacks: attacks = list() attacks.append( AutoProjectedGradientDescent( estimator=estimator, # type: ignore norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=False, nb_random_init=5, batch_size=batch_size, loss_type="cross_entropy", ) ) attacks.append( AutoProjectedGradientDescent( estimator=estimator, # type: ignore norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=False, nb_random_init=5, batch_size=batch_size, loss_type="difference_logits_ratio", ) ) attacks.append( ( DeepFool( classifier=estimator, # type: ignore max_iter=100, epsilon=1e-3, nb_grads=10, batch_size=batch_size, ) ) ) attacks.append( SquareAttack(estimator=estimator, norm=norm, max_iter=5000, eps=eps, p_init=0.8, nb_restarts=5) ) self.norm = norm self.eps = eps self.eps_step = eps_step self.attacks = attacks self.batch_size = batch_size if estimator_orig is not None: self.estimator_orig = estimator_orig else: self.estimator_orig = estimator self._targeted = targeted self._check_params()
def __init__( self, estimator: ClassifierGradients, norm: Union[int, float] = np.inf, eps: float = 0.3, eps_step: float = 0.1, attacks: Optional[List[EvasionAttack]] = None, batch_size: int = 32, estimator_orig: Optional[BaseEstimator] = None, ): """ Create a :class:`.ProjectedGradientDescent` instance. :param estimator: An trained estimator. :param norm: The norm of the adversarial perturbation. Possible values: np.inf, 1 or 2. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param attacks: The list of `art.attacks.EvasionAttack` attacks to be used for AutoAttack. If it is `None` the original AutoAttack (PGD, APGD-ce, APGD-dlr, FAB, Square) will be used. :param batch_size: Size of the batch on which adversarial samples are generated. :param estimator_orig: Original estimator to be attacked by adversarial examples. """ super().__init__(estimator=estimator) if estimator_orig is None: estimator_orig = estimator if attacks is None: attacks = list() attacks.append( AutoProjectedGradientDescent( estimator=estimator, norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=False, nb_random_init=5, batch_size=batch_size, loss_type="cross_entropy", ) ) attacks.append( AutoProjectedGradientDescent( estimator=estimator, norm=norm, eps=eps, eps_step=eps_step, max_iter=100, targeted=False, nb_random_init=5, batch_size=batch_size, loss_type="difference_logits_ratio", ) ) attacks.append( DeepFool(classifier=estimator, max_iter=100, epsilon=1e-6, nb_grads=3, batch_size=batch_size) ) attacks.append( SquareAttack(estimator=estimator, norm=norm, max_iter=5000, eps=eps, p_init=0.8, nb_restarts=5) ) self.norm = norm self.eps = eps self.eps_step = eps_step self.attacks = attacks self.batch_size = batch_size self.estimator_orig = estimator_orig self._check_params()