예제 #1
0
    def explain(self, request: Dict) -> Dict:
        image = request["instances"][0]
        label = request["instances"][1]
        try:
            inputs = np.array(image)
            logging.info("Calling explain on image of shape %s", (inputs.shape,))
        except Exception as e:
            raise Exception(
                "Failed to initialize NumPy array from inputs: %s, %s" % (e, request["instances"]))
        try:
            if str.lower(self.adversary_type) == "squareattack":

                classifier = BlackBoxClassifier(self._predict, inputs.shape, self.nb_classes,
                                                clip_values=(-np.inf, np.inf))
                preds = np.argmax(classifier.predict(inputs, batch_size=1))
                classifier.channels_first = False
                attack = SquareAttack(estimator=classifier, max_iter=self.max_iter)

                x_adv = attack.generate(x=inputs, y=label)

                adv_preds = np.argmax(classifier.predict(x_adv))
                l2_error = np.linalg.norm(np.reshape(x_adv[0] - inputs, [-1]))

                return {"explanations": {"adversarial_example": x_adv.tolist(), "L2 error": l2_error.tolist(),
                                         "adversarial_prediction": adv_preds.tolist(), "prediction": preds.tolist()}}
        except Exception as e:
            raise Exception("Failed to explain %s" % e)
def test_generate_attacks_and_targeted(fix_get_mnist_subset, is_tf_version_2):

    classifier, _ = get_image_classifier_tf(from_logits=True)
    norm = np.inf
    eps = 0.3
    eps_step = 0.1
    batch_size = 32

    attacks = list()
    attacks.append(
        AutoProjectedGradientDescent(
            estimator=classifier,
            norm=norm,
            eps=eps,
            eps_step=eps_step,
            max_iter=100,
            targeted=True,
            nb_random_init=5,
            batch_size=batch_size,
            loss_type="cross_entropy",
        ))

    if is_tf_version_2:
        loss_type_2 = "difference_logits_ratio"
    else:
        loss_type_2 = "cross_entropy"

    attacks.append(
        AutoProjectedGradientDescent(
            estimator=classifier,
            norm=norm,
            eps=eps,
            eps_step=eps_step,
            max_iter=100,
            targeted=False,
            nb_random_init=5,
            batch_size=batch_size,
            loss_type=loss_type_2,
        ))
    attacks.append(
        DeepFool(classifier=classifier,
                 max_iter=100,
                 epsilon=1e-6,
                 nb_grads=3,
                 batch_size=batch_size))
    attacks.append(
        SquareAttack(estimator=classifier,
                     norm=norm,
                     max_iter=5000,
                     eps=eps,
                     p_init=0.8,
                     nb_restarts=5))

    (x_train_mnist, y_train_mnist, x_test_mnist,
     y_test_mnist) = fix_get_mnist_subset

    # First test with defined_attack_only=False
    attack = AutoAttack(
        estimator=classifier,
        norm=norm,
        eps=eps,
        eps_step=eps_step,
        attacks=attacks,
        batch_size=batch_size,
        estimator_orig=None,
        targeted=False,
    )

    x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist)

    assert np.mean(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(
        0.0182, abs=0.105)
    assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(
        0.3, abs=0.05)

    # Then test with defined_attack_only=True
    attack = AutoAttack(
        estimator=classifier,
        norm=norm,
        eps=eps,
        eps_step=eps_step,
        attacks=attacks,
        batch_size=batch_size,
        estimator_orig=None,
        targeted=True,
    )

    x_train_mnist_adv = attack.generate(x=x_train_mnist, y=y_train_mnist)

    assert np.mean(x_train_mnist_adv - x_train_mnist) == pytest.approx(
        0.0179, abs=0.0075)
    assert np.max(np.abs(x_train_mnist_adv - x_train_mnist)) == pytest.approx(
        eps, abs=0.005)
    def __init__(
        self,
        estimator: "CLASSIFIER_TYPE",
        norm: Union[int, float, str] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        attacks: Optional[List[EvasionAttack]] = None,
        batch_size: int = 32,
        estimator_orig: Optional["CLASSIFIER_TYPE"] = None,
        targeted: bool = False,
    ):
        """
        Create a :class:`.AutoAttack` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param attacks: The list of `art.attacks.EvasionAttack` attacks to be used for AutoAttack. If it is `None` or
                        empty the standard attacks (PGD, APGD-ce, APGD-dlr, DeepFool, Square) will be used.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param estimator_orig: Original estimator to be attacked by adversarial examples.
        :param targeted: If False run only untargeted attacks, if True also run targeted attacks against each possible
                         target.
        """
        super().__init__(estimator=estimator)

        if attacks is None or not attacks:
            attacks = list()
            attacks.append(
                AutoProjectedGradientDescent(
                    estimator=estimator,  # type: ignore
                    norm=norm,
                    eps=eps,
                    eps_step=eps_step,
                    max_iter=100,
                    targeted=False,
                    nb_random_init=5,
                    batch_size=batch_size,
                    loss_type="cross_entropy",
                )
            )
            attacks.append(
                AutoProjectedGradientDescent(
                    estimator=estimator,  # type: ignore
                    norm=norm,
                    eps=eps,
                    eps_step=eps_step,
                    max_iter=100,
                    targeted=False,
                    nb_random_init=5,
                    batch_size=batch_size,
                    loss_type="difference_logits_ratio",
                )
            )
            attacks.append(
                (
                    DeepFool(
                        classifier=estimator,  # type: ignore
                        max_iter=100,
                        epsilon=1e-3,
                        nb_grads=10,
                        batch_size=batch_size,
                    )
                )
            )
            attacks.append(
                SquareAttack(estimator=estimator, norm=norm, max_iter=5000, eps=eps, p_init=0.8, nb_restarts=5)
            )

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.attacks = attacks
        self.batch_size = batch_size
        if estimator_orig is not None:
            self.estimator_orig = estimator_orig
        else:
            self.estimator_orig = estimator

        self._targeted = targeted
        self._check_params()
예제 #4
0
    def __init__(
        self,
        estimator: ClassifierGradients,
        norm: Union[int, float] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        attacks: Optional[List[EvasionAttack]] = None,
        batch_size: int = 32,
        estimator_orig: Optional[BaseEstimator] = None,
    ):
        """
        Create a :class:`.ProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param attacks: The list of `art.attacks.EvasionAttack` attacks to be used for AutoAttack. If it is `None` the
                        original AutoAttack (PGD, APGD-ce, APGD-dlr, FAB, Square) will be used.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param estimator_orig: Original estimator to be attacked by adversarial examples.
        """
        super().__init__(estimator=estimator)

        if estimator_orig is None:
            estimator_orig = estimator

        if attacks is None:
            attacks = list()
            attacks.append(
                AutoProjectedGradientDescent(
                    estimator=estimator,
                    norm=norm,
                    eps=eps,
                    eps_step=eps_step,
                    max_iter=100,
                    targeted=False,
                    nb_random_init=5,
                    batch_size=batch_size,
                    loss_type="cross_entropy",
                )
            )
            attacks.append(
                AutoProjectedGradientDescent(
                    estimator=estimator,
                    norm=norm,
                    eps=eps,
                    eps_step=eps_step,
                    max_iter=100,
                    targeted=False,
                    nb_random_init=5,
                    batch_size=batch_size,
                    loss_type="difference_logits_ratio",
                )
            )
            attacks.append(
                DeepFool(classifier=estimator, max_iter=100, epsilon=1e-6, nb_grads=3, batch_size=batch_size)
            )
            attacks.append(
                SquareAttack(estimator=estimator, norm=norm, max_iter=5000, eps=eps, p_init=0.8, nb_restarts=5)
            )

        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.attacks = attacks
        self.batch_size = batch_size
        self.estimator_orig = estimator_orig
        self._check_params()