Ejemplos de BoundaryAttack en Python, ejemplos de art.attacks.BoundaryAttack en Python

Ejemplo n.º 1

0

Mostrar archivo

    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC
        from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
        from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier
        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier

        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier, ScikitlearnExtraTreeClassifier
        from art.classifiers.scikitlearn import ScikitlearnAdaBoostClassifier, ScikitlearnBaggingClassifier
        from art.classifiers.scikitlearn import ScikitlearnExtraTreesClassifier, ScikitlearnGradientBoostingClassifier
        from art.classifiers.scikitlearn import ScikitlearnRandomForestClassifier, ScikitlearnLogisticRegression
        from art.classifiers.scikitlearn import ScikitlearnSVC

        scikitlearn_test_cases = {
            DecisionTreeClassifier: ScikitlearnDecisionTreeClassifier,
            ExtraTreeClassifier: ScikitlearnExtraTreeClassifier,
            AdaBoostClassifier: ScikitlearnAdaBoostClassifier,
            BaggingClassifier: ScikitlearnBaggingClassifier,
            ExtraTreesClassifier: ScikitlearnExtraTreesClassifier,
            GradientBoostingClassifier: ScikitlearnGradientBoostingClassifier,
            RandomForestClassifier: ScikitlearnRandomForestClassifier,
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC
        }

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test, y=self.y_test)

            attack = BoundaryAttack(classifier,
                                    targeted=False,
                                    delta=0.01,
                                    epsilon=0.01,
                                    step_adapt=0.667,
                                    max_iter=50,
                                    num_trial=25,
                                    sample_size=20,
                                    init_size=100)
            x_test_adv = attack.generate(self.x_test)
            self.assertFalse((self.x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test,
                                        axis=1) == preds_adv).all())
            accuracy = np.sum(preds_adv == np.argmax(
                self.y_test, axis=1)) / self.y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with BoundaryAttack adversarial '
                'examples: %.2f%%', (accuracy * 100))

Ejemplo n.º 2

0

Mostrar archivo

    def test_iris_pt(self):
        classifier = get_iris_classifier_pt()
        attack = BoundaryAttack(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test.astype(np.float32))
        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all())
        accuracy = np.sum(
            preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info(
            'Accuracy on Iris with boundary adversarial examples: %.2f%%',
            (accuracy * 100))

Ejemplo n.º 3

0

Mostrar archivo

    def test_keras_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()
        attack = BoundaryAttack(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        accuracy = np.sum(
            preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with boundary adversarial examples: %.2f%%',
            (accuracy * 100))

Ejemplo n.º 4

0

Mostrar archivo

    def test_iris_k_unbounded(self):
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = BoundaryAttack(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test)
        self.assertFalse((self.x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all())
        accuracy = np.sum(
            preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info(
            'Accuracy on Iris with boundary adversarial examples: %.2f%%',
            (accuracy * 100))

Ejemplo n.º 5

0

Mostrar archivo

def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt

Ejemplo n.º 6

0

Mostrar archivo

    def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Build KerasClassifier
        krc = get_classifier_kr()

        # First targeted attack
        boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20)
        params = {'y': random_targets(self.y_test, krc.nb_classes())}
        x_test_adv = boundary.generate(self.x_test, **params)

        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(self.x_test)

        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(krc.predict(self.x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Clean-up session
        k.clear_session()

Ejemplo n.º 7

0

Mostrar archivo

    def test_iris_tf(self):
        classifier, _ = get_iris_classifier_tf()

        # Test untargeted attack
        attack = BoundaryAttack(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test)
        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test, axis=1) == preds_adv).all())
        accuracy = np.sum(
            preds_adv == np.argmax(self.y_test, axis=1)) / self.y_test.shape[0]
        logger.info(
            'Accuracy on Iris with boundary adversarial examples: %.2f%%',
            (accuracy * 100))

        # Test targeted attack
        targets = random_targets(self.y_test, nb_classes=3)
        attack = BoundaryAttack(classifier, targeted=True, max_iter=10)
        x_test_adv = attack.generate(self.x_test, **{'y': targets})
        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        accuracy = np.sum(
            preds_adv == np.argmax(targets, axis=1)) / self.y_test.shape[0]
        logger.info('Success rate of targeted boundary on Iris: %.2f%%',
                    (accuracy * 100))

Ejemplo n.º 8

0

Mostrar archivo

    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32)

        # First targeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
        params = {'y': random_targets(self.y_test, ptc.nb_classes())}
        x_test_adv = boundary.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

Ejemplo n.º 9

0

Mostrar archivo

    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = BoundaryAttack(classifier=classifier)

        self.assertIn(
            'For `BoundaryAttack` classifier must be an instance of `art.classifiers.classifier.Classifier`, '
            'the provided classifier is instance of (<class \'object\'>,).',
            str(context.exception))

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_boundary.py Proyecto: xkeys/adversarial-robustness-toolbox

def test_images(fix_get_mnist_subset, get_image_classifier_list_for_attack,
                framework, targeted):
    classifier_list = get_image_classifier_list_for_attack(BoundaryAttack)
    if classifier_list is None:
        logging.warning(
            "Couldn't perform  this test because no classifier is defined")
        return

    for classifier in classifier_list:

        attack = BoundaryAttack(classifier=classifier,
                                targeted=targeted,
                                max_iter=20)
        if targeted:
            backend_targeted_images(attack, fix_get_mnist_subset)
        else:
            back_end_untargeted_images(attack, fix_get_mnist_subset, framework)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_boundary.py Proyecto: xkeys/adversarial-robustness-toolbox

def test_tabular(get_tabular_classifier_list, framework, get_iris_dataset,
                 clipped_classifier, targeted):
    classifier_list = get_tabular_classifier_list(BoundaryAttack,
                                                  clipped=clipped_classifier)
    if classifier_list is None:
        logging.warning(
            "Couldn't perform  this test because no classifier is defined")
        return

    for classifier in classifier_list:

        attack = BoundaryAttack(classifier, targeted=targeted, max_iter=10)
        if targeted:
            backend_targeted_tabular(attack, get_iris_dataset)
        else:
            backend_untargeted_tabular(attack,
                                       get_iris_dataset,
                                       clipped=clipped_classifier)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: ys_test.py Proyecto: CSI5138-2019Fall/CSI_5138_project_2019Fall

def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=10)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=10)  #, max_iter=20
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt

Ejemplo n.º 13

0

Mostrar archivo

    def test_keras_mnist(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist

        x_test_original = x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr()

        # First targeted attack
        boundary = BoundaryAttack(classifier=krc, targeted=True, max_iter=20)
        params = {'y': random_targets(y_test, krc.nb_classes())}
        x_test_adv = boundary.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=krc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(krc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        k.clear_session()

Ejemplo n.º 14

0

Mostrar archivo

    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test = np.reshape(x_test,
                            (x_test.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # First targeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20)
        params = {'y': random_targets(y_test, ptc.nb_classes())}
        x_test_adv = boundary.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

Ejemplo n.º 15

0

Mostrar archivo

    def test_tfclassifier(self):
        """
        First test with the TFClassifier.
        :return:
        """
        # Build TFClassifier
        tfc, sess = get_classifier_tf()

        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        # First targeted attack
        boundary = BoundaryAttack(classifier=tfc, targeted=True, max_iter=20)
        params = {'y': random_targets(y_test, tfc.nb_classes)}
        x_test_adv = boundary.generate(x_test, **params)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(tfc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Clean-up session
        sess.close()
        tf.reset_default_graph()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: Boundary_attack.py Proyecto: crystal616/8100-Intro-to-AI-project

                                   nb_classes=nclasses)

    test_data, test_label = load_svmlight_file(data_path, n_features=nfeatures)
    test_data = test_data.toarray()
    test_label = test_label.astype('int')
    n = len(test_label)
    df = pd.DataFrame(test_data)
    df['label'] = test_label
    df = df.sample(frac=1)
    test_label = df['label'].tolist()
    test_data = np.array(df.drop(columns=['label']))

    predictions = np.argmax(classifier.predict(test_data), axis=1)
    attack = BoundaryAttack(classifier=classifier,
                            targeted=False,
                            delta=0.05,
                            epsilon=0.05,
                            step_adapt=0.5)
    n_selected = 100
    corrected = []
    c_labels = []
    for i in range(len(test_label)):
        if test_label[i] == predictions[i]:
            corrected.append(test_data[i])
            c_labels.append(test_label[i])
        if len(corrected) >= n_selected:
            break
    corrected = np.array(corrected)
    start = time.time()
    test_adv = attack.generate(corrected)
    end = time.time()

Ejemplo n.º 17

0

Mostrar archivo

    def test_tfclassifier(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # First targeted attack
        boundary = BoundaryAttack(classifier=tfc,
                                  targeted=True,
                                  max_iter=200,
                                  delta=0.5)
        params = {'y': random_targets(self.y_test, tfc.nb_classes())}
        x_test_adv = boundary.generate(self.x_test, **params)
        expected_x_test_adv_1 = np.asarray([
            0.42622495, 0.0, 0.0, 0.33005068, 0.2277837, 0.0, 0.18348512,
            0.42622495, 0.27452883, 0.0, 0.0, 0.0, 0.1653487, 0.70523715,
            0.7367977, 0.7974912, 0.28579983, 0.0, 0.36499417, 0.0, 0.0, 0.0,
            0.42622495, 0.0, 0.26680174, 0.42622495, 0.0, 0.19260764
        ])
        expected_x_test_adv_2 = np.asarray([
            0.0459, 0., 0., 0.0756, 0.2048, 0.037, 0., 0., 0.0126, 0.4338,
            0.1566, 0.3061, 0., 0.296, 0.8318, 0.7267, 0.2252, 0.074, 0.,
            0.1208, 0.4362, 0., 0., 0., 0., 0.0359, 0., 0.1191
        ])
        try:
            np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0],
                                                 expected_x_test_adv_1,
                                                 decimal=4)
        except AssertionError:
            np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0],
                                                 expected_x_test_adv_2,
                                                 decimal=4)
        self.assertLessEqual(np.max(x_test_adv), 1.0)
        self.assertGreaterEqual(np.min(x_test_adv), 0.0)

        y_pred_adv = tfc.predict(x_test_adv)
        y_pred_adv_expected = np.asarray([
            1.57103419e-01, -7.31061280e-01, -4.03979905e-02, -4.79048371e-01,
            9.37852338e-02, -8.01057637e-01, -4.77534801e-01, 1.08687377e+00,
            -3.06577891e-01, -5.74976981e-01
        ])
        np.testing.assert_array_almost_equal(y_pred_adv[0],
                                             y_pred_adv_expected,
                                             decimal=4)

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(self.x_test)

        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(tfc.predict(self.x_test), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Clean-up session
        sess.close()

Ejemplo n.º 18

0

Mostrar archivo

    def test_tensorflow_mnist(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        (_, _), (x_test, y_test) = self.mnist

        x_test_original = x_test.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # First targeted attack
        boundary = BoundaryAttack(classifier=tfc,
                                  targeted=True,
                                  max_iter=200,
                                  delta=0.5)
        params = {'y': random_targets(y_test, tfc.nb_classes())}
        x_test_adv = boundary.generate(x_test, **params)
        # expected_x_test_adv_1 = np.asarray([0.42622495, 0.0, 0.0, 0.33005068, 0.2277837, 0.0,
        #                                     0.18348512, 0.42622495, 0.27452883, 0.0, 0.0, 0.0,
        #                                     0.1653487, 0.70523715, 0.7367977, 0.7974912, 0.28579983, 0.0,
        #                                     0.36499417, 0.0, 0.0, 0.0, 0.42622495, 0.0,
        #                                     0.26680174, 0.42622495, 0.0, 0.19260764])
        # expected_x_test_adv_2 = np.asarray([0.0459, 0., 0., 0.0756, 0.2048, 0.037, 0., 0.,
        #                                     0.0126, 0.4338, 0.1566, 0.3061, 0., 0.296, 0.8318, 0.7267,
        #                                     0.2252, 0.074, 0., 0.1208, 0.4362, 0., 0., 0.,
        #                                     0., 0.0359, 0., 0.1191])
        #
        # expected_x_test_adv_3 = np.asarray([0.0671, 0.0644, 0.3012, 0., 0., 0., 0.3407, 0.,
        #                                     0.1507, 0.0478, 0.3253, 0., 0.3334, 0.3473, 1., 0.8649,
        #                                     0.5639, 0.5198, 0., 0., 0.6173, 0., 0.3116, 0.,
        #                                     0.3937, 0.6173, 0., 0.0021])
        # try:
        #     np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_1, decimal=4)
        # except AssertionError:
        #     try:
        #         np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_2, decimal=4)
        #     except AssertionError:
        #         np.testing.assert_array_almost_equal(x_test_adv[2, 14, :, 0], expected_x_test_adv_3, decimal=4)
        self.assertLessEqual(np.max(x_test_adv), 1.0)
        self.assertGreaterEqual(np.min(x_test_adv), 0.0)

        y_pred_adv = tfc.predict(x_test_adv)
        y_pred_adv_expected = np.asarray([
            1.57103419e-01, -7.31061280e-01, -4.03979905e-02, -4.79048371e-01,
            9.37852338e-02, -8.01057637e-01, -4.77534801e-01, 1.08687377e+00,
            -3.06577891e-01, -5.74976981e-01
        ])
        # np.testing.assert_array_almost_equal(y_pred_adv[0], y_pred_adv_expected, decimal=4)

        # Second untargeted attack
        boundary = BoundaryAttack(classifier=tfc, targeted=False, max_iter=20)
        x_test_adv = boundary.generate(x_test)

        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())

        y_pred = np.argmax(tfc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        sess.close()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: perturbations.py Proyecto: dataiku-research/drift_detectors_benchmark

def adversarial_attack_shift(x, y, delta=1.0, model=RandomForestClassifier(), attack_type='zoo',
                             numerical_features=None, feat_delta=1.0):
    # in this case delta is the portion of half the data on which to generate attacks
    # because the first half as a minimum has to be used to train a model against which generate the attacks
    assert (attack_type in ['zoo', 'boundary', 'hop-skip-jump'])

    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y))
    y = le.transform(y)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=(0.5 * delta))

    if numerical_features is not None:

        n_numerical = len(numerical_features)
        feat_indices = np.random.choice(n_numerical, ceil(n_numerical * feat_delta), replace=False)
        feat_indices = np.array(numerical_features)[feat_indices]

    else:

        feat_indices = np.random.choice(x.shape[1], ceil(x.shape[1] * feat_delta), replace=False)

    other_features = list(set(range(x.shape[1])) - set(feat_indices))

    x_train_other = x_train[:, other_features]
    x_train_numerical = x_train[:, feat_indices]
    x_test_other = x_test[:, other_features]
    x_test_numerical = x_test[:, feat_indices]

    classifier = SklearnClassifier(model=model, clip_values=(0, np.max(x_train_numerical)))

    # Train the ART classifier

    classifier.fit(x_train_numerical, y_train)

    # Evaluate the ART classifier on benign test examples

    predictions = classifier.predict(x_test_numerical)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    # Generate adversarial test examples
    if attack_type == 'zoo':
        attack = ZooAttack(
            classifier=classifier,
            confidence=0.0,
            targeted=False,
            learning_rate=1e-1,
            max_iter=10,
            binary_search_steps=10,
            initial_const=1e-3,
            abort_early=True,
            use_resize=False,
            use_importance=False,
            nb_parallel=x_test_numerical.shape[1],
            batch_size=1,
            variable_h=0.01,
        )
    elif attack_type == 'boundary':
        attack = BoundaryAttack(classifier, targeted=False, epsilon=0.02, max_iter=20, num_trial=10)
    elif attack_type == 'hop-skip-jump':
        attack = HopSkipJump(classifier,
                             targeted=False,
                             norm=2,
                             max_iter=20,
                             max_eval=10,
                             init_eval=9,
                             init_size=10)

    x_adv = attack.generate(x=x_test_numerical, y=y_test)

    # Evaluate the ART classifier on adversarial test examples

    predictions_adv = classifier.predict(x_adv)
    accuracy = np.sum(np.argmax(predictions_adv, axis=1) == y_test) / len(y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
    print("Max difference: {}".format(np.max(np.abs(x_test_numerical - x_adv) / x_test_numerical)))

    x_final = np.zeros_like(x)
    x_final[:, feat_indices] = np.vstack([x_train_numerical, x_adv])
    x_final[:, other_features] = np.vstack([x_train_other, x_test_other])

    y_final = np.concatenate([y_train, y_test], axis=0)
    y_final = le.inverse_transform(y_final)

    adv_indices = list(range(len(y_train), len(y)))

    return x_final, y_final, adv_indices, feat_indices

Ejemplo n.º 20

0

Mostrar archivo

def attack_run_rejection_policy(model, hps):
    """
    An attack run with rejection policy.
    :param model: Pytorch model.
    :param hps: hyperparameters
    :return:
    """
    model.eval()

    # Get thresholds
    threshold_list1 = []
    threshold_list2 = []
    for label_id in range(hps.n_classes):
        # No data augmentation(crop_flip=False) when getting in-distribution thresholds
        dataset = get_dataset(data_name=hps.problem, train=True, label_id=label_id, crop_flip=False)
        in_test_loader = DataLoader(dataset=dataset, batch_size=hps.n_batch_test, shuffle=False)

        print('Inference on {}, label_id {}'.format(hps.problem, label_id))
        in_ll_list = []
        for batch_id, (x, y) in enumerate(in_test_loader):
            x = x.to(hps.device)
            y = y.to(hps.device)
            ll = model(x)

            correct_idx = ll.argmax(dim=1) == y

            ll_, y_ = ll[correct_idx], y[correct_idx]  # choose samples are classified correctly
            in_ll_list += list(ll_[:, label_id].detach().cpu().numpy())

        thresh_idx = int(0.01 * len(in_ll_list))
        thresh1 = sorted(in_ll_list)[thresh_idx]
        thresh_idx = int(0.02 * len(in_ll_list))
        thresh2 = sorted(in_ll_list)[thresh_idx]
        threshold_list1.append(thresh1)  # class mean as threshold
        threshold_list2.append(thresh2)  # class mean as threshold
        print('1st & 2nd percentile thresholds: {:.3f}, {:.3f}'.format(thresh1, thresh2))

    # Evaluation
    n_total = 0   # total number of correct classified samples by clean classifier
    n_successful_adv = 0  # total number of successful adversarial examples generated
    n_rejected_adv1 = 0   # total number of successfully rejected (successful) adversarial examples, <= n_successful_adv
    n_rejected_adv2 = 0   # total number of successfully rejected (successful) adversarial examples, <= n_successful_adv

    attack_path = os.path.join(hps.attack_dir, hps.attack)
    if not os.path.exists(attack_path):
        os.mkdir(attack_path)

    thresholds1 = torch.tensor(threshold_list1).to(hps.device)
    thresholds2 = torch.tensor(threshold_list2).to(hps.device)

    l2_distortion_list = []
    n_eval = 0

    wrapped_target_model = PyTorchClassifier(model=model,
                                             loss=None,
                                             optimizer=None,
                                             input_shape=(hps.image_channel, 32, 32),
                                             nb_classes=hps.n_classes)

    if hps.attack == 'boundary':
        attack = BoundaryAttack(wrapped_target_model, targeted=hps.targeted)
    elif hps.attack == 'cw':
        attack = CarliniL2Method(wrapped_target_model, confidence=hps.cw_confidence, targeted=hps.targeted)


    hps.n_batch_test = 1
    for label_id in range(hps.n_classes):
        dataset = get_dataset(data_name=hps.problem, train=False, label_id=label_id)
        test_loader = DataLoader(dataset=dataset, batch_size=hps.n_batch_test, shuffle=False)
        for batch_id, (x, y) in enumerate(test_loader):
            # Note that images are scaled to [0., 1.0]
            x, y = x.to(hps.device), y.to(hps.device)
            with torch.no_grad():
                output = model(x)

            pred = output.argmax(dim=1)
            correct_idx = pred == y  # Only evaluate on the correct classified samples by clean classifier.
            x, y = x[correct_idx], y[correct_idx]

            n_eval += correct_idx.sum().item()

            for id in range(hps.n_classes):
                if label_id != id:
                    n_total += 1
                    y_cur = torch.LongTensor([id] * x.size(0)).to(hps.device)
                    # adv_x = adversary.perturb(x, y_cur)
                    x_ = x.cpu().numpy().astype(np.float32)
                    y_ = y_cur.cpu().numpy().astype(np.float32)
                    adv_x = attack.generate(x_, y_)

                    with torch.no_grad():
                        adv_x = torch.tensor(adv_x).to(hps.device)
                        output = model(adv_x)

                    logits, preds = output.max(dim=1)

                    success_idx = preds == y_cur
                    n_successful_adv += success_idx.sum().item()

                    diff = adv_x - x
                    l2_distortion = diff.norm(p=2, dim=-1).mean().item()  # mean l2 distortion
                    l2_distortion_list.append(l2_distortion)

                    rej_idx1 = logits < thresholds1[preds]
                    n_rejected_adv1 += rej_idx1.sum().item()

                    rej_idx2 = logits < thresholds2[preds]
                    n_rejected_adv2 += rej_idx2.sum().item()

            break  # only one batch

        print('Evaluating on samples of class {} ...'.format(label_id))

    reject_rate1 = n_rejected_adv1 / n_successful_adv
    reject_rate2 = n_rejected_adv2 / n_successful_adv
    success_adv_rate = n_successful_adv / n_total
    print('success rate of adv examples generation: {}/{}={:.4f}'.format(n_successful_adv, n_total, success_adv_rate))
    print('Mean L2 distortion of Adv Examples: {:.4f}'.format(np.mean(l2_distortion_list)))
    print('1st percentile, reject success rate: {}/{}={:.4f}'.format(n_rejected_adv1, n_successful_adv, reject_rate1))
    print('2nd percentile, reject success rate: {}/{}={:.4f}'.format(n_rejected_adv2, n_successful_adv, reject_rate2))