def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Build KerasClassifier
        krc = get_classifier_kr()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Attack
        up = UniversalPerturbation(krc,
                                   max_iter=1,
                                   attacker="ead",
                                   attacker_params={
                                       "max_iter": 5,
                                       "targeted": False
                                   })
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        attack_params = {
            "max_iter": 1,
            "attacker": "ead",
            "attacker_params": {
                "max_iter": 5,
                "targeted": False
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with universal adversarial examples: %.2f%%',
            (acc * 100))
    def test_tfclassifier(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Attack
        up = UniversalPerturbation(tfc,
                                   max_iter=1,
                                   attacker="newtonfool",
                                   attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

        # Attack
        up = UniversalPerturbation(ptc,
                                   max_iter=1,
                                   attacker="newtonfool",
                                   attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack_params = {
            "max_iter": 1,
            "attacker": "newtonfool",
            "attacker_params": {
                "max_iter": 5
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with universal adversarial examples: %.2f%%',
            (acc * 100))
    def test_tensorflow_mnist(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Attack
        up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
    def test_keras_mnist(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr()

        # Attack
        up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={"max_iter": 5, "targeted": False})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Attack
        up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
    def test_pytorch_iris(self):
        classifier = get_tabular_classifier_pt()

        attack_params = {
            "max_iter": 1,
            "attacker": "ead",
            "attacker_params": {
                "max_iter": 5,
                "targeted": False
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info(
            "Accuracy on Iris with universal adversarial examples: %.2f%%",
            (acc * 100))
Ejemplo n.º 10
0
    def my_gen_UAP(self):
        num_i = self.X_images.shape[0]
        num_m = len(self.X_materials_paths)
        imshape = self.X_images[0].shape
        
        #print("\n Generating UAP ...")
        if self.targeted > 0:
            print(" *** targeted attack *** \n")
            adv_crafter = UniversalPerturbation(
                self.classifier,
                attacker='fgsm',
                delta=0.000001,
                attacker_params={"targeted":True, "eps":self.fgsm_eps},
                max_iter=self.uap_iter,
                eps=self.norm_size,
                norm=self.norm_type)
        else:
            print(" *** non-targeted attack *** \n")
            adv_crafter = UniversalPerturbation(
                self.classifier,
                attacker='fgsm',
                delta=0.000001,
                attacker_params={"eps":self.fgsm_eps},
                max_iter=self.uap_iter,
                eps=self.norm_size,
                norm=self.norm_type)

        LOG = []
        X_materials_cnt = 0
        noise = np.zeros(imshape)
        noise = noise.astype('float32')
        for i,path in enumerate(self.X_materials_paths):
            X_materials = np.load(path)
            X_materials_cnt += X_materials.shape[0]
            #if X_materials.shape[-1] != 3:
                #X_materials = self.my_gray_scale(images=X_materials)
            X_materials -= 128.0 # -1~+1正規化
            X_materials /= 128.0 

            # UAPの生成
            if self.targeted >= 0:
                Y_materials_tar = self.my_target_labels(length=X_materials.shape[0]) # targeted-attackの標的配列を生成
                noise = adv_crafter.generate(X_materials, noise=noise,  y=Y_materials_tar, targeted=True)
            else:
                noise = adv_crafter.generate(X_materials, noise=noise)
            
            # ノイズが一度も更新されなかった場合の対策
            if type(adv_crafter.noise[0,:]) == int:
                noise = np.zeros(imshape)
            else:
                noise = np.copy(adv_crafter.noise)
                noise = np.reshape(noise, imshape)
            
            noise_random = self.my_randomized_noise(noise=noise) # ランダムノイズの生成

            # 誤認識率の計算
            fr_i = self.my_calc_fooling_ratio(images=self.X_images, noise=noise) # images+noiseの誤認識率
            fr_m = self.my_calc_fooling_ratio(images=X_materials, noise=noise) # materials+noiseの誤認識率
            fr_i_r = self.my_calc_fooling_ratio(images=self.X_images, noise=noise_random)
            fr_m_r = self.my_calc_fooling_ratio(images=X_materials, noise=noise_random)

            # 生成したUAPの大きさを計算
            norm_2 = np.linalg.norm(noise)
            norm_inf = abs(noise).max()

            LOG.append([X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r])
            #np.save(self.save_path+'_noise_{}'.format(i), noise)
            print("LOG: {} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f}".format(X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r))
            del(X_materials) # メモリ解放
        np.save(self.save_path+'_noise', noise)
        np.save(self.save_path+'_LOG', np.array(LOG))
        return noise, np.array(LOG)
Ejemplo n.º 11
0
def main(args):
    (x_train, y_train), (x_test,
                         y_test), min_, max_ = load_dataset(str('cifar10'))
    x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
    x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

    x_train = x_train[:50]
    y_train = y_train[:50]

    model = VGG('VGG16')
    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    attack_params = {
        "attacker": "fgsm",
        # "attacker_params": {
        #     "max_iter": 1000,
        #     "epsilon": 0.02
        # },
        "delta": 0.01,
        "max_iter": 1000,
        "eps": 13.0 / 255.0,
        "norm": np.inf
    }

    # Craft attack on training examples
    adv_crafter = UniversalPerturbation(classifier, **attack_params)
    x_train_adv = adv_crafter.generate(x_train)

    # fooling rate on train set
    adv_crafter.fooling_rate
    # # Convergence
    adv_crafter.converged

    print('\nCraft attack train examples')
    # adv_crafter.v: vector (array) for perturbation
    # perturbation = adv_crafter.v[0, :]
    # universal perturbation
    perturbation = adv_crafter.noise
    x_train_adv = x_train + perturbation

    # randomized perturbation (control)
    # perturbation_rand = np.random.permutation(perturbation.reshape(32 * 32 * 3)).reshape(3, 32, 32)
    # x_train_adv_rand = x_train + perturbation_rand

    preds = np.argmax(classifier.predict(x_train), axis=1)
    preds_adv = np.argmax(classifier.predict(x_train_adv), axis=1)
    acc = np.sum(preds != preds_adv) / y_train.shape[0]
    # Fooling rate on train set (universal perturbation)
    print("\nFooling rate: %.2f%%" % (acc * 100))
Ejemplo n.º 12
0
                                eps=args.eps,
                                mean_l2_train=mean_l2_train,
                                mean_linf_train=mean_linf_train)

adv_crafter = UniversalPerturbation(classifier,
                                    attacker='fgsm',
                                    delta=0.000001,
                                    attacker_params={
                                        'targeted': False,
                                        'eps': 0.0024
                                    },
                                    max_iter=15,
                                    eps=eps,
                                    norm=norm)

_ = adv_crafter.generate(X_train)
noise = adv_crafter.noise[0, :].astype(np.float32)
base_f = 'nontargeted_{}_{}_eps{:.3f}'.format(args.model, args.norm, args.eps)
save_f_noise = 'result/{}/noise/{}'.format(args.dataset, base_f)
np.save(save_f_noise, noise)

# # Evaluate the ART classifier on adversarial examples

preds_train = np.argmax(classifier.predict(X_train), axis=1)
preds_test = np.argmax(classifier.predict(X_test), axis=1)

X_train_adv = X_train + noise
X_test_adv = X_test + noise

preds_train_adv = np.argmax(classifier.predict(X_train_adv), axis=1)
preds_test_adv = np.argmax(classifier.predict(X_test_adv), axis=1)