def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Build KerasClassifier
        krc = get_classifier_kr()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Attack
        up = UniversalPerturbation(krc,
                                   max_iter=1,
                                   attacker="ead",
                                   attacker_params={
                                       "max_iter": 5,
                                       "targeted": False
                                   })
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        attack_params = {
            "max_iter": 1,
            "attacker": "ead",
            "attacker_params": {
                "max_iter": 5,
                "targeted": False
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with universal adversarial examples: %.2f%%',
            (acc * 100))
    def test_tfclassifier(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Attack
        up = UniversalPerturbation(tfc,
                                   max_iter=1,
                                   attacker="newtonfool",
                                   attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

        # Attack
        up = UniversalPerturbation(ptc,
                                   max_iter=1,
                                   attacker="newtonfool",
                                   attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack_params = {
            "max_iter": 1,
            "attacker": "newtonfool",
            "attacker_params": {
                "max_iter": 5
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with universal adversarial examples: %.2f%%',
            (acc * 100))
    def test_tensorflow_mnist(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Attack
        up = UniversalPerturbation(tfc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
    def test_keras_mnist(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr()

        # Attack
        up = UniversalPerturbation(krc, max_iter=1, attacker="ead", attacker_params={"max_iter": 5, "targeted": False})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(krc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
    def test_pytorch_iris(self):
        classifier = get_tabular_classifier_pt()

        attack_params = {
            "max_iter": 1,
            "attacker": "ead",
            "attacker_params": {
                "max_iter": 5,
                "targeted": False
            }
        }
        attack = UniversalPerturbation(classifier)
        attack.set_params(**attack_params)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info(
            "Accuracy on Iris with universal adversarial examples: %.2f%%",
            (acc * 100))
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
        x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_classifier_pt()

        # Attack
        up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5})
        x_train_adv = up.generate(x_train)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.noise
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Ejemplo n.º 10
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = UniversalPerturbation(classifier=classifier)

        self.assertIn('For `UniversalPerturbation` classifier must be an instance of '
                      '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
                      '(<class \'object\'>,).', str(context.exception))
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = UniversalPerturbation(classifier=classifier)

        self.assertIn('For `UniversalPerturbation` classifier must be an instance of '
                      '`art.classifiers.classifier.ClassifierNeuralNetwork` and '
                      '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
                      '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).', str(context.exception))
Ejemplo n.º 13
0
def get_adversarial(targeted,
                    attack_name,
                    classifier,
                    xs,
                    target_ys,
                    batch_size,
                    dataset,
                    fgsm_epsilon=0,
                    cwl2_confidence=0):

    # The attack
    attack = ''
    samples_range = xs.shape[0]

    #======================================
    if attack_name == 'FastGradientMethod':
        # norm=np.inf, eps=.3, eps_step=0.1, targeted=False, num_random_init=0, batch_size=1,minimal=False
        attack = FastGradientMethod(classifier=classifier,
                                    targeted=targeted,
                                    eps=fgsm_epsilon,
                                    batch_size=batch_size)
    #=====================================
    elif attack_name == 'CarliniLInfMethod':
        # confidence=0.0, targeted=False, learning_rate=0.01, max_iter=10, max_halving=5,
        #max_doubling=5, eps=0.3, batch_size=128
        attack = CarliniLInfMethod(classifier=classifier,
                                   max_iter=1000,
                                   targeted=targeted,
                                   batch_size=batch_size)
    #-------------------------------
    elif attack_name == 'UniversalPerturbation':
        # attacker='deepfool', attacker_params=None, delta=0.2,
        # max_iter=20, eps=10.0, norm=np.inf

        if targeted:
            print('UniversalPerturbation attack cannot be targeted.')
            exit()
        attack = UniversalPerturbation(classifier=classifier, max_iter=5)

    #==============================================
    elif attack_name == 'ProjectedGradientDescent':
        # norm=np.inf, eps=.3, eps_step=0.1, max_iter=100,
        # targeted=False, num_random_init=0, batch_size=1
        if dataset == 'mnist':
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=.3,
                                              eps_step=0.01,
                                              num_random_init=0,
                                              max_iter=40,
                                              batch_size=batch_size)
        else:
            attack = ProjectedGradientDescent(classifier=classifier,
                                              targeted=targeted,
                                              norm=1,
                                              eps=8.0,
                                              eps_step=2.0,
                                              num_random_init=0,
                                              max_iter=7,
                                              batch_size=batch_size)

    if targeted:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :],
                              y=target_ys[0:batch_size])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :],
                y=target_ys[ii:ii + batch_size])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(
                last_samples,
                y=target_ys[last_ii + batch_size:])  ################
            adv = np.concatenate((adv, adv_samples), axis=0)
    else:
        # Generate the adversarial samples in steps
        adv = attack.generate(xs[0:batch_size, :, :, :])  ###################
        last_ii = 0
        for ii in range(batch_size, samples_range - batch_size, batch_size):
            print(ii)
            adv_samples = attack.generate(
                xs[ii:ii + batch_size, :, :, :])  ####################
            adv = np.concatenate((adv, adv_samples), axis=0)
            last_ii = ii

        # The rest of the samples
        if last_ii + batch_size < xs.shape[0]:
            last_samples = xs[last_ii + batch_size:, :, :, :]
            adv_samples = attack.generate(last_samples)  ################
            adv = np.concatenate((adv, adv_samples), axis=0)

    adv = np.asarray(adv)
    return adv
Ejemplo n.º 14
0
    def my_gen_UAP(self):
        num_i = self.X_images.shape[0]
        num_m = len(self.X_materials_paths)
        imshape = self.X_images[0].shape
        
        #print("\n Generating UAP ...")
        if self.targeted > 0:
            print(" *** targeted attack *** \n")
            adv_crafter = UniversalPerturbation(
                self.classifier,
                attacker='fgsm',
                delta=0.000001,
                attacker_params={"targeted":True, "eps":self.fgsm_eps},
                max_iter=self.uap_iter,
                eps=self.norm_size,
                norm=self.norm_type)
        else:
            print(" *** non-targeted attack *** \n")
            adv_crafter = UniversalPerturbation(
                self.classifier,
                attacker='fgsm',
                delta=0.000001,
                attacker_params={"eps":self.fgsm_eps},
                max_iter=self.uap_iter,
                eps=self.norm_size,
                norm=self.norm_type)

        LOG = []
        X_materials_cnt = 0
        noise = np.zeros(imshape)
        noise = noise.astype('float32')
        for i,path in enumerate(self.X_materials_paths):
            X_materials = np.load(path)
            X_materials_cnt += X_materials.shape[0]
            #if X_materials.shape[-1] != 3:
                #X_materials = self.my_gray_scale(images=X_materials)
            X_materials -= 128.0 # -1~+1正規化
            X_materials /= 128.0 

            # UAPの生成
            if self.targeted >= 0:
                Y_materials_tar = self.my_target_labels(length=X_materials.shape[0]) # targeted-attackの標的配列を生成
                noise = adv_crafter.generate(X_materials, noise=noise,  y=Y_materials_tar, targeted=True)
            else:
                noise = adv_crafter.generate(X_materials, noise=noise)
            
            # ノイズが一度も更新されなかった場合の対策
            if type(adv_crafter.noise[0,:]) == int:
                noise = np.zeros(imshape)
            else:
                noise = np.copy(adv_crafter.noise)
                noise = np.reshape(noise, imshape)
            
            noise_random = self.my_randomized_noise(noise=noise) # ランダムノイズの生成

            # 誤認識率の計算
            fr_i = self.my_calc_fooling_ratio(images=self.X_images, noise=noise) # images+noiseの誤認識率
            fr_m = self.my_calc_fooling_ratio(images=X_materials, noise=noise) # materials+noiseの誤認識率
            fr_i_r = self.my_calc_fooling_ratio(images=self.X_images, noise=noise_random)
            fr_m_r = self.my_calc_fooling_ratio(images=X_materials, noise=noise_random)

            # 生成したUAPの大きさを計算
            norm_2 = np.linalg.norm(noise)
            norm_inf = abs(noise).max()

            LOG.append([X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r])
            #np.save(self.save_path+'_noise_{}'.format(i), noise)
            print("LOG: {} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f}".format(X_materials_cnt, norm_2, norm_inf, fr_i, fr_m, fr_i_r, fr_m_r))
            del(X_materials) # メモリ解放
        np.save(self.save_path+'_noise', noise)
        np.save(self.save_path+'_LOG', np.array(LOG))
        return noise, np.array(LOG)
Ejemplo n.º 15
0
def main(args):
    (x_train, y_train), (x_test,
                         y_test), min_, max_ = load_dataset(str('cifar10'))
    x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
    x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

    x_train = x_train[:50]
    y_train = y_train[:50]

    model = VGG('VGG16')
    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    attack_params = {
        "attacker": "fgsm",
        # "attacker_params": {
        #     "max_iter": 1000,
        #     "epsilon": 0.02
        # },
        "delta": 0.01,
        "max_iter": 1000,
        "eps": 13.0 / 255.0,
        "norm": np.inf
    }

    # Craft attack on training examples
    adv_crafter = UniversalPerturbation(classifier, **attack_params)
    x_train_adv = adv_crafter.generate(x_train)

    # fooling rate on train set
    adv_crafter.fooling_rate
    # # Convergence
    adv_crafter.converged

    print('\nCraft attack train examples')
    # adv_crafter.v: vector (array) for perturbation
    # perturbation = adv_crafter.v[0, :]
    # universal perturbation
    perturbation = adv_crafter.noise
    x_train_adv = x_train + perturbation

    # randomized perturbation (control)
    # perturbation_rand = np.random.permutation(perturbation.reshape(32 * 32 * 3)).reshape(3, 32, 32)
    # x_train_adv_rand = x_train + perturbation_rand

    preds = np.argmax(classifier.predict(x_train), axis=1)
    preds_adv = np.argmax(classifier.predict(x_train_adv), axis=1)
    acc = np.sum(preds != preds_adv) / y_train.shape[0]
    # Fooling rate on train set (universal perturbation)
    print("\nFooling rate: %.2f%%" % (acc * 100))
Ejemplo n.º 16
0
                   model_type=args.model,
                   mode='inference')

# # Generate adversarial examples

classifier, norm, eps = set_art(model=model,
                                norm_str=args.norm,
                                eps=args.eps,
                                mean_l2_train=mean_l2_train,
                                mean_linf_train=mean_linf_train)

adv_crafter = UniversalPerturbation(classifier,
                                    attacker='fgsm',
                                    delta=0.000001,
                                    attacker_params={
                                        'targeted': False,
                                        'eps': 0.0024
                                    },
                                    max_iter=15,
                                    eps=eps,
                                    norm=norm)

_ = adv_crafter.generate(X_train)
noise = adv_crafter.noise[0, :].astype(np.float32)
base_f = 'nontargeted_{}_{}_eps{:.3f}'.format(args.model, args.norm, args.eps)
save_f_noise = 'result/{}/noise/{}'.format(args.dataset, base_f)
np.save(save_f_noise, noise)

# # Evaluate the ART classifier on adversarial examples

preds_train = np.argmax(classifier.predict(X_train), axis=1)
preds_test = np.argmax(classifier.predict(X_test), axis=1)