コード例 #1
0
    def test_bim(self):
        attack = attacks.BIMContainer(self.mc,
                                      eps=0.3,
                                      eps_step=0.1,
                                      max_iter=100,
                                      targeted=False)
        adv, y_adv, x_clean, y_clean = attack.generate(count=NUM_ADV)

        # At least made some change from clean images
        self.assertFalse((adv == x_clean).all())

        # test accuracy
        accuracy = self.mc.evaluate(adv, y_clean)
        logger.info('Accuracy on adv. examples: %f', accuracy)
        self.assertLessEqual(accuracy, 0.1)

        # test success rate
        success_rate = (y_adv != y_clean).sum() / len(y_adv)
        logger.info('Success rate of adv. attack: %f', success_rate)
        self.assertGreaterEqual(success_rate, 0.9)

        # sum success rate (missclassified) and accuracy (correctly classified)
        self.assertAlmostEqual(success_rate + accuracy, 1.0, places=4)

        # Check the max perturbation
        dif = np.max(np.abs(adv - x_clean))
        logger.info('Max perturbation (L1-norm): %f', dif)
        self.assertLessEqual(dif, 0.3 + 1e-4)

        # Check bounding box
        self.assertLessEqual(np.max(adv), 1.0 + 1e-4)
        self.assertGreaterEqual(np.min(adv), 0 - 1e-4)

        l2 = np.max(get_l2_norm(adv, x_clean))
        logger.info('L2 norm = %f', l2)
コード例 #2
0
 def test_bim_attack(self):
     attack = attacks.BIMContainer(self.mc,
                                   eps=0.3,
                                   eps_step=0.1,
                                   max_iter=100,
                                   targeted=False)
     blocked_indices, adv_success_rate = self.preform_attack(attack)
     block_rate = len(blocked_indices) / NUM_ADV
     self.assertGreaterEqual(block_rate, adv_success_rate * 0.6)
     logger.info('[%s] Block rate: %f', attack.__class__.__name__,
                 block_rate)
コード例 #3
0
def main():
    dc = DataContainer(DATASET_LIST[NAME], get_data_path())
    dc()

    model = MnistCnnV2()
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    carlini_attack = attacks.CarliniL2V2Container(
        mc,
        learning_rate=0.01,
        binary_search_steps=9,
        max_iter=1000,
        confidence=0.0,
        initial_const=0.01,
        c_range=(0, 1e10),
        batch_size=BATCH_SIZE,
        clip_values=(0.0, 1.0),
    )
    # adv, y_adv, x_clean, y_clean = carlini_attack.generate(count=1000)
    # carlini_attack.save_attack(
    #     'MnistCnnV2_MNIST_Carlini',
    #     adv,
    #     y_adv,
    #     x_clean,
    #     y_clean,
    #     True,
    # )

    # use pre-trained adversarial examples
    adv, y_adv, x_clean, y_clean = carlini_attack.load_adv_examples(
        CARLINI_FILE)

    accuracy = mc.evaluate(adv, y_clean)
    print(f'Accuracy on adv. examples: {accuracy}')

    bim_attack = attacks.BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack)

    # use pre-defined parameters
    # k2, zeta, kappa, gamma, score = K2, RELIABILITY, KAPPA, CONFIDENCE, 0

    ad = ApplicabilityDomainContainer(
        mc,
        hidden_model=model.hidden_model,
        k2=k2,
        reliability=zeta,
        sample_ratio=SAMPLE_RATIO,
        kappa=kappa,
        confidence=gamma,
    )
    print(ad.params)

    ad.fit()
    blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True)
    print('After update parameters, blocked {}/{} samples from adv. examples'.
          format(len(blocked_indices), len(adv)))
コード例 #4
0
def main():
    dc = DataContainer(DATASET_LIST[NAME], get_data_path())
    dc(shuffle=True, normalize=True, size_train=0.8)
    print('Sample size: {}'.format(len(dc)))
    num_classes = dc.num_classes
    num_features = dc.dim_data[0]
    model = IrisNN(num_features=num_features,
                   hidden_nodes=num_features * 4,
                   num_classes=num_classes)
    mc = ModelContainerPT(model, dc)
    # mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE, early_stop=False)
    # filename = get_pt_model_filename('IrisNN', NAME, MAX_EPOCHS)
    # mc.save(filename, overwrite=True)

    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    carlini_attack = attacks.CarliniL2V2Container(
        mc,
        learning_rate=0.01,
        binary_search_steps=9,
        max_iter=1000,
        confidence=0.0,
        initial_const=0.01,
        c_range=(0, 1e4),
        batch_size=BATCH_SIZE,
        clip_values=(0.0, 1.0),
    )

    # we need more than 30 adv. examples
    # x_all = dc.x_all
    # y_all = dc.x_all
    # indices = np.random.permutation(np.arange(len(dc)))[:100]
    # x = x_all[indices]
    # y = y_all[indices]

    # adv, pred_adv, x_clean, pred_clean = carlini_attack.generate(use_testset=False, x=x)
    # carlini_attack.save_attack(
    #     'IrisNN_Iris_Carlini',
    #     adv, pred_adv,
    #     x_clean, pred_clean,
    #     True,
    # )

    # use pre-trained adversarial examples
    adv, y_adv, x_clean, pred_clean = carlini_attack.load_adv_examples(
        CARLINI_FILE)

    accuracy = mc.evaluate(adv, pred_clean)
    print(f'Accuracy on adv. examples: {accuracy}')

    bim_attack = attacks.BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack)

    ad = ApplicabilityDomainContainer(
        mc,
        hidden_model=model.hidden_model,
        k2=k2,
        reliability=zeta,
        sample_ratio=SAMPLE_RATIO,
        kappa=kappa,
        confidence=gamma,
    )
    print(ad.params)

    ad.fit()
    blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True)
    print('After update parameters, blocked {}/{} samples from adv. examples'.
          format(len(blocked_indices), len(adv)))