예제 #1
0
    def test_fetch_all(self):
        dataname = 'CIFAR10'
        data_lookup = DATASET_LIST[dataname]
        path = get_data_path()
        dc = DataContainer(data_lookup, path)
        dc(shuffle=False, normalize=False)

        x_train = dc.x_train
        y_train = dc.y_train
        x_test = dc.x_test
        y_test = dc.y_test
        x_all = dc.x_all
        y_all = dc.y_all

        data_shape = list(x_train.shape)
        data_shape[0] = len(x_train) + len(x_test)
        self.assertTupleEqual(x_all.shape, tuple(data_shape))

        label_shape = list(y_train.shape)
        label_shape[0] = len(y_train) + len(y_test)
        self.assertTupleEqual(y_all.shape, tuple(label_shape))

        dc.y_train = onehot_encoding(y_train, dc.num_classes)
        dc.y_test = onehot_encoding(y_test, dc.num_classes)
        x_train = dc.x_train
        y_train = dc.y_train
        y_all = dc.y_all
        label_shape = list(y_train.shape)
        label_shape[0] = len(y_train) + len(y_test)
        self.assertTupleEqual(y_all.shape, tuple(label_shape))
예제 #2
0
    def setUpClass(cls):
        master_seed(SEED)

        Model = get_model('MnistCnnV2')
        model = Model()
        logger.info('Starting %s data container...', NAME)
        dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        dc()
        mc = ModelContainerPT(model, dc)
        mc.load(MODEL_FILE)
        accuracy = mc.evaluate(dc.x_test, dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)

        cls.distillation = DistillationContainer(mc,
                                                 Model(),
                                                 temperature=TEMPERATURE,
                                                 pretrained=False)

        filename = get_pt_model_filename(
            model.__class__.__name__, NAME,
            str(MAX_EPOCHS) + 't' + str(int(TEMPERATURE * 10)))
        filename = os.path.join('test', 'distill_' + filename)
        file_path = os.path.join('save', filename)
        if not os.path.exists(file_path):
            # Expected initial loss = -log(1/num_classes) = 2.3025850929940455'
            cls.distillation.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE)
            cls.distillation.save(filename, overwrite=True)
        else:
            cls.distillation.load(file_path)

        smooth_mc = cls.distillation.get_def_model_container()
        accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)
def main():
    master_seed(SEED)

    logger.info('Starting %s data container...', NAME)
    dc = DataContainer(DATASET_LIST[NAME], get_data_path())
    dc(shuffle=True, normalize=True)

    num_features = dc.dim_data[0]
    num_classes = dc.num_classes
    print('Features:', num_features)
    print('Classes:', num_classes)
    model = BCNN(num_features, num_classes)
    filename = get_pt_model_filename(BCNN.__name__, NAME, MAX_EPOCHS)
    logger.debug('File name: %s', filename)

    mc = ModelContainerPT(model, dc)

    file_path = os.path.join('save', filename)
    if not os.path.exists(file_path):
        logger.debug('Expected initial loss: %f', np.log(dc.num_classes))
        mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE)
        mc.save(filename, overwrite=True)
    else:
        logger.info('Use saved parameters from %s', filename)
        mc.load(file_path)

    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)
예제 #4
0
def main():
    master_seed(SEED)

    dataset = DATASET_LIST[DATA_NAME]
    dc = DataContainer(dataset, get_data_path())
    dc()
    model = MnistCnnCW()
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print('Accuracy on test set: {}'.format(accuracy))

    attack = CarliniL2V2Container(mc,
                                  targeted=False,
                                  learning_rate=0.01,
                                  binary_search_steps=9,
                                  max_iter=1000,
                                  confidence=0.0,
                                  initial_const=0.01,
                                  c_range=(0, 1e10),
                                  batch_size=16,
                                  clip_values=(0.0, 1.0))
    adv, y_adv, x_clean, y_clean = attack.generate(count=100)

    l2 = np.mean(get_l2_norm(adv, x_clean))
    print('L2 norm: {}'.format(l2))
    not_match = y_adv != y_clean
    success_rate = len(not_match[not_match == True]) / len(adv)
    print('Success rate: {}'.format(success_rate))

    accuracy = mc.evaluate(adv, y_clean)
    print('Accuracy on adv. examples: {}'.format(accuracy))
예제 #5
0
def main():
    print(f'Starting {NAME} data container...')
    print(DATASET_LIST[NAME])

    # Step 1: select dataset
    dc = DataContainer(DATASET_LIST[NAME], DATA_ROOT)
    dc(size_train=0.8, normalize=True)

    num_features = dc.dim_data[0]
    num_classes = dc.num_classes
    print('Features:', num_features)
    print('Classes:', num_classes)

    # Step 2: train model
    model = MnistCnnCW()
    model_name = model.__class__.__name__
    print('Using model:', model_name)

    mc = ModelContainerPT(model, dc)

    model_filename = f'example-mnist-e{EPOCHS}.pt'
    file_path = os.path.join('save', model_filename)

    if not os.path.exists(file_path):
        mc.fit(max_epochs=EPOCHS, batch_size=BATCH_SIZE)
        mc.save(model_filename, overwrite=True)
    else:
        print('Found saved model!')
        mc.load(file_path)

    acc = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on random test set: {acc*100:.4f}%')

    # Step 3: attack the model
    # TODO: Unable to generate adversarial examples successfully!
    attack = ZooContainer(
        mc,
        confidence=0.5,
        targeted=False,
        learning_rate=1e-1,
        max_iter=200,
        binary_search_steps=100,
        initial_const=1e-1,
        abort_early=True,
        use_resize=False,
        use_importance=False,
        nb_parallel=250,
        batch_size=1,
        variable_h=0.01,
    )

    adv, y_adv, x_clean, y_clean = attack.generate(count=5)
    accuracy = mc.evaluate(adv, y_clean)
    print('Accuracy on adv. examples: {:.4f}%'.format(accuracy * 100))
    def setUpClass(cls):
        master_seed(SEED)

        Model = get_model('MnistCnnV2')
        model = Model()
        logger.info('Starting %s data container...', NAME)
        dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        dc()
        cls.mc = ModelContainerPT(model, dc)
        cls.mc.load(MODEL_FILE)
        accuracy = cls.mc.evaluate(dc.x_test, dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)
예제 #7
0
def main():
    # load dataset and initial model
    Model = get_model(MODEL_NAME)
    model = Model()
    dc = DataContainer(DATASET_LIST[DATASET], get_data_path())
    dc()
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    # train or load distillation model
    distillation = DistillationContainer(mc,
                                         Model(),
                                         temperature=TEMPERATURE,
                                         pretrained=False)

    distill_path = os.path.join('save', DISTILL_FILE)
    if not os.path.exists(distill_path):
        distillation.fit(max_epochs=MAX_EPOCHS, batch_size=128)
        distillation.save(DISTILL_FILE, True)
    else:
        distillation.load(distill_path)

    smooth_mc = distillation.get_def_model_container()
    accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    # load adversarial examples
    adv_list = ['FGSM', 'BIM', 'DeepFool', 'Carlini', 'Saliency']
    y_file = os.path.join('save',
                          f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_y.npy')
    x_file = os.path.join('save',
                          f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_x.npy')
    x = np.load(x_file, allow_pickle=False)
    y = np.load(y_file, allow_pickle=False)
    acc_og = mc.evaluate(x, y)
    acc_distill = smooth_mc.evaluate(x, y)
    print(f'Accuracy on clean set - OG: {acc_og}, Distill: {acc_distill}')

    for adv_name in adv_list:
        adv_file = os.path.join(
            'save', build_adv_filename(MODEL_NAME, DATASET, adv_name))
        adv = np.load(adv_file, allow_pickle=False)
        acc_og = mc.evaluate(adv, y)
        acc_distill = smooth_mc.evaluate(adv, y)
        print(
            f'Accuracy on {adv_name} set - OG: {acc_og}, Distill: {acc_distill}'
        )
def get_data_container(dname, use_shuffle=True, use_normalize=True):
    """Returns a DataContainer based on given name"""
    dataset = DATASET_LIST[dname]
    dc = DataContainer(dataset, get_data_path())
    if dname in ('MNIST', 'CIFAR10', 'SVHN'):
        dc(shuffle=use_shuffle)
    elif dname == 'Iris':
        dc(shuffle=use_shuffle, normalize=use_normalize, size_train=0.6)
    elif dname in ('BankNote', 'BreastCancerWisconsin', 'HTRU2', 'WheatSeed'):
        dc(shuffle=use_shuffle, normalize=use_normalize)
    elif dname == 'Synthetic':
        # No function call for synthetic
        return dc
    else:
        raise AttributeError('Received unknown dataset "{}"'.format(dname))
    return dc
    def setUpClass(cls):
        master_seed(SEED)

        logger.info('Starting %s data container...', NAME)
        cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        cls.dc(shuffle=True)

        model = MnistCnnV2()
        logger.info('Using model: %s', model.__class__.__name__)

        cls.mc = ModelContainerPT(model, cls.dc)

        filename = get_pt_model_filename(MnistCnnV2.__name__, NAME, MAX_EPOCHS)
        file_path = os.path.join('save', filename)
        if not os.path.exists(file_path):
            cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE)
            cls.mc.save(filename, overwrite=True)
        else:
            logger.info('Use saved parameters from %s', filename)
            cls.mc.load(file_path)

        accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)

        hidden_model = model.hidden_model

        logger.info('sample_ratio: %f', SAMPLE_RATIO)
        cls.ad = ApplicabilityDomainContainer(
            cls.mc,
            hidden_model=hidden_model,
            k2=9,
            reliability=1.6,
            sample_ratio=SAMPLE_RATIO,
            kappa=10,
            confidence=0.9,
        )
        cls.ad.fit()

        # shuffle the test set
        x_test = cls.dc.x_test
        y_test = cls.dc.y_test
        shuffled_indices = np.random.permutation(len(x_test))[:NUM_ADV]
        cls.x = x_test[shuffled_indices]
        cls.y = y_test[shuffled_indices]
        logger.info('# of test set: %d', len(cls.x))
    def setUpClass(cls):
        master_seed(SEED)

        logger.info('Starting %s data container...', NAME)
        cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        cls.dc(shuffle=True)

        model = CifarCnn()
        logger.info('Using model: %s', model.__class__.__name__)

        cls.mc = ModelContainerPT(model, cls.dc)

        file_path = os.path.join('save', FILE_NAME)
        if not os.path.exists(file_path):
            raise Exception('Where is the pretrained file?!')
        else:
            logger.info('Use saved parameters from %s', FILE_NAME)
            cls.mc.load(file_path)

        accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)

        hidden_model = model.hidden_model

        logger.info('sample_ratio: %f', SAMPLE_RATIO)
        cls.ad = ApplicabilityDomainContainer(
            cls.mc,
            hidden_model=hidden_model,
            k2=9,
            reliability=1.6,
            sample_ratio=SAMPLE_RATIO,
            kappa=10,
            confidence=0.7,
        )
        cls.ad.fit()

        # shuffle the test set
        x_test = cls.dc.x_test
        y_test = cls.dc.y_test
        shuffled_indices = np.random.permutation(len(x_test))[:NUM_ADV]
        cls.x = x_test[shuffled_indices]
        cls.y = y_test[shuffled_indices]
        logger.info('# of test set: %d', len(cls.x))
def main():
    master_seed(SEED)

    dataset = DATASET_LIST[DATA_NAME]
    dc = DataContainer(dataset, get_data_path())
    dc(shuffle=True, normalize=True)
    print('# of trainset: {}, # of testset: {}'.format(len(dc.x_train),
                                                       len(dc.x_test)))
    num_classes = dc.num_classes
    num_features = dc.dim_data[0]
    model = IrisNN(num_features=num_features,
                   hidden_nodes=num_features * 4,
                   num_classes=num_classes)
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print('Accuracy on test set: {}'.format(accuracy))

    clip_values = get_range(dc.x_train, is_image=False)
    print('clip_values', clip_values)

    attack = CarliniL2V2Container(mc,
                                  targeted=False,
                                  learning_rate=0.01,
                                  binary_search_steps=9,
                                  max_iter=1000,
                                  confidence=0.0,
                                  initial_const=0.01,
                                  c_range=(0, 1e4),
                                  batch_size=16,
                                  clip_values=clip_values)
    adv, y_adv, x_clean, y_clean = attack.generate(count=100)

    l2 = np.mean(get_l2_norm(adv, x_clean))
    print('L2 norm: {}'.format(l2))
    not_match = y_adv != y_clean
    success_rate = len(not_match[not_match == True]) / len(adv)
    print('Success rate: {}'.format(success_rate))

    accuracy = mc.evaluate(adv, y_clean)
    print('Accuracy on adv. examples: {}'.format(accuracy))
def main():
    Model = get_model('MnistCnnV2')
    classifier = Model()

    dc = DataContainer(DATASET_LIST['MNIST'], get_data_path())
    dc()
    classifier_mc = ModelContainerPT(classifier, dc)
    classifier_mc.load(MODEL_FILE)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    attack = BIMContainer(classifier_mc,
                          eps=0.3,
                          eps_step=0.1,
                          max_iter=100,
                          targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    adv_trainer.fit(max_epochs=5, batch_size=128, ratio=0.1)
    discriminator = adv_trainer.get_def_model_container()
    print(discriminator.accuracy_test)
    def setUpClass(cls):
        master_seed(SEED)

        logger.info('Starting %s data container...', NAME)
        cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        cls.dc(shuffle=False)

        model = MnistCnnCW()
        logger.info('Using model: %s', model.__class__.__name__)

        cls.mc = ModelContainerPT(model, cls.dc)

        file_path = os.path.join('save', FILE_NAME)
        if not os.path.exists(file_path):
            cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE)
            cls.mc.save(FILE_NAME, overwrite=True)
        else:
            logger.info('Use saved parameters from %s', FILE_NAME)
            cls.mc.load(file_path)

        accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)
    def setUpClass(cls):
        master_seed(SEED)

        logger.info('Starting %s data container...', NAME)
        cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        # ordered by labels, it requires shuffle!
        cls.dc(shuffle=True, normalize=True)

        num_features = cls.dc.dim_data[0]
        num_classes = cls.dc.num_classes
        model = BCNN(num_features, num_classes)
        logger.info('Using model: %s', model.__class__.__name__)

        cls.mc = ModelContainerPT(model, cls.dc)

        filename = get_pt_model_filename(BCNN.__name__, NAME, MAX_EPOCHS)
        file_path = os.path.join('save', filename)
        if not os.path.exists(file_path):
            cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE)
            cls.mc.save(filename, overwrite=True)
        else:
            logger.info('Use saved parameters from %s', filename)
            cls.mc.load(file_path)

        accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)

        hidden_model = model.hidden_model
        cls.ad = ApplicabilityDomainContainer(
            cls.mc,
            hidden_model=hidden_model,
            k2=6,
            reliability=1.6,
            sample_ratio=SAMPLE_RATIO,
            kappa=10,
            confidence=0.9,
        )
        cls.ad.fit()
예제 #15
0
def main():
    DATA_ROOT = 'data'
    BATCH_SIZE = 128

    # image datasets: {'MNIST', 'CIFAR10', 'SVHN'}
    # numeric datasets: {'BankNote', 'BreastCancerWisconsin', 'HTRU2', 'Iris', 'WheatSeed'}
    NAME = 'MNIST'
    print(f'Starting {NAME} data container...')
    print(DATASET_LIST[NAME])

    dc = DataContainer(DATASET_LIST[NAME], DATA_ROOT)
    dc(size_train=0.8, normalize=True)

    num_features = dc.dim_data[0]
    num_classes = dc.num_classes
    print('Features:', num_features)
    print('Classes:', num_classes)

    ## model in {BCNN, IrisNN, MnistCnnCW}
    model = MnistCnnCW()
    # model = BCNN(num_features, num_classes)
    # model = IrisNN(num_features, num_classes, hidden_nodes=16)  # for Iris
    # model = IrisNN(num_features, num_classes, hidden_nodes=64)
    model_name = model.__class__.__name__
    print('Using model:', model_name)

    print('Expected initial loss: {}'.format(np.log(10)))

    mc = ModelContainerPT(model, dc)
    mc.fit(max_epochs=10, batch_size=BATCH_SIZE)  # for image
    # mc.fit(max_epochs=200, batch_size=BATCH_SIZE)
    print('Test acc:', mc.accuracy_test)

    mc.save(f'{NAME}-{model_name}')
    mc.load(os.path.join('save', f'{NAME}-{model_name}.pt'))

    acc = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on random test set: {acc*100:.4f}%')
예제 #16
0
    def setUpClass(cls):
        master_seed(SEED)

        if not os.path.exists(os.path.join('save', 'test')):
            os.makedirs(os.path.join('save', 'test'))

        NAME = 'Iris'
        logger.info('Starting %s data container...', NAME)
        cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path())
        cls.dc(shuffle=True)

        model = IrisNN()
        model_name = model.__class__.__name__
        logger.info('Using model: %s', model_name)
        cls.mc = ModelContainerPT(model, cls.dc)
        cls.mc.fit(max_epochs=100, batch_size=BATCH_SIZE)

        # for comparison
        model2 = IrisNN()
        cls.mc2 = ModelContainerPT(model2, cls.dc)

        # inputs for testing
        cls.x = np.copy(cls.dc.x_test[:5])
        cls.y = np.copy(cls.dc.y_test[:5])
    def setUpClass(cls):
        master_seed(SEED)

        # generating synthetic data
        x, y = make_classification(
            n_samples=SAMPLE_SIZE,
            n_features=NUM_FEATURES,
            n_informative=NUM_CLASSES,
            n_redundant=0,
            n_classes=NUM_CLASSES,
            n_clusters_per_class=1,
        )
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        x = scale_normalize(x, x_min, x_max)
        n_train = int(np.floor(SAMPLE_SIZE * 0.8))
        x_train = np.array(x[:n_train], dtype=np.float32)
        y_train = np.array(y[:n_train], dtype=np.long)
        x_test = np.array(x[n_train:], dtype=np.float32)
        y_test = np.array(y[n_train:], dtype=np.long)

        data_dict = get_synthetic_dataset_dict(SAMPLE_SIZE, NUM_CLASSES,
                                               NUM_FEATURES)
        dc = DataContainer(data_dict, get_data_path())
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        # training Extra Tree classifier
        classifier = ExtraTreeClassifier(
            criterion='gini',
            splitter='random',
        )
        cls.mc = ModelContainerTree(classifier, dc)
        cls.mc.fit()
        accuracy = cls.mc.evaluate(dc.x_test, dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)
def main():
    data_name = 'MNIST'
    set_logging('advTraining', data_name, True, True)

    model_file = os.path.join('save', 'MnistCnnV2_MNIST_e50.pt')
    Model = get_model('MnistCnnV2')
    classifier = Model()

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc()
    classifier_mc = ModelContainerPT(classifier, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    attack = BIMContainer(classifier_mc,
                          eps=0.3,
                          eps_step=0.1,
                          max_iter=100,
                          targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    # adv_trainer.fit(max_epochs=30, batch_size=128, ratio=0.1)
    # adv_trainer.save('AdvTrain_MnistCnnV2_MNIST', overwrite=True)

    file_name = os.path.join('save', 'AdvTrain_MnistCnnV2_MNIST.pt')
    adv_trainer.load(file_name)

    x = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_x.npy'),
                allow_pickle=False)
    y = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_y.npy'),
                allow_pickle=False)
    blocked_indices = adv_trainer.detect(x, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x),
                'clean')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'BIM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'BIM')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Carlini_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Carlini', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'Carlini')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_DeepFool_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'DeepFool')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_FGSM_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'FGSM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'FGSM')

    adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Saliency_adv.npy'),
                  allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Saliency', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv),
                'Saliency')
예제 #19
0
 def init_datacontainer(cls, name):
     x = DATASET_LIST[name]
     path = get_data_path()
     dc = DataContainer(x, path)
     dc(shuffle=False, normalize=True, size_train=0.5)
     return dc
def main():
    dc = DataContainer(DATASET_LIST[NAME], get_data_path())
    dc()

    model = MnistCnnV2()
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    carlini_attack = attacks.CarliniL2V2Container(
        mc,
        learning_rate=0.01,
        binary_search_steps=9,
        max_iter=1000,
        confidence=0.0,
        initial_const=0.01,
        c_range=(0, 1e10),
        batch_size=BATCH_SIZE,
        clip_values=(0.0, 1.0),
    )
    # adv, y_adv, x_clean, y_clean = carlini_attack.generate(count=1000)
    # carlini_attack.save_attack(
    #     'MnistCnnV2_MNIST_Carlini',
    #     adv,
    #     y_adv,
    #     x_clean,
    #     y_clean,
    #     True,
    # )

    # use pre-trained adversarial examples
    adv, y_adv, x_clean, y_clean = carlini_attack.load_adv_examples(
        CARLINI_FILE)

    accuracy = mc.evaluate(adv, y_clean)
    print(f'Accuracy on adv. examples: {accuracy}')

    bim_attack = attacks.BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack)

    # use pre-defined parameters
    # k2, zeta, kappa, gamma, score = K2, RELIABILITY, KAPPA, CONFIDENCE, 0

    ad = ApplicabilityDomainContainer(
        mc,
        hidden_model=model.hidden_model,
        k2=k2,
        reliability=zeta,
        sample_ratio=SAMPLE_RATIO,
        kappa=kappa,
        confidence=gamma,
    )
    print(ad.params)

    ad.fit()
    blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True)
    print('After update parameters, blocked {}/{} samples from adv. examples'.
          format(len(blocked_indices), len(adv)))
def main():
    data_name = 'Iris'
    set_logging('advTraining', data_name, True, True)

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc()

    model_file = os.path.join('save', 'IrisNN_Iris_e200.pt')
    num_features = dc.dim_data[0]
    num_classes = dc.num_classes
    classifier = IrisNN(
        num_features=num_features,
        hidden_nodes=num_features*4,
        num_classes=num_classes,
    )

    classifier_mc = ModelContainerPT(classifier, dc)
    classifier_mc.load(model_file)
    accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    attack = BIMContainer(
        classifier_mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False)

    adv_trainer = AdversarialTraining(classifier_mc, [attack])
    # adv_trainer.fit(max_epochs=100, batch_size=64, ratio=1)
    # adv_trainer.save('AdvTrain_IrisNN_Iris', overwrite=True)

    file_name = os.path.join('save', 'AdvTrain_IrisNN_Iris.pt')
    adv_trainer.load(file_name)

    x = np.load(os.path.join('save', 'IrisNN_Iris_BIM_x.npy'),
                allow_pickle=False)
    y = np.load(os.path.join('save', 'IrisNN_Iris_BIM_y.npy'),
                allow_pickle=False)
    blocked_indices = adv_trainer.detect(x, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(x), 'clean')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_BIM_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'BIM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'BIM')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_Carlini_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'Carlini', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'Carlini')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_DeepFool_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'DeepFool')

    adv = np.load(os.path.join(
        'save', 'IrisNN_Iris_FGSM_adv.npy'), allow_pickle=False)
    accuracy = classifier_mc.evaluate(adv, y)
    logger.info('Accuracy on %s set: %f', 'FGSM', accuracy)
    blocked_indices = adv_trainer.detect(adv, return_passed_x=False)
    logger.info('Blocked %d/%d samples on %s',
                len(blocked_indices), len(adv), 'FGSM')
예제 #22
0
def main():
    # load dataset and initial model
    model = MnistCnnV2()
    dc = DataContainer(DATASET_LIST[DATASET], get_data_path())
    dc(shuffle=True, normalize=True)
    mc = ModelContainerPT(model, dc)
    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    # train or load pretrained parameters
    squeezer = FeatureSqueezing(mc, ['median', 'normal', 'binary'],
                                bit_depth=BIT_DEPTH,
                                sigma=SIGMA,
                                kernel_size=KERNEL_SIZE,
                                pretrained=True)

    x_test = dc.x_test
    y_test = dc.y_test
    mc_binary = squeezer.get_def_model_container('binary')
    mc_median = squeezer.get_def_model_container('median')
    mc_normal = squeezer.get_def_model_container('normal')

    print('before fit')
    acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test),
                                 y_test)
    print(f'Accuracy of binary squeezer: {acc_bin}')
    acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test),
                                 y_test)
    print(f'Accuracy of median squeezer: {acc_med}')
    acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test),
                                 y_test)
    print(f'Accuracy of normal squeezer: {acc_nor}')

    if not squeezer.does_pretrained_exist(MODEL_FILE):
        squeezer.fit(max_epochs=MAX_EPOCHS, batch_size=128)

        print('after fit')
        acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test),
                                     y_test)
        print(f'Accuracy of binary squeezer: {acc_bin}')
        acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test),
                                     y_test)
        print(f'Accuracy of median squeezer: {acc_med}')
        acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test),
                                     y_test)
        print(f'Accuracy of normal squeezer: {acc_nor}')

        squeezer.save(MODEL_FILE, True)

    print('after load')
    squeezer.load(MODEL_FILE)
    acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test),
                                 y_test)
    print(f'Accuracy of binary squeezer: {acc_bin}')
    acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test),
                                 y_test)
    print(f'Accuracy of median squeezer: {acc_med}')
    acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test),
                                 y_test)
    print(f'Accuracy of normal squeezer: {acc_nor}')

    # load adversarial examples
    adv_list = ['FGSM', 'BIM', 'DeepFool', 'Carlini', 'Saliency']
    y_file = os.path.join('save',
                          f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_y.npy')
    x_file = os.path.join('save',
                          f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_x.npy')
    x = np.load(x_file, allow_pickle=False)
    y = np.load(y_file, allow_pickle=False)
    acc_og = mc.evaluate(x, y)
    acc_squeezer = squeezer.evaluate(x, y)
    print(f'Accuracy on clean set - OG: {acc_og}, Squeezer: {acc_squeezer}')

    for adv_name in adv_list:
        adv_file = os.path.join(
            'save', build_adv_filename(MODEL_NAME, DATASET, adv_name))
        adv = np.load(adv_file, allow_pickle=False)
        acc_og = mc.evaluate(adv, y)
        acc_squeezer = squeezer.evaluate(adv, y)
        print(
            f'Accuracy on {adv_name} set - OG: {acc_og}, Squeezer: {acc_squeezer}'
        )
def main():
    parser = ap.ArgumentParser()
    parser.add_argument(
        '-m', '--model', type=str, required=True,
        help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format')
    parser.add_argument(
        '-p', '--param', type=str, required=True,
        help='a JSON config file which contains the parameters for the cross validation')
    parser.add_argument(
        '-a', '--adv', type=str,
        help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format')
    parser.add_argument(
        '-s', '--seed', type=int, default=4096,
        help='the seed for random number generator')
    parser.add_argument(
        '-v', '--verbose', action='store_true', default=False,
        help='set logger level to debug')
    parser.add_argument(
        '-l', '--savelog', action='store_true', default=False,
        help='save logging file')
    parser.add_argument(
        '-i', '--ignore', action='store_true', default=False,
        help='Ignore saving the results. Only returns the results from terminal.')
    parser.add_argument(
        '-w', '--overwrite', action='store_true', default=False,
        help='overwrite the existing file')
    args = parser.parse_args()
    model_file = args.model
    param_file = args.param
    adv_file = args.adv
    seed = args.seed
    verbose = args.verbose
    save_log = args.savelog
    does_ignore = args.ignore
    overwrite = args.overwrite

    model_name, data_name = parse_model_filename(model_file)

    # set logging config. Run this before logging anything!
    set_logging('cross_validation', data_name, verbose, save_log)

    # check files
    for file_path in [model_file, param_file]:
        if not os.path.exists(file_path):
            logger.warning('%s does not exist. Exit.', file_path)
            sys.exit(0)
    if adv_file is not None and not os.path.exists(adv_file):
        logger.warning('%s does not exist. Exit.', adv_file)
        sys.exit(0)

    # read parameters
    with open(param_file) as param_json:
        params = json.load(param_json)

    # show parameters
    print('[cv] Running cross validation on {} with {}...'.format(
        model_file, data_name))
    logger.info('Start at      : %s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('model file    :%s', model_file)
    logger.info('adv file      :%s', adv_file)
    logger.info('model         :%s', model_name)
    logger.info('dataset       :%s', data_name)
    logger.info('param file    :%s', param_file)
    logger.info('seed          :%d', seed)
    logger.info('verbose       :%r', verbose)
    logger.info('save_log      :%r', save_log)
    logger.info('Ignore saving :%r', does_ignore)
    logger.info('overwrite     :%r', overwrite)
    logger.debug('params       :%s', str(params))

    # load parameters
    k_range = params['k_range']
    z_range = params['z_range']
    kappa_range = params['kappa_range']
    gamma_range = params['gamma_range']
    epsilon = params['epsilon']
    num_folds = params['num_folds']
    batch_size = params['batch_size']
    sample_ratio = params['sample_ratio']
    logger.info('k_range       :%s', str(k_range))
    logger.info('z_range       :%s', str(z_range))
    logger.info('kappa_range   :%s', str(kappa_range))
    logger.info('gamma_range   :%s', str(gamma_range))
    logger.info('epsilon       :%.1f', epsilon)
    logger.info('num_folds     :%d', num_folds)
    logger.info('batch_size    :%d', batch_size)
    logger.info('sample_ratio  :%.1f', sample_ratio)

    # reset seed
    master_seed(seed)

    dc = DataContainer(DATASET_LIST[data_name], get_data_path())
    dc(shuffle=True, normalize=True, size_train=0.8)
    logger.info('Sample size: %d', len(dc))

    Model = get_model(model_name)
    # there models require extra keyword arguments
    if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'):
        num_classes = dc.num_classes
        num_features = dc.dim_data[0]
        kwargs = {
            'num_features': num_features,
            'hidden_nodes': num_features*4,
            'num_classes': num_classes,
        }
        model = Model(**kwargs)
    else:
        model = Model()
    logger.info('Use %s model', model.__class__.__name__)
    mc = ModelContainerPT(model, dc)
    mc.load(model_file)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    logger.info('Accuracy on test set: %f', accuracy)

    ad = ApplicabilityDomainContainer(
        mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio)
    cross_validation = CrossValidation(
        ad,
        num_folds=num_folds,
        k_range=k_range,
        z_range=z_range,
        kappa_range=kappa_range,
        gamma_range=gamma_range,
        epsilon=epsilon,
    )
    bim_attack = BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    cross_validation.fit(bim_attack)

    # test optimal parameters
    if adv_file is not None:
        postfix = ['adv', 'pred', 'x', 'y']
        data_files = [adv_file.replace('_adv', '_' + s) for s in postfix]
        adv = np.load(data_files[0], allow_pickle=False)
        pred_adv = np.load(data_files[1], allow_pickle=False)
        x = np.load(data_files[2], allow_pickle=False)
        pred = np.load(data_files[3], allow_pickle=False)

        # fetch optimal parameters
        ad = ApplicabilityDomainContainer(
            mc,
            hidden_model=model.hidden_model,
            k2=cross_validation.k2,
            reliability=cross_validation.reliability,
            sample_ratio=sample_ratio,
            kappa=cross_validation.kappa,
            confidence=cross_validation.confidence,
        )
        logger.info('Params: %s', str(ad.params))
        ad.fit()
        blocked_indices = ad.detect(x, pred, return_passed_x=False)
        logger.info('Blocked %d/%d on clean data',
                    len(blocked_indices), len(x))
        blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False)
        logger.info('Blocked %d/%d on adv. examples.',
                    len(blocked_indices), len(adv))

    # save results
    if not does_ignore:
        file_name = name_handler(
            model_name + '_' + data_name, 'csv', overwrite=overwrite)
        cross_validation.save(file_name)
def main():
    dc = DataContainer(DATASET_LIST[NAME], get_data_path())
    dc(shuffle=True, normalize=True, size_train=0.8)
    print('Sample size: {}'.format(len(dc)))
    num_classes = dc.num_classes
    num_features = dc.dim_data[0]
    model = IrisNN(num_features=num_features,
                   hidden_nodes=num_features * 4,
                   num_classes=num_classes)
    mc = ModelContainerPT(model, dc)
    # mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE, early_stop=False)
    # filename = get_pt_model_filename('IrisNN', NAME, MAX_EPOCHS)
    # mc.save(filename, overwrite=True)

    mc.load(MODEL_FILE)
    accuracy = mc.evaluate(dc.x_test, dc.y_test)
    print(f'Accuracy on test set: {accuracy}')

    carlini_attack = attacks.CarliniL2V2Container(
        mc,
        learning_rate=0.01,
        binary_search_steps=9,
        max_iter=1000,
        confidence=0.0,
        initial_const=0.01,
        c_range=(0, 1e4),
        batch_size=BATCH_SIZE,
        clip_values=(0.0, 1.0),
    )

    # we need more than 30 adv. examples
    # x_all = dc.x_all
    # y_all = dc.x_all
    # indices = np.random.permutation(np.arange(len(dc)))[:100]
    # x = x_all[indices]
    # y = y_all[indices]

    # adv, pred_adv, x_clean, pred_clean = carlini_attack.generate(use_testset=False, x=x)
    # carlini_attack.save_attack(
    #     'IrisNN_Iris_Carlini',
    #     adv, pred_adv,
    #     x_clean, pred_clean,
    #     True,
    # )

    # use pre-trained adversarial examples
    adv, y_adv, x_clean, pred_clean = carlini_attack.load_adv_examples(
        CARLINI_FILE)

    accuracy = mc.evaluate(adv, pred_clean)
    print(f'Accuracy on adv. examples: {accuracy}')

    bim_attack = attacks.BIMContainer(
        mc,
        eps=0.3,
        eps_step=0.1,
        max_iter=100,
        targeted=False,
    )
    k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack)

    ad = ApplicabilityDomainContainer(
        mc,
        hidden_model=model.hidden_model,
        k2=k2,
        reliability=zeta,
        sample_ratio=SAMPLE_RATIO,
        kappa=kappa,
        confidence=gamma,
    )
    print(ad.params)

    ad.fit()
    blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True)
    print('After update parameters, blocked {}/{} samples from adv. examples'.
          format(len(blocked_indices), len(adv)))
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-s',
                        '--size',
                        type=int,
                        required=True,
                        help='the number of sample size')
    parser.add_argument('-f',
                        '--features',
                        type=int,
                        required=True,
                        help='the number of features')
    parser.add_argument('-c',
                        '--classes',
                        type=int,
                        default=2,
                        help='the number of classes')
    parser.add_argument(
        '-i',
        '--iteration',
        type=int,
        default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        required=True,
                        help='the number of max epochs for training')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-w',
                        '--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite the existing file')

    args = parser.parse_args()
    sample_size = args.size
    num_features = args.features
    num_classes = args.classes
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}'
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {}...'.format(LOG_NAME, dname))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('train size  :%d', sample_size)
    logger.info('num features:%d', num_features)
    logger.info('num classes :%d', num_classes)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    result_file = name_handler(os.path.join(
        'save', f'{LOG_NAME}_{dname}_i{max_iterations}'),
                               'csv',
                               overwrite=overwrite)

    adv_file = name_handler(os.path.join('save',
                                         f'{LOG_NAME}_{dname}_AdvExamples'),
                            'csv',
                            overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        # generate synthetic data
        x, y = make_classification(
            n_samples=sample_size + 1000,
            n_features=num_features,
            n_informative=num_classes,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )

        # normalize data
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        # NOTE: Carlini attack expects the data in range [0, 1]
        # x_mean = np.mean(x, axis=0)
        # x = scale_normalize(x, x_min, x_max, x_mean)
        x = scale_normalize(x, x_min, x_max)

        # training/test split
        # NOTE: test set has fixed size
        x_train = np.array(x[:-1000], dtype=np.float32)
        y_train = np.array(y[:-1000], dtype=np.long)
        x_test = np.array(x[-1000:], dtype=np.float32)
        y_test = np.array(y[-1000:], dtype=np.long)

        # create data container
        data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes,
                                               num_features)
        dc = DataContainer(data_dict, get_data_path())

        # assign data manually
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        experiment(i, dc, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname, i + 1, max_iterations, int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()