def test_fetch_all(self): dataname = 'CIFAR10' data_lookup = DATASET_LIST[dataname] path = get_data_path() dc = DataContainer(data_lookup, path) dc(shuffle=False, normalize=False) x_train = dc.x_train y_train = dc.y_train x_test = dc.x_test y_test = dc.y_test x_all = dc.x_all y_all = dc.y_all data_shape = list(x_train.shape) data_shape[0] = len(x_train) + len(x_test) self.assertTupleEqual(x_all.shape, tuple(data_shape)) label_shape = list(y_train.shape) label_shape[0] = len(y_train) + len(y_test) self.assertTupleEqual(y_all.shape, tuple(label_shape)) dc.y_train = onehot_encoding(y_train, dc.num_classes) dc.y_test = onehot_encoding(y_test, dc.num_classes) x_train = dc.x_train y_train = dc.y_train y_all = dc.y_all label_shape = list(y_train.shape) label_shape[0] = len(y_train) + len(y_test) self.assertTupleEqual(y_all.shape, tuple(label_shape))
def setUpClass(cls): master_seed(SEED) Model = get_model('MnistCnnV2') model = Model() logger.info('Starting %s data container...', NAME) dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) cls.distillation = DistillationContainer(mc, Model(), temperature=TEMPERATURE, pretrained=False) filename = get_pt_model_filename( model.__class__.__name__, NAME, str(MAX_EPOCHS) + 't' + str(int(TEMPERATURE * 10))) filename = os.path.join('test', 'distill_' + filename) file_path = os.path.join('save', filename) if not os.path.exists(file_path): # Expected initial loss = -log(1/num_classes) = 2.3025850929940455' cls.distillation.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) cls.distillation.save(filename, overwrite=True) else: cls.distillation.load(file_path) smooth_mc = cls.distillation.get_def_model_container() accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): master_seed(SEED) logger.info('Starting %s data container...', NAME) dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc(shuffle=True, normalize=True) num_features = dc.dim_data[0] num_classes = dc.num_classes print('Features:', num_features) print('Classes:', num_classes) model = BCNN(num_features, num_classes) filename = get_pt_model_filename(BCNN.__name__, NAME, MAX_EPOCHS) logger.debug('File name: %s', filename) mc = ModelContainerPT(model, dc) file_path = os.path.join('save', filename) if not os.path.exists(file_path): logger.debug('Expected initial loss: %f', np.log(dc.num_classes)) mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) mc.save(filename, overwrite=True) else: logger.info('Use saved parameters from %s', filename) mc.load(file_path) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): master_seed(SEED) dataset = DATASET_LIST[DATA_NAME] dc = DataContainer(dataset, get_data_path()) dc() model = MnistCnnCW() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print('Accuracy on test set: {}'.format(accuracy)) attack = CarliniL2V2Container(mc, targeted=False, learning_rate=0.01, binary_search_steps=9, max_iter=1000, confidence=0.0, initial_const=0.01, c_range=(0, 1e10), batch_size=16, clip_values=(0.0, 1.0)) adv, y_adv, x_clean, y_clean = attack.generate(count=100) l2 = np.mean(get_l2_norm(adv, x_clean)) print('L2 norm: {}'.format(l2)) not_match = y_adv != y_clean success_rate = len(not_match[not_match == True]) / len(adv) print('Success rate: {}'.format(success_rate)) accuracy = mc.evaluate(adv, y_clean) print('Accuracy on adv. examples: {}'.format(accuracy))
def main(): print(f'Starting {NAME} data container...') print(DATASET_LIST[NAME]) # Step 1: select dataset dc = DataContainer(DATASET_LIST[NAME], DATA_ROOT) dc(size_train=0.8, normalize=True) num_features = dc.dim_data[0] num_classes = dc.num_classes print('Features:', num_features) print('Classes:', num_classes) # Step 2: train model model = MnistCnnCW() model_name = model.__class__.__name__ print('Using model:', model_name) mc = ModelContainerPT(model, dc) model_filename = f'example-mnist-e{EPOCHS}.pt' file_path = os.path.join('save', model_filename) if not os.path.exists(file_path): mc.fit(max_epochs=EPOCHS, batch_size=BATCH_SIZE) mc.save(model_filename, overwrite=True) else: print('Found saved model!') mc.load(file_path) acc = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on random test set: {acc*100:.4f}%') # Step 3: attack the model # TODO: Unable to generate adversarial examples successfully! attack = ZooContainer( mc, confidence=0.5, targeted=False, learning_rate=1e-1, max_iter=200, binary_search_steps=100, initial_const=1e-1, abort_early=True, use_resize=False, use_importance=False, nb_parallel=250, batch_size=1, variable_h=0.01, ) adv, y_adv, x_clean, y_clean = attack.generate(count=5) accuracy = mc.evaluate(adv, y_clean) print('Accuracy on adv. examples: {:.4f}%'.format(accuracy * 100))
def setUpClass(cls): master_seed(SEED) Model = get_model('MnistCnnV2') model = Model() logger.info('Starting %s data container...', NAME) dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc() cls.mc = ModelContainerPT(model, dc) cls.mc.load(MODEL_FILE) accuracy = cls.mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): # load dataset and initial model Model = get_model(MODEL_NAME) model = Model() dc = DataContainer(DATASET_LIST[DATASET], get_data_path()) dc() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') # train or load distillation model distillation = DistillationContainer(mc, Model(), temperature=TEMPERATURE, pretrained=False) distill_path = os.path.join('save', DISTILL_FILE) if not os.path.exists(distill_path): distillation.fit(max_epochs=MAX_EPOCHS, batch_size=128) distillation.save(DISTILL_FILE, True) else: distillation.load(distill_path) smooth_mc = distillation.get_def_model_container() accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') # load adversarial examples adv_list = ['FGSM', 'BIM', 'DeepFool', 'Carlini', 'Saliency'] y_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_y.npy') x_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_x.npy') x = np.load(x_file, allow_pickle=False) y = np.load(y_file, allow_pickle=False) acc_og = mc.evaluate(x, y) acc_distill = smooth_mc.evaluate(x, y) print(f'Accuracy on clean set - OG: {acc_og}, Distill: {acc_distill}') for adv_name in adv_list: adv_file = os.path.join( 'save', build_adv_filename(MODEL_NAME, DATASET, adv_name)) adv = np.load(adv_file, allow_pickle=False) acc_og = mc.evaluate(adv, y) acc_distill = smooth_mc.evaluate(adv, y) print( f'Accuracy on {adv_name} set - OG: {acc_og}, Distill: {acc_distill}' )
def get_data_container(dname, use_shuffle=True, use_normalize=True): """Returns a DataContainer based on given name""" dataset = DATASET_LIST[dname] dc = DataContainer(dataset, get_data_path()) if dname in ('MNIST', 'CIFAR10', 'SVHN'): dc(shuffle=use_shuffle) elif dname == 'Iris': dc(shuffle=use_shuffle, normalize=use_normalize, size_train=0.6) elif dname in ('BankNote', 'BreastCancerWisconsin', 'HTRU2', 'WheatSeed'): dc(shuffle=use_shuffle, normalize=use_normalize) elif dname == 'Synthetic': # No function call for synthetic return dc else: raise AttributeError('Received unknown dataset "{}"'.format(dname)) return dc
def setUpClass(cls): master_seed(SEED) logger.info('Starting %s data container...', NAME) cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path()) cls.dc(shuffle=True) model = MnistCnnV2() logger.info('Using model: %s', model.__class__.__name__) cls.mc = ModelContainerPT(model, cls.dc) filename = get_pt_model_filename(MnistCnnV2.__name__, NAME, MAX_EPOCHS) file_path = os.path.join('save', filename) if not os.path.exists(file_path): cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) cls.mc.save(filename, overwrite=True) else: logger.info('Use saved parameters from %s', filename) cls.mc.load(file_path) accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test) logger.info('Accuracy on test set: %f', accuracy) hidden_model = model.hidden_model logger.info('sample_ratio: %f', SAMPLE_RATIO) cls.ad = ApplicabilityDomainContainer( cls.mc, hidden_model=hidden_model, k2=9, reliability=1.6, sample_ratio=SAMPLE_RATIO, kappa=10, confidence=0.9, ) cls.ad.fit() # shuffle the test set x_test = cls.dc.x_test y_test = cls.dc.y_test shuffled_indices = np.random.permutation(len(x_test))[:NUM_ADV] cls.x = x_test[shuffled_indices] cls.y = y_test[shuffled_indices] logger.info('# of test set: %d', len(cls.x))
def setUpClass(cls): master_seed(SEED) logger.info('Starting %s data container...', NAME) cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path()) cls.dc(shuffle=True) model = CifarCnn() logger.info('Using model: %s', model.__class__.__name__) cls.mc = ModelContainerPT(model, cls.dc) file_path = os.path.join('save', FILE_NAME) if not os.path.exists(file_path): raise Exception('Where is the pretrained file?!') else: logger.info('Use saved parameters from %s', FILE_NAME) cls.mc.load(file_path) accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test) logger.info('Accuracy on test set: %f', accuracy) hidden_model = model.hidden_model logger.info('sample_ratio: %f', SAMPLE_RATIO) cls.ad = ApplicabilityDomainContainer( cls.mc, hidden_model=hidden_model, k2=9, reliability=1.6, sample_ratio=SAMPLE_RATIO, kappa=10, confidence=0.7, ) cls.ad.fit() # shuffle the test set x_test = cls.dc.x_test y_test = cls.dc.y_test shuffled_indices = np.random.permutation(len(x_test))[:NUM_ADV] cls.x = x_test[shuffled_indices] cls.y = y_test[shuffled_indices] logger.info('# of test set: %d', len(cls.x))
def main(): master_seed(SEED) dataset = DATASET_LIST[DATA_NAME] dc = DataContainer(dataset, get_data_path()) dc(shuffle=True, normalize=True) print('# of trainset: {}, # of testset: {}'.format(len(dc.x_train), len(dc.x_test))) num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print('Accuracy on test set: {}'.format(accuracy)) clip_values = get_range(dc.x_train, is_image=False) print('clip_values', clip_values) attack = CarliniL2V2Container(mc, targeted=False, learning_rate=0.01, binary_search_steps=9, max_iter=1000, confidence=0.0, initial_const=0.01, c_range=(0, 1e4), batch_size=16, clip_values=clip_values) adv, y_adv, x_clean, y_clean = attack.generate(count=100) l2 = np.mean(get_l2_norm(adv, x_clean)) print('L2 norm: {}'.format(l2)) not_match = y_adv != y_clean success_rate = len(not_match[not_match == True]) / len(adv) print('Success rate: {}'.format(success_rate)) accuracy = mc.evaluate(adv, y_clean) print('Accuracy on adv. examples: {}'.format(accuracy))
def main(): Model = get_model('MnistCnnV2') classifier = Model() dc = DataContainer(DATASET_LIST['MNIST'], get_data_path()) dc() classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(MODEL_FILE) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') attack = BIMContainer(classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) adv_trainer.fit(max_epochs=5, batch_size=128, ratio=0.1) discriminator = adv_trainer.get_def_model_container() print(discriminator.accuracy_test)
def setUpClass(cls): master_seed(SEED) logger.info('Starting %s data container...', NAME) cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path()) cls.dc(shuffle=False) model = MnistCnnCW() logger.info('Using model: %s', model.__class__.__name__) cls.mc = ModelContainerPT(model, cls.dc) file_path = os.path.join('save', FILE_NAME) if not os.path.exists(file_path): cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) cls.mc.save(FILE_NAME, overwrite=True) else: logger.info('Use saved parameters from %s', FILE_NAME) cls.mc.load(file_path) accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def setUpClass(cls): master_seed(SEED) logger.info('Starting %s data container...', NAME) cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path()) # ordered by labels, it requires shuffle! cls.dc(shuffle=True, normalize=True) num_features = cls.dc.dim_data[0] num_classes = cls.dc.num_classes model = BCNN(num_features, num_classes) logger.info('Using model: %s', model.__class__.__name__) cls.mc = ModelContainerPT(model, cls.dc) filename = get_pt_model_filename(BCNN.__name__, NAME, MAX_EPOCHS) file_path = os.path.join('save', filename) if not os.path.exists(file_path): cls.mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) cls.mc.save(filename, overwrite=True) else: logger.info('Use saved parameters from %s', filename) cls.mc.load(file_path) accuracy = cls.mc.evaluate(cls.dc.x_test, cls.dc.y_test) logger.info('Accuracy on test set: %f', accuracy) hidden_model = model.hidden_model cls.ad = ApplicabilityDomainContainer( cls.mc, hidden_model=hidden_model, k2=6, reliability=1.6, sample_ratio=SAMPLE_RATIO, kappa=10, confidence=0.9, ) cls.ad.fit()
def main(): DATA_ROOT = 'data' BATCH_SIZE = 128 # image datasets: {'MNIST', 'CIFAR10', 'SVHN'} # numeric datasets: {'BankNote', 'BreastCancerWisconsin', 'HTRU2', 'Iris', 'WheatSeed'} NAME = 'MNIST' print(f'Starting {NAME} data container...') print(DATASET_LIST[NAME]) dc = DataContainer(DATASET_LIST[NAME], DATA_ROOT) dc(size_train=0.8, normalize=True) num_features = dc.dim_data[0] num_classes = dc.num_classes print('Features:', num_features) print('Classes:', num_classes) ## model in {BCNN, IrisNN, MnistCnnCW} model = MnistCnnCW() # model = BCNN(num_features, num_classes) # model = IrisNN(num_features, num_classes, hidden_nodes=16) # for Iris # model = IrisNN(num_features, num_classes, hidden_nodes=64) model_name = model.__class__.__name__ print('Using model:', model_name) print('Expected initial loss: {}'.format(np.log(10))) mc = ModelContainerPT(model, dc) mc.fit(max_epochs=10, batch_size=BATCH_SIZE) # for image # mc.fit(max_epochs=200, batch_size=BATCH_SIZE) print('Test acc:', mc.accuracy_test) mc.save(f'{NAME}-{model_name}') mc.load(os.path.join('save', f'{NAME}-{model_name}.pt')) acc = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on random test set: {acc*100:.4f}%')
def setUpClass(cls): master_seed(SEED) if not os.path.exists(os.path.join('save', 'test')): os.makedirs(os.path.join('save', 'test')) NAME = 'Iris' logger.info('Starting %s data container...', NAME) cls.dc = DataContainer(DATASET_LIST[NAME], get_data_path()) cls.dc(shuffle=True) model = IrisNN() model_name = model.__class__.__name__ logger.info('Using model: %s', model_name) cls.mc = ModelContainerPT(model, cls.dc) cls.mc.fit(max_epochs=100, batch_size=BATCH_SIZE) # for comparison model2 = IrisNN() cls.mc2 = ModelContainerPT(model2, cls.dc) # inputs for testing cls.x = np.copy(cls.dc.x_test[:5]) cls.y = np.copy(cls.dc.y_test[:5])
def setUpClass(cls): master_seed(SEED) # generating synthetic data x, y = make_classification( n_samples=SAMPLE_SIZE, n_features=NUM_FEATURES, n_informative=NUM_CLASSES, n_redundant=0, n_classes=NUM_CLASSES, n_clusters_per_class=1, ) x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) x = scale_normalize(x, x_min, x_max) n_train = int(np.floor(SAMPLE_SIZE * 0.8)) x_train = np.array(x[:n_train], dtype=np.float32) y_train = np.array(y[:n_train], dtype=np.long) x_test = np.array(x[n_train:], dtype=np.float32) y_test = np.array(y[n_train:], dtype=np.long) data_dict = get_synthetic_dataset_dict(SAMPLE_SIZE, NUM_CLASSES, NUM_FEATURES) dc = DataContainer(data_dict, get_data_path()) dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test # training Extra Tree classifier classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) cls.mc = ModelContainerTree(classifier, dc) cls.mc.fit() accuracy = cls.mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): data_name = 'MNIST' set_logging('advTraining', data_name, True, True) model_file = os.path.join('save', 'MnistCnnV2_MNIST_e50.pt') Model = get_model('MnistCnnV2') classifier = Model() dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc() classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer(classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) # adv_trainer.fit(max_epochs=30, batch_size=128, ratio=0.1) # adv_trainer.save('AdvTrain_MnistCnnV2_MNIST', overwrite=True) file_name = os.path.join('save', 'AdvTrain_MnistCnnV2_MNIST.pt') adv_trainer.load(file_name) x = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_x.npy'), allow_pickle=False) y = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_y.npy'), allow_pickle=False) blocked_indices = adv_trainer.detect(x, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x), 'clean') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'BIM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'BIM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Carlini_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Carlini', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Carlini') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_DeepFool_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'DeepFool') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_FGSM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'FGSM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'FGSM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Saliency_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Saliency', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Saliency')
def init_datacontainer(cls, name): x = DATASET_LIST[name] path = get_data_path() dc = DataContainer(x, path) dc(shuffle=False, normalize=True, size_train=0.5) return dc
def main(): dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc() model = MnistCnnV2() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') carlini_attack = attacks.CarliniL2V2Container( mc, learning_rate=0.01, binary_search_steps=9, max_iter=1000, confidence=0.0, initial_const=0.01, c_range=(0, 1e10), batch_size=BATCH_SIZE, clip_values=(0.0, 1.0), ) # adv, y_adv, x_clean, y_clean = carlini_attack.generate(count=1000) # carlini_attack.save_attack( # 'MnistCnnV2_MNIST_Carlini', # adv, # y_adv, # x_clean, # y_clean, # True, # ) # use pre-trained adversarial examples adv, y_adv, x_clean, y_clean = carlini_attack.load_adv_examples( CARLINI_FILE) accuracy = mc.evaluate(adv, y_clean) print(f'Accuracy on adv. examples: {accuracy}') bim_attack = attacks.BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack) # use pre-defined parameters # k2, zeta, kappa, gamma, score = K2, RELIABILITY, KAPPA, CONFIDENCE, 0 ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=k2, reliability=zeta, sample_ratio=SAMPLE_RATIO, kappa=kappa, confidence=gamma, ) print(ad.params) ad.fit() blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True) print('After update parameters, blocked {}/{} samples from adv. examples'. format(len(blocked_indices), len(adv)))
def main(): data_name = 'Iris' set_logging('advTraining', data_name, True, True) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc() model_file = os.path.join('save', 'IrisNN_Iris_e200.pt') num_features = dc.dim_data[0] num_classes = dc.num_classes classifier = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes, ) classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer( classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) # adv_trainer.fit(max_epochs=100, batch_size=64, ratio=1) # adv_trainer.save('AdvTrain_IrisNN_Iris', overwrite=True) file_name = os.path.join('save', 'AdvTrain_IrisNN_Iris.pt') adv_trainer.load(file_name) x = np.load(os.path.join('save', 'IrisNN_Iris_BIM_x.npy'), allow_pickle=False) y = np.load(os.path.join('save', 'IrisNN_Iris_BIM_y.npy'), allow_pickle=False) blocked_indices = adv_trainer.detect(x, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x), 'clean') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_BIM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'BIM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'BIM') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_Carlini_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Carlini', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Carlini') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_DeepFool_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'DeepFool') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_FGSM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'FGSM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'FGSM')
def main(): # load dataset and initial model model = MnistCnnV2() dc = DataContainer(DATASET_LIST[DATASET], get_data_path()) dc(shuffle=True, normalize=True) mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') # train or load pretrained parameters squeezer = FeatureSqueezing(mc, ['median', 'normal', 'binary'], bit_depth=BIT_DEPTH, sigma=SIGMA, kernel_size=KERNEL_SIZE, pretrained=True) x_test = dc.x_test y_test = dc.y_test mc_binary = squeezer.get_def_model_container('binary') mc_median = squeezer.get_def_model_container('median') mc_normal = squeezer.get_def_model_container('normal') print('before fit') acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test), y_test) print(f'Accuracy of binary squeezer: {acc_bin}') acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test), y_test) print(f'Accuracy of median squeezer: {acc_med}') acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test), y_test) print(f'Accuracy of normal squeezer: {acc_nor}') if not squeezer.does_pretrained_exist(MODEL_FILE): squeezer.fit(max_epochs=MAX_EPOCHS, batch_size=128) print('after fit') acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test), y_test) print(f'Accuracy of binary squeezer: {acc_bin}') acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test), y_test) print(f'Accuracy of median squeezer: {acc_med}') acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test), y_test) print(f'Accuracy of normal squeezer: {acc_nor}') squeezer.save(MODEL_FILE, True) print('after load') squeezer.load(MODEL_FILE) acc_bin = mc_binary.evaluate(squeezer.apply_binary_transform(x_test), y_test) print(f'Accuracy of binary squeezer: {acc_bin}') acc_med = mc_median.evaluate(squeezer.apply_median_transform(x_test), y_test) print(f'Accuracy of median squeezer: {acc_med}') acc_nor = mc_normal.evaluate(squeezer.apply_normal_transform(x_test), y_test) print(f'Accuracy of normal squeezer: {acc_nor}') # load adversarial examples adv_list = ['FGSM', 'BIM', 'DeepFool', 'Carlini', 'Saliency'] y_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_y.npy') x_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_x.npy') x = np.load(x_file, allow_pickle=False) y = np.load(y_file, allow_pickle=False) acc_og = mc.evaluate(x, y) acc_squeezer = squeezer.evaluate(x, y) print(f'Accuracy on clean set - OG: {acc_og}, Squeezer: {acc_squeezer}') for adv_name in adv_list: adv_file = os.path.join( 'save', build_adv_filename(MODEL_NAME, DATASET, adv_name)) adv = np.load(adv_file, allow_pickle=False) acc_og = mc.evaluate(adv, y) acc_squeezer = squeezer.evaluate(adv, y) print( f'Accuracy on {adv_name} set - OG: {acc_og}, Squeezer: {acc_squeezer}' )
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the cross validation') parser.add_argument( '-a', '--adv', type=str, help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-i', '--ignore', action='store_true', default=False, help='Ignore saving the results. Only returns the results from terminal.') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() model_file = args.model param_file = args.param adv_file = args.adv seed = args.seed verbose = args.verbose save_log = args.savelog does_ignore = args.ignore overwrite = args.overwrite model_name, data_name = parse_model_filename(model_file) # set logging config. Run this before logging anything! set_logging('cross_validation', data_name, verbose, save_log) # check files for file_path in [model_file, param_file]: if not os.path.exists(file_path): logger.warning('%s does not exist. Exit.', file_path) sys.exit(0) if adv_file is not None and not os.path.exists(adv_file): logger.warning('%s does not exist. Exit.', adv_file) sys.exit(0) # read parameters with open(param_file) as param_json: params = json.load(param_json) # show parameters print('[cv] Running cross validation on {} with {}...'.format( model_file, data_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('Ignore saving :%r', does_ignore) logger.info('overwrite :%r', overwrite) logger.debug('params :%s', str(params)) # load parameters k_range = params['k_range'] z_range = params['z_range'] kappa_range = params['kappa_range'] gamma_range = params['gamma_range'] epsilon = params['epsilon'] num_folds = params['num_folds'] batch_size = params['batch_size'] sample_ratio = params['sample_ratio'] logger.info('k_range :%s', str(k_range)) logger.info('z_range :%s', str(z_range)) logger.info('kappa_range :%s', str(kappa_range)) logger.info('gamma_range :%s', str(gamma_range)) logger.info('epsilon :%.1f', epsilon) logger.info('num_folds :%d', num_folds) logger.info('batch_size :%d', batch_size) logger.info('sample_ratio :%.1f', sample_ratio) # reset seed master_seed(seed) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc(shuffle=True, normalize=True, size_train=0.8) logger.info('Sample size: %d', len(dc)) Model = get_model(model_name) # there models require extra keyword arguments if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio) cross_validation = CrossValidation( ad, num_folds=num_folds, k_range=k_range, z_range=z_range, kappa_range=kappa_range, gamma_range=gamma_range, epsilon=epsilon, ) bim_attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) cross_validation.fit(bim_attack) # test optimal parameters if adv_file is not None: postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] adv = np.load(data_files[0], allow_pickle=False) pred_adv = np.load(data_files[1], allow_pickle=False) x = np.load(data_files[2], allow_pickle=False) pred = np.load(data_files[3], allow_pickle=False) # fetch optimal parameters ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=cross_validation.k2, reliability=cross_validation.reliability, sample_ratio=sample_ratio, kappa=cross_validation.kappa, confidence=cross_validation.confidence, ) logger.info('Params: %s', str(ad.params)) ad.fit() blocked_indices = ad.detect(x, pred, return_passed_x=False) logger.info('Blocked %d/%d on clean data', len(blocked_indices), len(x)) blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False) logger.info('Blocked %d/%d on adv. examples.', len(blocked_indices), len(adv)) # save results if not does_ignore: file_name = name_handler( model_name + '_' + data_name, 'csv', overwrite=overwrite) cross_validation.save(file_name)
def main(): dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc(shuffle=True, normalize=True, size_train=0.8) print('Sample size: {}'.format(len(dc))) num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) mc = ModelContainerPT(model, dc) # mc.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE, early_stop=False) # filename = get_pt_model_filename('IrisNN', NAME, MAX_EPOCHS) # mc.save(filename, overwrite=True) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') carlini_attack = attacks.CarliniL2V2Container( mc, learning_rate=0.01, binary_search_steps=9, max_iter=1000, confidence=0.0, initial_const=0.01, c_range=(0, 1e4), batch_size=BATCH_SIZE, clip_values=(0.0, 1.0), ) # we need more than 30 adv. examples # x_all = dc.x_all # y_all = dc.x_all # indices = np.random.permutation(np.arange(len(dc)))[:100] # x = x_all[indices] # y = y_all[indices] # adv, pred_adv, x_clean, pred_clean = carlini_attack.generate(use_testset=False, x=x) # carlini_attack.save_attack( # 'IrisNN_Iris_Carlini', # adv, pred_adv, # x_clean, pred_clean, # True, # ) # use pre-trained adversarial examples adv, y_adv, x_clean, pred_clean = carlini_attack.load_adv_examples( CARLINI_FILE) accuracy = mc.evaluate(adv, pred_clean) print(f'Accuracy on adv. examples: {accuracy}') bim_attack = attacks.BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) k2, zeta, kappa, gamma = cross_validation(dc, model, bim_attack) ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=k2, reliability=zeta, sample_ratio=SAMPLE_RATIO, kappa=kappa, confidence=gamma, ) print(ad.params) ad.fit() blocked_indices, x_passed = ad.detect(adv, y_adv, return_passed_x=True) print('After update parameters, blocked {}/{} samples from adv. examples'. format(len(blocked_indices), len(adv)))
def main(): parser = ap.ArgumentParser() parser.add_argument('-s', '--size', type=int, required=True, help='the number of sample size') parser.add_argument('-f', '--features', type=int, required=True, help='the number of features') parser.add_argument('-c', '--classes', type=int, default=2, help='the number of classes') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() sample_size = args.size num_features = args.features num_classes = args.classes max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}' set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {}...'.format(LOG_NAME, dname)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('train size :%d', sample_size) logger.info('num features:%d', num_features) logger.info('num classes :%d', num_classes) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) result_file = name_handler(os.path.join( 'save', f'{LOG_NAME}_{dname}_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = name_handler(os.path.join('save', f'{LOG_NAME}_{dname}_AdvExamples'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() # generate synthetic data x, y = make_classification( n_samples=sample_size + 1000, n_features=num_features, n_informative=num_classes, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) # normalize data x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) # NOTE: Carlini attack expects the data in range [0, 1] # x_mean = np.mean(x, axis=0) # x = scale_normalize(x, x_min, x_max, x_mean) x = scale_normalize(x, x_min, x_max) # training/test split # NOTE: test set has fixed size x_train = np.array(x[:-1000], dtype=np.float32) y_train = np.array(y[:-1000], dtype=np.long) x_test = np.array(x[-1000:], dtype=np.float32) y_test = np.array(y[-1000:], dtype=np.long) # create data container data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes, num_features) dc = DataContainer(data_dict, get_data_path()) # assign data manually dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test experiment(i, dc, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i + 1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()