def setUpClass(cls): master_seed(SEED) Model = get_model('MnistCnnV2') model = Model() logger.info('Starting %s data container...', NAME) dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) cls.distillation = DistillationContainer(mc, Model(), temperature=TEMPERATURE, pretrained=False) filename = get_pt_model_filename( model.__class__.__name__, NAME, str(MAX_EPOCHS) + 't' + str(int(TEMPERATURE * 10))) filename = os.path.join('test', 'distill_' + filename) file_path = os.path.join('save', filename) if not os.path.exists(file_path): # Expected initial loss = -log(1/num_classes) = 2.3025850929940455' cls.distillation.fit(max_epochs=MAX_EPOCHS, batch_size=BATCH_SIZE) cls.distillation.save(filename, overwrite=True) else: cls.distillation.load(file_path) smooth_mc = cls.distillation.get_def_model_container() accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def setUpClass(cls): master_seed(SEED) Model = get_model('MnistCnnV2') model = Model() logger.info('Starting %s data container...', NAME) dc = DataContainer(DATASET_LIST[NAME], get_data_path()) dc() cls.mc = ModelContainerPT(model, dc) cls.mc.load(MODEL_FILE) accuracy = cls.mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): # load dataset and initial model Model = get_model(MODEL_NAME) model = Model() dc = DataContainer(DATASET_LIST[DATASET], get_data_path()) dc() mc = ModelContainerPT(model, dc) mc.load(MODEL_FILE) accuracy = mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') # train or load distillation model distillation = DistillationContainer(mc, Model(), temperature=TEMPERATURE, pretrained=False) distill_path = os.path.join('save', DISTILL_FILE) if not os.path.exists(distill_path): distillation.fit(max_epochs=MAX_EPOCHS, batch_size=128) distillation.save(DISTILL_FILE, True) else: distillation.load(distill_path) smooth_mc = distillation.get_def_model_container() accuracy = smooth_mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') # load adversarial examples adv_list = ['FGSM', 'BIM', 'DeepFool', 'Carlini', 'Saliency'] y_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_y.npy') x_file = os.path.join('save', f'{MODEL_NAME}_{DATASET}_{adv_list[0]}_x.npy') x = np.load(x_file, allow_pickle=False) y = np.load(y_file, allow_pickle=False) acc_og = mc.evaluate(x, y) acc_distill = smooth_mc.evaluate(x, y) print(f'Accuracy on clean set - OG: {acc_og}, Distill: {acc_distill}') for adv_name in adv_list: adv_file = os.path.join( 'save', build_adv_filename(MODEL_NAME, DATASET, adv_name)) adv = np.load(adv_file, allow_pickle=False) acc_og = mc.evaluate(adv, y) acc_distill = smooth_mc.evaluate(adv, y) print( f'Accuracy on {adv_name} set - OG: {acc_og}, Distill: {acc_distill}' )
def main(): Model = get_model('MnistCnnV2') classifier = Model() dc = DataContainer(DATASET_LIST['MNIST'], get_data_path()) dc() classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(MODEL_FILE) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) print(f'Accuracy on test set: {accuracy}') attack = BIMContainer(classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) adv_trainer.fit(max_epochs=5, batch_size=128, ratio=0.1) discriminator = adv_trainer.get_def_model_container() print(discriminator.accuracy_test)
def main(): data_name = 'MNIST' set_logging('advTraining', data_name, True, True) model_file = os.path.join('save', 'MnistCnnV2_MNIST_e50.pt') Model = get_model('MnistCnnV2') classifier = Model() dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc() classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer(classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) # adv_trainer.fit(max_epochs=30, batch_size=128, ratio=0.1) # adv_trainer.save('AdvTrain_MnistCnnV2_MNIST', overwrite=True) file_name = os.path.join('save', 'AdvTrain_MnistCnnV2_MNIST.pt') adv_trainer.load(file_name) x = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_x.npy'), allow_pickle=False) y = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_y.npy'), allow_pickle=False) blocked_indices = adv_trainer.detect(x, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x), 'clean') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'BIM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'BIM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Carlini_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Carlini', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Carlini') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_DeepFool_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'DeepFool') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_FGSM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'FGSM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'FGSM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Saliency_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Saliency', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Saliency')
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks') parser.add_argument( '-n', '--number', type=int, default=1000, help='the number of adv. examples want to generate. (if more than test set, it uses all test examples.)') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model attack_param_file = args.param num_adv = args.number seed = args.seed verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # Which attack should apply? # use binary encoding for attacks my_attacks = np.zeros(5, dtype=np.int8) attack_list = np.array( ['FGSM', 'BIM', 'Carlini', 'DeepFool', 'Saliency']) my_attacks[0] = 1 if args.fgsm else 0 my_attacks[1] = 1 if args.bim else 0 my_attacks[2] = 1 if args.carlini else 0 my_attacks[3] = 1 if args.deepfool else 0 my_attacks[4] = 1 if args.saliency else 0 selected_attacks = attack_list[np.where(my_attacks == 1)[0]] # check file for f in [model_file, attack_param_file]: if not os.path.exists(f): raise FileNotFoundError('{} does not exist!'.format(f)) dirname = os.path.dirname(model_file) model_name, dname = parse_model_filename(model_file) with open(attack_param_file) as param_json: att_params = json.load(param_json) # set logging config. Run this before logging anything! set_logging('attack', dname, verbose, save_log) # show parameters print('[attack] Start generating {} adv. samples from {} model...'.format( num_adv, model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('params :%s', attack_param_file) logger.info('num_adv :%r', num_adv) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('dirname :%r', dirname) logger.info('attacks :%s', ', '.join(selected_attacks)) if len(selected_attacks) == 0: logger.warning('No attack is selected. Exit.') sys.exit(0) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) run_attacks(mc, selected_attacks, att_params, num_adv, model_name + '_' + dname, overwrite)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-a', '--adv', type=str, required=True, help= 'file name for adv. examples. The name should in "<model>_<dataset>_<attack>_adv.npy" format' ) parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks' ) parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') args = parser.parse_args() adv_file = args.adv param_file = args.param model_file = args.model seed = args.seed verbose = args.verbose save_log = args.savelog check_clean = True # build filenames from the root file postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] model_name, dname = parse_model_filename(adv_file) # set logging config. Run this before logging anything! set_logging('defence_ad', dname, verbose, save_log) # check adv. examples and parameter config files for f in data_files[:2] + [param_file]: if not os.path.exists(f): logger.warning('%s does not exist. Exit.', f) sys.exit(0) # check clean samples for f in data_files[-2:]: if not os.path.exists(f): logger.warning( 'Cannot load files for clean samples. Skip checking clean set.' ) check_clean = False with open(param_file) as param_json: params = json.load(param_json) # show parameters print( '[defend_ad] Running applicability domain on {}...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('check_clean :%r', check_clean) logger.debug('params : %s', str(params)) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features * 4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # preform defence ad = ApplicabilityDomainContainer(mc, hidden_model=model.hidden_model, **params) ad.fit() result_prefix = [model_file] \ + [adv_file] \ + [params['k2']] \ + [params['reliability']] \ + [params['sample_ratio']] \ + [params['confidence']] \ + [params['kappa']] \ + [params['disable_s2']] # check clean if check_clean: x = np.load(data_files[2], allow_pickle=False) y = np.load(data_files[3], allow_pickle=False) x_passed, blk_idx, blocked_counts = detect(ad, 'clean samples', x, y) result = result_prefix + ['clean'] + blocked_counts result_clean = '[result]' + ','.join([str(r) for r in result]) # check adversarial examples adv = np.load(data_files[0], allow_pickle=False) pred = np.load(data_files[1], allow_pickle=False) adv_passed, adv_blk_idx, blocked_counts = detect(ad, 'adv. examples', adv, pred) result = result_prefix + ['adv'] + blocked_counts result = '[result]' + ','.join([str(r) for r in result]) if check_clean: logger.info(result_clean) logger.info(result)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the cross validation') parser.add_argument( '-a', '--adv', type=str, help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-i', '--ignore', action='store_true', default=False, help='Ignore saving the results. Only returns the results from terminal.') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() model_file = args.model param_file = args.param adv_file = args.adv seed = args.seed verbose = args.verbose save_log = args.savelog does_ignore = args.ignore overwrite = args.overwrite model_name, data_name = parse_model_filename(model_file) # set logging config. Run this before logging anything! set_logging('cross_validation', data_name, verbose, save_log) # check files for file_path in [model_file, param_file]: if not os.path.exists(file_path): logger.warning('%s does not exist. Exit.', file_path) sys.exit(0) if adv_file is not None and not os.path.exists(adv_file): logger.warning('%s does not exist. Exit.', adv_file) sys.exit(0) # read parameters with open(param_file) as param_json: params = json.load(param_json) # show parameters print('[cv] Running cross validation on {} with {}...'.format( model_file, data_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('Ignore saving :%r', does_ignore) logger.info('overwrite :%r', overwrite) logger.debug('params :%s', str(params)) # load parameters k_range = params['k_range'] z_range = params['z_range'] kappa_range = params['kappa_range'] gamma_range = params['gamma_range'] epsilon = params['epsilon'] num_folds = params['num_folds'] batch_size = params['batch_size'] sample_ratio = params['sample_ratio'] logger.info('k_range :%s', str(k_range)) logger.info('z_range :%s', str(z_range)) logger.info('kappa_range :%s', str(kappa_range)) logger.info('gamma_range :%s', str(gamma_range)) logger.info('epsilon :%.1f', epsilon) logger.info('num_folds :%d', num_folds) logger.info('batch_size :%d', batch_size) logger.info('sample_ratio :%.1f', sample_ratio) # reset seed master_seed(seed) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc(shuffle=True, normalize=True, size_train=0.8) logger.info('Sample size: %d', len(dc)) Model = get_model(model_name) # there models require extra keyword arguments if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio) cross_validation = CrossValidation( ad, num_folds=num_folds, k_range=k_range, z_range=z_range, kappa_range=kappa_range, gamma_range=gamma_range, epsilon=epsilon, ) bim_attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) cross_validation.fit(bim_attack) # test optimal parameters if adv_file is not None: postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] adv = np.load(data_files[0], allow_pickle=False) pred_adv = np.load(data_files[1], allow_pickle=False) x = np.load(data_files[2], allow_pickle=False) pred = np.load(data_files[3], allow_pickle=False) # fetch optimal parameters ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=cross_validation.k2, reliability=cross_validation.reliability, sample_ratio=sample_ratio, kappa=cross_validation.kappa, confidence=cross_validation.confidence, ) logger.info('Params: %s', str(ad.params)) ad.fit() blocked_indices = ad.detect(x, pred, return_passed_x=False) logger.info('Blocked %d/%d on clean data', len(blocked_indices), len(x)) blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False) logger.info('Blocked %d/%d on adv. examples.', len(blocked_indices), len(adv)) # save results if not does_ignore: file_name = name_handler( model_name + '_' + data_name, 'csv', overwrite=overwrite) cross_validation.save(file_name)
def experiment(index, dname, mname, max_epochs, adv_file, res_file): # STEP 1: select data dc = get_data_container(dname, use_shuffle=True, use_normalize=True) Model = get_model(mname) model = Model() distill_model = Model() logger.info('Selected %s model', model.__class__.__name__) # STEP 2: train models mc = ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) adv_res = [accuracy] # STEP 3: generate adversarial examples # no more than 1000 samples are required n = 1000 if len(dc.x_test) >= 1000 else len(dc.x_test) # idx = np.random.choice(len(dc.x_test), n, replace=False) # x = dc.x_test[idx] # y = dc.y_test[idx] x = dc.x_test[:n] y = dc.y_test[:n] accuracy = mc.evaluate(x, y) adv_res.append(accuracy) advs = np.zeros( tuple([len(ATTACK_LIST)] + list(x.shape)), dtype=np.float32) pred_advs = -np.ones( (len(ATTACK_LIST), n), dtype=np.int32) # assign -1 as initial value pred_clean = mc.predict(x) advs[0] = x pred_advs[0] = pred_clean att_param_json = open(os.path.join(DIR_PATH, 'AttackParams.json')) att_params = json.load(att_param_json) for i, att_name in enumerate(ATTACK_LIST): # Clean set is only used in evaluation phase. if att_name == 'Clean': continue logger.debug('[%d]Running %s attack...', i, att_name) kwargs = att_params[att_name] logger.debug('%s params: %s', att_name, str(kwargs)) Attack = get_attack(att_name) attack = Attack(mc, **kwargs) adv, pred_adv, x_clean, pred_clean_ = attack.generate( use_testset=False, x=x) assert np.all(pred_clean == pred_clean_) assert np.all(x == x_clean) logger.info('created %d adv examples using %s from %s', len(advs[i]), att_name, dname) not_match = pred_adv != pred_clean success_rate = len(not_match[not_match == True]) / len(pred_clean) accuracy = mc.evaluate(adv, y) advs[i] = adv pred_advs[i] = pred_adv logger.info('Success rate of %s: %f', att_name, success_rate) logger.info('Accuracy on %s: %f', att_name, accuracy) adv_res.append(accuracy) adv_file.write(','.join([str(r) for r in adv_res]) + '\n') # STEP 4: train defences blocked_res = np.zeros(len(TITLE_RESULTS), dtype=np.int32) blocked_res[0] = index for def_name in DEFENCE_LIST: logger.debug('Running %s...', def_name) if def_name == 'AdvTraining': attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) defence = AdversarialTraining(mc, [attack]) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE, ratio=ADV_TRAIN_RATIO) block_attack(0, advs, defence, def_name, blocked_res) elif def_name == 'Destillation': defence = DistillationContainer( mc, distill_model, temperature=DISTILL_TEMP, pretrained=False) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(1, advs, defence, def_name, blocked_res) elif def_name == 'Squeezing': defence = FeatureSqueezing( mc, SQUEEZER_FILTER_LIST, bit_depth=SQUEEZER_DEPTH, sigma=SQUEEZER_SIGMA, kernel_size=SQUEEZER_KERNEL, pretrained=True, ) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(2, advs, defence, def_name, blocked_res) elif def_name == 'AD': ad_param_file = open(AD_PARAM_FILE) ad_params = json.load(ad_param_file) logger.debug('AD params: %s', str(ad_params)) defence = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, **ad_params) defence.fit() block_attack(3, advs, defence, def_name, blocked_res) res_file.write(','.join([str(r) for r in blocked_res]) + '\n')
def main(): parser = ap.ArgumentParser() parser.add_argument('-d', '--dataset', type=str, required=True, choices=get_dataset_list(), help='the dataset you want to train') parser.add_argument( '-o', '--ofile', type=str, help='the filename will be used to store model parameters') parser.add_argument('-e', '--epoch', type=int, default=5, help='the number of max epochs for training') parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-H', '--shuffle', type=bool, default=True, help='shuffle the dataset') parser.add_argument( '-n', '--normalize', type=bool, default=True, help= 'apply zero mean and scaling to the dataset (for numeral dataset only)' ) parser.add_argument('-m', '--model', type=str, choices=AVALIABLE_MODELS, help='select a model to train the data') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() dname = args.dataset filename = args.ofile max_epochs = args.epoch batch_size = args.batchsize seed = args.seed use_shuffle = args.shuffle use_normalize = args.normalize model_name = args.model verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging('train', dname, verbose, save_log) # show parameters print('[train] Start training {} model...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('filename :%s', filename) logger.info('max_epochs :%d', max_epochs) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('use_shuffle :%r', use_shuffle) logger.info('use_normalize :%r', use_normalize) logger.info('model_name :%s', model_name) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) master_seed(seed) # set DataContainer dc = get_data_container( dname, use_shuffle=use_shuffle, use_normalize=use_normalize, ) # select a model model = None if model_name is not None: Model = models.get_model(model_name) model = Model() else: if dname == 'MNIST': model = models.MnistCnnV2() elif dname == 'CIFAR10': model = models.CifarCnn() elif dname == 'BreastCancerWisconsin': model = models.BCNN() elif dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = models.IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) if model is None: raise AttributeError('Cannot find model!') modelname = model.__class__.__name__ logger.info('Selected %s model', modelname) # set ModelContainer and train the model mc = models.ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=batch_size) # save if not os.path.exists('save'): os.makedirs('save') if filename is None: filename = get_pt_model_filename(modelname, dname, max_epochs) logger.debug('File name: %s', filename) mc.save(filename, overwrite=overwrite) # test result file_path = os.path.join('save', filename) logger.debug('Use saved parameters from %s', filename) mc.load(file_path) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-r', '--ratio', type=float, required=True, help='the percentage of adversarial examples mix to the training set.') parser.add_argument( '-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-t', '--train', action='store_true', default=False, help='Force the model to retrain without searching existing pretrained file') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch ratio = args.ratio batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which attack should apply? attack_list = [] if args.bim: attack_list.append('BIM') if args.carlini: attack_list.append('Carlini') if args.deepfool: attack_list.append('DeepFool') if args.fgsm: attack_list.append('FGSM') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append(os.path.join( 'save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the discriminator? pretrain_file = f'AdvTrain_{model_name}_{data_name}.pt' if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running adversarial training on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('ratio :%d', ratio) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('attacks :%s', ', '.join(attack_list)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer( classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) if need_train: adv_trainer.fit(max_epochs=max_epochs, batch_size=batch_size, ratio=ratio) adv_trainer.save(pretrain_file, overwrite=True) else: adv_trainer.load(os.path.join('save', pretrain_file)) y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', adv_name, accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-d', '--depth', type=int, default=0, help= 'The image color depth for input images. Apply Binary-Depth filter when receives a parameter' ) parser.add_argument( '--sigma', type=float, default=0, help= 'The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter' ) parser.add_argument( '-k', '--kernelsize', type=int, default=0, help= 'The kernel size for Median filter. Apply median filter when receives a parameter' ) parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-T', '--train', action='store_true', default=False, help= 'Force the model to retrain without searching existing pretrained file' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument('-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument('-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument('-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument('-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch bit_depth = args.depth sigma = args.sigma kernel_size = args.kernelsize batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which filter should apply? filter_list = [] if bit_depth > 0: filter_list.append('binary') if sigma > 0: filter_list.append('normal') if kernel_size > 0: filter_list.append('median') # Which attack should apply? attack_list = [] if args.fgsm: attack_list.append('FGSM') if args.bim: attack_list.append('BIM') if args.deepfool: attack_list.append('DeepFool') if args.carlini: attack_list.append('Carlini') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(filter_list) == 0 or len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append( os.path.join('save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the distillation network? pretrain_files = [] for fname in filter_list: pretrain_file = build_squeezer_filename(model_name, data_name, max_epochs, fname) pretrain_files.append(pretrain_file) if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running feature squeezing on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('bit_depth :%d', bit_depth) logger.info('sigma :%f', sigma) logger.info('kernel_size :%d', kernel_size) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('filters :%s', ', '.join(filter_list)) logger.info('attacks :%s', ', '.join(attack_list)) logger.info('pretrained :%s', ', '.join(pretrain_files)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # initialize Squeezer squeezer = FeatureSqueezing( classifier_mc, filter_list, bit_depth=bit_depth, sigma=sigma, kernel_size=kernel_size, pretrained=True, ) # train or load parameters for Squeezer if need_train: squeezer.fit(max_epochs=max_epochs, batch_size=batch_size) squeezer.save(model_file, True) else: squeezer.load(model_file) # traverse all attacks y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) acc_og = classifier_mc.evaluate(adv, y) acc_squeezer = squeezer.evaluate(adv, y) logger.info('Accuracy on %s set - OG: %f, Squeezer: %f', adv_name, acc_og, acc_squeezer) blocked_indices = squeezer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)