def experiment(data_name, filter_list, bit_depth, sigma, kernel_size): # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # train the model classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) mc = ModelContainerTree(classifier, dc) mc.fit() # no more than 1000 samples are required x = dc.x_test y = dc.y_test if len(x) > 1000: x = x[:1000] y = y[:1000] accuracy = mc.evaluate(x, y) logger.info('Accuracy on clean: %f', accuracy) squeezer = FeatureSqueezingTree( mc, filter_list, bit_depth=bit_depth, sigma=sigma, kernel_size=kernel_size, pretrained=True, ) squeezer.fit() blocked_indices = squeezer.detect(x) logger.info('Blocked %d/%d samples on clean', len(blocked_indices), len(y)) num_blk_clean = len(blocked_indices) # generate adversarial examples art_classifier = SklearnClassifier(classifier) try: attack = DecisionTreeAttack(art_classifier) adv = attack.generate(x) except IndexError as error: # Output expected IndexErrors. logger.error(error) return num_blk_clean, -1 accuracy = mc.evaluate(adv, y) logger.info('Accuracy on DecisionTreeAttack: %f', accuracy) blocked_indices = squeezer.detect(adv) logger.info('Blocked %d/%d samples on DecisionTreeAttack', len(blocked_indices), len(adv)) num_blk_adv = len(blocked_indices) return num_blk_clean, num_blk_adv
def experiment(data_name, params): # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # train the model classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) mc = ModelContainerTree(classifier, dc) mc.fit() # train Applicability Domain ad = ApplicabilityDomainContainer( mc, mc.hidden_model, **params) ad.fit() # no more than 1000 samples are required x = dc.x_test y = dc.y_test if len(x) > 1000: x = x[:1000] y = y[:1000] accuracy = mc.evaluate(x, y) logger.info('Accuracy on clean: %f', accuracy) blocked_indices = ad.detect(x) logger.info('Blocked %d/%d samples on clean', len(blocked_indices), len(y)) num_blk_clean = len(blocked_indices) # generate adversarial examples art_classifier = SklearnClassifier(classifier) try: attack = DecisionTreeAttack(art_classifier) adv = attack.generate(x) except IndexError as error: # Output expected IndexErrors. logger.error(error) return num_blk_clean, -1 accuracy = mc.evaluate(adv, y) logger.info('Accuracy on DecisionTreeAttack: %f', accuracy) blocked_indices = ad.detect(adv) logger.info('Blocked %d/%d samples on DecisionTreeAttack', len(blocked_indices), len(adv)) num_blk_adv = len(blocked_indices) return num_blk_clean, num_blk_adv
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks') parser.add_argument( '-n', '--number', type=int, default=1000, help='the number of adv. examples want to generate. (if more than test set, it uses all test examples.)') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model attack_param_file = args.param num_adv = args.number seed = args.seed verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # Which attack should apply? # use binary encoding for attacks my_attacks = np.zeros(5, dtype=np.int8) attack_list = np.array( ['FGSM', 'BIM', 'Carlini', 'DeepFool', 'Saliency']) my_attacks[0] = 1 if args.fgsm else 0 my_attacks[1] = 1 if args.bim else 0 my_attacks[2] = 1 if args.carlini else 0 my_attacks[3] = 1 if args.deepfool else 0 my_attacks[4] = 1 if args.saliency else 0 selected_attacks = attack_list[np.where(my_attacks == 1)[0]] # check file for f in [model_file, attack_param_file]: if not os.path.exists(f): raise FileNotFoundError('{} does not exist!'.format(f)) dirname = os.path.dirname(model_file) model_name, dname = parse_model_filename(model_file) with open(attack_param_file) as param_json: att_params = json.load(param_json) # set logging config. Run this before logging anything! set_logging('attack', dname, verbose, save_log) # show parameters print('[attack] Start generating {} adv. samples from {} model...'.format( num_adv, model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('params :%s', attack_param_file) logger.info('num_adv :%r', num_adv) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('dirname :%r', dirname) logger.info('attacks :%s', ', '.join(selected_attacks)) if len(selected_attacks) == 0: logger.warning('No attack is selected. Exit.') sys.exit(0) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) run_attacks(mc, selected_attacks, att_params, num_adv, model_name + '_' + dname, overwrite)
def main(): parser = ap.ArgumentParser() parser.add_argument('-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument('-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument('-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument('-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') args = parser.parse_args() data_name = args.dataset param_file = args.param seed = args.seed verbose = args.verbose save_log = args.savelog # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # Which attack should apply? attack_list = [] if args.fgsm: attack_list.append('FGSM') if args.bim: attack_list.append('BIM') if args.deepfool: attack_list.append('DeepFool') if args.carlini: attack_list.append('Carlini') # Quit, if there is nothing to do. if len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): model_name = 'IrisNN' if data_name == 'BreastCancerWisconsin': model_name = 'BCNN' y_file = os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append( os.path.join('save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # load parameters for Applicability Domain with open(param_file) as param_json: params = json.load(param_json) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('attacks :%s', ', '.join(attack_list)) logger.debug('params :%s', str(params)) # check files for file_name in [y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # train the model classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) mc = ModelContainerTree(classifier, dc) mc.fit() x = np.load(attack_files[0], allow_pickle=False) art_classifier = SklearnClassifier(classifier) attack = DecisionTreeAttack(art_classifier) adv = attack.generate(x) ad = ApplicabilityDomainContainer(mc, mc.hidden_model, **params) ad.fit() # generate adversarial examples y = np.load(y_file, allow_pickle=False) accuracy = mc.evaluate(adv, y) logger.info('Accuracy on DecisionTreeAttack set: %f', accuracy) blocked_indices = ad.detect(adv) logger.info('Blocked %d/%d samples on DecisionTreeAttack', len(blocked_indices), len(adv)) # traverse other attacks for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) accuracy = mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', adv_name, accuracy) blocked_indices = ad.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-a', '--adv', type=str, required=True, help= 'file name for adv. examples. The name should in "<model>_<dataset>_<attack>_adv.npy" format' ) parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks' ) parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') args = parser.parse_args() adv_file = args.adv param_file = args.param model_file = args.model seed = args.seed verbose = args.verbose save_log = args.savelog check_clean = True # build filenames from the root file postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] model_name, dname = parse_model_filename(adv_file) # set logging config. Run this before logging anything! set_logging('defence_ad', dname, verbose, save_log) # check adv. examples and parameter config files for f in data_files[:2] + [param_file]: if not os.path.exists(f): logger.warning('%s does not exist. Exit.', f) sys.exit(0) # check clean samples for f in data_files[-2:]: if not os.path.exists(f): logger.warning( 'Cannot load files for clean samples. Skip checking clean set.' ) check_clean = False with open(param_file) as param_json: params = json.load(param_json) # show parameters print( '[defend_ad] Running applicability domain on {}...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('check_clean :%r', check_clean) logger.debug('params : %s', str(params)) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features * 4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # preform defence ad = ApplicabilityDomainContainer(mc, hidden_model=model.hidden_model, **params) ad.fit() result_prefix = [model_file] \ + [adv_file] \ + [params['k2']] \ + [params['reliability']] \ + [params['sample_ratio']] \ + [params['confidence']] \ + [params['kappa']] \ + [params['disable_s2']] # check clean if check_clean: x = np.load(data_files[2], allow_pickle=False) y = np.load(data_files[3], allow_pickle=False) x_passed, blk_idx, blocked_counts = detect(ad, 'clean samples', x, y) result = result_prefix + ['clean'] + blocked_counts result_clean = '[result]' + ','.join([str(r) for r in result]) # check adversarial examples adv = np.load(data_files[0], allow_pickle=False) pred = np.load(data_files[1], allow_pickle=False) adv_passed, adv_blk_idx, blocked_counts = detect(ad, 'adv. examples', adv, pred) result = result_prefix + ['adv'] + blocked_counts result = '[result]' + ','.join([str(r) for r in result]) if check_clean: logger.info(result_clean) logger.info(result)
def experiment(index, dname, mname, max_epochs, adv_file, res_file): # STEP 1: select data dc = get_data_container(dname, use_shuffle=True, use_normalize=True) Model = get_model(mname) model = Model() distill_model = Model() logger.info('Selected %s model', model.__class__.__name__) # STEP 2: train models mc = ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) adv_res = [accuracy] # STEP 3: generate adversarial examples # no more than 1000 samples are required n = 1000 if len(dc.x_test) >= 1000 else len(dc.x_test) # idx = np.random.choice(len(dc.x_test), n, replace=False) # x = dc.x_test[idx] # y = dc.y_test[idx] x = dc.x_test[:n] y = dc.y_test[:n] accuracy = mc.evaluate(x, y) adv_res.append(accuracy) advs = np.zeros( tuple([len(ATTACK_LIST)] + list(x.shape)), dtype=np.float32) pred_advs = -np.ones( (len(ATTACK_LIST), n), dtype=np.int32) # assign -1 as initial value pred_clean = mc.predict(x) advs[0] = x pred_advs[0] = pred_clean att_param_json = open(os.path.join(DIR_PATH, 'AttackParams.json')) att_params = json.load(att_param_json) for i, att_name in enumerate(ATTACK_LIST): # Clean set is only used in evaluation phase. if att_name == 'Clean': continue logger.debug('[%d]Running %s attack...', i, att_name) kwargs = att_params[att_name] logger.debug('%s params: %s', att_name, str(kwargs)) Attack = get_attack(att_name) attack = Attack(mc, **kwargs) adv, pred_adv, x_clean, pred_clean_ = attack.generate( use_testset=False, x=x) assert np.all(pred_clean == pred_clean_) assert np.all(x == x_clean) logger.info('created %d adv examples using %s from %s', len(advs[i]), att_name, dname) not_match = pred_adv != pred_clean success_rate = len(not_match[not_match == True]) / len(pred_clean) accuracy = mc.evaluate(adv, y) advs[i] = adv pred_advs[i] = pred_adv logger.info('Success rate of %s: %f', att_name, success_rate) logger.info('Accuracy on %s: %f', att_name, accuracy) adv_res.append(accuracy) adv_file.write(','.join([str(r) for r in adv_res]) + '\n') # STEP 4: train defences blocked_res = np.zeros(len(TITLE_RESULTS), dtype=np.int32) blocked_res[0] = index for def_name in DEFENCE_LIST: logger.debug('Running %s...', def_name) if def_name == 'AdvTraining': attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) defence = AdversarialTraining(mc, [attack]) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE, ratio=ADV_TRAIN_RATIO) block_attack(0, advs, defence, def_name, blocked_res) elif def_name == 'Destillation': defence = DistillationContainer( mc, distill_model, temperature=DISTILL_TEMP, pretrained=False) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(1, advs, defence, def_name, blocked_res) elif def_name == 'Squeezing': defence = FeatureSqueezing( mc, SQUEEZER_FILTER_LIST, bit_depth=SQUEEZER_DEPTH, sigma=SQUEEZER_SIGMA, kernel_size=SQUEEZER_KERNEL, pretrained=True, ) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(2, advs, defence, def_name, blocked_res) elif def_name == 'AD': ad_param_file = open(AD_PARAM_FILE) ad_params = json.load(ad_param_file) logger.debug('AD params: %s', str(ad_params)) defence = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, **ad_params) defence.fit() block_attack(3, advs, defence, def_name, blocked_res) res_file.write(','.join([str(r) for r in blocked_res]) + '\n')
def experiment(index, dname, max_epochs, adv_file, res_file): # STEP 1: select data dc = get_data_container(dname, use_shuffle=True, use_normalize=True) model = None if dname == 'BreastCancerWisconsin': model = BCNN() distill_model = BCNN() elif dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes ) distill_model = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes ) if model is None: logger.error('Unrecognised dataset %s', dname) logger.info('Selected %s model', model.__class__.__name__) # STEP 2: train models mc = ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) adv_res = [accuracy] # STEP 3: generate adversarial examples # no more than 1000 samples are required x = dc.x_test y = dc.y_test if len(x) > 1000: x = x[:1000] y = y[:1000] accuracy = mc.evaluate(x, y) adv_res.append(accuracy) advs = np.zeros((len(ATTACK_LIST), x.shape[0], x.shape[1]), dtype=np.float32) pred_advs = -np.ones((len(ATTACK_LIST), len(y)), dtype=np.int32) # assign -1 as initial value pred_clean = mc.predict(x) advs[0] = x pred_advs[0] = pred_clean att_param_json = open(os.path.join(DIR_PATH, 'AttackParams.json')) att_params = json.load(att_param_json) for i, att_name in enumerate(ATTACK_LIST): # Clean set is only used in evaluation phase. if att_name == 'Clean': continue logger.debug('[%d]Running %s attack...', i, att_name) kwargs = att_params[att_name] logger.debug('%s params: %s', att_name, str(kwargs)) Attack = get_attack(att_name) attack = Attack(mc, **kwargs) adv, pred_adv, x_clean, pred_clean_ = attack.generate( use_testset=False, x=x) assert np.all(pred_clean == pred_clean_) assert np.all(x == x_clean) logger.info('created %d adv examples using %s from %s', len(advs[i]), att_name, dname) not_match = pred_adv != pred_clean success_rate = len(not_match[not_match == True]) / len(pred_clean) accuracy = mc.evaluate(adv, y) advs[i] = adv pred_advs[i] = pred_adv logger.info('Success rate of %s: %f', att_name, success_rate) logger.info('Accuracy on %s: %f', att_name, accuracy) adv_res.append(accuracy) adv_file.write(','.join([str(r) for r in adv_res]) + '\n') # STEP 4: train defences blocked_res = np.zeros(len(TITLE_RESULTS), dtype=np.int32) blocked_res[0] = index for def_name in DEFENCE_LIST: logger.debug('Running %s...', def_name) if def_name == 'AdvTraining': attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) defence = AdversarialTraining(mc, [attack]) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE, ratio=ADV_TRAIN_RATIO) block_attack(0, advs, defence, def_name, blocked_res) elif def_name == 'Destillation': if dname == 'Iris': temp = 10 elif dname == 'BreastCancerWisconsin': temp = 2 else: temp = 20 defence = DistillationContainer( mc, distill_model, temperature=temp, pretrained=False) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(1, advs, defence, def_name, blocked_res) elif def_name == 'Squeezing': defence = FeatureSqueezing( mc, SQUEEZER_FILTER_LIST, bit_depth=SQUEEZER_DEPTH, sigma=SQUEEZER_SIGMA, pretrained=True, ) defence.fit(max_epochs=max_epochs, batch_size=BATCH_SIZE) block_attack(2, advs, defence, def_name, blocked_res) elif def_name == 'AD': ad_param_file = open(AD_PARAM_FILE) # BreastCancer uses a different set of parameters if dname == 'BreastCancerWisconsin': param_file = os.path.join(DIR_PATH, 'AdParamsBC.json') ad_param_file = open(param_file) ad_params = json.load(ad_param_file) logger.debug('AD params: %s', str(ad_params)) defence = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, **ad_params) defence.fit() block_attack(3, advs, defence, def_name, blocked_res) res_file.write(','.join([str(r) for r in blocked_res]) + '\n')
def main(): parser = ap.ArgumentParser() parser.add_argument('-d', '--dataset', type=str, required=True, choices=get_dataset_list(), help='the dataset you want to train') parser.add_argument( '-o', '--ofile', type=str, help='the filename will be used to store model parameters') parser.add_argument('-e', '--epoch', type=int, default=5, help='the number of max epochs for training') parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-H', '--shuffle', type=bool, default=True, help='shuffle the dataset') parser.add_argument( '-n', '--normalize', type=bool, default=True, help= 'apply zero mean and scaling to the dataset (for numeral dataset only)' ) parser.add_argument('-m', '--model', type=str, choices=AVALIABLE_MODELS, help='select a model to train the data') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() dname = args.dataset filename = args.ofile max_epochs = args.epoch batch_size = args.batchsize seed = args.seed use_shuffle = args.shuffle use_normalize = args.normalize model_name = args.model verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging('train', dname, verbose, save_log) # show parameters print('[train] Start training {} model...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('filename :%s', filename) logger.info('max_epochs :%d', max_epochs) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('use_shuffle :%r', use_shuffle) logger.info('use_normalize :%r', use_normalize) logger.info('model_name :%s', model_name) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) master_seed(seed) # set DataContainer dc = get_data_container( dname, use_shuffle=use_shuffle, use_normalize=use_normalize, ) # select a model model = None if model_name is not None: Model = models.get_model(model_name) model = Model() else: if dname == 'MNIST': model = models.MnistCnnV2() elif dname == 'CIFAR10': model = models.CifarCnn() elif dname == 'BreastCancerWisconsin': model = models.BCNN() elif dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = models.IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) if model is None: raise AttributeError('Cannot find model!') modelname = model.__class__.__name__ logger.info('Selected %s model', modelname) # set ModelContainer and train the model mc = models.ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=batch_size) # save if not os.path.exists('save'): os.makedirs('save') if filename is None: filename = get_pt_model_filename(modelname, dname, max_epochs) logger.debug('File name: %s', filename) mc.save(filename, overwrite=overwrite) # test result file_path = os.path.join('save', filename) logger.debug('Use saved parameters from %s', filename) mc.load(file_path) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-r', '--ratio', type=float, required=True, help='the percentage of adversarial examples mix to the training set.') parser.add_argument( '-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-t', '--train', action='store_true', default=False, help='Force the model to retrain without searching existing pretrained file') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch ratio = args.ratio batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which attack should apply? attack_list = [] if args.bim: attack_list.append('BIM') if args.carlini: attack_list.append('Carlini') if args.deepfool: attack_list.append('DeepFool') if args.fgsm: attack_list.append('FGSM') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append(os.path.join( 'save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the discriminator? pretrain_file = f'AdvTrain_{model_name}_{data_name}.pt' if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running adversarial training on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('ratio :%d', ratio) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('attacks :%s', ', '.join(attack_list)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer( classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) if need_train: adv_trainer.fit(max_epochs=max_epochs, batch_size=batch_size, ratio=ratio) adv_trainer.save(pretrain_file, overwrite=True) else: adv_trainer.load(os.path.join('save', pretrain_file)) y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', adv_name, accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-d', '--depth', type=int, default=0, help= 'The image color depth for input images. Apply Binary-Depth filter when receives a parameter' ) parser.add_argument( '--sigma', type=float, default=0, help= 'The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter' ) parser.add_argument( '-k', '--kernelsize', type=int, default=0, help= 'The kernel size for Median filter. Apply median filter when receives a parameter' ) parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-T', '--train', action='store_true', default=False, help= 'Force the model to retrain without searching existing pretrained file' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument('-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument('-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument('-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument('-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch bit_depth = args.depth sigma = args.sigma kernel_size = args.kernelsize batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which filter should apply? filter_list = [] if bit_depth > 0: filter_list.append('binary') if sigma > 0: filter_list.append('normal') if kernel_size > 0: filter_list.append('median') # Which attack should apply? attack_list = [] if args.fgsm: attack_list.append('FGSM') if args.bim: attack_list.append('BIM') if args.deepfool: attack_list.append('DeepFool') if args.carlini: attack_list.append('Carlini') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(filter_list) == 0 or len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append( os.path.join('save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the distillation network? pretrain_files = [] for fname in filter_list: pretrain_file = build_squeezer_filename(model_name, data_name, max_epochs, fname) pretrain_files.append(pretrain_file) if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running feature squeezing on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('bit_depth :%d', bit_depth) logger.info('sigma :%f', sigma) logger.info('kernel_size :%d', kernel_size) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('filters :%s', ', '.join(filter_list)) logger.info('attacks :%s', ', '.join(attack_list)) logger.info('pretrained :%s', ', '.join(pretrain_files)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # initialize Squeezer squeezer = FeatureSqueezing( classifier_mc, filter_list, bit_depth=bit_depth, sigma=sigma, kernel_size=kernel_size, pretrained=True, ) # train or load parameters for Squeezer if need_train: squeezer.fit(max_epochs=max_epochs, batch_size=batch_size) squeezer.save(model_file, True) else: squeezer.load(model_file) # traverse all attacks y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) acc_og = classifier_mc.evaluate(adv, y) acc_squeezer = squeezer.evaluate(adv, y) logger.info('Accuracy on %s set - OG: %f, Squeezer: %f', adv_name, acc_og, acc_squeezer) blocked_indices = squeezer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)