def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks') parser.add_argument( '-n', '--number', type=int, default=1000, help='the number of adv. examples want to generate. (if more than test set, it uses all test examples.)') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model attack_param_file = args.param num_adv = args.number seed = args.seed verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # Which attack should apply? # use binary encoding for attacks my_attacks = np.zeros(5, dtype=np.int8) attack_list = np.array( ['FGSM', 'BIM', 'Carlini', 'DeepFool', 'Saliency']) my_attacks[0] = 1 if args.fgsm else 0 my_attacks[1] = 1 if args.bim else 0 my_attacks[2] = 1 if args.carlini else 0 my_attacks[3] = 1 if args.deepfool else 0 my_attacks[4] = 1 if args.saliency else 0 selected_attacks = attack_list[np.where(my_attacks == 1)[0]] # check file for f in [model_file, attack_param_file]: if not os.path.exists(f): raise FileNotFoundError('{} does not exist!'.format(f)) dirname = os.path.dirname(model_file) model_name, dname = parse_model_filename(model_file) with open(attack_param_file) as param_json: att_params = json.load(param_json) # set logging config. Run this before logging anything! set_logging('attack', dname, verbose, save_log) # show parameters print('[attack] Start generating {} adv. samples from {} model...'.format( num_adv, model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('params :%s', attack_param_file) logger.info('num_adv :%r', num_adv) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('dirname :%r', dirname) logger.info('attacks :%s', ', '.join(selected_attacks)) if len(selected_attacks) == 0: logger.warning('No attack is selected. Exit.') sys.exit(0) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) run_attacks(mc, selected_attacks, att_params, num_adv, model_name + '_' + dname, overwrite)
def main(): data_name = 'MNIST' set_logging('advTraining', data_name, True, True) model_file = os.path.join('save', 'MnistCnnV2_MNIST_e50.pt') Model = get_model('MnistCnnV2') classifier = Model() dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc() classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer(classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) # adv_trainer.fit(max_epochs=30, batch_size=128, ratio=0.1) # adv_trainer.save('AdvTrain_MnistCnnV2_MNIST', overwrite=True) file_name = os.path.join('save', 'AdvTrain_MnistCnnV2_MNIST.pt') adv_trainer.load(file_name) x = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_x.npy'), allow_pickle=False) y = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_y.npy'), allow_pickle=False) blocked_indices = adv_trainer.detect(x, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x), 'clean') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_BIM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'BIM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'BIM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Carlini_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Carlini', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Carlini') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_DeepFool_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'DeepFool') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_FGSM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'FGSM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'FGSM') adv = np.load(os.path.join('save', 'MnistCnnV2_MNIST_Saliency_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Saliency', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Saliency')
def main(): parser = ap.ArgumentParser() parser.add_argument('-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument('-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument('-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument('-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') args = parser.parse_args() data_name = args.dataset param_file = args.param seed = args.seed verbose = args.verbose save_log = args.savelog # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # Which attack should apply? attack_list = [] if args.fgsm: attack_list.append('FGSM') if args.bim: attack_list.append('BIM') if args.deepfool: attack_list.append('DeepFool') if args.carlini: attack_list.append('Carlini') # Quit, if there is nothing to do. if len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): model_name = 'IrisNN' if data_name == 'BreastCancerWisconsin': model_name = 'BCNN' y_file = os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append( os.path.join('save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # load parameters for Applicability Domain with open(param_file) as param_json: params = json.load(param_json) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('attacks :%s', ', '.join(attack_list)) logger.debug('params :%s', str(params)) # check files for file_name in [y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # train the model classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) mc = ModelContainerTree(classifier, dc) mc.fit() x = np.load(attack_files[0], allow_pickle=False) art_classifier = SklearnClassifier(classifier) attack = DecisionTreeAttack(art_classifier) adv = attack.generate(x) ad = ApplicabilityDomainContainer(mc, mc.hidden_model, **params) ad.fit() # generate adversarial examples y = np.load(y_file, allow_pickle=False) accuracy = mc.evaluate(adv, y) logger.info('Accuracy on DecisionTreeAttack set: %f', accuracy) blocked_indices = ad.detect(adv) logger.info('Blocked %d/%d samples on DecisionTreeAttack', len(blocked_indices), len(adv)) # traverse other attacks for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) accuracy = mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', adv_name, accuracy) blocked_indices = ad.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '--depth', type=int, default=0, help='The image color depth for input images. Apply Binary-Depth filter when receives a parameter') parser.add_argument( '-s', '--sigma', type=float, default=0, help='The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter') parser.add_argument( '-k', '--kernelsize', type=int, default=0, help='The kernel size for Median filter. Apply median filter when receives a parameter') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() data_name = args.dataset max_iterations = args.iteration bit_depth = args.depth sigma = args.sigma kernel_size = args.kernelsize verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # Which filter should apply? filter_list = [] if bit_depth > 0: filter_list.append('binary') if sigma > 0: filter_list.append('normal') if kernel_size > 0: filter_list.append('median') result_filename = name_handler( os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'), 'csv', overwrite=overwrite) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', data_name) logger.info('iterations :%d', max_iterations) logger.info('bit_depth :%d', bit_depth) logger.info('sigma :%f', sigma) logger.info('kernel_size :%d', kernel_size) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('filename :%s', result_filename) # NOTE: Why does train all adversarial examples not work? # The classification models are depended on the training set. They are not # identical, thus adversarial examples are also not the same. with open(result_filename, 'w') as file: file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n') i = 1 while i <= max_iterations: num_blk_clean, num_blk_adv = experiment( data_name, filter_list, bit_depth, sigma, kernel_size) if num_blk_adv == -1: continue i += 1 file.write(f'{i},{num_blk_clean},{num_blk_adv}\n') file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument('-s', '--size', type=int, required=True, help='the number of sample size') parser.add_argument('-f', '--features', type=int, required=True, help='the number of features') parser.add_argument('-c', '--classes', type=int, default=2, help='the number of classes') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() sample_size = args.size num_features = args.features num_classes = args.classes max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}' set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {}...'.format(LOG_NAME, dname)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('train size :%d', sample_size) logger.info('num features:%d', num_features) logger.info('num classes :%d', num_classes) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) result_file = name_handler(os.path.join( 'save', f'{LOG_NAME}_{dname}_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = name_handler(os.path.join('save', f'{LOG_NAME}_{dname}_AdvExamples'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() # generate synthetic data x, y = make_classification( n_samples=sample_size + 1000, n_features=num_features, n_informative=num_classes, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) # normalize data x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) # NOTE: Carlini attack expects the data in range [0, 1] # x_mean = np.mean(x, axis=0) # x = scale_normalize(x, x_min, x_max, x_mean) x = scale_normalize(x, x_min, x_max) # training/test split # NOTE: test set has fixed size x_train = np.array(x[:-1000], dtype=np.float32) y_train = np.array(y[:-1000], dtype=np.long) x_test = np.array(x[-1000:], dtype=np.float32) y_test = np.array(y[-1000:], dtype=np.long) # create data container data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes, num_features) dc = DataContainer(data_dict, get_data_path()) # assign data manually dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test experiment(i, dc, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i + 1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the applicability domain') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() data_name = args.dataset max_iterations = args.iteration param_file = args.param verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # load parameters for Applicability Domain with open(param_file) as param_json: params = json.load(param_json) result_filename = name_handler( os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'), 'csv', overwrite=overwrite) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', data_name) logger.info('iterations :%d', max_iterations) logger.info('param file :%s', param_file) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('filename :%s', result_filename) logger.debug('params :%s', str(params)) # NOTE: Why does train all adversarial examples not work? # The classification models are depended on the training set. They are not # identical, thus adversarial examples are also not the same. with open(result_filename, 'w') as file: file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n') for i in range(max_iterations): num_blk_clean, num_blk_adv = experiment(data_name, params) file.write(f'{i},{num_blk_clean},{num_blk_adv}\n') file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the cross validation') parser.add_argument( '-a', '--adv', type=str, help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-i', '--ignore', action='store_true', default=False, help='Ignore saving the results. Only returns the results from terminal.') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() model_file = args.model param_file = args.param adv_file = args.adv seed = args.seed verbose = args.verbose save_log = args.savelog does_ignore = args.ignore overwrite = args.overwrite model_name, data_name = parse_model_filename(model_file) # set logging config. Run this before logging anything! set_logging('cross_validation', data_name, verbose, save_log) # check files for file_path in [model_file, param_file]: if not os.path.exists(file_path): logger.warning('%s does not exist. Exit.', file_path) sys.exit(0) if adv_file is not None and not os.path.exists(adv_file): logger.warning('%s does not exist. Exit.', adv_file) sys.exit(0) # read parameters with open(param_file) as param_json: params = json.load(param_json) # show parameters print('[cv] Running cross validation on {} with {}...'.format( model_file, data_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('Ignore saving :%r', does_ignore) logger.info('overwrite :%r', overwrite) logger.debug('params :%s', str(params)) # load parameters k_range = params['k_range'] z_range = params['z_range'] kappa_range = params['kappa_range'] gamma_range = params['gamma_range'] epsilon = params['epsilon'] num_folds = params['num_folds'] batch_size = params['batch_size'] sample_ratio = params['sample_ratio'] logger.info('k_range :%s', str(k_range)) logger.info('z_range :%s', str(z_range)) logger.info('kappa_range :%s', str(kappa_range)) logger.info('gamma_range :%s', str(gamma_range)) logger.info('epsilon :%.1f', epsilon) logger.info('num_folds :%d', num_folds) logger.info('batch_size :%d', batch_size) logger.info('sample_ratio :%.1f', sample_ratio) # reset seed master_seed(seed) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc(shuffle=True, normalize=True, size_train=0.8) logger.info('Sample size: %d', len(dc)) Model = get_model(model_name) # there models require extra keyword arguments if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio) cross_validation = CrossValidation( ad, num_folds=num_folds, k_range=k_range, z_range=z_range, kappa_range=kappa_range, gamma_range=gamma_range, epsilon=epsilon, ) bim_attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) cross_validation.fit(bim_attack) # test optimal parameters if adv_file is not None: postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] adv = np.load(data_files[0], allow_pickle=False) pred_adv = np.load(data_files[1], allow_pickle=False) x = np.load(data_files[2], allow_pickle=False) pred = np.load(data_files[3], allow_pickle=False) # fetch optimal parameters ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=cross_validation.k2, reliability=cross_validation.reliability, sample_ratio=sample_ratio, kappa=cross_validation.kappa, confidence=cross_validation.confidence, ) logger.info('Params: %s', str(ad.params)) ad.fit() blocked_indices = ad.detect(x, pred, return_passed_x=False) logger.info('Blocked %d/%d on clean data', len(blocked_indices), len(x)) blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False) logger.info('Blocked %d/%d on adv. examples.', len(blocked_indices), len(adv)) # save results if not does_ignore: file_name = name_handler( model_name + '_' + data_name, 'csv', overwrite=overwrite) cross_validation.save(file_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-a', '--adv', type=str, required=True, help= 'file name for adv. examples. The name should in "<model>_<dataset>_<attack>_adv.npy" format' ) parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the attacks' ) parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') args = parser.parse_args() adv_file = args.adv param_file = args.param model_file = args.model seed = args.seed verbose = args.verbose save_log = args.savelog check_clean = True # build filenames from the root file postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] model_name, dname = parse_model_filename(adv_file) # set logging config. Run this before logging anything! set_logging('defence_ad', dname, verbose, save_log) # check adv. examples and parameter config files for f in data_files[:2] + [param_file]: if not os.path.exists(f): logger.warning('%s does not exist. Exit.', f) sys.exit(0) # check clean samples for f in data_files[-2:]: if not os.path.exists(f): logger.warning( 'Cannot load files for clean samples. Skip checking clean set.' ) check_clean = False with open(param_file) as param_json: params = json.load(param_json) # show parameters print( '[defend_ad] Running applicability domain on {}...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', dname) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('check_clean :%r', check_clean) logger.debug('params : %s', str(params)) # reset seed master_seed(seed) # set DataContainer and ModelContainer dc = get_data_container(dname) Model = get_model(model_name) # there models require extra keyword arguments if dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features * 4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # preform defence ad = ApplicabilityDomainContainer(mc, hidden_model=model.hidden_model, **params) ad.fit() result_prefix = [model_file] \ + [adv_file] \ + [params['k2']] \ + [params['reliability']] \ + [params['sample_ratio']] \ + [params['confidence']] \ + [params['kappa']] \ + [params['disable_s2']] # check clean if check_clean: x = np.load(data_files[2], allow_pickle=False) y = np.load(data_files[3], allow_pickle=False) x_passed, blk_idx, blocked_counts = detect(ad, 'clean samples', x, y) result = result_prefix + ['clean'] + blocked_counts result_clean = '[result]' + ','.join([str(r) for r in result]) # check adversarial examples adv = np.load(data_files[0], allow_pickle=False) pred = np.load(data_files[1], allow_pickle=False) adv_passed, adv_blk_idx, blocked_counts = detect(ad, 'adv. examples', adv, pred) result = result_prefix + ['adv'] + blocked_counts result = '[result]' + ','.join([str(r) for r in result]) if check_clean: logger.info(result_clean) logger.info(result)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-m', '--model', type=str, required=True, help='Name of the model') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') # NOTE: the JSON file for parameter are hard coded. # We expect to run multiple attacks and defences in one iteration. args = parser.parse_args() dname = args.dataset mname = args.model max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {} {} i{} e{}...'.format( LOG_NAME, mname, dname, max_iterations, max_epochs)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('model :%s', mname) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) adv_file = name_handler( os.path.join('save', f'{LOG_NAME}_{dname}_{mname}_acc'), 'csv', overwrite=overwrite) result_file = name_handler( os.path.join( 'save', f'{LOG_NAME}_{dname}_{mname}_res_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() experiment(i, dname, mname, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i+1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument('-d', '--dataset', type=str, required=True, choices=get_dataset_list(), help='the dataset you want to train') parser.add_argument( '-o', '--ofile', type=str, help='the filename will be used to store model parameters') parser.add_argument('-e', '--epoch', type=int, default=5, help='the number of max epochs for training') parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-H', '--shuffle', type=bool, default=True, help='shuffle the dataset') parser.add_argument( '-n', '--normalize', type=bool, default=True, help= 'apply zero mean and scaling to the dataset (for numeral dataset only)' ) parser.add_argument('-m', '--model', type=str, choices=AVALIABLE_MODELS, help='select a model to train the data') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() dname = args.dataset filename = args.ofile max_epochs = args.epoch batch_size = args.batchsize seed = args.seed use_shuffle = args.shuffle use_normalize = args.normalize model_name = args.model verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging('train', dname, verbose, save_log) # show parameters print('[train] Start training {} model...'.format(model_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('filename :%s', filename) logger.info('max_epochs :%d', max_epochs) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('use_shuffle :%r', use_shuffle) logger.info('use_normalize :%r', use_normalize) logger.info('model_name :%s', model_name) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) master_seed(seed) # set DataContainer dc = get_data_container( dname, use_shuffle=use_shuffle, use_normalize=use_normalize, ) # select a model model = None if model_name is not None: Model = models.get_model(model_name) model = Model() else: if dname == 'MNIST': model = models.MnistCnnV2() elif dname == 'CIFAR10': model = models.CifarCnn() elif dname == 'BreastCancerWisconsin': model = models.BCNN() elif dname in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = models.IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) if model is None: raise AttributeError('Cannot find model!') modelname = model.__class__.__name__ logger.info('Selected %s model', modelname) # set ModelContainer and train the model mc = models.ModelContainerPT(model, dc) mc.fit(max_epochs=max_epochs, batch_size=batch_size) # save if not os.path.exists('save'): os.makedirs('save') if filename is None: filename = get_pt_model_filename(modelname, dname, max_epochs) logger.debug('File name: %s', filename) mc.save(filename, overwrite=overwrite) # test result file_path = os.path.join('save', filename) logger.debug('Use saved parameters from %s', filename) mc.load(file_path) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-r', '--ratio', type=float, required=True, help='the percentage of adversarial examples mix to the training set.') parser.add_argument( '-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-t', '--train', action='store_true', default=False, help='Force the model to retrain without searching existing pretrained file') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument( '-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument( '-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument( '-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument( '-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch ratio = args.ratio batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which attack should apply? attack_list = [] if args.bim: attack_list.append('BIM') if args.carlini: attack_list.append('Carlini') if args.deepfool: attack_list.append('DeepFool') if args.fgsm: attack_list.append('FGSM') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join( 'save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append(os.path.join( 'save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the discriminator? pretrain_file = f'AdvTrain_{model_name}_{data_name}.pt' if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running adversarial training on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('ratio :%d', ratio) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('attacks :%s', ', '.join(attack_list)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer( classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) if need_train: adv_trainer.fit(max_epochs=max_epochs, batch_size=batch_size, ratio=ratio) adv_trainer.save(pretrain_file, overwrite=True) else: adv_trainer.load(os.path.join('save', pretrain_file)) y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', adv_name, accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)
def main(): data_name = 'Iris' set_logging('advTraining', data_name, True, True) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc() model_file = os.path.join('save', 'IrisNN_Iris_e200.pt') num_features = dc.dim_data[0] num_classes = dc.num_classes classifier = IrisNN( num_features=num_features, hidden_nodes=num_features*4, num_classes=num_classes, ) classifier_mc = ModelContainerPT(classifier, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) attack = BIMContainer( classifier_mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False) adv_trainer = AdversarialTraining(classifier_mc, [attack]) # adv_trainer.fit(max_epochs=100, batch_size=64, ratio=1) # adv_trainer.save('AdvTrain_IrisNN_Iris', overwrite=True) file_name = os.path.join('save', 'AdvTrain_IrisNN_Iris.pt') adv_trainer.load(file_name) x = np.load(os.path.join('save', 'IrisNN_Iris_BIM_x.npy'), allow_pickle=False) y = np.load(os.path.join('save', 'IrisNN_Iris_BIM_y.npy'), allow_pickle=False) blocked_indices = adv_trainer.detect(x, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(x), 'clean') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_BIM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'BIM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'BIM') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_Carlini_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'Carlini', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'Carlini') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_DeepFool_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'DeepFool', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'DeepFool') adv = np.load(os.path.join( 'save', 'IrisNN_Iris_FGSM_adv.npy'), allow_pickle=False) accuracy = classifier_mc.evaluate(adv, y) logger.info('Accuracy on %s set: %f', 'FGSM', accuracy) blocked_indices = adv_trainer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), 'FGSM')
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help= 'a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format' ) parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-d', '--depth', type=int, default=0, help= 'The image color depth for input images. Apply Binary-Depth filter when receives a parameter' ) parser.add_argument( '--sigma', type=float, default=0, help= 'The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter' ) parser.add_argument( '-k', '--kernelsize', type=int, default=0, help= 'The kernel size for Median filter. Apply median filter when receives a parameter' ) parser.add_argument('-b', '--batchsize', type=int, default=128, help='batch size') parser.add_argument( '-T', '--train', action='store_true', default=False, help= 'Force the model to retrain without searching existing pretrained file' ) parser.add_argument('-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-B', '--bim', action='store_true', default=False, help='Apply BIM attack') parser.add_argument('-C', '--carlini', action='store_true', default=False, help='Apply Carlini L2 attack') parser.add_argument('-D', '--deepfool', action='store_true', default=False, help='Apply DeepFool attack') parser.add_argument('-F', '--fgsm', action='store_true', default=False, help='Apply FGSM attack') parser.add_argument('-S', '--saliency', action='store_true', default=False, help='Apply Saliency Map attack') args = parser.parse_args() model_file = args.model max_epochs = args.epoch bit_depth = args.depth sigma = args.sigma kernel_size = args.kernelsize batch_size = args.batchsize seed = args.seed verbose = args.verbose save_log = args.savelog need_train = args.train model_name, data_name = parse_model_filename(model_file) # Which filter should apply? filter_list = [] if bit_depth > 0: filter_list.append('binary') if sigma > 0: filter_list.append('normal') if kernel_size > 0: filter_list.append('median') # Which attack should apply? attack_list = [] if args.fgsm: attack_list.append('FGSM') if args.bim: attack_list.append('BIM') if args.deepfool: attack_list.append('DeepFool') if args.carlini: attack_list.append('Carlini') if args.saliency: attack_list.append('Saliency') # Quit, if there is nothing to do. if len(filter_list) == 0 or len(attack_list) == 0: logger.warning('Neither received any filter nor any attack. Exit') sys.exit(0) y_file = os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_y.npy') attack_files = [ os.path.join('save', f'{model_name}_{data_name}_{attack_list[0]}_x.npy') ] for attack_name in attack_list: attack_files.append( os.path.join('save', f'{model_name}_{data_name}_{attack_name}_adv.npy')) # the 1st file this the clean inputs attack_list = ['clean'] + attack_list # Do I need train the distillation network? pretrain_files = [] for fname in filter_list: pretrain_file = build_squeezer_filename(model_name, data_name, max_epochs, fname) pretrain_files.append(pretrain_file) if not os.path.exists(os.path.join('save', pretrain_file)): need_train = True # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # show parameters print(f'[{LOG_NAME}] Running feature squeezing on {model_name}...') logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('max_epochs :%d', max_epochs) logger.info('bit_depth :%d', bit_depth) logger.info('sigma :%f', sigma) logger.info('kernel_size :%d', kernel_size) logger.info('batch_size :%d', batch_size) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('need train :%r', need_train) logger.info('filters :%s', ', '.join(filter_list)) logger.info('attacks :%s', ', '.join(attack_list)) logger.info('pretrained :%s', ', '.join(pretrain_files)) # check files for file_name in [model_file, y_file] + attack_files: if not os.path.exists(file_name): logger.error('%s does not exist!', file_name) raise FileNotFoundError('{} does not exist!'.format(file_name)) # reset seed master_seed(seed) # select data dc = get_data_container( data_name, use_shuffle=True, use_normalize=True, ) # select a model Model = get_model(model_name) model = Model() if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] model = IrisNN(num_features=num_features, hidden_nodes=num_features * 4, num_classes=num_classes) classifier_mc = ModelContainerPT(model, dc) classifier_mc.load(model_file) accuracy = classifier_mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) # initialize Squeezer squeezer = FeatureSqueezing( classifier_mc, filter_list, bit_depth=bit_depth, sigma=sigma, kernel_size=kernel_size, pretrained=True, ) # train or load parameters for Squeezer if need_train: squeezer.fit(max_epochs=max_epochs, batch_size=batch_size) squeezer.save(model_file, True) else: squeezer.load(model_file) # traverse all attacks y = np.load(y_file, allow_pickle=False) for i in range(len(attack_list)): adv_file = attack_files[i] adv_name = attack_list[i] logger.debug('Load %s...', adv_file) adv = np.load(adv_file, allow_pickle=False) acc_og = classifier_mc.evaluate(adv, y) acc_squeezer = squeezer.evaluate(adv, y) logger.info('Accuracy on %s set - OG: %f, Squeezer: %f', adv_name, acc_og, acc_squeezer) blocked_indices = squeezer.detect(adv, return_passed_x=False) logger.info('Blocked %d/%d samples on %s', len(blocked_indices), len(adv), adv_name)