def save(self, filename): filename = name_handler(os.path.join('save', filename), 'csv', overwrite=False) title = [ 'n_fold', 'k2', 'zeta', 'kappa', 'gamma', 'score', 'blk_cleans', 'blk_advs' ] with open(filename, 'w') as file: line = ','.join(title) file.write(line + '\n') for i in range(len(self.folds)): line_builder = [] line_builder.append(self.folds[i]) line_builder.append(self.k2s[i]) line_builder.append(self.zetas[i]) line_builder.append(self.kappas[i]) line_builder.append(self.gammas[i]) line_builder.append(self.scores[i]) line_builder.append(self.blk_cleans[i]) line_builder.append(self.blk_advs[i]) line = ','.join([str(i) for i in line_builder]) file.write(line + '\n') file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '--depth', type=int, default=0, help='The image color depth for input images. Apply Binary-Depth filter when receives a parameter') parser.add_argument( '-s', '--sigma', type=float, default=0, help='The Standard Deviation of Normal distribution. Apply Gaussian Noise filter when receives a parameter') parser.add_argument( '-k', '--kernelsize', type=int, default=0, help='The kernel size for Median filter. Apply median filter when receives a parameter') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() data_name = args.dataset max_iterations = args.iteration bit_depth = args.depth sigma = args.sigma kernel_size = args.kernelsize verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # Which filter should apply? filter_list = [] if bit_depth > 0: filter_list.append('binary') if sigma > 0: filter_list.append('normal') if kernel_size > 0: filter_list.append('median') result_filename = name_handler( os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'), 'csv', overwrite=overwrite) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', data_name) logger.info('iterations :%d', max_iterations) logger.info('bit_depth :%d', bit_depth) logger.info('sigma :%f', sigma) logger.info('kernel_size :%d', kernel_size) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('filename :%s', result_filename) # NOTE: Why does train all adversarial examples not work? # The classification models are depended on the training set. They are not # identical, thus adversarial examples are also not the same. with open(result_filename, 'w') as file: file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n') i = 1 while i <= max_iterations: num_blk_clean, num_blk_adv = experiment( data_name, filter_list, bit_depth, sigma, kernel_size) if num_blk_adv == -1: continue i += 1 file.write(f'{i},{num_blk_clean},{num_blk_adv}\n') file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the applicability domain') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() data_name = args.dataset max_iterations = args.iteration param_file = args.param verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, data_name, verbose, save_log) # load parameters for Applicability Domain with open(param_file) as param_json: params = json.load(param_json) result_filename = name_handler( os.path.join('save', LOG_NAME + '_' + data_name + '_' + 'tree'), 'csv', overwrite=overwrite) # show parameters print(f'[{LOG_NAME}] Running tree model...') logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', data_name) logger.info('iterations :%d', max_iterations) logger.info('param file :%s', param_file) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) logger.info('filename :%s', result_filename) logger.debug('params :%s', str(params)) # NOTE: Why does train all adversarial examples not work? # The classification models are depended on the training set. They are not # identical, thus adversarial examples are also not the same. with open(result_filename, 'w') as file: file.write(','.join(['Index', 'Clean', 'DecisionTreeAttack']) + '\n') for i in range(max_iterations): num_blk_clean, num_blk_adv = experiment(data_name, params) file.write(f'{i},{num_blk_clean},{num_blk_adv}\n') file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument('-s', '--size', type=int, required=True, help='the number of sample size') parser.add_argument('-f', '--features', type=int, required=True, help='the number of features') parser.add_argument('-c', '--classes', type=int, default=2, help='the number of classes') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() sample_size = args.size num_features = args.features num_classes = args.classes max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}' set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {}...'.format(LOG_NAME, dname)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('train size :%d', sample_size) logger.info('num features:%d', num_features) logger.info('num classes :%d', num_classes) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) result_file = name_handler(os.path.join( 'save', f'{LOG_NAME}_{dname}_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = name_handler(os.path.join('save', f'{LOG_NAME}_{dname}_AdvExamples'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() # generate synthetic data x, y = make_classification( n_samples=sample_size + 1000, n_features=num_features, n_informative=num_classes, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) # normalize data x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) # NOTE: Carlini attack expects the data in range [0, 1] # x_mean = np.mean(x, axis=0) # x = scale_normalize(x, x_min, x_max, x_mean) x = scale_normalize(x, x_min, x_max) # training/test split # NOTE: test set has fixed size x_train = np.array(x[:-1000], dtype=np.float32) y_train = np.array(y[:-1000], dtype=np.long) x_test = np.array(x[-1000:], dtype=np.float32) y_test = np.array(y[-1000:], dtype=np.long) # create data container data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes, num_features) dc = DataContainer(data_dict, get_data_path()) # assign data manually dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test experiment(i, dc, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i + 1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()
def main(): parser = ap.ArgumentParser() parser.add_argument( '-m', '--model', type=str, required=True, help='a file which contains a pretrained model. The filename should in "<model>_<dataset>_e<max epochs>[_<date>].pt" format') parser.add_argument( '-p', '--param', type=str, required=True, help='a JSON config file which contains the parameters for the cross validation') parser.add_argument( '-a', '--adv', type=str, help='file name of adv. examples for testing. If it\'s none, the program will ignore testing. The name should in "<model>_<dataset>_<attack>_adv.npy" format') parser.add_argument( '-s', '--seed', type=int, default=4096, help='the seed for random number generator') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-i', '--ignore', action='store_true', default=False, help='Ignore saving the results. Only returns the results from terminal.') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() model_file = args.model param_file = args.param adv_file = args.adv seed = args.seed verbose = args.verbose save_log = args.savelog does_ignore = args.ignore overwrite = args.overwrite model_name, data_name = parse_model_filename(model_file) # set logging config. Run this before logging anything! set_logging('cross_validation', data_name, verbose, save_log) # check files for file_path in [model_file, param_file]: if not os.path.exists(file_path): logger.warning('%s does not exist. Exit.', file_path) sys.exit(0) if adv_file is not None and not os.path.exists(adv_file): logger.warning('%s does not exist. Exit.', adv_file) sys.exit(0) # read parameters with open(param_file) as param_json: params = json.load(param_json) # show parameters print('[cv] Running cross validation on {} with {}...'.format( model_file, data_name)) logger.info('Start at : %s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('model file :%s', model_file) logger.info('adv file :%s', adv_file) logger.info('model :%s', model_name) logger.info('dataset :%s', data_name) logger.info('param file :%s', param_file) logger.info('seed :%d', seed) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('Ignore saving :%r', does_ignore) logger.info('overwrite :%r', overwrite) logger.debug('params :%s', str(params)) # load parameters k_range = params['k_range'] z_range = params['z_range'] kappa_range = params['kappa_range'] gamma_range = params['gamma_range'] epsilon = params['epsilon'] num_folds = params['num_folds'] batch_size = params['batch_size'] sample_ratio = params['sample_ratio'] logger.info('k_range :%s', str(k_range)) logger.info('z_range :%s', str(z_range)) logger.info('kappa_range :%s', str(kappa_range)) logger.info('gamma_range :%s', str(gamma_range)) logger.info('epsilon :%.1f', epsilon) logger.info('num_folds :%d', num_folds) logger.info('batch_size :%d', batch_size) logger.info('sample_ratio :%.1f', sample_ratio) # reset seed master_seed(seed) dc = DataContainer(DATASET_LIST[data_name], get_data_path()) dc(shuffle=True, normalize=True, size_train=0.8) logger.info('Sample size: %d', len(dc)) Model = get_model(model_name) # there models require extra keyword arguments if data_name in ('BankNote', 'HTRU2', 'Iris', 'WheatSeed'): num_classes = dc.num_classes num_features = dc.dim_data[0] kwargs = { 'num_features': num_features, 'hidden_nodes': num_features*4, 'num_classes': num_classes, } model = Model(**kwargs) else: model = Model() logger.info('Use %s model', model.__class__.__name__) mc = ModelContainerPT(model, dc) mc.load(model_file) accuracy = mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy) ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, sample_ratio=sample_ratio) cross_validation = CrossValidation( ad, num_folds=num_folds, k_range=k_range, z_range=z_range, kappa_range=kappa_range, gamma_range=gamma_range, epsilon=epsilon, ) bim_attack = BIMContainer( mc, eps=0.3, eps_step=0.1, max_iter=100, targeted=False, ) cross_validation.fit(bim_attack) # test optimal parameters if adv_file is not None: postfix = ['adv', 'pred', 'x', 'y'] data_files = [adv_file.replace('_adv', '_' + s) for s in postfix] adv = np.load(data_files[0], allow_pickle=False) pred_adv = np.load(data_files[1], allow_pickle=False) x = np.load(data_files[2], allow_pickle=False) pred = np.load(data_files[3], allow_pickle=False) # fetch optimal parameters ad = ApplicabilityDomainContainer( mc, hidden_model=model.hidden_model, k2=cross_validation.k2, reliability=cross_validation.reliability, sample_ratio=sample_ratio, kappa=cross_validation.kappa, confidence=cross_validation.confidence, ) logger.info('Params: %s', str(ad.params)) ad.fit() blocked_indices = ad.detect(x, pred, return_passed_x=False) logger.info('Blocked %d/%d on clean data', len(blocked_indices), len(x)) blocked_indices = ad.detect(adv, pred_adv, return_passed_x=False) logger.info('Blocked %d/%d on adv. examples.', len(blocked_indices), len(adv)) # save results if not does_ignore: file_name = name_handler( model_name + '_' + data_name, 'csv', overwrite=overwrite) cross_validation.save(file_name)
def main(): parser = ap.ArgumentParser() parser.add_argument( '-d', '--dataset', type=str, required=True, help='Name of the dataset') parser.add_argument( '-m', '--model', type=str, required=True, help='Name of the model') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument( '-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument( '-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument( '-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument( '-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') # NOTE: the JSON file for parameter are hard coded. # We expect to run multiple attacks and defences in one iteration. args = parser.parse_args() dname = args.dataset mname = args.model max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {} {} i{} e{}...'.format( LOG_NAME, mname, dname, max_iterations, max_epochs)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('model :%s', mname) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) adv_file = name_handler( os.path.join('save', f'{LOG_NAME}_{dname}_{mname}_acc'), 'csv', overwrite=overwrite) result_file = name_handler( os.path.join( 'save', f'{LOG_NAME}_{dname}_{mname}_res_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() experiment(i, dname, mname, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i+1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()
def test_name_handler(self): x = name_handler('test', 'test', overwrite=True) self.assertEqual(x, 'test.test')