def setUpClass(cls): master_seed(SEED) # generating synthetic data x, y = make_classification( n_samples=SAMPLE_SIZE, n_features=NUM_FEATURES, n_informative=NUM_CLASSES, n_redundant=0, n_classes=NUM_CLASSES, n_clusters_per_class=1, ) x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) x = scale_normalize(x, x_min, x_max) n_train = int(np.floor(SAMPLE_SIZE * 0.8)) x_train = np.array(x[:n_train], dtype=np.float32) y_train = np.array(y[:n_train], dtype=np.long) x_test = np.array(x[n_train:], dtype=np.float32) y_test = np.array(y[n_train:], dtype=np.long) data_dict = get_synthetic_dataset_dict(SAMPLE_SIZE, NUM_CLASSES, NUM_FEATURES) dc = DataContainer(data_dict, get_data_path()) dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test # training Extra Tree classifier classifier = ExtraTreeClassifier( criterion='gini', splitter='random', ) cls.mc = ModelContainerTree(classifier, dc) cls.mc.fit() accuracy = cls.mc.evaluate(dc.x_test, dc.y_test) logger.info('Accuracy on test set: %f', accuracy)
def main(): parser = ap.ArgumentParser() parser.add_argument('-s', '--size', type=int, required=True, help='the number of sample size') parser.add_argument('-f', '--features', type=int, required=True, help='the number of features') parser.add_argument('-c', '--classes', type=int, default=2, help='the number of classes') parser.add_argument( '-i', '--iteration', type=int, default=MAX_ITERATIONS, help='the number of iterations that the experiment will repeat') parser.add_argument('-e', '--epoch', type=int, required=True, help='the number of max epochs for training') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='set logger level to debug') parser.add_argument('-l', '--savelog', action='store_true', default=False, help='save logging file') parser.add_argument('-w', '--overwrite', action='store_true', default=False, help='overwrite the existing file') args = parser.parse_args() sample_size = args.size num_features = args.features num_classes = args.classes max_iterations = args.iteration max_epochs = args.epoch verbose = args.verbose save_log = args.savelog overwrite = args.overwrite # set logging config. Run this before logging anything! dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}' set_logging(LOG_NAME, dname, verbose, save_log) print('[{}] Start experiment on {}...'.format(LOG_NAME, dname)) logger.info('Start at :%s', get_time_str()) logger.info('RECEIVED PARAMETERS:') logger.info('dataset :%s', dname) logger.info('train size :%d', sample_size) logger.info('num features:%d', num_features) logger.info('num classes :%d', num_classes) logger.info('iterations :%d', max_iterations) logger.info('max_epochs :%d', max_epochs) logger.info('verbose :%r', verbose) logger.info('save_log :%r', save_log) logger.info('overwrite :%r', overwrite) result_file = name_handler(os.path.join( 'save', f'{LOG_NAME}_{dname}_i{max_iterations}'), 'csv', overwrite=overwrite) adv_file = name_handler(os.path.join('save', f'{LOG_NAME}_{dname}_AdvExamples'), 'csv', overwrite=overwrite) adv_file = open(adv_file, 'w') adv_file.write(','.join(TITLE_ADV) + '\n') res_file = open(result_file, 'w') res_file.write(','.join(TITLE_RESULTS) + '\n') for i in range(max_iterations): since = time.time() # generate synthetic data x, y = make_classification( n_samples=sample_size + 1000, n_features=num_features, n_informative=num_classes, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) # normalize data x_max = np.max(x, axis=0) x_min = np.min(x, axis=0) # NOTE: Carlini attack expects the data in range [0, 1] # x_mean = np.mean(x, axis=0) # x = scale_normalize(x, x_min, x_max, x_mean) x = scale_normalize(x, x_min, x_max) # training/test split # NOTE: test set has fixed size x_train = np.array(x[:-1000], dtype=np.float32) y_train = np.array(y[:-1000], dtype=np.long) x_test = np.array(x[-1000:], dtype=np.float32) y_test = np.array(y[-1000:], dtype=np.long) # create data container data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes, num_features) dc = DataContainer(data_dict, get_data_path()) # assign data manually dc.x_train = x_train dc.y_train = y_train dc.x_test = x_test dc.y_test = y_test experiment(i, dc, max_epochs, adv_file, res_file) time_elapsed = time.time() - since print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format( dname, i + 1, max_iterations, int(time_elapsed // 60), time_elapsed % 60)) adv_file.close() res_file.close()