def main(args): # assertions assert args.criterion in ['gini', 'entropy'] # create output dir out_dir = os.path.join(args.out_dir, args.dataset, args.criterion, 'rs_{}'.format(args.rs), 'topd_{}'.format(args.topd), 'k_{}'.format(args.k), 'sub_{}'.format(args.subsample_size)) # create output directory and clear previous contents os.makedirs(out_dir, exist_ok=True) print_util.clear_dir(out_dir) # skip experiment if results already exist if args.append_results and os.path.exists(os.path.join(out_dir, 'results.npy')): print('results exist: {}'.format(out_dir)) return # create logger log_fp = os.path.join(out_dir, 'log.txt') logger = print_util.get_logger(log_fp) logger.info(args) logger.info(datetime.now()) # run experiment experiment(args, logger, out_dir, seed=args.rs) # remove logger print_util.remove_logger(logger)
def main(args): # make logger dataset = args.dataset if args.train_frac < 1.0 and args.train_frac > 0.0: dataset += '_{}'.format(str(args.train_frac).replace('.', 'p')) out_dir = os.path.join(args.out_dir, dataset, args.tree_type, 'rs{}'.format(args.rs)) if args.trex: out_dir = os.path.join(out_dir, args.kernel_model, args.tree_kernel) elif args.teknn: out_dir = os.path.join(out_dir, 'teknn', args.tree_kernel) elif args.maple: out_dir = os.path.join(out_dir, 'maple') elif args.inf_k is not None: out_dir = os.path.join(out_dir, 'leaf_influence') elif args.mmd: out_dir = os.path.join(out_dir, 'mmd') elif args.proto: out_dir = os.path.join(out_dir, 'proto') os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info(datetime.now()) seed = args.rs logger.info('\nSeed: {}'.format(seed)) experiment(args, logger, out_dir, seed=seed) print_util.remove_logger(logger)
def main(args): # make logger dataset = args.dataset for rs in args.rs: out_dir = os.path.join(args.out_dir, dataset, args.tree_type, 'rs{}'.format(rs)) if args.trex: out_dir = os.path.join(out_dir, args.kernel_model, args.tree_kernel) elif args.teknn: out_dir = os.path.join(out_dir, 'teknn', args.tree_kernel) elif args.maple: out_dir = os.path.join(out_dir, 'maple') elif args.inf_k is not None: out_dir = os.path.join(out_dir, 'leaf_influence') os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info('\nSeed: {}'.format(rs)) experiment(args, logger, out_dir, seed=rs) print_util.remove_logger(logger)
def main(args): # create output dir out_dir = os.path.join(args.out_dir, args.dataset, args.criterion, args.method, 'rs_{}'.format(args.rs)) log_fp = os.path.join(out_dir, 'log.txt') os.makedirs(out_dir, exist_ok=True) # skip experiment if results already exist if args.append_results and os.path.exists(os.path.join(out_dir, 'results.npy')): return # create logger logger = print_util.get_logger(log_fp) logger.info(args) logger.info(datetime.now()) # run experiment experiment(args, logger, out_dir) # remove logger print_util.remove_logger(logger)
def main(args): out_dir = os.path.join(args.out_dir) # create logger os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info(datetime.now()) create_csv(args, out_dir, logger)
def main(args): # make logger dataset = args.dataset out_dir = os.path.join(args.out_dir, dataset, args.model) os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, '{}.txt'.format(args.dataset))) logger.info(args) experiment(args, logger, out_dir, seed=args.rs)
def main(args): # make logger dataset = args.dataset out_dir = os.path.join(args.out_dir, dataset, args.tree_type, args.tree_kernel) os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) experiment(args, logger, out_dir, seed=args.rs) print_util.remove_logger(logger)
def main(args): # create output directory out_dir = os.path.join(args.out_dir, args.dataset) os.makedirs(out_dir, exist_ok=True) # create logger logger_fp = os.path.join(out_dir, 'log.txt') logger = print_util.get_logger(logger_fp) logger.info('{}'.format(args)) logger.info('\ntimestamp: {}'.format(datetime.now())) # get dataset X_train, X_test, y_train, y_test = data_util.get_data( args.dataset, args.data_dir) logger.info('X_train.shape: {}'.format(X_train.shape)) # collect top threshold scores top_scores = [] # get best threshold(s) for each feature for i in range(X_train.shape[1]): vals = np.unique(X_train[:, i]) C = get_thresholds(X_train[:, i], y_train) S = compute_scores(C) logger.info( '\n[FEATURE {}] no. unique: {:,}, no. valid thresholds: {:,}'. format(i, len(vals), len(C))) # sort thresholds based on score S = sorted(S, key=lambda x: x[1]) # display split score for each threshold for T, s in S[:args.k]: logger.info(' threshold value: {:.5f}, score: {:.5f}'.format( T.v, s)) top_scores.append(s) # plot distribution of top threshold scores ax = sns.distplot(top_scores, rug=True, hist=False) ax.set_title('{}: Scores for Top {} Threshold(s) / Feature'.format( args.dataset.title(), args.k)) ax.set_xlabel('Gini index') ax.set_ylabel('Density') plt.savefig(os.path.join(out_dir, 'k_{}.pdf'.format(args.k)), bbox_inches='tight')
def main(args): # create output dir out_dir = os.path.join(args.out_dir, args.dataset, args.criterion) # add tuning to filepath if args.no_tune: out_dir = os.path.join(out_dir, 'no_tune', 'rs_{}'.format(args.rs)) else: out_dir = os.path.join(out_dir, 'tuned', 'rs_{}'.format(args.rs)) # create filename if args.model == 'sklearn': out_dir = os.path.join(out_dir, args.model) if args.bootstrap: out_dir = os.path.join(out_dir, 'bootstrap') elif args.model == 'dare': assert args.topd == 0 out_dir = os.path.join(out_dir, args.model) elif args.model in ['extra_trees', 'extra_trees_k1', 'borat']: out_dir = os.path.join(out_dir, args.model) else: raise ValueError('model {} unknown!'.format(args.model)) # create output directory and clear any previous contents os.makedirs(out_dir, exist_ok=True) print_util.clear_dir(out_dir) # create logger logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info(datetime.now()) # write everything printed to stdout to this log file logfile, stdout, stderr = print_util.stdout_stderr_to_log( os.path.join(out_dir, 'log+.txt')) # run experiment performance(args, out_dir, logger) # restore original stdout and stderr settings print_util.reset_stdout_stderr(logfile, stdout, stderr)
def main(args): # make logger dataset = args.dataset for i in range(args.repeats): seed = args.rs + i rs_dir = os.path.join(args.out_dir, dataset, args.tree_type, args.tree_kernel, 'rs{}'.format(seed)) os.makedirs(rs_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(rs_dir, 'log.txt')) logger.info(args) logger.info('Seed {}'.format(seed)) experiment(args, logger, rs_dir, seed=seed) print_util.remove_logger(logger)
def main(args): # create output dir out_dir = os.path.join(args.out_dir, args.dataset, args.criterion, 'rs_{}'.format(args.rs)) # create output directory and clear previous contents os.makedirs(out_dir, exist_ok=True) print_util.clear_dir(out_dir) # create logger logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info('timestamp: {}'.format(datetime.now())) # run experiment performance(args, out_dir, logger)
def main(args): # create output dir out_dir = os.path.join(args.out_dir, args.dataset, args.model_type) if args.no_tune: out_dir = os.path.join(out_dir, 'trees_{}'.format(args.n_estimators), 'depth_{}'.format(args.max_depth)) os.makedirs(out_dir, exist_ok=True) # create logger logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) logger.info(datetime.now()) # run experiment performance(args, logger)
def main(args): # make logger dataset = args.dataset out_dir = os.path.join(args.out_dir, dataset, args.tree_type, args.tree_kernel) if args.trex: out_dir = os.path.join(out_dir, args.kernel_model) elif args.teknn: out_dir = os.path.join(out_dir, 'teknn') os.makedirs(out_dir, exist_ok=True) logger = print_util.get_logger(os.path.join(out_dir, 'log.txt')) logger.info(args) seed = args.rs logger.info('\nSeed: {}'.format(seed)) experiment(args, logger, out_dir, seed=seed) print_util.remove_logger(logger)