Ejemplo n.º 1
0
def main(args):

    # assertions
    assert args.criterion in ['gini', 'entropy']

    # create output dir
    out_dir = os.path.join(args.out_dir,
                           args.dataset,
                           args.criterion,
                           'rs_{}'.format(args.rs),
                           'topd_{}'.format(args.topd),
                           'k_{}'.format(args.k),
                           'sub_{}'.format(args.subsample_size))

    # create output directory and clear previous contents
    os.makedirs(out_dir, exist_ok=True)
    print_util.clear_dir(out_dir)

    # skip experiment if results already exist
    if args.append_results and os.path.exists(os.path.join(out_dir, 'results.npy')):
        print('results exist: {}'.format(out_dir))
        return

    # create logger
    log_fp = os.path.join(out_dir, 'log.txt')
    logger = print_util.get_logger(log_fp)
    logger.info(args)
    logger.info(datetime.now())

    # run experiment
    experiment(args, logger, out_dir, seed=args.rs)

    # remove logger
    print_util.remove_logger(logger)
Ejemplo n.º 2
0
def main(args):

    # make logger
    dataset = args.dataset

    if args.train_frac < 1.0 and args.train_frac > 0.0:
        dataset += '_{}'.format(str(args.train_frac).replace('.', 'p'))

    out_dir = os.path.join(args.out_dir, dataset, args.tree_type,
                           'rs{}'.format(args.rs))

    if args.trex:
        out_dir = os.path.join(out_dir, args.kernel_model, args.tree_kernel)
    elif args.teknn:
        out_dir = os.path.join(out_dir, 'teknn', args.tree_kernel)
    elif args.maple:
        out_dir = os.path.join(out_dir, 'maple')
    elif args.inf_k is not None:
        out_dir = os.path.join(out_dir, 'leaf_influence')
    elif args.mmd:
        out_dir = os.path.join(out_dir, 'mmd')
    elif args.proto:
        out_dir = os.path.join(out_dir, 'proto')

    os.makedirs(out_dir, exist_ok=True)
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)
    logger.info(datetime.now())

    seed = args.rs
    logger.info('\nSeed: {}'.format(seed))
    experiment(args, logger, out_dir, seed=seed)
    print_util.remove_logger(logger)
Ejemplo n.º 3
0
def main(args):

    # make logger
    dataset = args.dataset

    for rs in args.rs:
        out_dir = os.path.join(args.out_dir, dataset, args.tree_type,
                               'rs{}'.format(rs))

        if args.trex:
            out_dir = os.path.join(out_dir, args.kernel_model, args.tree_kernel)
        elif args.teknn:
            out_dir = os.path.join(out_dir, 'teknn', args.tree_kernel)
        elif args.maple:
            out_dir = os.path.join(out_dir, 'maple')
        elif args.inf_k is not None:
            out_dir = os.path.join(out_dir, 'leaf_influence')

        os.makedirs(out_dir, exist_ok=True)
        logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
        logger.info(args)

        logger.info('\nSeed: {}'.format(rs))
        experiment(args, logger, out_dir, seed=rs)
        print_util.remove_logger(logger)
Ejemplo n.º 4
0
def main(args):

    # create output dir
    out_dir = os.path.join(args.out_dir,
                           args.dataset,
                           args.criterion,
                           args.method,
                           'rs_{}'.format(args.rs))

    log_fp = os.path.join(out_dir, 'log.txt')
    os.makedirs(out_dir, exist_ok=True)

    # skip experiment if results already exist
    if args.append_results and os.path.exists(os.path.join(out_dir, 'results.npy')):
        return

    # create logger
    logger = print_util.get_logger(log_fp)
    logger.info(args)
    logger.info(datetime.now())

    # run experiment
    experiment(args, logger, out_dir)

    # remove logger
    print_util.remove_logger(logger)
Ejemplo n.º 5
0
def main(args):

    out_dir = os.path.join(args.out_dir)

    # create logger
    os.makedirs(out_dir, exist_ok=True)
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)
    logger.info(datetime.now())

    create_csv(args, out_dir, logger)
Ejemplo n.º 6
0
def main(args):

    # make logger
    dataset = args.dataset

    out_dir = os.path.join(args.out_dir, dataset, args.model)
    os.makedirs(out_dir, exist_ok=True)
    logger = print_util.get_logger(os.path.join(out_dir, '{}.txt'.format(args.dataset)))
    logger.info(args)

    experiment(args, logger, out_dir, seed=args.rs)
Ejemplo n.º 7
0
def main(args):

    # make logger
    dataset = args.dataset

    out_dir = os.path.join(args.out_dir, dataset, args.tree_type, args.tree_kernel)
    os.makedirs(out_dir, exist_ok=True)

    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)

    experiment(args, logger, out_dir, seed=args.rs)

    print_util.remove_logger(logger)
Ejemplo n.º 8
0
def main(args):

    # create output directory
    out_dir = os.path.join(args.out_dir, args.dataset)
    os.makedirs(out_dir, exist_ok=True)

    # create logger
    logger_fp = os.path.join(out_dir, 'log.txt')
    logger = print_util.get_logger(logger_fp)
    logger.info('{}'.format(args))
    logger.info('\ntimestamp: {}'.format(datetime.now()))

    # get dataset
    X_train, X_test, y_train, y_test = data_util.get_data(
        args.dataset, args.data_dir)
    logger.info('X_train.shape: {}'.format(X_train.shape))

    # collect top threshold scores
    top_scores = []

    # get best threshold(s) for each feature
    for i in range(X_train.shape[1]):
        vals = np.unique(X_train[:, i])
        C = get_thresholds(X_train[:, i], y_train)
        S = compute_scores(C)
        logger.info(
            '\n[FEATURE {}] no. unique: {:,}, no. valid thresholds: {:,}'.
            format(i, len(vals), len(C)))

        # sort thresholds based on score
        S = sorted(S, key=lambda x: x[1])

        # display split score for each threshold
        for T, s in S[:args.k]:
            logger.info('  threshold value: {:.5f}, score: {:.5f}'.format(
                T.v, s))
            top_scores.append(s)

    # plot distribution of top threshold scores
    ax = sns.distplot(top_scores, rug=True, hist=False)
    ax.set_title('{}: Scores for Top {} Threshold(s) / Feature'.format(
        args.dataset.title(), args.k))
    ax.set_xlabel('Gini index')
    ax.set_ylabel('Density')
    plt.savefig(os.path.join(out_dir, 'k_{}.pdf'.format(args.k)),
                bbox_inches='tight')
Ejemplo n.º 9
0
def main(args):

    # create output dir
    out_dir = os.path.join(args.out_dir, args.dataset, args.criterion)

    # add tuning to filepath
    if args.no_tune:
        out_dir = os.path.join(out_dir, 'no_tune', 'rs_{}'.format(args.rs))
    else:
        out_dir = os.path.join(out_dir, 'tuned', 'rs_{}'.format(args.rs))

    # create filename
    if args.model == 'sklearn':
        out_dir = os.path.join(out_dir, args.model)

        if args.bootstrap:
            out_dir = os.path.join(out_dir, 'bootstrap')

    elif args.model == 'dare':
        assert args.topd == 0
        out_dir = os.path.join(out_dir, args.model)

    elif args.model in ['extra_trees', 'extra_trees_k1', 'borat']:
        out_dir = os.path.join(out_dir, args.model)

    else:
        raise ValueError('model {} unknown!'.format(args.model))

    # create output directory and clear any previous contents
    os.makedirs(out_dir, exist_ok=True)
    print_util.clear_dir(out_dir)

    # create logger
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)
    logger.info(datetime.now())

    # write everything printed to stdout to this log file
    logfile, stdout, stderr = print_util.stdout_stderr_to_log(
        os.path.join(out_dir, 'log+.txt'))

    # run experiment
    performance(args, out_dir, logger)

    # restore original stdout and stderr settings
    print_util.reset_stdout_stderr(logfile, stdout, stderr)
Ejemplo n.º 10
0
def main(args):

    # make logger
    dataset = args.dataset

    for i in range(args.repeats):
        seed = args.rs + i
        rs_dir = os.path.join(args.out_dir, dataset, args.tree_type,
                              args.tree_kernel, 'rs{}'.format(seed))
        os.makedirs(rs_dir, exist_ok=True)

        logger = print_util.get_logger(os.path.join(rs_dir, 'log.txt'))
        logger.info(args)
        logger.info('Seed {}'.format(seed))

        experiment(args, logger, rs_dir, seed=seed)
        print_util.remove_logger(logger)
Ejemplo n.º 11
0
def main(args):

    # create output dir
    out_dir = os.path.join(args.out_dir, args.dataset, args.criterion,
                           'rs_{}'.format(args.rs))

    # create output directory and clear previous contents
    os.makedirs(out_dir, exist_ok=True)
    print_util.clear_dir(out_dir)

    # create logger
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)
    logger.info('timestamp: {}'.format(datetime.now()))

    # run experiment
    performance(args, out_dir, logger)
Ejemplo n.º 12
0
def main(args):

    # create output dir
    out_dir = os.path.join(args.out_dir, args.dataset, args.model_type)

    if args.no_tune:
        out_dir = os.path.join(out_dir, 'trees_{}'.format(args.n_estimators),
                               'depth_{}'.format(args.max_depth))

    os.makedirs(out_dir, exist_ok=True)

    # create logger
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)
    logger.info(datetime.now())

    # run experiment
    performance(args, logger)
Ejemplo n.º 13
0
def main(args):

    # make logger
    dataset = args.dataset

    out_dir = os.path.join(args.out_dir, dataset, args.tree_type,
                           args.tree_kernel)

    if args.trex:
        out_dir = os.path.join(out_dir, args.kernel_model)
    elif args.teknn:
        out_dir = os.path.join(out_dir, 'teknn')

    os.makedirs(out_dir, exist_ok=True)
    logger = print_util.get_logger(os.path.join(out_dir, 'log.txt'))
    logger.info(args)

    seed = args.rs
    logger.info('\nSeed: {}'.format(seed))
    experiment(args, logger, out_dir, seed=seed)
    print_util.remove_logger(logger)