def main(options):

    # create sub folder
    subm_folder = "%s/ensemble_selection"%config.SUBM_DIR
    os_utils._create_dirs( [subm_folder] )
    subm_prefix = "%s/test.pred.[%s]" % (subm_folder, options.outfile)

    # get model list
    log_folder = "%s/level%d_models"%(config.LOG_DIR, options.level-1)
    model_list = get_model_list(log_folder, options.size)

    # get instance splitter
    if options.level not in [2, 3]:
        inst_splitter = None
    elif options.level == 2:
        inst_splitter = splitter_level2
    elif options.level == 3:
        inst_splitter = splitter_level3

    ees = ExtremeEnsembleSelection(
            model_folder=config.OUTPUT_DIR, 
            model_list=model_list, 
            subm_prefix=subm_prefix, 
            weight_opt_max_evals=options.weight_opt_max_evals, 
            w_min=-1., 
            w_max=1., 
            inst_subsample=options.inst_subsample,
            inst_subsample_replacement=options.inst_subsample_replacement,
            inst_splitter=inst_splitter,
            model_subsample=options.model_subsample,
            model_subsample_replacement=options.model_subsample_replacement,
            bagging_size=options.bagging_size, 
            init_top_k=options.init_top_k,
            epsilon=options.epsilon,
            multiprocessing=False, 
            multiprocessing_num_cores=config.NUM_CORES,
            enable_extreme=options.enable_extreme,
            random_seed=config.RANDOM_SEED
        )
    ees.go()
def main(options):

    # create sub folder
    subm_folder = "%s/ensemble_selection" % config.SUBM_DIR
    os_utils._create_dirs([subm_folder])
    subm_prefix = "%s/test.pred.[%s]" % (subm_folder, options.outfile)

    # get model list
    log_folder = "%s/level%d_models" % (config.LOG_DIR, options.level - 1)
    model_list = get_model_list(log_folder, options.size)

    # get instance splitter
    if options.level not in [2, 3]:
        inst_splitter = None
    elif options.level == 2:
        inst_splitter = splitter_level2
    elif options.level == 3:
        inst_splitter = splitter_level3

    ees = ExtremeEnsembleSelection(
        model_folder=config.OUTPUT_DIR,
        model_list=model_list,
        subm_prefix=subm_prefix,
        weight_opt_max_evals=options.weight_opt_max_evals,
        w_min=-1.,
        w_max=1.,
        inst_subsample=options.inst_subsample,
        inst_subsample_replacement=options.inst_subsample_replacement,
        inst_splitter=inst_splitter,
        model_subsample=options.model_subsample,
        model_subsample_replacement=options.model_subsample_replacement,
        bagging_size=options.bagging_size,
        init_top_k=options.init_top_k,
        epsilon=options.epsilon,
        multiprocessing=False,
        multiprocessing_num_cores=config.NUM_CORES,
        enable_extreme=options.enable_extreme,
        random_seed=config.RANDOM_SEED)
    ees.go()
Beispiel #3
0
RANDOM_SEED = 2016
PLATFORM = platform.system()
NUM_CORES = 4 if PLATFORM == "Windows" else 14

DATA_PROCESSOR_N_JOBS = 4 if PLATFORM == "Windows" else 6
AUTO_SPELLING_CHECKER_N_JOBS = 4 if PLATFORM == "Windows" else 8
# multi processing is not faster
AUTO_SPELLING_CHECKER_N_JOBS = 1

## rgf
RGF_CALL_EXE = "%s/rgf1.2/test/call_exe.pl"%THIRDPARTY_DIR
RGF_EXTENSION = ".exe" if PLATFORM == "Windows" else ""
RGF_EXE = "%s/rgf1.2/bin/rgf%s"%(THIRDPARTY_DIR, RGF_EXTENSION)


# ---------------------- CREATE PATH --------------------
DIRS = []
DIRS += [CLEAN_DATA_DIR]
DIRS += [SPLIT_DIR]
DIRS += [FEAT_DIR, FEAT_CONF_DIR]
DIRS += ["%s/All"%FEAT_DIR]
DIRS += ["%s/Run%d"%(FEAT_DIR,i+1) for i in range(N_RUNS)]
DIRS += ["%s/Combine"%FEAT_DIR]
DIRS += [OUTPUT_DIR, SUBM_DIR]
DIRS += ["%s/All"%OUTPUT_DIR]
DIRS += ["%s/Run%d"%(OUTPUT_DIR,i+1) for i in range(N_RUNS)]
DIRS += [LOG_DIR, FIG_DIR, TMP_DIR]
DIRS += [WORD2VEC_MODEL_DIR, DOC2VEC_MODEL_DIR, GLOVE_WORD2VEC_MODEL_DIR]

os_utils._create_dirs(DIRS)
Beispiel #4
0
MISSING_VALUE_STRING = "MISSINGVALUE"
MISSING_VALUE_NUMERIC = -1

# Other
TERM_VEC_MAX_NUM_TERMS = 50
RANDOM_SEED = 42
NUM_CORES = 4

DBN_CONFIG = {
    'MAX_ITERATIONS': 40,
    'DEBUG': False,
    'PRETTY_LOG': True,
    'MIN_DOCS_PER_QUERY': 10,
    'MAX_DOCS_PER_QUERY': 10,
    'SERP_SIZE': 10,
    'QUERY_INDEPENDENT_PAGER': False,
    'DEFAULT_REL': 0.5
}

DIRS = [
    DATA_DIR,
    CLEAN_DATA_DIR,
    FEAT_DIR,
    FEAT_DIR + '/Combine',
    FEAT_CONF_DIR,
    LOG_DIR,
    TMP_DIR,
]

os_utils._create_dirs(DIRS)