def main(options): # create sub folder subm_folder = "%s/ensemble_selection"%config.SUBM_DIR os_utils._create_dirs( [subm_folder] ) subm_prefix = "%s/test.pred.[%s]" % (subm_folder, options.outfile) # get model list log_folder = "%s/level%d_models"%(config.LOG_DIR, options.level-1) model_list = get_model_list(log_folder, options.size) # get instance splitter if options.level not in [2, 3]: inst_splitter = None elif options.level == 2: inst_splitter = splitter_level2 elif options.level == 3: inst_splitter = splitter_level3 ees = ExtremeEnsembleSelection( model_folder=config.OUTPUT_DIR, model_list=model_list, subm_prefix=subm_prefix, weight_opt_max_evals=options.weight_opt_max_evals, w_min=-1., w_max=1., inst_subsample=options.inst_subsample, inst_subsample_replacement=options.inst_subsample_replacement, inst_splitter=inst_splitter, model_subsample=options.model_subsample, model_subsample_replacement=options.model_subsample_replacement, bagging_size=options.bagging_size, init_top_k=options.init_top_k, epsilon=options.epsilon, multiprocessing=False, multiprocessing_num_cores=config.NUM_CORES, enable_extreme=options.enable_extreme, random_seed=config.RANDOM_SEED ) ees.go()
def main(options): # create sub folder subm_folder = "%s/ensemble_selection" % config.SUBM_DIR os_utils._create_dirs([subm_folder]) subm_prefix = "%s/test.pred.[%s]" % (subm_folder, options.outfile) # get model list log_folder = "%s/level%d_models" % (config.LOG_DIR, options.level - 1) model_list = get_model_list(log_folder, options.size) # get instance splitter if options.level not in [2, 3]: inst_splitter = None elif options.level == 2: inst_splitter = splitter_level2 elif options.level == 3: inst_splitter = splitter_level3 ees = ExtremeEnsembleSelection( model_folder=config.OUTPUT_DIR, model_list=model_list, subm_prefix=subm_prefix, weight_opt_max_evals=options.weight_opt_max_evals, w_min=-1., w_max=1., inst_subsample=options.inst_subsample, inst_subsample_replacement=options.inst_subsample_replacement, inst_splitter=inst_splitter, model_subsample=options.model_subsample, model_subsample_replacement=options.model_subsample_replacement, bagging_size=options.bagging_size, init_top_k=options.init_top_k, epsilon=options.epsilon, multiprocessing=False, multiprocessing_num_cores=config.NUM_CORES, enable_extreme=options.enable_extreme, random_seed=config.RANDOM_SEED) ees.go()
RANDOM_SEED = 2016 PLATFORM = platform.system() NUM_CORES = 4 if PLATFORM == "Windows" else 14 DATA_PROCESSOR_N_JOBS = 4 if PLATFORM == "Windows" else 6 AUTO_SPELLING_CHECKER_N_JOBS = 4 if PLATFORM == "Windows" else 8 # multi processing is not faster AUTO_SPELLING_CHECKER_N_JOBS = 1 ## rgf RGF_CALL_EXE = "%s/rgf1.2/test/call_exe.pl"%THIRDPARTY_DIR RGF_EXTENSION = ".exe" if PLATFORM == "Windows" else "" RGF_EXE = "%s/rgf1.2/bin/rgf%s"%(THIRDPARTY_DIR, RGF_EXTENSION) # ---------------------- CREATE PATH -------------------- DIRS = [] DIRS += [CLEAN_DATA_DIR] DIRS += [SPLIT_DIR] DIRS += [FEAT_DIR, FEAT_CONF_DIR] DIRS += ["%s/All"%FEAT_DIR] DIRS += ["%s/Run%d"%(FEAT_DIR,i+1) for i in range(N_RUNS)] DIRS += ["%s/Combine"%FEAT_DIR] DIRS += [OUTPUT_DIR, SUBM_DIR] DIRS += ["%s/All"%OUTPUT_DIR] DIRS += ["%s/Run%d"%(OUTPUT_DIR,i+1) for i in range(N_RUNS)] DIRS += [LOG_DIR, FIG_DIR, TMP_DIR] DIRS += [WORD2VEC_MODEL_DIR, DOC2VEC_MODEL_DIR, GLOVE_WORD2VEC_MODEL_DIR] os_utils._create_dirs(DIRS)
MISSING_VALUE_STRING = "MISSINGVALUE" MISSING_VALUE_NUMERIC = -1 # Other TERM_VEC_MAX_NUM_TERMS = 50 RANDOM_SEED = 42 NUM_CORES = 4 DBN_CONFIG = { 'MAX_ITERATIONS': 40, 'DEBUG': False, 'PRETTY_LOG': True, 'MIN_DOCS_PER_QUERY': 10, 'MAX_DOCS_PER_QUERY': 10, 'SERP_SIZE': 10, 'QUERY_INDEPENDENT_PAGER': False, 'DEFAULT_REL': 0.5 } DIRS = [ DATA_DIR, CLEAN_DATA_DIR, FEAT_DIR, FEAT_DIR + '/Combine', FEAT_CONF_DIR, LOG_DIR, TMP_DIR, ] os_utils._create_dirs(DIRS)