def args_to_dicts(args): global parse_args print(args) algorithm = args['algorithm_type'] algorithm_type = algorithm['type'] #print(algorithm_type) algorithms = {} _LOG_REG = False _XGB = False _SVM = False if algorithm_type == 'logreg': _LOG_REG = True algorithms['log_reg'] = algorithm elif algorithm_type == 'svm': _SVM = True algorithms['svm'] = algorithm else: _XGB = True algorithms['xgb'] = algorithm passed_args = copy.deepcopy(args) passed_args.pop('algorithm_type') passed_args['RESAMPLING'] = True passed_args['USE_CACHE'] = True passed_args['W2V_PRETRAINED'] = True #print(_LOG_REG, _SVM, _XGB) # Because the algoritm is above we don't actually need it to be set here directory, features, _, parameters = mp.get_args( directory=parse_args.directory_base, LOG_REG=_LOG_REG, SVM=_SVM, XGB=_XGB, **passed_args) return directory, features, algorithms, parameters
def run_experiment( directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, SEED, USE_CACHE): return pythia_main( get_args(directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, SEED, USE_CACHE))
def args_to_dicts(args): global parse_args print(args) algorithm= args['algorithm_type'] algorithm_type = algorithm['type'] #print(algorithm_type) algorithms = {} _LOG_REG = False _XGB = False _SVM = False if algorithm_type=='logreg': _LOG_REG = True algorithms['log_reg'] = algorithm elif algorithm_type== 'svm': _SVM = True algorithms['svm'] = algorithm else: _XGB = True algorithms['xgb'] = algorithm passed_args = copy.deepcopy(args) passed_args.pop('algorithm_type') passed_args['RESAMPLING']=True passed_args['USE_CACHE']=True passed_args['W2V_PRETRAINED']=True #print(_LOG_REG, _SVM, _XGB) # Because the algoritm is above we don't actually need it to be set here directory, features, _, parameters = mp.get_args( directory=parse_args.directory_base, LOG_REG=_LOG_REG, SVM=_SVM, XGB=_XGB, **passed_args) return directory, features, algorithms, parameters
def objective(args_): # arguments to pass as config_updates dict global args # result to pass to hyperopt global result # command-line arguments global parse_args try: ex = Experiment('Hyperopt') logger.debug("Adding observer for {}, DB {}".format(parse_args.mongo_db_address,parse_args.mongo_db_name)) ex.observers.append(MongoObserver.create(url=parse_args.mongo_db_address, db_name=parse_args.mongo_db_name)) pythia_args = make_args_for_pythia(args_) args = mp.get_args(**pythia_args) ex.main(run_with_global_args) r = ex.run(config_updates=pythia_args) logger.debug("Experiment result: {}\n" "Report to hyperopt: {}".format(r.result, result)) return result except: raise #If we somehow cannot get to the MongoDB server, then continue with the experiment logger.warning("Running without Sacred") run_with_global_args()
def objective(args_): # arguments to pass as config_updates dict global args # result to pass to hyperopt global result # command-line arguments global parse_args try: ex = Experiment('Hyperopt') logger.debug("Adding observer for {}, DB {}".format( parse_args.mongo_db_address, parse_args.mongo_db_name)) ex.observers.append( MongoObserver.create(url=parse_args.mongo_db_address, db_name=parse_args.mongo_db_name)) pythia_args = make_args_for_pythia(args_) args = mp.get_args(**pythia_args) ex.main(run_with_global_args) r = ex.run(config_updates=pythia_args) logger.debug("Experiment result: {}\n" "Report to hyperopt: {}".format(r.result, result)) return result except: raise #If we somehow cannot get to the MongoDB server, then continue with the experiment logger.warning("Running without Sacred") run_with_global_args()
def test_random(): args = get_args(SEED=41, directory='data/stackexchange/anime', XGB=True, LDA_APPEND=True, BOW_APPEND=True, RESAMPLING=False) run1 = pipeline_main(args) run2 = pipeline_main(args) assert run1==run2
def run_experiment( directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, BOW_BINARY, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, SGD, SGD_LOSS, SGD_ALPHA, SGD_PENALTY, SGD_EPOCHS, SGD_BATCH_SIZE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, FULL_VOCAB_STEM, SEED, HDF5_PATH_TRAIN, HDF5_PATH_TEST, HDF5_SAVE_FREQUENCY, HDF5_USE_EXISTING, USE_CACHE, _run): # store default metadata USER = os.environ.get('USER', 'unknown user') _run.info = {'user': USER} return pythia_main( get_args(directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, BOW_BINARY, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, SGD, SGD_LOSS, SGD_ALPHA, SGD_PENALTY, SGD_EPOCHS, SGD_BATCH_SIZE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, FULL_VOCAB_STEM, SEED, HDF5_PATH_TRAIN, HDF5_PATH_TEST, HDF5_SAVE_FREQUENCY, HDF5_USE_EXISTING, USE_CACHE))
def run_experiment( directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, BOW_VOCAB, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_VOCAB, LDA_TOPICS, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNAL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SEED): return pythia_main( get_args( directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, BOW_VOCAB, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_VOCAB, LDA_TOPICS, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNAL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SEED) )
def run_experiment(directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, SEED, USE_CACHE): return pythia_main( get_args( directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT, WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA, EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, SEED, USE_CACHE) )