Ejemplo n.º 1
0
def args_to_dicts(args):
    global parse_args
    print(args)
    algorithm = args['algorithm_type']
    algorithm_type = algorithm['type']
    #print(algorithm_type)
    algorithms = {}
    _LOG_REG = False
    _XGB = False
    _SVM = False
    if algorithm_type == 'logreg':
        _LOG_REG = True
        algorithms['log_reg'] = algorithm
    elif algorithm_type == 'svm':
        _SVM = True
        algorithms['svm'] = algorithm
    else:
        _XGB = True
        algorithms['xgb'] = algorithm

    passed_args = copy.deepcopy(args)
    passed_args.pop('algorithm_type')
    passed_args['RESAMPLING'] = True
    passed_args['USE_CACHE'] = True
    passed_args['W2V_PRETRAINED'] = True
    #print(_LOG_REG, _SVM, _XGB)
    # Because the algoritm is above we don't actually need it to be set here
    directory, features, _, parameters = mp.get_args(
        directory=parse_args.directory_base,
        LOG_REG=_LOG_REG,
        SVM=_SVM,
        XGB=_XGB,
        **passed_args)

    return directory, features, algorithms, parameters
Ejemplo n.º 2
0
def run_experiment(
        directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF,
        ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND,
        LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX,
        W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS,
        W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS,
        CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT,
        WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C,
        SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH,
        XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, MEM_NET, MEM_VOCAB, MEM_TYPE,
        MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE,
        MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO,
        OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA,
        EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE,
        FULL_CHAR_VOCAB, SEED, USE_CACHE):
    return pythia_main(
        get_args(directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS,
                 BOW_TFIDF, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS,
                 LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS,
                 W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND,
                 W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED,
                 W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND,
                 CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT,
                 WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM,
                 SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE,
                 XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE,
                 MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS,
                 MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN,
                 MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING,
                 REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA,
                 EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE,
                 FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, SEED, USE_CACHE))
Ejemplo n.º 3
0
def args_to_dicts(args):
    global parse_args
    print(args)
    algorithm= args['algorithm_type']
    algorithm_type = algorithm['type']
    #print(algorithm_type)
    algorithms = {}
    _LOG_REG = False
    _XGB = False
    _SVM = False
    if algorithm_type=='logreg':
        _LOG_REG = True
        algorithms['log_reg'] = algorithm
    elif algorithm_type== 'svm':
        _SVM = True
        algorithms['svm'] = algorithm
    else:
        _XGB = True
        algorithms['xgb'] = algorithm

    passed_args = copy.deepcopy(args)
    passed_args.pop('algorithm_type')
    passed_args['RESAMPLING']=True
    passed_args['USE_CACHE']=True
    passed_args['W2V_PRETRAINED']=True
    #print(_LOG_REG, _SVM, _XGB)
    # Because the algoritm is above we don't actually need it to be set here
    directory, features, _, parameters = mp.get_args(
            directory=parse_args.directory_base, LOG_REG=_LOG_REG, SVM=_SVM, XGB=_XGB, **passed_args)

    return directory, features, algorithms, parameters
Ejemplo n.º 4
0
def objective(args_):

    # arguments to pass as config_updates dict
    global args
    # result to pass to hyperopt
    global result
    # command-line arguments 
    global parse_args

    try:
        ex = Experiment('Hyperopt')
        logger.debug("Adding observer for {}, DB {}".format(parse_args.mongo_db_address,parse_args.mongo_db_name))
        ex.observers.append(MongoObserver.create(url=parse_args.mongo_db_address, db_name=parse_args.mongo_db_name))
        
        pythia_args = make_args_for_pythia(args_)
        args = mp.get_args(**pythia_args) 
        ex.main(run_with_global_args)
        r = ex.run(config_updates=pythia_args)
        logger.debug("Experiment result: {}\n"
                     "Report to hyperopt: {}".format(r.result, result))

        return result

    except:
        raise
        #If we somehow cannot get to the MongoDB server, then continue with the experiment
        logger.warning("Running without Sacred")
        run_with_global_args()
Ejemplo n.º 5
0
def objective(args_):

    # arguments to pass as config_updates dict
    global args
    # result to pass to hyperopt
    global result
    # command-line arguments
    global parse_args

    try:
        ex = Experiment('Hyperopt')
        logger.debug("Adding observer for {}, DB {}".format(
            parse_args.mongo_db_address, parse_args.mongo_db_name))
        ex.observers.append(
            MongoObserver.create(url=parse_args.mongo_db_address,
                                 db_name=parse_args.mongo_db_name))

        pythia_args = make_args_for_pythia(args_)
        args = mp.get_args(**pythia_args)
        ex.main(run_with_global_args)
        r = ex.run(config_updates=pythia_args)
        logger.debug("Experiment result: {}\n"
                     "Report to hyperopt: {}".format(r.result, result))

        return result

    except:
        raise
        #If we somehow cannot get to the MongoDB server, then continue with the experiment
        logger.warning("Running without Sacred")
        run_with_global_args()
Ejemplo n.º 6
0
def test_random():
    args = get_args(SEED=41, 
                    directory='data/stackexchange/anime', 
                    XGB=True,
                    LDA_APPEND=True,
                    BOW_APPEND=True,
                    RESAMPLING=False)
    run1 = pipeline_main(args)
    run2 = pipeline_main(args)
    assert run1==run2
Ejemplo n.º 7
0
def run_experiment(
        directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS, BOW_TFIDF,
        BOW_BINARY, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT, ST_COS, LDA_APPEND,
        LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS, LDA_TOPICS, W2V_AVG, W2V_MAX,
        W2V_MIN, W2V_ABS, W2V_APPEND, W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS,
        W2V_PRETRAINED, W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS,
        CNN_APPEND, CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT,
        WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM, SVM_C,
        SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE, XGB_MAXDEPTH,
        XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, SGD, SGD_LOSS, SGD_ALPHA,
        SGD_PENALTY, SGD_EPOCHS, SGD_BATCH_SIZE, MEM_NET, MEM_VOCAB, MEM_TYPE,
        MEM_BATCH, MEM_EPOCHS, MEM_MASK_MODE, MEM_EMBED_MODE,
        MEM_ONEHOT_MIN_LEN, MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO,
        OVERSAMPLING, REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA,
        EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE, FULL_VOCAB_TYPE,
        FULL_CHAR_VOCAB, FULL_VOCAB_STEM, SEED, HDF5_PATH_TRAIN,
        HDF5_PATH_TEST, HDF5_SAVE_FREQUENCY, HDF5_USE_EXISTING, USE_CACHE,
        _run):
    # store default metadata
    USER = os.environ.get('USER', 'unknown user')
    _run.info = {'user': USER}

    return pythia_main(
        get_args(directory, BOW_APPEND, BOW_DIFFERENCE, BOW_PRODUCT, BOW_COS,
                 BOW_TFIDF, BOW_BINARY, ST_APPEND, ST_DIFFERENCE, ST_PRODUCT,
                 ST_COS, LDA_APPEND, LDA_DIFFERENCE, LDA_PRODUCT, LDA_COS,
                 LDA_TOPICS, W2V_AVG, W2V_MAX, W2V_MIN, W2V_ABS, W2V_APPEND,
                 W2V_DIFFERENCE, W2V_PRODUCT, W2V_COS, W2V_PRETRAINED,
                 W2V_MIN_COUNT, W2V_WINDOW, W2V_SIZE, W2V_WORKERS, CNN_APPEND,
                 CNN_DIFFERENCE, CNN_PRODUCT, CNN_COS, WORDONEHOT,
                 WORDONEHOT_VOCAB, LOG_REG, LOG_PENALTY, LOG_TOL, LOG_C, SVM,
                 SVM_C, SVM_KERNEL, SVM_GAMMA, XGB, XGB_LEARNRATE,
                 XGB_MAXDEPTH, XGB_MINCHILDWEIGHT, XGB_COLSAMPLEBYTREE, SGD,
                 SGD_LOSS, SGD_ALPHA, SGD_PENALTY, SGD_EPOCHS, SGD_BATCH_SIZE,
                 MEM_NET, MEM_VOCAB, MEM_TYPE, MEM_BATCH, MEM_EPOCHS,
                 MEM_MASK_MODE, MEM_EMBED_MODE, MEM_ONEHOT_MIN_LEN,
                 MEM_ONEHOT_MAX_LEN, RESAMPLING, NOVEL_RATIO, OVERSAMPLING,
                 REPLACEMENT, SAVE_RESULTS, SAVEEXPERIMENTDATA,
                 EXPERIMENTDATAFILE, VOCAB_SIZE, STEM, FULL_VOCAB_SIZE,
                 FULL_VOCAB_TYPE, FULL_CHAR_VOCAB, FULL_VOCAB_STEM, SEED,
                 HDF5_PATH_TRAIN, HDF5_PATH_TEST, HDF5_SAVE_FREQUENCY,
                 HDF5_USE_EXISTING, USE_CACHE))
Ejemplo n.º 8
0
def run_experiment(
    directory,
    BOW_APPEND,
    BOW_DIFFERENCE,
    BOW_PRODUCT,
    BOW_COS,
    BOW_TFIDF,
    BOW_VOCAB,
    ST_APPEND,
    ST_DIFFERENCE,
    ST_PRODUCT,
    ST_COS,
    LDA_APPEND,
    LDA_DIFFERENCE,
    LDA_PRODUCT,
    LDA_COS,
    LDA_VOCAB,
    LDA_TOPICS,
    LOG_REG,
    LOG_PENALTY,
    LOG_TOL,
    LOG_C,
    SVM,
    SVM_C,
    SVM_KERNAL,
    SVM_GAMMA,
    XGB,
    XGB_LEARNRATE,
    XGB_MAXDEPTH,
    XGB_MINCHILDWEIGHT,
    XGB_COLSAMPLEBYTREE,
    RESAMPLING,
    NOVEL_RATIO,
    OVERSAMPLING,
    REPLACEMENT,
    SEED):

    return pythia_main(
        get_args(
            directory,
            BOW_APPEND,
            BOW_DIFFERENCE,
            BOW_PRODUCT,
            BOW_COS,
            BOW_TFIDF,
            BOW_VOCAB,
            ST_APPEND,
            ST_DIFFERENCE,
            ST_PRODUCT,
            ST_COS,
            LDA_APPEND,
            LDA_DIFFERENCE,
            LDA_PRODUCT,
            LDA_COS,
            LDA_VOCAB,
            LDA_TOPICS,
            LOG_REG,
            LOG_PENALTY,
            LOG_TOL,
            LOG_C,
            SVM,
            SVM_C,
            SVM_KERNAL,
            SVM_GAMMA,
            XGB,
            XGB_LEARNRATE,
            XGB_MAXDEPTH,
            XGB_MINCHILDWEIGHT,
            XGB_COLSAMPLEBYTREE,
            RESAMPLING,
            NOVEL_RATIO,
            OVERSAMPLING,
            REPLACEMENT,
            SEED)
    )
Ejemplo n.º 9
0
def run_experiment(directory,
            BOW_APPEND,
            BOW_DIFFERENCE,
            BOW_PRODUCT,
            BOW_COS,
            BOW_TFIDF,
            ST_APPEND,
            ST_DIFFERENCE,
            ST_PRODUCT,
            ST_COS,
            LDA_APPEND,
            LDA_DIFFERENCE,
            LDA_PRODUCT,
            LDA_COS,
            LDA_TOPICS,
            W2V_AVG,
            W2V_MAX,
            W2V_MIN,
            W2V_ABS,
            W2V_APPEND,
            W2V_DIFFERENCE,
            W2V_PRODUCT,
            W2V_COS,
            W2V_PRETRAINED,
            W2V_MIN_COUNT,
            W2V_WINDOW,
            W2V_SIZE,
            W2V_WORKERS,
            CNN_APPEND,
            CNN_DIFFERENCE,
            CNN_PRODUCT,
            CNN_COS,
            WORDONEHOT,
            WORDONEHOT_VOCAB,
            LOG_REG,
            LOG_PENALTY,
            LOG_TOL,
            LOG_C,
            SVM,
            SVM_C,
            SVM_KERNEL,
            SVM_GAMMA,
            XGB,
            XGB_LEARNRATE,
            XGB_MAXDEPTH,
            XGB_MINCHILDWEIGHT,
            XGB_COLSAMPLEBYTREE,
            MEM_NET,
            MEM_VOCAB,
            MEM_TYPE,
            MEM_BATCH,
            MEM_EPOCHS,
            MEM_MASK_MODE,
            MEM_EMBED_MODE,
            MEM_ONEHOT_MIN_LEN,
            MEM_ONEHOT_MAX_LEN,
            RESAMPLING,
            NOVEL_RATIO,
            OVERSAMPLING,
            REPLACEMENT,
            SAVE_RESULTS,
            SAVEEXPERIMENTDATA,
            EXPERIMENTDATAFILE,
            VOCAB_SIZE,
            STEM,
            FULL_VOCAB_SIZE,
            FULL_VOCAB_TYPE,
            FULL_CHAR_VOCAB,
            SEED,
            USE_CACHE):
    return pythia_main(
        get_args(
            directory,
            BOW_APPEND,
            BOW_DIFFERENCE,
            BOW_PRODUCT,
            BOW_COS,
            BOW_TFIDF,
            ST_APPEND,
            ST_DIFFERENCE,
            ST_PRODUCT,
            ST_COS,
            LDA_APPEND,
            LDA_DIFFERENCE,
            LDA_PRODUCT,
            LDA_COS,
            LDA_TOPICS,
            W2V_AVG,
            W2V_MAX,
            W2V_MIN,
            W2V_ABS,
            W2V_APPEND,
            W2V_DIFFERENCE,
            W2V_PRODUCT,
            W2V_COS,
            W2V_PRETRAINED,
            W2V_MIN_COUNT,
            W2V_WINDOW,
            W2V_SIZE,
            W2V_WORKERS,
            CNN_APPEND,
            CNN_DIFFERENCE,
            CNN_PRODUCT,
            CNN_COS,
            WORDONEHOT,
            WORDONEHOT_VOCAB,
            LOG_REG,
            LOG_PENALTY,
            LOG_TOL,
            LOG_C,
            SVM,
            SVM_C,
            SVM_KERNEL,
            SVM_GAMMA,
            XGB,
            XGB_LEARNRATE,
            XGB_MAXDEPTH,
            XGB_MINCHILDWEIGHT,
            XGB_COLSAMPLEBYTREE,
            MEM_NET,
            MEM_VOCAB,
            MEM_TYPE,
            MEM_BATCH,
            MEM_EPOCHS,
            MEM_MASK_MODE,
            MEM_EMBED_MODE,
            MEM_ONEHOT_MIN_LEN,
            MEM_ONEHOT_MAX_LEN,
            RESAMPLING,
            NOVEL_RATIO,
            OVERSAMPLING,
            REPLACEMENT,
            SAVE_RESULTS,
            SAVEEXPERIMENTDATA,
            EXPERIMENTDATAFILE,
            VOCAB_SIZE,
            STEM,
            FULL_VOCAB_SIZE,
            FULL_VOCAB_TYPE,
            FULL_CHAR_VOCAB,
            SEED,
            USE_CACHE)
    )