Python add_by_params Examples, workdir.classes.models.qm.add_by_params Python Examples

Example #1

0

Show file

File: level1_models_logregr1.py Project: quantum13/mlbootcamp7

    def fn(params):
        global counter

        counter +=1

        params['C'] = params['C'] / 10
        params['max_iter'] = int(1.3 ** params['max_iter'])

        model_id = qm.add_by_params(
            LogisticRegression(
                n_jobs=-1,
                max_iter=params['max_iter'],
                solver=params['solver'],
                C=params['C']
            ),
            'hyperopt log_regr',
            predict_fn='predict_proba'
        )
        res = cv.cross_val(model_id, params['data_id'], seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(),  params)
        return np.mean(res_arr)

Example #2

0

Show file

    def fn(params):
        global counter

        counter += 1

        params['max_features'] = params['max_features'] / 10
        params['n_estimators'] = int(1.3**params['n_estimators'])

        model_id = qm.add_by_params(ExtraTreesClassifier(
            max_depth=int(params['max_depth']),
            n_estimators=int(params['n_estimators']),
            max_features=float(params['max_features']),
            n_jobs=-1),
                                    'hyperopt rand_forest',
                                    predict_fn='predict_proba')
        res = cv.cross_val(model_id,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)

Example #3

0

Show file

File: level1_models_nn02.py Project: quantum13/mlbootcamp7

    def fn(params):
        global counter

        counter += 1

        params['epochs'] = int(1.3**params['epochs'])

        model_id = qm.add_by_params(
            QNN2(middle_dim=int(params['middle_dim']),
                 epochs=int(params['epochs'])), 'hyperopt nn1')
        res = cv.cross_val(model_id,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)

Example #4

0

Show file

File: level1_models_xgb2.py Project: quantum13/mlbootcamp7

    def fn(params):
        global counter

        counter += 1
        data_id = params['data_id']
        del params['data_id']
        params['num_boost_rounds'] = int(1.3**params['num_boost_rounds'])
        params['eta'] = round(1 / (1.3**params['eta']), 4)
        params['lr_decay'] = round(1 / (2**params['lr_decay']), 4)
        params['subsample'] = params['subsample'] / 10
        params['colsample_bytree'] = params['colsample_bytree'] / 10
        params['colsample_bylevel'] = params['colsample_bylevel'] / 10

        params['gamma'] = round(5**params['gamma'], 3)
        params['alpha'] = round(5**params['alpha'], 3)
        model_id = qm.add_by_params(
            QXgb2(
                booster='gbtree',
                objective='binary:logistic',
                eval_metric='logloss',
                subsample=params['subsample'],
                colsample_bytree=params['colsample_bytree'],
                colsample_bylevel=params['colsample_bylevel'],
                eta=params['eta'],
                gamma=params['gamma'],
                alpha=params['alpha'],
                max_depth=params['maxdepth'],
                num_boost_round=params['num_boost_rounds'],
                lr_decay=params['lr_decay'],
            ), 'hyperopt xgb')
        res = cv.cross_val(model_id,
                           data_id,
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        if res < CV_SCORE_TO_STOP:
            for i in range(7):
                res = cv.cross_val(model_id,
                                   data_id,
                                   seed=1001 + i,
                                   force=True)
                res = np.float64(res)
                res_arr.append(res)

        print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr,
              datetime.datetime.now(), params)
        return np.mean(res_arr)

Example #5

0

Show file

    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id']])


    for i in range(ROUNDS):
        random.shuffle(results)
        models = list(results[:random.randint(2, 10)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)


        try:
            model_id = qm.add_by_params(
                QAvg(models)
            )
            print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            conn.execute("update qml_models set level=2 where model_id={}".format(model_id))

            # model_id = qm.add_by_params(
            #     QAvg(models, is_geom=True)
            # )
            # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            # conn.execute("update qml_models set level=2 where model_id={}".format(model_id))

            model_id = qm.add_by_params(
                QRankedAvg(models)
            )
            print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            conn.execute("update qml_models set level=2 where model_id={}".format(model_id))

Example #6

0

Show file

    results = []
    best_models = []

    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id'], 1000])

    for i in range(ROUNDS):
        random.shuffle(results)
        models = list(results[:random.randint(4, 25)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)

        try:

            model_id2 = qm.add_by_params(Ridge(alpha=0.05))

            if len(models) >= 4:
                model_id = qm.add_by_params(
                    QStackModel(models,
                                second_layer_model=model_id2,
                                nsplits=2))
                conn.execute(
                    "update qml_models set level=2 where model_id={}".format(
                        model_id))
                print(
                    model_id,
                    cv.cross_val(model_id,
                                 -1,
                                 early_stop_cv=lambda x: x > CV_SCORE_TO_STOP))

Example #7

0

Show file

    def fn(params):
        global counter

        counter += 1

        params['C'] = params['C'] / 10
        params['max_iter'] = int(1.3**params['max_iter'])

        model_id = qm.add_by_params(LogisticRegression(
            n_jobs=-1,
            max_iter=params['max_iter'],
            solver=params['solver'],
            C=params['C']),
                                    'hyperopt log_regr',
                                    predict_fn='predict_proba')
        model_id_main = qm.add_by_params(
            QStackModel(
                [[1747, 69, 1000], [101655, 266, 1000], [1831, 266, 1000],
                 [101457, 266, 1000]

                 # [1747, 69, 1000],
                 # [1747, 66, 1000],
                 # [1747, 47, 1000],
                 #[1747, 69, 1001],
                 # [101340, 69, 1000],
                 # [101340, 66, 1000],
                 # [101340, 47, 1000],
                 # #[101340, 69, 1001],
                 # [101331, 69, 1000],
                 # [101331, 66, 1000],
                 # [101331, 47, 1000],
                 # #[101331, 69, 1001],
                 # [101261, 69, 1000],
                 # [101261, 66, 1000],
                 # [101261, 47, 1000],
                 #
                 # [101655, 266, 1000],
                 # [101655, 269, 1000],
                 #
                 # [1831, 266, 1000],
                 # [1831, 269, 1000],
                 #
                 # [101457, 266, 1000],
                 # [101457, 269, 1000],
                 #
                 # [101657, 269, 1000],
                 # [101657, 266, 1000],
                 #
                 # [1841, 269, 1000],
                 # [1841, 266, 1000],
                 #
                 # [101411, 269, 1000],
                 # [101411, 266, 1000],
                 ],
                second_layer_model=model_id,
                nsplits=5),
            level=-2)

        res = cv.cross_val(model_id_main,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)

Example #8

0

Show file

import datetime
import random

from hyperopt import hp, fmin, tpe
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

import workdir.classes.config  # loads local config
from qml.cv import QCV
from qml.helpers import get_engine
from qml.models import QXgb, QAvg, QRankedAvg, QStackModel, QPostProcessingModel, QRankedByLineAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)
# print(cv.cross_val(465, 15))
# print(cv.cross_val(465, 16))
# print(cv.cross_val(466, 15))
# print(cv.cross_val(466, 16))
new_model_id = qm.add_by_params(QAvgOneModelData(636, 8), level=-2)
print(new_model_id)
qm.qpredict(new_model_id, 25)
# # print(new_model_id)
#
# for model_id in [416]: #450
#     for data_id in [25]:#13,15,16,17
#
#         for i in range(8):
#             res = cv.cross_val(model_id, data_id, seed=1000 + i, force=True)
#             print('##', model_id, data_id, i,  res)

Example #9

0

Show file

import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)

# model_id = qm.add_by_params(
#     QXgb(
# ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
#     "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
#     ),
#     'hyperopt xgb', level=-1
# )

model_id = qm.add_by_params(QAvgOneModelData(416, 2), level=-2)

cv.features_sel_del(
    model_id,
    23,
    early_stop_cv=lambda x: x < 0.557,  # minmax
    log_file='workdir/logs/data23_sub_cols3.txt',
    exclude=[
        'data__daily__data_vol_mb__median__max',
        'data__daily__cell_count__min',
        'voice__hourly__voice_dur_min__sum__max',
        'voice__daily__voice_dur_min__sum__median',
        'data__daily__data_vol_mb__sum__min',
        'data__daily__data_vol_mb__avg__avg',
        'voice__hourly__voice_dur_min__median__max',
        'voice__hourly__voice_dur_min__sum__min',

Example #10

0

Show file

import datetime
import numpy as np

from hyperopt import hp, fmin, tpe
import os
import sys
sys.path.insert(0, os.getcwd())
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm



cv = QCV(qm)

model_id = qm.add_by_params(
    QXgb(
** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
    "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
    ),
    'hyperopt xgb', level=-1
)

model_id =qm.add_by_params(QAvgOneModelData(model_id, 3), level=-2)

cv.features_sel_del(model_id, 66, early_stop_cv=lambda x: x>0.5414, log_file='workdir/logs/feat19.txt', exclude=[])

Example #11

0

Show file

File: level1_models_xgb3.py Project: quantum13/mlbootcamp7

    def fn(params):
        global counter


        counter +=1
        data_id = params['data_id']
        del params['data_id']
        params['num_boost_rounds'] = int(1.3**params['num_boost_rounds'])
        params['eta'] = round(1 / (1.3**params['eta']), 4)
        params['subsample'] = 1#params['subsample']/10
        params['colsample_bytree'] = 1#params['colsample_bytree']/10
        params['colsample_bylevel'] = 1#params['colsample_bylevel']/10

        params['gamma'] = round(5 ** params['gamma'], 3)
        params['alpha'] = round(5 ** params['alpha'], 3)
        model_id = qm.add_by_params(
            QXgb(
                booster='gbtree',
                objective='binary:logistic',
                eval_metric='logloss',
                subsample=params['subsample'],
                colsample_bytree=params['colsample_bytree'],
                colsample_bylevel=params['colsample_bylevel'],
                eta=params['eta'],
                gamma=params['gamma'],
                alpha=params['alpha'],
                max_depth=params['maxdepth'],
                num_boost_round=params['num_boost_rounds']#,		tree_method='hist'
            ),
            'hyperopt xgb'
        )
        model_id_main = qm.add_by_params(
            QStackModel(
                [
                    [1747, 69, 1000],
                    [101655, 266, 1000],
                    [1831, 266, 1000],
                    [101457, 266, 1000]


                    # [1747, 69, 1000],
                    # [1747, 66, 1000],
                    # [1747, 47, 1000],
                    #[1747, 69, 1001],
                    # [101340, 69, 1000],
                    # [101340, 66, 1000],
                    # [101340, 47, 1000],
                    # #[101340, 69, 1001],
                    # [101331, 69, 1000],
                    # [101331, 66, 1000],
                    # [101331, 47, 1000],
                    # #[101331, 69, 1001],
                    # [101261, 69, 1000],
                    # [101261, 66, 1000],
                    # [101261, 47, 1000],
                    #
                    # [101655, 266, 1000],
                    # [101655, 269, 1000],
                    #
                    # [1831, 266, 1000],
                    # [1831, 269, 1000],
                    #
                    # [101457, 266, 1000],
                    # [101457, 269, 1000],
                    #
                    # [101657, 269, 1000],
                    # [101657, 266, 1000],
                    #
                    # [1841, 269, 1000],
                    # [1841, 266, 1000],
                    #
                    # [101411, 269, 1000],
                    # [101411, 266, 1000],
                ],
                second_layer_model=model_id,
                nsplits=5
            ), level=-2
        )


        res = cv.cross_val(model_id_main, data_id, seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(),  params)
        return np.mean(res_arr)

Example #12

0

Show file

            group by data_id, cls, descr
        """.format(CV_SCORE_TO_SELECT)).fetchall()

    results = []

    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id'], 1000])

    for i in range(5000):
        random.shuffle(results)
        models = list(results[:random.randint(2, 20)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)

        model_id = qm.add_by_params(QAvg(models))
        print(
            cv.cross_val(model_id,
                         -1,
                         early_stop_cv=lambda x: x > CV_SCORE_TO_STOP))
        conn.execute(
            "update qml_models set level=3 where model_id={}".format(model_id))

        # model_id = qm.add_by_params(
        #     QAvg(models, is_geom=True)
        # )
        # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
        # conn.execute("update qml_models set level=3 where model_id={}".format(model_id))
        #
        # model_id = qm.add_by_params(
        #     QRankedAvg(models)

Example #13

0

Show file

File: feat_sel_17.py Project: quantum13/mlbootcamp7

import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)

model_id = qm.add_by_params(
    QXgb(
        **{
            "alpha": 0.008,
            "booster": "gbtree",
            "colsample_bylevel": 0.9,
            "colsample_bytree": 0.9,
            "eta": 0.0024,
            "eval_metric": "logloss",
            "gamma": 0.04,
            "max_depth": 4,
            "num_boost_round": 2619,
            "objective": "binary:logistic",
            "subsample": 0.7,
            "tree_method": "hist"
        }),
    'hyperopt xgb',
)

#model_id =qm.add_by_params(QAvgOneModelData(model_id, 8), level=-2)

cv.features_sel_add(
    model_id,
    60, [
        'age', 'height', 'weight', 'ap_hi', 'ap_lo', 'smoke', 'alco', 'active',