def fn(params):
        global counter

        counter +=1

        params['C'] = params['C'] / 10
        params['max_iter'] = int(1.3 ** params['max_iter'])

        model_id = qm.add_by_params(
            LogisticRegression(
                n_jobs=-1,
                max_iter=params['max_iter'],
                solver=params['solver'],
                C=params['C']
            ),
            'hyperopt log_regr',
            predict_fn='predict_proba'
        )
        res = cv.cross_val(model_id, params['data_id'], seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(),  params)
        return np.mean(res_arr)
Ejemplo n.º 2
0
    def fn(params):
        global counter

        counter += 1

        params['max_features'] = params['max_features'] / 10
        params['n_estimators'] = int(1.3**params['n_estimators'])

        model_id = qm.add_by_params(ExtraTreesClassifier(
            max_depth=int(params['max_depth']),
            n_estimators=int(params['n_estimators']),
            max_features=float(params['max_features']),
            n_jobs=-1),
                                    'hyperopt rand_forest',
                                    predict_fn='predict_proba')
        res = cv.cross_val(model_id,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)
Ejemplo n.º 3
0
    def fn(params):
        global counter

        counter += 1

        params['epochs'] = int(1.3**params['epochs'])

        model_id = qm.add_by_params(
            QNN2(middle_dim=int(params['middle_dim']),
                 epochs=int(params['epochs'])), 'hyperopt nn1')
        res = cv.cross_val(model_id,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)
Ejemplo n.º 4
0
    def fn(params):
        global counter

        counter += 1
        data_id = params['data_id']
        del params['data_id']
        params['num_boost_rounds'] = int(1.3**params['num_boost_rounds'])
        params['eta'] = round(1 / (1.3**params['eta']), 4)
        params['lr_decay'] = round(1 / (2**params['lr_decay']), 4)
        params['subsample'] = params['subsample'] / 10
        params['colsample_bytree'] = params['colsample_bytree'] / 10
        params['colsample_bylevel'] = params['colsample_bylevel'] / 10

        params['gamma'] = round(5**params['gamma'], 3)
        params['alpha'] = round(5**params['alpha'], 3)
        model_id = qm.add_by_params(
            QXgb2(
                booster='gbtree',
                objective='binary:logistic',
                eval_metric='logloss',
                subsample=params['subsample'],
                colsample_bytree=params['colsample_bytree'],
                colsample_bylevel=params['colsample_bylevel'],
                eta=params['eta'],
                gamma=params['gamma'],
                alpha=params['alpha'],
                max_depth=params['maxdepth'],
                num_boost_round=params['num_boost_rounds'],
                lr_decay=params['lr_decay'],
            ), 'hyperopt xgb')
        res = cv.cross_val(model_id,
                           data_id,
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        if res < CV_SCORE_TO_STOP:
            for i in range(7):
                res = cv.cross_val(model_id,
                                   data_id,
                                   seed=1001 + i,
                                   force=True)
                res = np.float64(res)
                res_arr.append(res)

        print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr,
              datetime.datetime.now(), params)
        return np.mean(res_arr)
Ejemplo n.º 5
0
    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id']])


    for i in range(ROUNDS):
        random.shuffle(results)
        models = list(results[:random.randint(2, 10)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)


        try:
            model_id = qm.add_by_params(
                QAvg(models)
            )
            print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            conn.execute("update qml_models set level=2 where model_id={}".format(model_id))

            # model_id = qm.add_by_params(
            #     QAvg(models, is_geom=True)
            # )
            # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            # conn.execute("update qml_models set level=2 where model_id={}".format(model_id))

            model_id = qm.add_by_params(
                QRankedAvg(models)
            )
            print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
            conn.execute("update qml_models set level=2 where model_id={}".format(model_id))
Ejemplo n.º 6
0
    results = []
    best_models = []

    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id'], 1000])

    for i in range(ROUNDS):
        random.shuffle(results)
        models = list(results[:random.randint(4, 25)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)

        try:

            model_id2 = qm.add_by_params(Ridge(alpha=0.05))

            if len(models) >= 4:
                model_id = qm.add_by_params(
                    QStackModel(models,
                                second_layer_model=model_id2,
                                nsplits=2))
                conn.execute(
                    "update qml_models set level=2 where model_id={}".format(
                        model_id))
                print(
                    model_id,
                    cv.cross_val(model_id,
                                 -1,
                                 early_stop_cv=lambda x: x > CV_SCORE_TO_STOP))
Ejemplo n.º 7
0
    def fn(params):
        global counter

        counter += 1

        params['C'] = params['C'] / 10
        params['max_iter'] = int(1.3**params['max_iter'])

        model_id = qm.add_by_params(LogisticRegression(
            n_jobs=-1,
            max_iter=params['max_iter'],
            solver=params['solver'],
            C=params['C']),
                                    'hyperopt log_regr',
                                    predict_fn='predict_proba')
        model_id_main = qm.add_by_params(
            QStackModel(
                [[1747, 69, 1000], [101655, 266, 1000], [1831, 266, 1000],
                 [101457, 266, 1000]

                 # [1747, 69, 1000],
                 # [1747, 66, 1000],
                 # [1747, 47, 1000],
                 #[1747, 69, 1001],
                 # [101340, 69, 1000],
                 # [101340, 66, 1000],
                 # [101340, 47, 1000],
                 # #[101340, 69, 1001],
                 # [101331, 69, 1000],
                 # [101331, 66, 1000],
                 # [101331, 47, 1000],
                 # #[101331, 69, 1001],
                 # [101261, 69, 1000],
                 # [101261, 66, 1000],
                 # [101261, 47, 1000],
                 #
                 # [101655, 266, 1000],
                 # [101655, 269, 1000],
                 #
                 # [1831, 266, 1000],
                 # [1831, 269, 1000],
                 #
                 # [101457, 266, 1000],
                 # [101457, 269, 1000],
                 #
                 # [101657, 269, 1000],
                 # [101657, 266, 1000],
                 #
                 # [1841, 269, 1000],
                 # [1841, 266, 1000],
                 #
                 # [101411, 269, 1000],
                 # [101411, 266, 1000],
                 ],
                second_layer_model=model_id,
                nsplits=5),
            level=-2)

        res = cv.cross_val(model_id_main,
                           params['data_id'],
                           seed=1000,
                           early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(params['data_id'], model_id, "{}/{}".format(counter, rounds),
              res_arr, datetime.datetime.now(), params)
        return np.mean(res_arr)
Ejemplo n.º 8
0
import datetime
import random

from hyperopt import hp, fmin, tpe
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

import workdir.classes.config  # loads local config
from qml.cv import QCV
from qml.helpers import get_engine
from qml.models import QXgb, QAvg, QRankedAvg, QStackModel, QPostProcessingModel, QRankedByLineAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)
# print(cv.cross_val(465, 15))
# print(cv.cross_val(465, 16))
# print(cv.cross_val(466, 15))
# print(cv.cross_val(466, 16))
new_model_id = qm.add_by_params(QAvgOneModelData(636, 8), level=-2)
print(new_model_id)
qm.qpredict(new_model_id, 25)
# # print(new_model_id)
#
# for model_id in [416]: #450
#     for data_id in [25]:#13,15,16,17
#
#         for i in range(8):
#             res = cv.cross_val(model_id, data_id, seed=1000 + i, force=True)
#             print('##', model_id, data_id, i,  res)
Ejemplo n.º 9
0
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)

# model_id = qm.add_by_params(
#     QXgb(
# ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
#     "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
#     ),
#     'hyperopt xgb', level=-1
# )

model_id = qm.add_by_params(QAvgOneModelData(416, 2), level=-2)

cv.features_sel_del(
    model_id,
    23,
    early_stop_cv=lambda x: x < 0.557,  # minmax
    log_file='workdir/logs/data23_sub_cols3.txt',
    exclude=[
        'data__daily__data_vol_mb__median__max',
        'data__daily__cell_count__min',
        'voice__hourly__voice_dur_min__sum__max',
        'voice__daily__voice_dur_min__sum__median',
        'data__daily__data_vol_mb__sum__min',
        'data__daily__data_vol_mb__avg__avg',
        'voice__hourly__voice_dur_min__median__max',
        'voice__hourly__voice_dur_min__sum__min',
Ejemplo n.º 10
0
import datetime
import numpy as np

from hyperopt import hp, fmin, tpe
import os
import sys
sys.path.insert(0, os.getcwd())
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm



cv = QCV(qm)

model_id = qm.add_by_params(
    QXgb(
** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
    "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
    ),
    'hyperopt xgb', level=-1
)

model_id =qm.add_by_params(QAvgOneModelData(model_id, 3), level=-2)

cv.features_sel_del(model_id, 66, early_stop_cv=lambda x: x>0.5414, log_file='workdir/logs/feat19.txt', exclude=[])


Ejemplo n.º 11
0
    def fn(params):
        global counter


        counter +=1
        data_id = params['data_id']
        del params['data_id']
        params['num_boost_rounds'] = int(1.3**params['num_boost_rounds'])
        params['eta'] = round(1 / (1.3**params['eta']), 4)
        params['subsample'] = 1#params['subsample']/10
        params['colsample_bytree'] = 1#params['colsample_bytree']/10
        params['colsample_bylevel'] = 1#params['colsample_bylevel']/10

        params['gamma'] = round(5 ** params['gamma'], 3)
        params['alpha'] = round(5 ** params['alpha'], 3)
        model_id = qm.add_by_params(
            QXgb(
                booster='gbtree',
                objective='binary:logistic',
                eval_metric='logloss',
                subsample=params['subsample'],
                colsample_bytree=params['colsample_bytree'],
                colsample_bylevel=params['colsample_bylevel'],
                eta=params['eta'],
                gamma=params['gamma'],
                alpha=params['alpha'],
                max_depth=params['maxdepth'],
                num_boost_round=params['num_boost_rounds']#,		tree_method='hist'
            ),
            'hyperopt xgb'
        )
        model_id_main = qm.add_by_params(
            QStackModel(
                [
                    [1747, 69, 1000],
                    [101655, 266, 1000],
                    [1831, 266, 1000],
                    [101457, 266, 1000]


                    # [1747, 69, 1000],
                    # [1747, 66, 1000],
                    # [1747, 47, 1000],
                    #[1747, 69, 1001],
                    # [101340, 69, 1000],
                    # [101340, 66, 1000],
                    # [101340, 47, 1000],
                    # #[101340, 69, 1001],
                    # [101331, 69, 1000],
                    # [101331, 66, 1000],
                    # [101331, 47, 1000],
                    # #[101331, 69, 1001],
                    # [101261, 69, 1000],
                    # [101261, 66, 1000],
                    # [101261, 47, 1000],
                    #
                    # [101655, 266, 1000],
                    # [101655, 269, 1000],
                    #
                    # [1831, 266, 1000],
                    # [1831, 269, 1000],
                    #
                    # [101457, 266, 1000],
                    # [101457, 269, 1000],
                    #
                    # [101657, 269, 1000],
                    # [101657, 266, 1000],
                    #
                    # [1841, 269, 1000],
                    # [1841, 266, 1000],
                    #
                    # [101411, 269, 1000],
                    # [101411, 266, 1000],
                ],
                second_layer_model=model_id,
                nsplits=5
            ), level=-2
        )


        res = cv.cross_val(model_id_main, data_id, seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)
        res = np.float64(res)
        res_arr = [res]
        # if res < CV_SCORE_TO_STOP:
        #     for i in range(7):
        #         res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True)
        #         res = np.float64(res)
        #         res_arr.append(res)

        print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(),  params)
        return np.mean(res_arr)
Ejemplo n.º 12
0
            group by data_id, cls, descr
        """.format(CV_SCORE_TO_SELECT)).fetchall()

    results = []

    for r in res:
        for m in r['models'].split(','):
            results.append([int(m), r['data_id'], 1000])

    for i in range(5000):
        random.shuffle(results)
        models = list(results[:random.randint(2, 20)])
        models = sorted(models, key=lambda x: (x[0], x[1]))
        print('{}/{}'.format(i, ROUNDS), models)

        model_id = qm.add_by_params(QAvg(models))
        print(
            cv.cross_val(model_id,
                         -1,
                         early_stop_cv=lambda x: x > CV_SCORE_TO_STOP))
        conn.execute(
            "update qml_models set level=3 where model_id={}".format(model_id))

        # model_id = qm.add_by_params(
        #     QAvg(models, is_geom=True)
        # )
        # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP))
        # conn.execute("update qml_models set level=3 where model_id={}".format(model_id))
        #
        # model_id = qm.add_by_params(
        #     QRankedAvg(models)
Ejemplo n.º 13
0
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)

model_id = qm.add_by_params(
    QXgb(
        **{
            "alpha": 0.008,
            "booster": "gbtree",
            "colsample_bylevel": 0.9,
            "colsample_bytree": 0.9,
            "eta": 0.0024,
            "eval_metric": "logloss",
            "gamma": 0.04,
            "max_depth": 4,
            "num_boost_round": 2619,
            "objective": "binary:logistic",
            "subsample": 0.7,
            "tree_method": "hist"
        }),
    'hyperopt xgb',
)

#model_id =qm.add_by_params(QAvgOneModelData(model_id, 8), level=-2)

cv.features_sel_add(
    model_id,
    60, [
        'age', 'height', 'weight', 'ap_hi', 'ap_lo', 'smoke', 'alco', 'active',