def fn(params): global counter counter +=1 params['C'] = params['C'] / 10 params['max_iter'] = int(1.3 ** params['max_iter']) model_id = qm.add_by_params( LogisticRegression( n_jobs=-1, max_iter=params['max_iter'], solver=params['solver'], C=params['C'] ), 'hyperopt log_regr', predict_fn='predict_proba' ) res = cv.cross_val(model_id, params['data_id'], seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] # if res < CV_SCORE_TO_STOP: # for i in range(7): # res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) # res = np.float64(res) # res_arr.append(res) print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
def fn(params): global counter counter += 1 params['max_features'] = params['max_features'] / 10 params['n_estimators'] = int(1.3**params['n_estimators']) model_id = qm.add_by_params(ExtraTreesClassifier( max_depth=int(params['max_depth']), n_estimators=int(params['n_estimators']), max_features=float(params['max_features']), n_jobs=-1), 'hyperopt rand_forest', predict_fn='predict_proba') res = cv.cross_val(model_id, params['data_id'], seed=1000, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] # if res < CV_SCORE_TO_STOP: # for i in range(7): # res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) # res = np.float64(res) # res_arr.append(res) print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
def fn(params): global counter counter += 1 params['epochs'] = int(1.3**params['epochs']) model_id = qm.add_by_params( QNN2(middle_dim=int(params['middle_dim']), epochs=int(params['epochs'])), 'hyperopt nn1') res = cv.cross_val(model_id, params['data_id'], seed=1000, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] # if res < CV_SCORE_TO_STOP: # for i in range(7): # res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) # res = np.float64(res) # res_arr.append(res) print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
def fn(params): global counter counter += 1 data_id = params['data_id'] del params['data_id'] params['num_boost_rounds'] = int(1.3**params['num_boost_rounds']) params['eta'] = round(1 / (1.3**params['eta']), 4) params['lr_decay'] = round(1 / (2**params['lr_decay']), 4) params['subsample'] = params['subsample'] / 10 params['colsample_bytree'] = params['colsample_bytree'] / 10 params['colsample_bylevel'] = params['colsample_bylevel'] / 10 params['gamma'] = round(5**params['gamma'], 3) params['alpha'] = round(5**params['alpha'], 3) model_id = qm.add_by_params( QXgb2( booster='gbtree', objective='binary:logistic', eval_metric='logloss', subsample=params['subsample'], colsample_bytree=params['colsample_bytree'], colsample_bylevel=params['colsample_bylevel'], eta=params['eta'], gamma=params['gamma'], alpha=params['alpha'], max_depth=params['maxdepth'], num_boost_round=params['num_boost_rounds'], lr_decay=params['lr_decay'], ), 'hyperopt xgb') res = cv.cross_val(model_id, data_id, seed=1000, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] if res < CV_SCORE_TO_STOP: for i in range(7): res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) res = np.float64(res) res_arr.append(res) print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
for r in res: for m in r['models'].split(','): results.append([int(m), r['data_id']]) for i in range(ROUNDS): random.shuffle(results) models = list(results[:random.randint(2, 10)]) models = sorted(models, key=lambda x: (x[0], x[1])) print('{}/{}'.format(i, ROUNDS), models) try: model_id = qm.add_by_params( QAvg(models) ) print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)) conn.execute("update qml_models set level=2 where model_id={}".format(model_id)) # model_id = qm.add_by_params( # QAvg(models, is_geom=True) # ) # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)) # conn.execute("update qml_models set level=2 where model_id={}".format(model_id)) model_id = qm.add_by_params( QRankedAvg(models) ) print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)) conn.execute("update qml_models set level=2 where model_id={}".format(model_id))
results = [] best_models = [] for r in res: for m in r['models'].split(','): results.append([int(m), r['data_id'], 1000]) for i in range(ROUNDS): random.shuffle(results) models = list(results[:random.randint(4, 25)]) models = sorted(models, key=lambda x: (x[0], x[1])) print('{}/{}'.format(i, ROUNDS), models) try: model_id2 = qm.add_by_params(Ridge(alpha=0.05)) if len(models) >= 4: model_id = qm.add_by_params( QStackModel(models, second_layer_model=model_id2, nsplits=2)) conn.execute( "update qml_models set level=2 where model_id={}".format( model_id)) print( model_id, cv.cross_val(model_id, -1, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP))
def fn(params): global counter counter += 1 params['C'] = params['C'] / 10 params['max_iter'] = int(1.3**params['max_iter']) model_id = qm.add_by_params(LogisticRegression( n_jobs=-1, max_iter=params['max_iter'], solver=params['solver'], C=params['C']), 'hyperopt log_regr', predict_fn='predict_proba') model_id_main = qm.add_by_params( QStackModel( [[1747, 69, 1000], [101655, 266, 1000], [1831, 266, 1000], [101457, 266, 1000] # [1747, 69, 1000], # [1747, 66, 1000], # [1747, 47, 1000], #[1747, 69, 1001], # [101340, 69, 1000], # [101340, 66, 1000], # [101340, 47, 1000], # #[101340, 69, 1001], # [101331, 69, 1000], # [101331, 66, 1000], # [101331, 47, 1000], # #[101331, 69, 1001], # [101261, 69, 1000], # [101261, 66, 1000], # [101261, 47, 1000], # # [101655, 266, 1000], # [101655, 269, 1000], # # [1831, 266, 1000], # [1831, 269, 1000], # # [101457, 266, 1000], # [101457, 269, 1000], # # [101657, 269, 1000], # [101657, 266, 1000], # # [1841, 269, 1000], # [1841, 266, 1000], # # [101411, 269, 1000], # [101411, 266, 1000], ], second_layer_model=model_id, nsplits=5), level=-2) res = cv.cross_val(model_id_main, params['data_id'], seed=1000, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] # if res < CV_SCORE_TO_STOP: # for i in range(7): # res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) # res = np.float64(res) # res_arr.append(res) print(params['data_id'], model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
import datetime import random from hyperopt import hp, fmin, tpe from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier import workdir.classes.config # loads local config from qml.cv import QCV from qml.helpers import get_engine from qml.models import QXgb, QAvg, QRankedAvg, QStackModel, QPostProcessingModel, QRankedByLineAvg, QAvgOneModelData from workdir.classes.models import qm cv = QCV(qm) # print(cv.cross_val(465, 15)) # print(cv.cross_val(465, 16)) # print(cv.cross_val(466, 15)) # print(cv.cross_val(466, 16)) new_model_id = qm.add_by_params(QAvgOneModelData(636, 8), level=-2) print(new_model_id) qm.qpredict(new_model_id, 25) # # print(new_model_id) # # for model_id in [416]: #450 # for data_id in [25]:#13,15,16,17 # # for i in range(8): # res = cv.cross_val(model_id, data_id, seed=1000 + i, force=True) # print('##', model_id, data_id, i, res)
import workdir.classes.config from qml.cv import QCV from qml.models import QXgb, QAvg, QAvgOneModelData from workdir.classes.models import qm cv = QCV(qm) # model_id = qm.add_by_params( # QXgb( # ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss", # "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"} # ), # 'hyperopt xgb', level=-1 # ) model_id = qm.add_by_params(QAvgOneModelData(416, 2), level=-2) cv.features_sel_del( model_id, 23, early_stop_cv=lambda x: x < 0.557, # minmax log_file='workdir/logs/data23_sub_cols3.txt', exclude=[ 'data__daily__data_vol_mb__median__max', 'data__daily__cell_count__min', 'voice__hourly__voice_dur_min__sum__max', 'voice__daily__voice_dur_min__sum__median', 'data__daily__data_vol_mb__sum__min', 'data__daily__data_vol_mb__avg__avg', 'voice__hourly__voice_dur_min__median__max', 'voice__hourly__voice_dur_min__sum__min',
import datetime import numpy as np from hyperopt import hp, fmin, tpe import os import sys sys.path.insert(0, os.getcwd()) import workdir.classes.config from qml.cv import QCV from qml.models import QXgb, QAvg, QAvgOneModelData from workdir.classes.models import qm cv = QCV(qm) model_id = qm.add_by_params( QXgb( ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss", "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"} ), 'hyperopt xgb', level=-1 ) model_id =qm.add_by_params(QAvgOneModelData(model_id, 3), level=-2) cv.features_sel_del(model_id, 66, early_stop_cv=lambda x: x>0.5414, log_file='workdir/logs/feat19.txt', exclude=[])
def fn(params): global counter counter +=1 data_id = params['data_id'] del params['data_id'] params['num_boost_rounds'] = int(1.3**params['num_boost_rounds']) params['eta'] = round(1 / (1.3**params['eta']), 4) params['subsample'] = 1#params['subsample']/10 params['colsample_bytree'] = 1#params['colsample_bytree']/10 params['colsample_bylevel'] = 1#params['colsample_bylevel']/10 params['gamma'] = round(5 ** params['gamma'], 3) params['alpha'] = round(5 ** params['alpha'], 3) model_id = qm.add_by_params( QXgb( booster='gbtree', objective='binary:logistic', eval_metric='logloss', subsample=params['subsample'], colsample_bytree=params['colsample_bytree'], colsample_bylevel=params['colsample_bylevel'], eta=params['eta'], gamma=params['gamma'], alpha=params['alpha'], max_depth=params['maxdepth'], num_boost_round=params['num_boost_rounds']#, tree_method='hist' ), 'hyperopt xgb' ) model_id_main = qm.add_by_params( QStackModel( [ [1747, 69, 1000], [101655, 266, 1000], [1831, 266, 1000], [101457, 266, 1000] # [1747, 69, 1000], # [1747, 66, 1000], # [1747, 47, 1000], #[1747, 69, 1001], # [101340, 69, 1000], # [101340, 66, 1000], # [101340, 47, 1000], # #[101340, 69, 1001], # [101331, 69, 1000], # [101331, 66, 1000], # [101331, 47, 1000], # #[101331, 69, 1001], # [101261, 69, 1000], # [101261, 66, 1000], # [101261, 47, 1000], # # [101655, 266, 1000], # [101655, 269, 1000], # # [1831, 266, 1000], # [1831, 269, 1000], # # [101457, 266, 1000], # [101457, 269, 1000], # # [101657, 269, 1000], # [101657, 266, 1000], # # [1841, 269, 1000], # [1841, 266, 1000], # # [101411, 269, 1000], # [101411, 266, 1000], ], second_layer_model=model_id, nsplits=5 ), level=-2 ) res = cv.cross_val(model_id_main, data_id, seed=1000, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP) res = np.float64(res) res_arr = [res] # if res < CV_SCORE_TO_STOP: # for i in range(7): # res = cv.cross_val(model_id, data_id, seed=1001 + i, force=True) # res = np.float64(res) # res_arr.append(res) print(data_id, model_id, "{}/{}".format(counter, rounds), res_arr, datetime.datetime.now(), params) return np.mean(res_arr)
group by data_id, cls, descr """.format(CV_SCORE_TO_SELECT)).fetchall() results = [] for r in res: for m in r['models'].split(','): results.append([int(m), r['data_id'], 1000]) for i in range(5000): random.shuffle(results) models = list(results[:random.randint(2, 20)]) models = sorted(models, key=lambda x: (x[0], x[1])) print('{}/{}'.format(i, ROUNDS), models) model_id = qm.add_by_params(QAvg(models)) print( cv.cross_val(model_id, -1, early_stop_cv=lambda x: x > CV_SCORE_TO_STOP)) conn.execute( "update qml_models set level=3 where model_id={}".format(model_id)) # model_id = qm.add_by_params( # QAvg(models, is_geom=True) # ) # print(cv.cross_val(model_id, -1, early_stop_cv=lambda x: x>CV_SCORE_TO_STOP)) # conn.execute("update qml_models set level=3 where model_id={}".format(model_id)) # # model_id = qm.add_by_params( # QRankedAvg(models)
import workdir.classes.config from qml.cv import QCV from qml.models import QXgb, QAvg, QAvgOneModelData from workdir.classes.models import qm cv = QCV(qm) model_id = qm.add_by_params( QXgb( **{ "alpha": 0.008, "booster": "gbtree", "colsample_bylevel": 0.9, "colsample_bytree": 0.9, "eta": 0.0024, "eval_metric": "logloss", "gamma": 0.04, "max_depth": 4, "num_boost_round": 2619, "objective": "binary:logistic", "subsample": 0.7, "tree_method": "hist" }), 'hyperopt xgb', ) #model_id =qm.add_by_params(QAvgOneModelData(model_id, 8), level=-2) cv.features_sel_add( model_id, 60, [ 'age', 'height', 'weight', 'ap_hi', 'ap_lo', 'smoke', 'alco', 'active',