def Test_blending_Binary_TestFold(X,
                                  y,
                                  nfold_test,
                                  blending_fold,
                                  verbose=True):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda']
    # model_list , param_list ,
    model_dict = OrderedDict()
    model_dict['xgb'] = mi.myXGBBinary()
    model_dict['lgb'] = mi.myLGBMBinary()
    model_dict['cat'] = mi.myCatBoostBinary()
    model_dict['rfc'] = mi.myRandomForestBinary()
    model_dict['svm'] = mi.mySVMBinary()
    model_dict['gpc'] = mi.myGPBinary()
    model_dict['lda'] = mi.myLDABinary()
    model_dict['qda'] = mi.myQDABinary()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('binary',
                                     len(np.unique(y)),
                                     use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('binary',
                                      len(np.unique(y)),
                                      use_gpu=False)
    param_list['cat'] = mp.param_cat('binary',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('binary')
    param_list['svm'] = mp.param_svm('binary')
    param_list['gpc'] = mp.param_gpc('binary')
    param_list['lda'] = mp.param_lda()
    param_list['qda'] = mp.param_qda()

    #fitting parmas
    fitpm_list = OrderedDict()
    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}
    #fit_cat = {}
    #fit_xgb = {}

    # metric func
    metric_func = roc_auc_score

    # Result
    result_list = OrderedDict()
    auc_score_list = OrderedDict()

    for name in name_list:
        print(name)
        test_fold_index, fold_train_pred, fold_test_pred, mean_fold_score = tr.training_blending_Testfold_noVal(
            'binary', model_dict[name], param_list[name], fitpm_list[name],
            metric_func, X, y, nfold_test, blending_fold, verbose)
        result_list[name] = [
            test_fold_index, fold_train_pred, fold_test_pred, mean_fold_score
        ]
        print('done')
    print('Test_Classification_TestFold Compelte')
    return result_list
def Test_blending_Binary(xtrain,
                         ytrain,
                         xtest,
                         blending_fold=5,
                         verbose=False):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda']
    # model_list , param_list ,

    model_dicts = OrderedDict()
    model_dicts['xgb'] = mi.myXGBBinary()
    model_dicts['lgb'] = mi.myLGBMBinary()
    model_dicts['cat'] = mi.myCatBoostBinary()
    model_dicts['rfc'] = mi.myRandomForestBinary()
    model_dicts['svm'] = mi.mySVMBinary()
    model_dicts['gpc'] = mi.myGPBinary()
    model_dicts['lda'] = mi.myLDABinary()
    model_dicts['qda'] = mi.myQDABinary()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('binary',
                                     len(np.unique(ytrain)),
                                     use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('binary',
                                      len(np.unique(ytrain)),
                                      use_gpu=False)
    param_list['cat'] = mp.param_cat('binary',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('binary')
    param_list['svm'] = mp.param_svm('binary')
    param_list['gpc'] = mp.param_gpc('binary')
    param_list['lda'] = mp.param_lda()
    param_list['qda'] = mp.param_qda()

    #fitting parmas
    fitpm_list = OrderedDict()

    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}
    #fit_cat = {}
    #fit_xgb = {}

    # metric func
    metric_func = roc_auc_score

    # Result
    result_list = OrderedDict()

    # Training
    for name in name_list:
        print(name)
        train_pred, test_pred, fold_metric = tr.training_blending_fixedTest(
            'binary', model_dicts[name], param_list[name], fitpm_list[name],
            metric_func, xtrain, ytrain, xtest, blending_fold, verbose)
        result_list[name] = [train_pred, test_pred, fold_metric]
    return result_list
def Test_Classification(xtrain, ytrain, xtest, nfold=5, verbose=False):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda']
    # model_list , param_list ,
    model_list = OrderedDict()
    model_list['xgb'] = mi.myXGBClassifier()
    model_list['lgb'] = mi.myLGBMClassifier()
    model_list['cat'] = mi.myCatBoostClassifier()
    model_list['rfc'] = mi.myRandomForestClassifier()

    model_list['svm'] = mi.mySVMClassifier()
    model_list['gpc'] = mi.myGPClassifier()
    model_list['lda'] = mi.myLDAClassifier()
    model_list['qda'] = mi.myQDAClassifier()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('classification',
                                     len(np.unique(ytrain)),
                                     use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('classification',
                                      len(np.unique(ytrain)),
                                      use_gpu=False)
    param_list['cat'] = mp.param_cat('classification',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('classification')
    param_list['svm'] = mp.param_svm('classification')
    param_list['gpc'] = mp.param_gpc('classification')
    param_list['lda'] = mp.param_lda()
    param_list['qda'] = mp.param_qda()

    #fitting parmas
    fitpm_list = OrderedDict()

    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}
    #fit_cat = {}
    #fit_xgb = {}

    # metric func
    metric_func = cu.aucpr
    #metric_func = partial(cu.auc_multi , [0,1,2,3])

    # Result
    result_list = OrderedDict()

    # Training
    for name in name_list:
        print(name)
        fold_predict, fold_oof, fold_metric, fold_model = tr.training_fixedTest(
            'classification', model_list[name], param_list[name],
            fitpm_list[name], metric_func, xtrain, ytrain, xtest, nfold)
        result_list[name] = [fold_predict, fold_oof, fold_metric, fold_model]
    print('Test_Classification Complete')
    return result_list
def Test_Classification_TestFold(X, y, nfold_test, nfold_val, verbose=True):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'svm', 'gpc', 'lda', 'qda']
    # model_list , param_list ,
    model_dict = OrderedDict()
    model_dict['xgb'] = mi.myXGBClassifier()
    model_dict['lgb'] = mi.myLGBMClassifier()
    model_dict['cat'] = mi.myCatBoostClassifier()
    model_dict['rfc'] = mi.myRandomForestClassifier()
    model_dict['svm'] = mi.mySVMClassifier()
    model_dict['gpc'] = mi.myGPClassifier()
    model_dict['lda'] = mi.myLDAClassifier()
    model_dict['qda'] = mi.myQDAClassifier()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('classification',
                                     len(np.unique(y)),
                                     use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('classification',
                                      len(np.unique(y)),
                                      use_gpu=False)
    param_list['cat'] = mp.param_cat('classification',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('classification')
    param_list['svm'] = mp.param_svm('classification')
    param_list['gpc'] = mp.param_gpc('classification')
    param_list['lda'] = mp.param_lda()
    param_list['qda'] = mp.param_qda()

    #fitting parmas
    fitpm_list = OrderedDict()
    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}
    #fit_cat = {}
    #fit_xgb = {}

    # metric func
    metric_func = cu.aucpr
    #metric_func = partial(cu.auc_multi , [0,1,2,3])
    # Result
    result_list = OrderedDict()
    auc_score_list = OrderedDict()
    for name in name_list:
        print(name)
        test_fold_index, oof, model_list = tr.training_Testfold(
            'classification', model_dict[name], param_list[name],
            fitpm_list[name], metric_func, X, y, nfold_test, nfold_val)
        result_list[name] = [test_fold_index, oof, model_list]
        #auc_score_list[name] = roc_auc_score(np.where(y > 0.5 , 1 ,0 ) , np.argmax(oof.mean(axis = 0) , axis = 1))
    print('Test_Classification_TestFold Compelte')
    return result_list
예제 #5
0
def Test_Regression_TestFold(X, y, nfold_test, nfold_val, verbose=True):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'elt', 'svm', 'gpc']
    # model_list , param_list ,
    model_dict = OrderedDict()
    model_dict['xgb'] = mi.myXGBRegressor()
    model_dict['lgb'] = mi.myLGBMRegressor()
    model_dict['cat'] = mi.myCatBoostRegressor()
    model_dict['rfc'] = mi.myRandomForestRegressor()
    model_dict['elt'] = mi.myElasticNetRegressor()
    model_dict['svm'] = mi.mySVMRegressor()
    model_dict['gpc'] = mi.myGPRegressor()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('regression', use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('regression', use_gpu=False)
    param_list['cat'] = mp.param_cat('regression',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('regression')
    param_list['elt'] = mp.param_elst('regression')
    param_list['svm'] = mp.param_svm('regression')
    param_list['gpc'] = mp.param_gpc('regression')
    #fitting parmas
    fitpm_list = OrderedDict()

    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}

    # metric func
    metric_func = mean_squared_error

    auc_score_list = OrderedDict()
    result_list = OrderedDict()
    for name in name_list:
        print(name)
        print('Model : {}'.format(name))
        test_fold_index, oof, model_list = tr.training_Testfold(
            'regression', model_dict[name], param_list[name], fitpm_list[name],
            metric_func, X, y, nfold_test, nfold_val)
        result_list[name] = [test_fold_index, oof,
                             model_list]  # 모든 데이터에 대해 예측값이 oof에 저장되어 있다.
        auc_score_list[name] = roc_auc_score(np.where(y > 25, 1, 0),
                                             oof.mean(axis=1))
    return result_list
def Test_Regression_noVal(xtrain, ytrain, xtest):
    # name list
    name_list = ['xgb', 'lgb', 'cat', 'rfc', 'elt', 'svm', 'gpc']
    # model_list , param_list ,
    model_list = OrderedDict()
    model_list['xgb'] = mi.myXGBRegressor()
    model_list['lgb'] = mi.myLGBMRegressor()
    model_list['cat'] = mi.myCatBoostRegressor()
    model_list['rfc'] = mi.myRandomForestRegressor()
    model_list['elt'] = mi.myElasticNetRegressor()
    model_list['svm'] = mi.mySVMRegressor()
    model_list['gpc'] = mi.myGPRegressor()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('regression', use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('regression', use_gpu=False)
    param_list['cat'] = mp.param_cat('regression',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('regression')
    param_list['elt'] = mp.param_elst('regression')
    param_list['svm'] = mp.param_svm('regression')
    param_list['gpc'] = mp.param_gpc('regression')
    #fitting parmas
    fitpm_list = OrderedDict()
    for name in name_list:
        fitpm_list[name] = {}
    fitpm_list['lgb'] = {'early_stopping_rounds': 12, 'verbose': -1}

    # metric func
    metric_func = mean_squared_error
    result_list = OrderedDict()
    for name in name_list:
        print(name)
        res_pred, model = tr.training_fixedTest_noVal(
            'regression', model_list[name], param_list[name], fitpm_list[name],
            metric_func, xtrain, ytrain, xtest)
        result_list[name] = [res_pred, model]
    print('Test_Regression Complete')
    return result_list
예제 #7
0
def Test_Binary(xtrain, ytrain, xtest, nfold=5, verbose=False):
    # name list
    name_list = [
        'xgb',
        'lgb',
        #'cat',
        'rfc',
        'svm',
        #'gpc',
        'lda',
        'qda',
        'rdg',
        'lso',
        'ann'
    ]
    # model_dicts , param_list ,
    model_dicts = OrderedDict()
    model_dicts['xgb'] = mi.myXGBBinary()
    model_dicts['lgb'] = mi.myLGBMBinary()
    model_dicts['cat'] = mi.myCatBoostBinary()
    model_dicts['rfc'] = mi.myRandomForestBinary()
    model_dicts['svm'] = mi.mySVMBinary()
    model_dicts['gpc'] = mi.myGPBinary()
    model_dicts['lda'] = mi.myLDABinary()
    model_dicts['qda'] = mi.myQDABinary()
    model_dicts['rdg'] = mi.myRidgeBinary()
    model_dicts['lso'] = mi.myLassoBinary()
    model_dicts['ann'] = mi.myANNBinary()

    param_list = OrderedDict()
    param_list['xgb'] = mp.param_xgb('binary',
                                     len(np.unique(ytrain)),
                                     use_gpu=False)
    param_list['lgb'] = mp.param_lgbm('binary',
                                      len(np.unique(ytrain)),
                                      use_gpu=False)
    param_list['cat'] = mp.param_cat('binary',
                                     use_gpu=True,
                                     is_unbalance=False)
    param_list['rfc'] = mp.param_rf('binary')
    param_list['svm'] = mp.param_svm('binary')
    param_list['gpc'] = mp.param_gpc('binary')
    param_list['lda'] = mp.param_lda()
    param_list['qda'] = mp.param_qda()
    param_list['rdg'] = mp.param_ridge('binary')
    param_list['lso'] = mp.param_lasso('binary')
    param_list['ann'] = mp.param_ANN()

    params_xgb = {
        'colsample_bytree': 0.018359345409703118,
        'max_delta_step': 10.0,
        'max_depth': 100,
        'min_child_weight': 0.0,
        'n_estimators': 800,
        'reg_alpha': 2.0,
        'reg_lambda': 10.0,
        'subsample': 1.0
    }

    params_lgb = {
        'bagging_fraction': 0.9146615380853989,
        'colsample_bytree': 0.7384250232683872,
        'feature_fraction': 0.2892361777710602,
        'lambda_l1': 6.11807950735429,
        'lambda_l2': 9.779990080293718,
        'learning_rate': 0.001,
        'max_depth': 9,
        'min_child_weight': 0.6385281864950193,
        'min_data_in_leaf': 1,
        'min_split_gain': 0.5944870633301388,
        'num_leaves': 969,
        'reg_alpha': 18.045166839320736,
        'reg_lambda': 8.490946187426754,
        'subsample': 0.16006631386138065
    }
    '''
    params_cat = {'bagging_temperature': 36.514154289873396,
                  'depth': 7,
                  'iterations': 1884,
                  'l2_leaf_reg': 5,
                  'learning_rate': 0.8494130280301052,
                  'random_strength': 39.83926219359324}
    '''
    params_rfc = {
        'max_depth': 3,
        'max_features': 0.26390005062522226,
        'n_estimators': 89
    }
    params_rdg = {'normalize': False}
    params_lso = {'alpha': 0.03, 'normalize': False}

    param_list['xgb'] = {**param_list['xgb'], **params_xgb}  # 뒤의 딕셔너리가 우선이 된다.
    param_list['lgb'] = {**param_list['lgb'], **params_lgb}
    #param_list['cat']= {**param_list['cat'], **params_cat}
    param_list['rfc'] = {**param_list['rfc'], **params_rfc}
    param_list['rdg'] = {**param_list['rdg'], **params_rdg}
    param_list['lso'] = {**param_list['lso'], **params_lso}

    #fitting parmas
    fitpm_list = OrderedDict()

    for name in name_list:
        fitpm_list[name] = {}
    #fitpm_list['lgb'] = {'early_stopping_rounds' : 12 , 'verbose' : -1}
    #fit_cat = {}
    #fit_xgb = {}

    # metric func
    metric_func = roc_auc_score

    # Result
    result_list = OrderedDict()

    # Training
    for name in name_list:
        print(name)
        prediction, model = tr.training_fixedTest_noVal(
            'binary', model_dicts[name], param_list[name], fitpm_list[name],
            metric_func, xtrain, ytrain, xtest, verbose)
        result_list[name] = [prediction, model]
    return result_list