Beispiel #1
0
# -*- coding: utf-8 -*-
import operator

import pandas as pd
import xgboost as xgb

from common.data_fun import get_train_X, get_train_Y

if __name__ == '__main__':

    Y = get_train_Y()
    xg_train = xgb.DMatrix(get_train_X(), label=Y)

    params = {
        #     'min_child_weight': 100,
        #     'eta': 0.02,
        #     'colsample_bytree': 0.7,
        #     'max_depth': 7,
        #     'subsample': 0.7,
        #     'alpha': 1,
        #     'gamma': 1,
        #     'silent': 1,
        #     'verbose_eval': True,
        #     'seed': 12,
        #     'n_estimators': 100
        'max_depth': 7,
        'n_estimators': 100
    }
    rounds = 10

    bst = xgb.train(params, xg_train, num_boost_round=rounds)
def lightBGM_model(X, Y):
    model = LGBMRegressor(num_leaves=36,
                          n_estimators=100,
                          learning_rate=0.07,
                          random_state=0)
    model.fit(X, Y, verbose=True)
    return model


if __name__ == '__main__':
    best, worst, avg, rounds = 1, 0, 0, 10

    for i in range(rounds):
        print('round : ' + str(i))
        error = lightBGM_model_with_test(get_train_X(), get_train_Y())
        print('error : ' + str(error))

        if error < best:
            best = error
        if error > worst:
            worst = error
        avg += error

    avg /= rounds

    print('=========================')
    print('best > ' + str(best))
    print('worst > ' + str(worst))
    print('average > ' + str(avg))
        'colsample_bytree': 0.6,
        'subsample': 0.9
    }

    xg_train = xgb.DMatrix(train_X, label=train_Y)
    xgboost_model = xgb.train(params, xg_train)

    xg_test = xgb.DMatrix(test_X)
    xg_res = xgboost_model.predict(xg_test)

    return xg_res


def stacking(train_X, train_Y, test_X):
    train_X1, test_X1 = stacking_layer_1(train_X, train_Y, test_X)
    res = stacking_layer_2(train_X1, train_Y, test_X1)
    return res


if __name__ == '__main__':
    from common.data_fun import get_train_X, get_train_Y
    from sklearn.model_selection import train_test_split

    x1, x2, y1, y2 = train_test_split(get_train_X(),
                                      get_train_Y(),
                                      test_size=0.2)

    predict = stacking(x1, y1, x2)
    error = error_fun(predict, y2)[1]

    print('stacking error: ' + str(error))
from common.data_fun import get_train_X, get_train_Y

model_list = []
model_list.append({'n_estimators' : 150, 'max_depth' : 6, 'min_child_weight' : 4, 'colsample_bytree' : 0.7, 'subsample' : 0.7})
model_list.append({'n_estimators' : 180, 'max_depth' : 5, 'min_child_weight' : 3, 'colsample_bytree' : 1,   'subsample' : 0.9})
model_list.append({'n_estimators' : 240, 'max_depth' : 6, 'min_child_weight' : 1, 'colsample_bytree' : 0.6, 'subsample' : 0.9})
model_list.append({'n_estimators' : 240, 'max_depth' : 6, 'min_child_weight' : 1, 'colsample_bytree' : 0.6, 'subsample' : 0.9})
model_list.append({'n_estimators' : 200, 'max_depth' : 9, 'min_child_weight' : 2, 'colsample_bytree' : 0.9, 'subsample' : 0.8})

total = 0
cnt = 0

Y = get_train_Y()
# 划分数据集
train_x, test_x, train_y, test_y = train_test_split(get_train_X(), Y, test_size=0.2)

# 训练参数设置和执行
params = model_list[cnt]

cnt += 1
rounds = 10

# 训练
xg_train = xgb.DMatrix(train_x, label = train_y)
xgboost_model = xgb.train(params, xg_train)

# 预测
xg_test = xgb.DMatrix(test_x)
xg_res = xgboost_model.predict(xg_test)
Beispiel #5
0
    useful_feature = get_useful_features_byLightBGM(X, Y)
    X_U = X[useful_feature]

    x1, x2, y1, y2 = train_test_split(X_U, Y, test_size=0.2)
    y1_log = np.log1p(y1)
    model.fit(x1, y1_log)

    predict_log = model.predict(x2)
    predict = np.expm1(predict_log)
    error = error_fun(predict, y2)[1]

    return model, error


if __name__ == '__main__':
    train_x_full = get_train_X().fillna(get_train_X().median())
    train_y = get_train_Y()
    best, worst, avg, rounds = 1, 0, 0, 10

    for i in range(rounds):
        print('round : ' + str(i))
        error = linearModel(train_x_full, train_y)
        print('error : ' + str(error))

        if error < best:
            best = error
        if error > worst:
            worst = error
        avg += error

    avg /= rounds