Esempio n. 1
0
# Isaac Li
# 1.23.2018

import time
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import function

train, test = function.read_file(path='a')
train["血糖"] = np.log1p(train["血糖"])
train, test = function.add_column(train, test)
train, test = function.transform(train, test)

print('\n\nStart...')
t0, mses = time.time(), []
train_preds, test_preds = np.zeros(train.shape[0]), np.zeros((test.shape[0], 5))
predictors = [f for f in test.columns if f not in ['血糖']]
kf = KFold(n_splits=5, shuffle=True, random_state=520)

for i, (train_index, test_index) in enumerate(kf.split(train)):
    print('   .{}/5.'.format(i + 1))
    train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index]
    gbm = function.settings.model_lgb.fit(train_feat1[predictors], train_feat1['血糖'],
                                          categorical_feature=['性别', '体检日期'])
    predict = gbm.predict(train_feat2[predictors])
    train_preds[test_index] += predict
    mses.append(.5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict)))
    test_preds[:, i] = gbm.predict(test[predictors])

cv = .5 * mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds))
Esempio n. 2
0
ans_path = function.settings.source_path + 'd_answer_a_20180128.csv'
ans = pd.read_csv(ans_path,
                  encoding='gbk')  # NOTICE: add a row in file as index!

# ------------ predict a -----------------------------------------------------------------------------------------------

a = input('Part A? -> ')
if a:
    print('Done.')
else:
    print('Part A.')
    train, test = train_a, test_a
    train["血糖"] = np.log1p(train["血糖"])
    if not s:
        list_test = function.settings.all_items + ['其他胆固醇']
        train, test = function.add_column(train, test, test_item=list_test[3:])
    else:
        train, test = function.add_column(train, test)
    train, test = function.transform(train, test)

    print('\n\nStart...')
    t0, mses = time.time(), []
    train_preds, test_preds = np.zeros(train.shape[0]), np.zeros(
        (test.shape[0], 5))
    predictors = [f for f in test.columns if f not in ['血糖']]
    kf = KFold(n_splits=5, shuffle=True, random_state=520)

    for i, (train_index, test_index) in enumerate(kf.split(train)):
        print('   .{}/5.'.format(i + 1))
        train_feat1, train_feat2 = train.iloc[train_index], train.iloc[
            test_index]
Esempio n. 3
0
# Isaac Li
# 1.23.2018

import time
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import function

train, test = function.read_file()
train["血糖"] = np.log1p(train["血糖"])
train, test = function.add_column(train, test, sqrt=True)
train, test = function.transform(train, test)

print('\n\nStart...')
t0, mses = time.time(), []
train_preds, test_preds = np.zeros(train.shape[0]), np.zeros(
    (test.shape[0], 5))
predictors = [f for f in test.columns if f not in ['血糖']]
kf = KFold(n_splits=5, shuffle=True, random_state=520)

for i, (train_index, test_index) in enumerate(kf.split(train)):
    print('   .{}/5.'.format(i + 1))
    train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index]
    gbm = function.settings.model_lgb.fit(train_feat1[predictors],
                                          train_feat1['血糖'],
                                          categorical_feature=['性别', '体检日期'])
    predict = gbm.predict(train_feat2[predictors])
    train_preds[test_index] += predict
    mses.append(
        .5 *
Esempio n. 4
0
# Isaac Li
# 1.23.2018

import time
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import function

train, test = function.read_file(path='s')
train["血糖"] = np.log1p(train["血糖"])
train, test = function.add_column(train,
                                  test,
                                  test_item=function.settings.all_items[3:] +
                                  ['其他胆固醇'])
train, test = function.transform(train, test)

print('\n\nStart...')
t0, mses = time.time(), []
train_preds, test_preds = np.zeros(train.shape[0]), np.zeros(
    (test.shape[0], 5))
predictors = [f for f in test.columns if f not in ['血糖']]
kf = KFold(n_splits=5, shuffle=True, random_state=520)

for i, (train_index, test_index) in enumerate(kf.split(train)):
    print('   .{}/5.'.format(i + 1))
    train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index]
    gbm = function.settings.model_lgb.fit(train_feat1[predictors],
                                          train_feat1['血糖'],
                                          categorical_feature=['性别', '体检日期'])
    predict = gbm.predict(train_feat2[predictors])
# Isaac Li
# 1.23.2018

import time
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import function

train, test = function.read_file()
train["血糖"] = np.log1p(train["血糖"])
train, test = function.add_column(train, test, sqrt=True, power3_a=True)
train, test = function.transform(train, test)

print('\n\nStart...')
t0, mses = time.time(), []
train_preds, test_preds = np.zeros(train.shape[0]), np.zeros(
    (test.shape[0], 5))
predictors = [f for f in test.columns if f not in ['血糖']]
kf = KFold(n_splits=5, shuffle=True, random_state=520)

for i, (train_index, test_index) in enumerate(kf.split(train)):
    print('   .{}/5.'.format(i + 1))
    train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index]
    gbm = function.settings.model_lgb.fit(train_feat1[predictors],
                                          train_feat1['血糖'],
                                          categorical_feature=['性别', '体检日期'])
    predict = gbm.predict(train_feat2[predictors])

    base, power, minimum = 1.7, 1, 7