Example #1
0
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from imblearn.under_sampling import RandomUnderSampler

sys.path.append('../')
from util.feature import add_feature, fillna
from util.metric import mse
from util import variables

plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示负号

train = pd.read_csv('../data/d_train_20180102.csv')
train = fillna(train)
train = add_feature(train)

predictor = [
    column for column in train.columns if column not in ['id', '体检日期', '血糖']
]
rus = RandomUnderSampler(random_state=2018, return_indices=True)
XALL, yALL, idx_resampled = rus.fit_sample(train[predictor],
                                           (train['血糖'] > 7).astype(int))
yALL = train.iloc[idx_resampled]['血糖']
XALL = pd.DataFrame(XALL, columns=predictor)
print('Feature: ', XALL.columns.tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=2018)
preds = np.zeros((train.shape[0], 5))
feature_importance = []
for cv_idx, (train_idx, valid_idx) in enumerate(kf.split(XALL)):
Example #2
0
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import MinMaxScaler

sys.path.append('../')
from util.feature import add_feature, fillna
from util import variables

train = pd.read_csv('../data/d_train_20180102.csv')
test = pd.read_csv('../data/d_test_A_20180102.csv')
test['血糖'] = -1

all_data = pd.concat([train, test], ignore_index=True)
all_data = fillna(all_data)
all_data = add_feature(all_data)

feature_col = [
    column for column in all_data.columns
    if column not in ['id', '性别', '体检日期', '血糖']
]
scaler = MinMaxScaler()
scaler.fit(all_data.loc[:, feature_col])
all_data.loc[:, feature_col] = scaler.transform(all_data[feature_col])

train = all_data.loc[all_data['血糖'] >= 0.0, :]
test = all_data.loc[all_data['血糖'] < 0.0, :]

# splits into male and female
train_m = train.loc[train['性别'] == 0, :]
train_f = train.loc[train['性别'] == 1, :]