def plot_features(): train_feat, test_feat = make_data_feat() aa = train_feat['Urea'].value_counts() aa = aa.to_frame() aa['num'] = aa.index # plt.hist(train_feat['Age'], 100) plt.scatter(train_feat['TG/UA'], train_feat['Blood_Sugar']) plt.show()
def data_corr(): train_feat, test_feat = make_data_feat() del train_feat['id'] corr = train_feat.corr() train_feat_temp = (train_feat - train_feat.min()) / (train_feat.max() - train_feat.min()) corr_temp = train_feat_temp.corr() print(0)
def mean_value(): train_feat, test_feat = make_data_feat() train_feat = train_feat.sort_values('Blood_Sugar', ascending=False) train1 = train_feat[0:1410] train2 = train_feat[1410:2820] train3 = train_feat[2820:4230] train4 = train_feat[4230:5640] mean1 = train1.mean().to_frame() mean2 = train2.mean().to_frame() mean3 = train3.mean().to_frame() mean4 = train4.mean().to_frame() result = pd.DataFrame({'feat': mean1.index}) result['1'] = mean1.values result['2'] = mean2.values result['3'] = mean3.values result['4'] = mean4.values print(0)
# -*- coding: utf-8 -*- import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error import time from xgb_make import xgboost_make from lgb_make import lightgbm_make from rf_make import randomforest_make from feature_extract import make_data_feat if __name__ == '__main__': t1 = time.time() train_feat, test_feat = make_data_feat() lgb_label, lgb_pred = lightgbm_make(train_feat, test_feat) xgb_label, xgb_pred = xgboost_make(train_feat, test_feat) rf_label, rf_pred = randomforest_make(train_feat, test_feat) print('线下得分: {}'.format(mean_squared_error(lgb_label, lgb_pred) * 0.5)) print('线下得分: {}'.format(mean_squared_error(xgb_label, xgb_pred) * 0.5)) print('线下得分: {}'.format( mean_squared_error(xgb_label, ( (lgb_pred * 0.8 + xgb_pred * 0.2) * 1 + rf_pred * 0)) * 0.5)) # # df = lgb_label.to_frame() # df['pred'] = (lgb_pred * 0.8 + xgb_pred * 0.2)*0.9 + rf_pred*0.1 # df = df.sort_values('pred', ascending=True)