Ejemplo n.º 1
0
def plot_features():
    train_feat, test_feat = make_data_feat()
    aa = train_feat['Urea'].value_counts()
    aa = aa.to_frame()
    aa['num'] = aa.index
    # plt.hist(train_feat['Age'], 100)
    plt.scatter(train_feat['TG/UA'], train_feat['Blood_Sugar'])
    plt.show()
Ejemplo n.º 2
0
def data_corr():
    train_feat, test_feat = make_data_feat()
    del train_feat['id']
    corr = train_feat.corr()

    train_feat_temp = (train_feat - train_feat.min()) / (train_feat.max() -
                                                         train_feat.min())
    corr_temp = train_feat_temp.corr()

    print(0)
Ejemplo n.º 3
0
def mean_value():
    train_feat, test_feat = make_data_feat()

    train_feat = train_feat.sort_values('Blood_Sugar', ascending=False)

    train1 = train_feat[0:1410]
    train2 = train_feat[1410:2820]
    train3 = train_feat[2820:4230]
    train4 = train_feat[4230:5640]

    mean1 = train1.mean().to_frame()
    mean2 = train2.mean().to_frame()
    mean3 = train3.mean().to_frame()
    mean4 = train4.mean().to_frame()

    result = pd.DataFrame({'feat': mean1.index})
    result['1'] = mean1.values
    result['2'] = mean2.values
    result['3'] = mean3.values
    result['4'] = mean4.values

    print(0)
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import time

from xgb_make import xgboost_make
from lgb_make import lightgbm_make
from rf_make import randomforest_make
from feature_extract import make_data_feat

if __name__ == '__main__':

    t1 = time.time()

    train_feat, test_feat = make_data_feat()

    lgb_label, lgb_pred = lightgbm_make(train_feat, test_feat)
    xgb_label, xgb_pred = xgboost_make(train_feat, test_feat)
    rf_label, rf_pred = randomforest_make(train_feat, test_feat)

    print('线下得分:    {}'.format(mean_squared_error(lgb_label, lgb_pred) * 0.5))
    print('线下得分:    {}'.format(mean_squared_error(xgb_label, xgb_pred) * 0.5))
    print('线下得分:    {}'.format(
        mean_squared_error(xgb_label, (
            (lgb_pred * 0.8 + xgb_pred * 0.2) * 1 + rf_pred * 0)) * 0.5))
    #
    # df = lgb_label.to_frame()
    # df['pred'] = (lgb_pred * 0.8 + xgb_pred * 0.2)*0.9 + rf_pred*0.1
    # df = df.sort_values('pred', ascending=True)