from sklearn.linear_model import (LinearRegression, Ridge, Lasso,
                                  RandomizedLasso)
from sklearn.feature_selection import RFE, f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from minepy import MINE
import loaddata as ld
import pandas as pd

train = ld.loadgoodData()
#pearson系数选择特征
# a = np.round(train.corr(method = 'pearson'), 2)
# a_label = a['label']
# a_label = a_label.sort_values(ascending=False)
# a_label = a_label.index.tolist()
# n = a_label.index('id')
# a_label = a_label[1:n]
X = train.iloc[:, :-1]
Y = train.iloc[:, -1]
X = np.array(X)
Y = np.array(Y)
names = train.columns[:len(train.columns) - 1]
#递归特征消除
#from sklearn.feature_selection import RFE
#from sklearn.linear_model import LinearRegression
# lr = LinearRegression()
# rfe = RFE(lr, n_features_to_select=1)
# rfe.fit(X,Y)
# print("Features sorted by their rank:")
# a = sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import xgboost as xgb
import loaddata as ld


#日期变换格式23/11/2012变成20121123
def data(x):
    arr = x.split('/')
    return int(arr[2] + arr[1] + arr[0])


# 数据预处理
ld.loadgoodData("d_train_20180102")
train = pd.read_csv("d_train_20180102_solve.csv", encoding="gbk", header=0)
# train_before_eat = train[train['is_eat'] == 0]
# train_after_eat = train[train['is_eat'] == 1]

# f=open('list.txt','r')
# for line in f.readlines():
#     a.append(line.split(','))
# f.close
# a = a[0]
# del a[-1]

# train_set.describe()

# 构造测试集
# X = train.iloc[:, :-1]
# coding=utf-8

# 1.导入相关库,读取数据
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
import loaddata as ld

#记录程序运行时间
# import time
# start_time = time.time()
# print(start_time)

data = ld.loadgoodData()
# 构造测试集
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# 随机切分数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


# 损失计算函数
def lossfunc(predict, real):
    return sum(np.square(predict - real)) / (2 * len(real))


reg = XGBRegressor()

# 监控数据
# eval_set = [(X_test, y_test)]