from sklearn.linear_model import (LinearRegression, Ridge, Lasso, RandomizedLasso) from sklearn.feature_selection import RFE, f_regression from sklearn.preprocessing import MinMaxScaler from sklearn.ensemble import RandomForestRegressor import numpy as np from minepy import MINE import loaddata as ld import pandas as pd train = ld.loadgoodData() #pearson系数选择特征 # a = np.round(train.corr(method = 'pearson'), 2) # a_label = a['label'] # a_label = a_label.sort_values(ascending=False) # a_label = a_label.index.tolist() # n = a_label.index('id') # a_label = a_label[1:n] X = train.iloc[:, :-1] Y = train.iloc[:, -1] X = np.array(X) Y = np.array(Y) names = train.columns[:len(train.columns) - 1] #递归特征消除 #from sklearn.feature_selection import RFE #from sklearn.linear_model import LinearRegression # lr = LinearRegression() # rfe = RFE(lr, n_features_to_select=1) # rfe.fit(X,Y) # print("Features sorted by their rank:") # a = sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
import pandas as pd from sklearn.model_selection import train_test_split import numpy as np import xgboost as xgb import loaddata as ld #日期变换格式23/11/2012变成20121123 def data(x): arr = x.split('/') return int(arr[2] + arr[1] + arr[0]) # 数据预处理 ld.loadgoodData("d_train_20180102") train = pd.read_csv("d_train_20180102_solve.csv", encoding="gbk", header=0) # train_before_eat = train[train['is_eat'] == 0] # train_after_eat = train[train['is_eat'] == 1] # f=open('list.txt','r') # for line in f.readlines(): # a.append(line.split(',')) # f.close # a = a[0] # del a[-1] # train_set.describe() # 构造测试集 # X = train.iloc[:, :-1]
# coding=utf-8 # 1.导入相关库,读取数据 import numpy as np from xgboost import XGBRegressor from sklearn.model_selection import train_test_split import loaddata as ld #记录程序运行时间 # import time # start_time = time.time() # print(start_time) data = ld.loadgoodData() # 构造测试集 X = data.iloc[:, :-1] y = data.iloc[:, -1] # 随机切分数据 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 损失计算函数 def lossfunc(predict, real): return sum(np.square(predict - real)) / (2 * len(real)) reg = XGBRegressor() # 监控数据 # eval_set = [(X_test, y_test)]