def feture_select_RLR(): data_x, data_y, names = get_data() rlr = RLR() rlr.fit(data_x, data_y) return sorted(zip(names, map(lambda x: round(x, 4), rlr.scores_)), key=lambda x: x[1], reverse=True)
def get_support_fields(X,Y): ''' Function for getting support fields ''' rlr = RLR() #建立随机逻辑回归模型,筛选变量 rlr.fit(X, Y) #训练模型 rlr.get_support() #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数 print rlr.scores_ print(u'有效特征为:%s' % (','.join(data.columns[rlr.get_support()])).decode('utf-8')) X = data[data.columns[rlr.get_support()]].as_matrix() #筛选好特征 return X
def programmer_1(): filename = "data/bankloan.xls" data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() rlr = RLR() rlr.fit(x, y) rlr_support = rlr.get_support() support_col = data.drop('违约', axis=1).columns[rlr_support] print( "rlr_support_columns: {columns}".format(columns=','.join(support_col))) x = data[support_col].as_matrix() lr = LR() lr.fit(x, y) print("lr: {score}".format(score=lr.score(x, y)))
def data_proc(self): self.load_data() # iloc,完全基于位置的索引,[]中的第一个值是从第几行到第几行,第二个是从第几列到第几列 x = self.data.iloc[:, :8].as_matrix() y = self.data.iloc[:, 8].as_matrix() #先使用随机变量模型进行属性的筛选 rlr = RLR() rlr.fit(x, y) #训练模型 rlr.get_support() #获取特征筛选结果,也可以通过.scores获得各个特征的分数 print("有效特征为%s" % ','.join(self.data.columns[rlr.get_support()])) x = self.data[data.columns[rlr.get_support()]].as_matrix() #筛选之后的特征 rlr.get_support() lr = LR(class_weight={ 0: 0.9, 1: 0.1 }) # 分类权重,避免误分类代价比较高时使用,class_weight='balanced'自行处理,或者像代码中那样设置 #lr.fit(x, y,sample_weight=[1,2,3,5,4,9,8,10]) lr.fit(x, y, sample_weight=[1, 2, 3, 5, 4]) #样本权重,设置每一行数据的重要性,一行数据一个值 result = lr.predict([[24, 2, 2, 0, 28, 17.3, 1.79, 3.06]]) print('模型的正确率是:%s,预测结果是 %d' % (lr.score(x, y), result))
def programmer_1(): # 参数初始化 filename = r'bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() # 使用pandas读取文件 就可以不用管label column标签 y = data.iloc[:, 8].as_matrix() rlr = RLR() # 建立随机逻辑回归模型,进行特征选择和变量筛选 rlr.fit(x, y) # 训练模型 egeList = rlr.get_support() # 获取筛选后的特征 egeList = np.append( egeList, False) # 往numpy数组中 添加一个False元素 使用np.append(array,ele)方法 print("rlr.get_support():") print(egeList) print(u'随机逻辑回归模型特征选择结束!!!') print(u'有效特征为:%s' % ','.join(data.columns[egeList])) x = data[data.columns[egeList]].as_matrix() # 筛选好特征值 lr = LR() # 建立逻辑回归模型 lr.fit(x, y) # 用筛选后的特征进行训练 print(u'逻辑回归训练模型结束!!!') print(u'模型的平均正确率:%s' % lr.score(x, y)) # 给出模型的平均正确率,本例为81.4%
def stable_select(df, y, rd_reg_columns, threshold=0.2, model='rlr'): X = df.loc[:, rd_reg_columns] Y = df[y] if model == 'rlr': rlr = RLR(scaling=0.5, sample_fraction=0.75, n_resampling=300, selection_threshold=threshold) # 随机逻辑回归 rlr.fit(X, Y) scores = rlr.scores_ elif model == 'rls': rls = RLS(scaling=0.5, sample_fraction=0.75, n_resampling=300, selection_threshold=threshold) # 随机Lasso回归 rls.fit(X, Y) scores = rls.scores_ elif model == 'rfr': rf = RFR() rf.fit(X, Y) scores = rf.feature_importances_ else: pass result = pd.Series(dict(zip(X.columns, scores))).rename('score').sort_values(ascending=False) plt.figure(figsize=(20, 10)) result.plot.barh(title='Feature Importances', color='lightblue') plt.ylabel('Feature Importance Score') return result
def logistic(X_train, X_test, y_train, y_test): from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR #特征工程 rlr = RLR() rlr.fit(X_train, y_train) print(rlr.get_support()) x = X_train[X_train.columns[rlr.get_support()]].as_matrix() x_test = X_test[X_test.columns[rlr.get_support()]].as_matrix() ''' x=X_train x_test=X_test ''' #逻辑回归 lr = LR() lr.fit(x, y_train) pred_prob_train = lr.predict_proba(x) pred_prob = lr.predict_proba(x_test) print('logistic') predicts = lr.predict(x_test) metrics_result(y_test, predicts) return pred_prob, pred_prob_train
def logistic_regression(): # 参数初始化 filename = SRC_PATH + '/data/bankloan.xls' data = pd.read_excel(filename) print data.head() print data.tail() x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() print x, y rlr = RLR() # 建立随机逻辑回归模型,筛选变量 rlr.fit(x, y) # 训练模型 rlr.get_support() # 获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数 print(u'通过随机逻辑回归模型筛选特征结束。') # print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()])) # x = data[data.columns[rlr.get_support()]].as_matrix() # 筛选好特征 lr = LR() # 建立逻辑货柜模型 lr.fit(x, y) # 用筛选后的特征数据来训练模型 print(u'逻辑回归模型训练结束。') print(u'模型的平均正确率为:%s' % lr.score(x, y)) # 给出模型的平均正确率,本例为81.4%
import pandas as pd from sklearn.linear_model import RandomizedLogisticRegression as RLR from sklearn.linear_model import LogisticRegression as LR #读取数据 data = pd.read_csv("C:/Users/T/Desktop/python视频/luqu.csv") x = data.iloc[:, 1:4].as_matrix() y = data.iloc[:, :1].as_matrix() #随机Logistic模型,用于筛选变量 f1 = RLR() f1.fit(x, y) f1.get_support() #筛选出的变量 #Logistic模型 f2 = LR() f2.fit(x, y) f2.score(x, y) #准确率
# coding: utf-8 # ���ع� �Զ���ģ import pandas as pd from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR filename = '../data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() rlr = RLR(selection_threshold=0.5) # 建立模型 rlr.fit(x, y) # 训练 rlr.get_support() # 获取特征筛选结果 print('通过随机逻辑回归模型筛选结束') print('有效特征:%s' % ','.join(data.columns[rlr.get_support(indices=True)])) x = data[data.columns[rlr.get_support(indices=True)]].as_matrix() # 筛选好特征 lr = LR() lr.fit(x, y) print(u'逻辑回归模型训练结束') print(u'模型平均正确率:%s' % lr.score(x, y)) # 本例 :81.4%
import pandas as pd import numpy as np import matplotlib.pylab as plt from scipy import ndimage from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR data = pd.read_csv("luqu.csv") x = data.iloc[:, 1:5] y = data.iloc[:, 0:1] #####首先进行特征筛选##### rlr = RLR() #建立随机逻辑回归模型,进行特征选择和变量筛选 rlr.fit(x, y) #训练模型 egeList = rlr.get_support() #获取筛选后的特征 print("rlr.get_support():") print(egeList) print('有效特征为:' + str(data.iloc[:, 1:5].columns[egeList].values)) print('随机逻辑回归模型特征选择结束!!!') x = data[data.iloc[:, 1:5].columns[egeList]] #筛选好特征值,排除无效的特征值 #####确定逻辑回归参数##### # 为确定参数C值,采用隔点搜索 # 用logspace先产生一组非常微小的数 from sklearn.model_selection import cross_val_score lr = LR() Pars = np.logspace(-4, 4, num=20)
# -*- coding:utf-8 -*- import pandas as pd filename = '../data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:,:8].as_matrix() y = data.iloc[:,8].as_matrix() from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr = RLR() #建立随机逻辑回归模型,复筛选变量 rlr.fit(x, y) #训练模型 rlr.get_support() #获取特征筛选变量 print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()])) x = data[data.columns[rlr.get_support()]].as_matrix() #筛选锟斤拷锟斤拷锟斤拷 lr = LR() #建立逻辑回归模型 lr.fit(x, y) #训练模型 print(u'模型的平均正确率:%s' % lr.score(x, y))
#-*- coding: utf-8 -*- #逻辑回归 自动建模 import pandas as pd #参数初始化 filename = 'pdata.xls' data = pd.read_excel(filename) x = data.iloc[:, :3].as_matrix() y = data.iloc[:, 3].as_matrix() from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR #Lasso/f_rgression rlr = RLR(selection_threshold=0.25) #建立随机逻辑回归模型,筛选变量,默认阈值0.25, rlr.fit(x, y) #训练模型 rlr.get_support() #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数 print('通过随机逻辑回归模型筛选特征结束。') print rlr.scores_ print('%s' % ','.join(data.columns[rlr.get_support()])) x = data[data.columns[rlr.get_support()]].as_matrix() #筛选好特征 lr = LR() #建立逻辑货柜模型 lr.fit(x, y) #用筛选后的特征数据来训练模型 print('逻辑回归模型训练结束。') print('correct_point:%s' % lr.score(x, y)) #给出模型的平均正确率
# -*- coding: utf-8 -*- # 时间 : 2018/9/15 13:38 # 作者 : xcl import pandas as pd filename = 'C:/Users/Administrator/Desktop/bankloan.xls' data = pd.read_excel(filename)#返回值是DataFrame类型 x = data.iloc[:,:8].values#行全选,列选下标0-7 y = data.iloc[:,8].values#行全选,列选下标8 #使用稳定性选择方法中的随机逻辑回归进行特征筛选,利用筛选后的特征建立逻辑回归模型,输出平均正确率 from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr = RLR(selection_threshold=0.25i) #建立随机逻辑回归模型,筛选变量 rlr.fit(x, y) #训练模型 #print(rlr.get_support()) #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数 #print(rlr.scores_) #print('通过随机逻辑回归模型筛选特征结束') print('有效特征:' , ','.join(data.columns[rlr.get_support(indices=True)])) #print(data.columns[rlr.get_support(indices=True)]) #data.columns[rlr.get_support()]返回的是筛选后的列名,是一个迭代器 #S.join(iterable) 将iterable里面的元素用S连起来,S就是分隔符 x = data[data.columns[rlr.get_support(indices=True)]].values lr = LR() #建立逻辑回归模型 lr.fit(x, y) #用筛选后的特征数据来训练模型 #print('逻辑回归模型训练结束')
#-*- coding: utf-8 -*- #逻辑回归 自动建模 import pandas as pd from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR #参数初始化 filename = '../data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:,:8].as_matrix()#8个属性 y = data.iloc[:,8].as_matrix()#第九列 结果标签 #稳定性选择方法 挑选特征 rlr = RLR(selection_threshold=0.5) #建立随机逻辑回归模型,筛选变量 特征筛选用了默认阈值0.25 rlr.fit(x, y) #训练模型 rlr.get_support() #获取特征筛选结果 print(u'通过随机逻辑回归模型筛选特征结束。') print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()])) x = data[data.columns[rlr.get_support()]].as_matrix() #筛选好特征,重新训练模型 lr = LR() #建立逻辑货柜模型 lr.fit(x, y) #用筛选后的特征数据来训练模型 print(u'逻辑回归模型训练结束。') print(u'模型的平均正确率为:%s' % lr.score(x, y))
#-*- coding:utf-8 -*- import pandas as pd from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR filename = '../../data/5-/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() #开始进行属性规约。 rlr = RLR() #建立随机逻辑货柜模型,筛选变量 rlr.fit(x, y) m = list(rlr.get_support()) #获取特征筛选结果 print(u'适合的特征选出来了。如下:') n = [] for i in range(len(m)): if m[i] == True: n.append(data.columns[i]) print(data.columns[i]) #columns内参数应为int型. x = data[n].as_matrix() #将筛选好特征的数据列出 #以上进行了数据预处理中的属性规约。 #建立逻辑回归模型进行测试。 lr = LR() lr.fit(x, y) print(u'模型训练结束') print(u'模型的平均正确率为:%s' % lr.score(x, y))
# 评判结果报告 from sklearn.metrics import classification_report def my_classification_report(y_true, y_pred): from sklearn.metrics import classification_report print( "classification_report(left: labels):") print (classification_report(y_true, y_pred)) my_classification_report(y_test,y_score_pre) #筛选特征RLR from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR Rlogis_reg = RLR(random_state=10) #筛选变量 Rlogis_reg.fit(X_train, y_train) selected_col =list(Rlogis_reg.get_support()) selected_col.append(False) print(u"通过随机逻辑回归模型筛选特征结束") print(u"有效特征为:%s" % ",".join(smote_resampled.columns[selected_col])) X1_train = pd.DataFrame(X_train,columns=names) X1_test = pd.DataFrame(X_test,columns=names) selected_feature =smote_resampled.columns[selected_col] X1_train = X1_train[selected_feature] X1_test = X1_test[selected_feature] # 筛选好特征 RLR=model(LR,X1_train,X1_test,y_train,y_test,model_name='RLR 个4 特征:') #决策树
#参数初始化 filename = './data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR lr = LR() #建立逻辑货柜模型 lr.fit(x, y) #用筛选后的特征数据来训练模型 print(u'逻辑回归模型训练结束。') print(u'未经过筛选特性模型的平均正确率为:%s' % lr.score(x, y)) #建立随机逻辑回归模型 rlr = RLR() #帅选变量 rlr.fit(x, y) #rlr.get_support() #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数 selected_col = numpy.append(rlr.get_support(),[False]) print(u"通过随机逻辑回归模型筛选特征结束") print(u"有效特征为:%s" % ",".join(data.columns[selected_col])) x = data[data.columns[selected_col]].as_matrix() # 筛选好特征 lr = LR() #建立逻辑货柜模型 lr.fit(x, y) #用筛选后的特征数据来训练模型 print(u'逻辑回归模型训练结束。') print(u'模型的平均正确率为:%s' % lr.score(x, y)) #给出模型的平均正确率,本例为81.4%
import pandas as pd from pandas import DataFrame as df from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import RandomForestClassifier dataFile = r'F:\pycharm_workspace\myML_DM_Test\resource\python_practice_Data_Analy_Min\chapter5\chapter5\demo\data\bankloan.xls' data = pd.read_excel(dataFile) df_data = df(data) print(data) print("DF: \n", df_data) from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() print("X \n", x) print("Y \n", y) rlr = RLR() #建立随机逻辑回归模型,筛选变量 rlr.fit(x, y) #训练模型
result.sort() print(result[-1]) model = LR( penalty='l2', tol=0.0001, C=.005, solver='liblinear', max_iter=500, ) model.fit(df, label) print(features, model.coef_) model2 = RLR(C=1, scaling=0.5, sample_fraction=0.6, selection_threshold=0.3, n_resampling=100) model2.fit(df, label) print(model2.get_support()) # dt ------------------------------------------ d = [1, 2, 3, 4, 5, 10, 15, 20, 25] m = ['gini', 'entropy'] result = [ bin_cv(DTC(criterion='entropy', max_depth=depth), df, label).mean() for depth in d ] print(result) pd.Series(result).plot()
#-*- coding:utf-8 -*- # Peishichao import pandas as pd filename = '../data/bankloan.xls' data = pd.read_excel(filename) x = data.iloc[:, :8].as_matrix() y = data.iloc[:, 8].as_matrix() from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr = RLR() rlr.fit(x, y) rlr.get_support() print(rlr.get_support()) print('end') #print('Feature: %s ' % ','.join(data.columns[rlr.get_support()])) x = data[data.columns[rlr.get_support()]].as_matrix() print(x) lr = LR() lr.fit(x, y) print('end') print('accur: %s' % lr.score(x, y))
#-*- coding: utf-8 -*- import pandas as pd import os from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR # init parameters # os.getcwd() filename = '../Datasets/bankloan.csv' data = pd.read_csv(filename) x = data.iloc[:, :8].as_matrix() # regressors y = data.iloc[:, 8].as_matrix() # labels features_columns = data.columns[:len(data.columns) - 1] rlr = RLR() # create a randomized LogisticRegression, feature selection rlr.fit(x, y) # training model print(features_columns) print(rlr.get_support()) # get feature selection result. print(rlr.scores_) # get each feature score print(u'RandomizedLogisticRegression feature selection finished.') print(u'The effective features are:\n\t %s' % ', '.join(features_columns[rlr.get_support()])) x = data[features_columns[rlr.get_support()]].as_matrix() # selected features lr = LR() # create logistic regression model lr.fit(x, y) # using effective features training model print(u'Accuracy:%s' % lr.score(x, y))
import pandas as pd from sklearn.linear_model import LogisticRegression as LR from sklearn.linear_model import RandomizedLogisticRegression as RLR lessonPath = 'E:\\BaiduNetdiskDownload\\sourceCode\\week8\\lesson2.csv' luquPath = 'E:\\BaiduNetdiskDownload\\sourceCode\\week8\\luqu2.csv' dataLuqu = pd.read_csv(luquPath) # 特征 x = dataLuqu.iloc[:, 1:4].as_matrix() y = dataLuqu.iloc[:, 0:1].as_matrix() r1 = RLR() r1.fit(x, y) # 特征筛选 r1.get_support() t = dataLuqu.columns[r1.get_support()].as_matrix() r2 = LR() r2.fit(t, y) print('训练结束') print('模型正确率: ' + str(r2.score(x, y)))
def main(): import read_data import data_standard import numpy as np import pandas as pd ''' ##############################################################数据读入与整理################################################################################## path2014='E:\Master\PPDAMcode\AIR_project\hangzhou.xls' path2015='E:\Master\PPDAMcode\AIR_project\hangzhou2015.xls' path2016='E:\Master\PPDAMcode\AIR_project\hangzhou2016.xls' hangzhou_data=[] ################################################################################## data=read_data.read_data_xls(path2014) for i in range(len(data)-1): if data.iloc[i+1,7]=='优': data.iloc[i+1,7]=1.0 elif data.iloc[i+1,7]=='良好': data.iloc[i+1,7]=2.0 elif data.iloc[i+1,7]=='轻度污染': data.iloc[i+1,7]=3.0 elif data.iloc[i+1,7]=='中度污染': data.iloc[i+1,7]=4.0 elif data.iloc[i+1,7]=='重度污染': data.iloc[i+1,7]=5.0 #for i in range(6): #data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i]) hangzhou_data.append(data) ################################################################################# data=read_data.read_data_xls(path2015) for i in range(len(data)-1): if data.iloc[i+1,7]=='优': data.iloc[i+1,7]=1.0 elif data.iloc[i+1,7]=='良好': data.iloc[i+1,7]=2.0 elif data.iloc[i+1,7]=='轻度污染': data.iloc[i+1,7]=3.0 elif data.iloc[i+1,7]=='中度污染': data.iloc[i+1,7]=4.0 elif data.iloc[i+1,7]=='重度污染': data.iloc[i+1,7]=5.0 #for i in range(6): #data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i]) hangzhou_data.append(data) ################################################################################## data=read_data.read_data_xls(path2016) for i in range(len(data)-1): if data.iloc[i+1,7]=='优': data.iloc[i+1,7]=1.0 elif data.iloc[i+1,7]=='良好': data.iloc[i+1,7]=2.0 elif data.iloc[i+1,7]=='轻度污染': data.iloc[i+1,7]=3.0 elif data.iloc[i+1,7]=='中度污染': data.iloc[i+1,7]=4.0 elif data.iloc[i+1,7]=='重度污染': data.iloc[i+1,7]=5.0 #for i in range(6): #data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i]) hangzhou_data.append(data) ############################################################################ #print(hangzhou_data)#######2014,2015,2016的所有数据在列表hangzhou_data中存放 all_data=pd.DataFrame(np.linspace(1,1091*8,1091*8).reshape(1091,8)) for i in range(len(hangzhou_data[0])-1): for j in range(8): all_data.iloc[i,j]=hangzhou_data[0].iloc[i+1,j+1] ############################################ for i in range(len(hangzhou_data[1])-1): for j in range(8): all_data.iloc[i+365,j]=hangzhou_data[1].iloc[i+1,j+1] ################################################### for i in range(len(hangzhou_data[2])-1): for j in range(8): all_data.iloc[i+730,j]=hangzhou_data[2].iloc[i+1,j+1] #print(all_data) all_data.to_excel('E:\Master\PPDAMcode\AIR_project\hangzhou_air_alldata.xls') ################################################################################################################################################ ''' ############################主成分分析PCA###################################################### import PCA PCA.PCA(all_data.iloc[:,0:6]) #######################RandomizedLogisticRegression筛选特征########## from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr=RLR() rlr.fit(all_data.iloc[:,0:6].as_matrix(),all_data.iloc[:,6].as_matrix()) print() print('通过RandomizedLogisticRegression筛选特征:') print(rlr.get_support()) print() ############################################################################################### #############################准备好训练数据与测试数据################################################################ all_data=read_data.read_data_xls('E:\Master\PPDAMcode\AIR_project\yuanshi_data.xls') import random index=list(range(1091)) test_index = random.sample(index, 90)##test_index is the index of test data随机选出200个样本作为测试样本 train_index=[] for i in range(1091): if i not in test_index: train_index.append(i) ##train_index is the index of train data data_train=all_data.iloc[train_index,0:6].as_matrix() classfy_train=all_data.iloc[train_index,6].as_matrix() AQI_train=all_data.iloc[train_index,7].as_matrix() data_test=all_data.iloc[test_index,0:6].as_matrix() classfy_test=all_data.iloc[test_index,6].as_matrix() AQI_test=all_data.iloc[test_index,7].as_matrix() ######################################################################################################################## ''' ############################运用DecisionTreeClassifier方法进行训练测试############################### import DecisionTreeClassifier DecisionTreeClassifier.DecisionTreeClassifier(data_train,AQI_train,data_test,AQI_test) ###################################################################################################### ''' ############################运用RandomForestClassifier方法进行训练测试############################### import RandomForestClassifier RandomForestClassifier.RandomForestClassifier(data_train,AQI_train,data_test,AQI_test) ###################################################################################################### ''' ############################运用SVM方法进行训练测试############################### import SVM SVM.SVM(data_train,classfy_train,data_test,classfy_test) ###################################################################################################### ''' ''' ############################运用keras方法进行训练测试############################### import Sequential Sequential.Sequential(data_train,classfy_train,data_test,classfy_test) ###################################################################################################### ''' '''