コード例 #1
0
ファイル: feture_select.py プロジェクト: tian5017/ModelNet
def feture_select_RLR():
    data_x, data_y, names = get_data()
    rlr = RLR()
    rlr.fit(data_x, data_y)
    return sorted(zip(names, map(lambda x: round(x, 4), rlr.scores_)),
                  key=lambda x: x[1],
                  reverse=True)
コード例 #2
0
def get_support_fields(X,Y):
    '''
    Function for getting support fields
    '''
    rlr = RLR() #建立随机逻辑回归模型,筛选变量
    rlr.fit(X, Y) #训练模型
    rlr.get_support() #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数
    print rlr.scores_
    print(u'有效特征为:%s' % (','.join(data.columns[rlr.get_support()])).decode('utf-8'))
    X = data[data.columns[rlr.get_support()]].as_matrix() #筛选好特征
    return X
コード例 #3
0
ファイル: code.py プロジェクト: Katherine-bq/python-
def programmer_1():
    filename = "data/bankloan.xls"
    data = pd.read_excel(filename)

    x = data.iloc[:, :8].as_matrix()
    y = data.iloc[:, 8].as_matrix()

    rlr = RLR()
    rlr.fit(x, y)
    rlr_support = rlr.get_support()
    support_col = data.drop('违约', axis=1).columns[rlr_support]

    print(
        "rlr_support_columns: {columns}".format(columns=','.join(support_col)))
    x = data[support_col].as_matrix()

    lr = LR()
    lr.fit(x, y)

    print("lr: {score}".format(score=lr.score(x, y)))
コード例 #4
0
ファイル: LogicRegression.py プロジェクト: dasgk/django_cms
    def data_proc(self):
        self.load_data()
        # iloc,完全基于位置的索引,[]中的第一个值是从第几行到第几行,第二个是从第几列到第几列
        x = self.data.iloc[:, :8].as_matrix()
        y = self.data.iloc[:, 8].as_matrix()
        #先使用随机变量模型进行属性的筛选
        rlr = RLR()
        rlr.fit(x, y)  #训练模型
        rlr.get_support()  #获取特征筛选结果,也可以通过.scores获得各个特征的分数

        print("有效特征为%s" % ','.join(self.data.columns[rlr.get_support()]))
        x = self.data[data.columns[rlr.get_support()]].as_matrix()  #筛选之后的特征
        rlr.get_support()
        lr = LR(class_weight={
            0: 0.9,
            1: 0.1
        })  # 分类权重,避免误分类代价比较高时使用,class_weight='balanced'自行处理,或者像代码中那样设置
        #lr.fit(x, y,sample_weight=[1,2,3,5,4,9,8,10])
        lr.fit(x, y, sample_weight=[1, 2, 3, 5, 4])  #样本权重,设置每一行数据的重要性,一行数据一个值
        result = lr.predict([[24, 2, 2, 0, 28, 17.3, 1.79, 3.06]])
        print('模型的正确率是:%s,预测结果是 %d' % (lr.score(x, y), result))
コード例 #5
0
ファイル: ch04.py プロジェクト: destefano1986/PyTraining
def programmer_1():
    # 参数初始化
    filename = r'bankloan.xls'
    data = pd.read_excel(filename)
    x = data.iloc[:, :8].as_matrix()  # 使用pandas读取文件  就可以不用管label column标签
    y = data.iloc[:, 8].as_matrix()

    rlr = RLR()  # 建立随机逻辑回归模型,进行特征选择和变量筛选
    rlr.fit(x, y)  # 训练模型
    egeList = rlr.get_support()  # 获取筛选后的特征
    egeList = np.append(
        egeList, False)  # 往numpy数组中 添加一个False元素  使用np.append(array,ele)方法
    print("rlr.get_support():")
    print(egeList)
    print(u'随机逻辑回归模型特征选择结束!!!')
    print(u'有效特征为:%s' % ','.join(data.columns[egeList]))
    x = data[data.columns[egeList]].as_matrix()  # 筛选好特征值

    lr = LR()  # 建立逻辑回归模型
    lr.fit(x, y)  # 用筛选后的特征进行训练
    print(u'逻辑回归训练模型结束!!!')
    print(u'模型的平均正确率:%s' % lr.score(x, y))  # 给出模型的平均正确率,本例为81.4%
コード例 #6
0
def stable_select(df, y, rd_reg_columns, threshold=0.2, model='rlr'):
    X = df.loc[:, rd_reg_columns]
    Y = df[y]
    if model == 'rlr':
        rlr = RLR(scaling=0.5, sample_fraction=0.75, n_resampling=300, selection_threshold=threshold)  # 随机逻辑回归
        rlr.fit(X, Y)
        scores = rlr.scores_
    elif model == 'rls':
        rls = RLS(scaling=0.5, sample_fraction=0.75, n_resampling=300, selection_threshold=threshold)  # 随机Lasso回归
        rls.fit(X, Y)
        scores = rls.scores_
    elif model == 'rfr':
        rf = RFR()
        rf.fit(X, Y)
        scores = rf.feature_importances_
    else:
        pass
    result = pd.Series(dict(zip(X.columns, scores))).rename('score').sort_values(ascending=False)
    plt.figure(figsize=(20, 10))
    result.plot.barh(title='Feature Importances', color='lightblue')
    plt.ylabel('Feature Importance Score')
    return result
コード例 #7
0
def logistic(X_train, X_test, y_train, y_test):
    from sklearn.linear_model import LogisticRegression as LR
    from sklearn.linear_model import RandomizedLogisticRegression as RLR
    #特征工程
    rlr = RLR()
    rlr.fit(X_train, y_train)
    print(rlr.get_support())
    x = X_train[X_train.columns[rlr.get_support()]].as_matrix()
    x_test = X_test[X_test.columns[rlr.get_support()]].as_matrix()
    '''
    x=X_train
    x_test=X_test
    '''
    #逻辑回归
    lr = LR()
    lr.fit(x, y_train)
    pred_prob_train = lr.predict_proba(x)
    pred_prob = lr.predict_proba(x_test)
    print('logistic')
    predicts = lr.predict(x_test)
    metrics_result(y_test, predicts)

    return pred_prob, pred_prob_train
コード例 #8
0
def logistic_regression():
    # 参数初始化
    filename = SRC_PATH + '/data/bankloan.xls'
    data = pd.read_excel(filename)
    print data.head()
    print data.tail()

    x = data.iloc[:, :8].as_matrix()
    y = data.iloc[:, 8].as_matrix()

    print x, y

    rlr = RLR()  # 建立随机逻辑回归模型,筛选变量
    rlr.fit(x, y)  # 训练模型
    rlr.get_support()  # 获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数
    print(u'通过随机逻辑回归模型筛选特征结束。')

    # print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()]))
    # x = data[data.columns[rlr.get_support()]].as_matrix()  # 筛选好特征

    lr = LR()  # 建立逻辑货柜模型
    lr.fit(x, y)  # 用筛选后的特征数据来训练模型
    print(u'逻辑回归模型训练结束。')
    print(u'模型的平均正确率为:%s' % lr.score(x, y))  # 给出模型的平均正确率,本例为81.4%
コード例 #9
0
ファイル: Logistic模型.py プロジェクト: wsgan001/python-4
import pandas as pd
from sklearn.linear_model import RandomizedLogisticRegression as RLR
from sklearn.linear_model import LogisticRegression as LR

#读取数据
data = pd.read_csv("C:/Users/T/Desktop/python视频/luqu.csv")
x = data.iloc[:, 1:4].as_matrix()
y = data.iloc[:, :1].as_matrix()

#随机Logistic模型,用于筛选变量
f1 = RLR()
f1.fit(x, y)
f1.get_support()  #筛选出的变量

#Logistic模型
f2 = LR()
f2.fit(x, y)
f2.score(x, y)  #准确率
コード例 #10
0
# coding: utf-8
# �߼��ع� �Զ���ģ
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

filename = '../data/bankloan.xls'
data = pd.read_excel(filename)
x = data.iloc[:, :8].as_matrix()
y = data.iloc[:, 8].as_matrix()

rlr = RLR(selection_threshold=0.5)  # 建立模型
rlr.fit(x, y)  # 训练
rlr.get_support()  # 获取特征筛选结果
print('通过随机逻辑回归模型筛选结束')
print('有效特征:%s' % ','.join(data.columns[rlr.get_support(indices=True)]))
x = data[data.columns[rlr.get_support(indices=True)]].as_matrix()  # 筛选好特征

lr = LR()
lr.fit(x, y)
print(u'逻辑回归模型训练结束')
print(u'模型平均正确率:%s' % lr.score(x, y))  # 本例 :81.4%
コード例 #11
0
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from scipy import ndimage
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

data = pd.read_csv("luqu.csv")
x = data.iloc[:, 1:5]
y = data.iloc[:, 0:1]

#####首先进行特征筛选#####
rlr = RLR()  #建立随机逻辑回归模型,进行特征选择和变量筛选
rlr.fit(x, y)  #训练模型
egeList = rlr.get_support()  #获取筛选后的特征

print("rlr.get_support():")
print(egeList)
print('有效特征为:' + str(data.iloc[:, 1:5].columns[egeList].values))
print('随机逻辑回归模型特征选择结束!!!')

x = data[data.iloc[:, 1:5].columns[egeList]]  #筛选好特征值,排除无效的特征值

#####确定逻辑回归参数#####

# 为确定参数C值,采用隔点搜索
# 用logspace先产生一组非常微小的数
from sklearn.model_selection import cross_val_score
lr = LR()

Pars = np.logspace(-4, 4, num=20)
コード例 #12
0
# -*- coding:utf-8 -*-
import pandas as pd
filename = '../data/bankloan.xls'
data = pd.read_excel(filename)
x = data.iloc[:,:8].as_matrix()
y = data.iloc[:,8].as_matrix()

from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR 
rlr = RLR() #建立随机逻辑回归模型,复筛选变量
rlr.fit(x, y) #训练模型
rlr.get_support() #获取特征筛选变量
print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()]))
x = data[data.columns[rlr.get_support()]].as_matrix() #筛选锟斤拷锟斤拷锟斤拷

lr = LR() #建立逻辑回归模型
lr.fit(x, y) #训练模型
print(u'模型的平均正确率:%s' % lr.score(x, y))
コード例 #13
0
#-*- coding: utf-8 -*-
#逻辑回归 自动建模
import pandas as pd

#参数初始化
filename = 'pdata.xls'
data = pd.read_excel(filename)
x = data.iloc[:, :3].as_matrix()
y = data.iloc[:, 3].as_matrix()

from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR  #Lasso/f_rgression
rlr = RLR(selection_threshold=0.25)  #建立随机逻辑回归模型,筛选变量,默认阈值0.25,
rlr.fit(x, y)  #训练模型
rlr.get_support()  #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数
print('通过随机逻辑回归模型筛选特征结束。')
print rlr.scores_
print('%s' % ','.join(data.columns[rlr.get_support()]))
x = data[data.columns[rlr.get_support()]].as_matrix()  #筛选好特征

lr = LR()  #建立逻辑货柜模型
lr.fit(x, y)  #用筛选后的特征数据来训练模型
print('逻辑回归模型训练结束。')
print('correct_point:%s' % lr.score(x, y))  #给出模型的平均正确率
コード例 #14
0
# -*- coding: utf-8 -*-
# 时间    : 2018/9/15 13:38
# 作者    : xcl
import pandas as pd
filename = 'C:/Users/Administrator/Desktop/bankloan.xls'
data = pd.read_excel(filename)#返回值是DataFrame类型
x = data.iloc[:,:8].values#行全选,列选下标0-7
y = data.iloc[:,8].values#行全选,列选下标8

#使用稳定性选择方法中的随机逻辑回归进行特征筛选,利用筛选后的特征建立逻辑回归模型,输出平均正确率
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR(selection_threshold=0.25i) #建立随机逻辑回归模型,筛选变量
rlr.fit(x, y) #训练模型
#print(rlr.get_support()) #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数
#print(rlr.scores_)
#print('通过随机逻辑回归模型筛选特征结束')



print('有效特征:' , ','.join(data.columns[rlr.get_support(indices=True)]))
#print(data.columns[rlr.get_support(indices=True)])
#data.columns[rlr.get_support()]返回的是筛选后的列名,是一个迭代器
#S.join(iterable) 将iterable里面的元素用S连起来,S就是分隔符


x = data[data.columns[rlr.get_support(indices=True)]].values

lr = LR() #建立逻辑回归模型
lr.fit(x, y) #用筛选后的特征数据来训练模型
#print('逻辑回归模型训练结束')
コード例 #15
0
#-*- coding: utf-8 -*-
#逻辑回归 自动建模
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
#参数初始化
filename = '../data/bankloan.xls'
data = pd.read_excel(filename)
x = data.iloc[:,:8].as_matrix()#8个属性
y = data.iloc[:,8].as_matrix()#第九列  结果标签

#稳定性选择方法  挑选特征
rlr = RLR(selection_threshold=0.5) #建立随机逻辑回归模型,筛选变量  特征筛选用了默认阈值0.25
rlr.fit(x, y) #训练模型
rlr.get_support() #获取特征筛选结果
print(u'通过随机逻辑回归模型筛选特征结束。')
print(u'有效特征为:%s' % ','.join(data.columns[rlr.get_support()]))

x = data[data.columns[rlr.get_support()]].as_matrix() #筛选好特征,重新训练模型
lr = LR() #建立逻辑货柜模型
lr.fit(x, y) #用筛选后的特征数据来训练模型
print(u'逻辑回归模型训练结束。')
print(u'模型的平均正确率为:%s' % lr.score(x, y))
コード例 #16
0
#-*- coding:utf-8 -*-

import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

filename = '../../data/5-/bankloan.xls'
data = pd.read_excel(filename)
x = data.iloc[:, :8].as_matrix()
y = data.iloc[:, 8].as_matrix()

#开始进行属性规约。
rlr = RLR()  #建立随机逻辑货柜模型,筛选变量
rlr.fit(x, y)
m = list(rlr.get_support())  #获取特征筛选结果
print(u'适合的特征选出来了。如下:')
n = []
for i in range(len(m)):
    if m[i] == True:
        n.append(data.columns[i])
        print(data.columns[i])
        #columns内参数应为int型.
x = data[n].as_matrix()  #将筛选好特征的数据列出
#以上进行了数据预处理中的属性规约。

#建立逻辑回归模型进行测试。
lr = LR()
lr.fit(x, y)
print(u'模型训练结束')
print(u'模型的平均正确率为:%s' % lr.score(x, y))
コード例 #17
0
 
 # 评判结果报告
 from sklearn.metrics import classification_report
 def my_classification_report(y_true, y_pred):
    from sklearn.metrics import classification_report
    print( "classification_report(left: labels):")
    print (classification_report(y_true, y_pred))
    
 my_classification_report(y_test,y_score_pre)


#筛选特征RLR
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

Rlogis_reg = RLR(random_state=10) #筛选变量
Rlogis_reg.fit(X_train, y_train)
selected_col =list(Rlogis_reg.get_support())
selected_col.append(False)
print(u"通过随机逻辑回归模型筛选特征结束")
print(u"有效特征为:%s" % ",".join(smote_resampled.columns[selected_col]))

X1_train = pd.DataFrame(X_train,columns=names)
X1_test = pd.DataFrame(X_test,columns=names)
selected_feature =smote_resampled.columns[selected_col]
X1_train = X1_train[selected_feature]
X1_test = X1_test[selected_feature] # 筛选好特征
RLR=model(LR,X1_train,X1_test,y_train,y_test,model_name='RLR 个4 特征:') 


#决策树
コード例 #18
0
#参数初始化
filename = './data/bankloan.xls'
data = pd.read_excel(filename)

x = data.iloc[:, :8].as_matrix()
y = data.iloc[:, 8].as_matrix()

from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR 


lr = LR() #建立逻辑货柜模型
lr.fit(x, y) #用筛选后的特征数据来训练模型
print(u'逻辑回归模型训练结束。')
print(u'未经过筛选特性模型的平均正确率为:%s' % lr.score(x, y))

#建立随机逻辑回归模型
rlr = RLR() #帅选变量
rlr.fit(x, y)
#rlr.get_support()   #获取特征筛选结果,也可以通过.scores_方法获取各个特征的分数
selected_col = numpy.append(rlr.get_support(),[False])
print(u"通过随机逻辑回归模型筛选特征结束")
print(u"有效特征为:%s" % ",".join(data.columns[selected_col]))
x = data[data.columns[selected_col]].as_matrix()    # 筛选好特征

lr = LR() #建立逻辑货柜模型
lr.fit(x, y) #用筛选后的特征数据来训练模型
print(u'逻辑回归模型训练结束。')
print(u'模型的平均正确率为:%s' % lr.score(x, y)) #给出模型的平均正确率,本例为81.4%
コード例 #19
0
import pandas as pd
from pandas import DataFrame as df
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

dataFile = r'F:\pycharm_workspace\myML_DM_Test\resource\python_practice_Data_Analy_Min\chapter5\chapter5\demo\data\bankloan.xls'

data = pd.read_excel(dataFile)
df_data = df(data)
print(data)
print("DF: \n", df_data)

from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

x = data.iloc[:, :8].as_matrix()
y = data.iloc[:, 8].as_matrix()
print("X \n", x)
print("Y \n", y)

rlr = RLR()  #建立随机逻辑回归模型,筛选变量
rlr.fit(x, y)  #训练模型
コード例 #20
0
result.sort()
print(result[-1])

model = LR(
    penalty='l2',
    tol=0.0001,
    C=.005,
    solver='liblinear',
    max_iter=500,
)
model.fit(df, label)
print(features, model.coef_)

model2 = RLR(C=1,
             scaling=0.5,
             sample_fraction=0.6,
             selection_threshold=0.3,
             n_resampling=100)
model2.fit(df, label)
print(model2.get_support())

# dt ------------------------------------------
d = [1, 2, 3, 4, 5, 10, 15, 20, 25]
m = ['gini', 'entropy']

result = [
    bin_cv(DTC(criterion='entropy', max_depth=depth), df, label).mean()
    for depth in d
]
print(result)
pd.Series(result).plot()
コード例 #21
0
#-*- coding:utf-8 -*-
# Peishichao
import pandas as pd

filename = '../data/bankloan.xls'
data = pd.read_excel(filename)

x = data.iloc[:, :8].as_matrix()

y = data.iloc[:, 8].as_matrix()

from sklearn.linear_model import LogisticRegression as LR

from sklearn.linear_model import RandomizedLogisticRegression as RLR

rlr = RLR()

rlr.fit(x, y)

rlr.get_support()
print(rlr.get_support())
print('end')

#print('Feature: %s ' % ','.join(data.columns[rlr.get_support()]))

x = data[data.columns[rlr.get_support()]].as_matrix()
print(x)
lr = LR()
lr.fit(x, y)
print('end')
print('accur: %s' % lr.score(x, y))
コード例 #22
0
#-*- coding: utf-8 -*-
import pandas as pd
import os
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

# init parameters
# os.getcwd()
filename = '../Datasets/bankloan.csv'
data = pd.read_csv(filename)
x = data.iloc[:, :8].as_matrix()  # regressors
y = data.iloc[:, 8].as_matrix()  # labels

features_columns = data.columns[:len(data.columns) - 1]

rlr = RLR()  # create a randomized LogisticRegression, feature selection
rlr.fit(x, y)  # training model
print(features_columns)
print(rlr.get_support())  # get feature selection result.
print(rlr.scores_)  # get each feature score

print(u'RandomizedLogisticRegression feature selection finished.')
print(u'The effective features are:\n\t %s' %
      ', '.join(features_columns[rlr.get_support()]))

x = data[features_columns[rlr.get_support()]].as_matrix()  # selected features

lr = LR()  # create logistic regression model
lr.fit(x, y)  # using effective features training model
print(u'Accuracy:%s' % lr.score(x, y))
コード例 #23
0
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.linear_model import RandomizedLogisticRegression as RLR

lessonPath = 'E:\\BaiduNetdiskDownload\\sourceCode\\week8\\lesson2.csv'
luquPath = 'E:\\BaiduNetdiskDownload\\sourceCode\\week8\\luqu2.csv'

dataLuqu = pd.read_csv(luquPath)
# 特征
x = dataLuqu.iloc[:, 1:4].as_matrix()
y = dataLuqu.iloc[:, 0:1].as_matrix()

r1 = RLR()
r1.fit(x, y)
# 特征筛选
r1.get_support()
t = dataLuqu.columns[r1.get_support()].as_matrix()

r2 = LR()
r2.fit(t, y)
print('训练结束')
print('模型正确率: ' + str(r2.score(x, y)))
コード例 #24
0
def main():
	import read_data
	import data_standard
	import numpy as np
	import pandas as pd
	'''
	##############################################################数据读入与整理##################################################################################

	path2014='E:\Master\PPDAMcode\AIR_project\hangzhou.xls'
	path2015='E:\Master\PPDAMcode\AIR_project\hangzhou2015.xls'
	path2016='E:\Master\PPDAMcode\AIR_project\hangzhou2016.xls'

	hangzhou_data=[]
	##################################################################################
	data=read_data.read_data_xls(path2014)
	for i in range(len(data)-1):
		if data.iloc[i+1,7]=='优':
			data.iloc[i+1,7]=1.0
		elif data.iloc[i+1,7]=='良好':
			data.iloc[i+1,7]=2.0
		elif data.iloc[i+1,7]=='轻度污染':
			data.iloc[i+1,7]=3.0
		elif data.iloc[i+1,7]=='中度污染':
			data.iloc[i+1,7]=4.0
		elif data.iloc[i+1,7]=='重度污染':
			data.iloc[i+1,7]=5.0
	#for i in range(6):
		#data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i])

	hangzhou_data.append(data)
	#################################################################################
	data=read_data.read_data_xls(path2015)
	for i in range(len(data)-1):
		if data.iloc[i+1,7]=='优':
			data.iloc[i+1,7]=1.0
		elif data.iloc[i+1,7]=='良好':
			data.iloc[i+1,7]=2.0
		elif data.iloc[i+1,7]=='轻度污染':
			data.iloc[i+1,7]=3.0
		elif data.iloc[i+1,7]=='中度污染':
			data.iloc[i+1,7]=4.0
		elif data.iloc[i+1,7]=='重度污染':
			data.iloc[i+1,7]=5.0
	#for i in range(6):
		#data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i])

	hangzhou_data.append(data)
	##################################################################################
	data=read_data.read_data_xls(path2016)
	for i in range(len(data)-1):
		if data.iloc[i+1,7]=='优':
			data.iloc[i+1,7]=1.0
		elif data.iloc[i+1,7]=='良好':
			data.iloc[i+1,7]=2.0
		elif data.iloc[i+1,7]=='轻度污染':
			data.iloc[i+1,7]=3.0
		elif data.iloc[i+1,7]=='中度污染':
			data.iloc[i+1,7]=4.0
		elif data.iloc[i+1,7]=='重度污染':
			data.iloc[i+1,7]=5.0

	#for i in range(6):
		#data.iloc[1:,1+i]=data_standard.data_standard(data.iloc[1:,1+i])

	hangzhou_data.append(data)

	############################################################################
	#print(hangzhou_data)#######2014,2015,2016的所有数据在列表hangzhou_data中存放
	all_data=pd.DataFrame(np.linspace(1,1091*8,1091*8).reshape(1091,8))

	for i in range(len(hangzhou_data[0])-1):
		for j in range(8):
			all_data.iloc[i,j]=hangzhou_data[0].iloc[i+1,j+1]
			############################################
	for i in range(len(hangzhou_data[1])-1):
		for j in range(8):
			all_data.iloc[i+365,j]=hangzhou_data[1].iloc[i+1,j+1]
			###################################################
	for i in range(len(hangzhou_data[2])-1):
		for j in range(8):
			all_data.iloc[i+730,j]=hangzhou_data[2].iloc[i+1,j+1]

	#print(all_data)
	all_data.to_excel('E:\Master\PPDAMcode\AIR_project\hangzhou_air_alldata.xls')
	################################################################################################################################################

	'''
	############################主成分分析PCA######################################################
	import PCA
	PCA.PCA(all_data.iloc[:,0:6])

	#######################RandomizedLogisticRegression筛选特征##########
	from sklearn.linear_model import RandomizedLogisticRegression as RLR 
	rlr=RLR()
	rlr.fit(all_data.iloc[:,0:6].as_matrix(),all_data.iloc[:,6].as_matrix())
	print()
	print('通过RandomizedLogisticRegression筛选特征:')
	print(rlr.get_support())
	print()
	###############################################################################################



	#############################准备好训练数据与测试数据################################################################
	
	all_data=read_data.read_data_xls('E:\Master\PPDAMcode\AIR_project\yuanshi_data.xls')

	import random
	index=list(range(1091))
	test_index = random.sample(index, 90)##test_index is the index of test data随机选出200个样本作为测试样本
	train_index=[]
	for i in range(1091):
		if i not in test_index:
			train_index.append(i)   ##train_index is the index of train data

	data_train=all_data.iloc[train_index,0:6].as_matrix()
	classfy_train=all_data.iloc[train_index,6].as_matrix()
	AQI_train=all_data.iloc[train_index,7].as_matrix()
	

	data_test=all_data.iloc[test_index,0:6].as_matrix()
	classfy_test=all_data.iloc[test_index,6].as_matrix()
	AQI_test=all_data.iloc[test_index,7].as_matrix()

	########################################################################################################################

	'''
	############################运用DecisionTreeClassifier方法进行训练测试###############################
	import DecisionTreeClassifier
	DecisionTreeClassifier.DecisionTreeClassifier(data_train,AQI_train,data_test,AQI_test)
	######################################################################################################
	'''
	
	############################运用RandomForestClassifier方法进行训练测试###############################
	import RandomForestClassifier
	RandomForestClassifier.RandomForestClassifier(data_train,AQI_train,data_test,AQI_test)
	######################################################################################################
	'''
	############################运用SVM方法进行训练测试###############################
	import SVM
	SVM.SVM(data_train,classfy_train,data_test,classfy_test)	
	######################################################################################################
	'''
	'''
	############################运用keras方法进行训练测试###############################
	import Sequential
	Sequential.Sequential(data_train,classfy_train,data_test,classfy_test)
	######################################################################################################
	'''

	'''