from sklearn.neural_network import MLPClassifier # import the classifier from imblearn.under_sampling import RandomUnderSampler from sklearn.model_selection import StratifiedKFold import matplotlib.pyplot as plt pandas_data = pd.read_csv('sql_eigen.csv') data = pandas_data.fillna(np.mean(pandas_data)) #缺失数据插补 data['age'][data['age'] > 200] = 91.4 #年龄大于200岁的用中位数91.4代替 data2 = data.drop([ 'hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov', 'spo2_cov', 'height' ], axis=1) #去掉没用的列 dataSet = np.array(data2) dataSet[:, 0:78] = ann.preprocess(dataSet[:, 0:78]) #归一化到[0,1] dataSet[:, 0:78] = ann.preprocess1(dataSet[:, 0:78]) #归一化到[-1,1] dataMat = dataSet[:, 0:78] labelMat = dataSet[:, 78] # dataMat=np.array(dataMat) # labelMat=np.array(labelMat) ''' 计算各个特征值的平均影响值 ''' clf = MLPClassifier(hidden_layer_sizes=(78, ), activation='tanh', shuffle=True, solver='sgd', alpha=1e-6, batch_size=5,
# @File : global_new.py #提取特征值,对哑变量性别进行处理,然后进行归一化 #0——79列是特征值,最后一列是标签 import pandas as pd import numpy as np import ann global dataMat global labelMat global colnames global framefile #建立全局变量,提取全部特征值 ###read the data### data = pd.read_csv('final_eigen.csv') dummy_sex = data['sex'] #哑变量 dummies = pd.get_dummies(dummy_sex, prefix='sex') #哑变量变换 data = data.drop(['sex'], axis=1) #去掉原来的列 datawithdummy = dummies.join(data) #矩阵进行拼接 colnames = datawithdummy.keys() dataSet = np.array(datawithdummy) dataSet[:, 0:80] = ann.preprocess(dataSet[:, 0:80]) # dataSet[:,0:80]=ann.preprocess1(dataSet[:,0:80]) dataMat = dataSet[:, 0:80] labelMat = np.array(datawithdummy['Label']) framefile = pd.DataFrame(dataSet, columns=colnames) #writefile.to_csv("F:/testformean.csv")#归一化之后的结果 #print("test")
""" #------------calculate the FS score with scikit-feature package--------------# from skfeature.function.similarity_based import fisher_score from skfeature.function.information_theoretical_based import MRMR from skfeature.function.similarity_based import reliefF from skfeature.function.statistical_based import gini_index Relief = reliefF.reliefF(datamat, labelmat) Fisher= fisher_score.fisher_score(datamat, labelmat) # mRMR,J,M,=MRMR.mrmr(datamat,labelmat,n_selected_features=80) # mRMR=-mRMR gini= gini_index.gini_index(datamat,labelmat) gini=-gini FSscore=np.column_stack((Relief,Fisher,gini))#合并三个分数 FSscore=ann.preprocess(FSscore) FinalScore=np.sum(FSscore,axis=1) FS=np.column_stack((FSscore,FinalScore)) FS_nor=ann.preprocess(FS)#将最后一列联合得分归一化 FS=pd.DataFrame(FS_nor,columns=["Relief", "Fisher","gini","FinalScore"],index=featurenames) # FS.to_csv("F:\Githubcode\AdaBoost\myown\FSscore.csv") sorteigen=FS.sort_values(by='FinalScore',ascending=False,axis=0) sorteigen.to_csv('FSsort.csv') #------------crossalidation with ann--------------# meanfit=[]#用来存储逐渐增加特征值过程中,不同数目特征值对应的BER平均值 stdfit=[]#用来存储逐渐增加特征值过程中,不同数目特征值对应的BER标准差 names=sorteigen.index#排序之后的特征值 sortfeatures=dataFrame[names]
import pandas as pd import numpy as np import ann from sklearn.neural_network import MLPClassifier # import the classifier from imblearn.under_sampling import RandomUnderSampler from sklearn.model_selection import StratifiedKFold import matplotlib.pyplot as plt pandas_data = pd.read_csv('sql_eigen.csv') data = pandas_data.fillna(np.mean(pandas_data)) data['age'][data['age'] > 200] = 91 data2 = data.drop(['hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov', 'spo2_cov','height'], axis=1) dataSet=np.array(data2) dataSet[:,0:78]=ann.preprocess(dataSet[:,0:78]) dataSet[:,0:78]=ann.preprocess1(dataSet[:,0:78]) dataMat=dataSet[:,0:78] labelMat=dataSet[:,78] # dataMat=np.array(dataMat) # labelMat=np.array(labelMat) clf = MLPClassifier(hidden_layer_sizes=(78,), activation='tanh', shuffle=True, solver='sgd', alpha=1e-6, batch_size=5, learning_rate='adaptive') clf.fit(dataMat,labelMat) IV=[] for i in range(78): tmpdata=dataMat.copy() tmpdata[:, i]=tmpdata[:,i]*0.8
evaluate_test:各折测试集的各个评价指标,最后两行分别为平均值及标准差 prenum_train:各折训练集混淆矩阵的结果 prenum_test:各折测试集混淆矩阵的结果 ''' import ann import pandas as pd import numpy as np from imblearn.over_sampling import RandomOverSampler from sklearn.model_selection import StratifiedKFold import matplotlib.pyplot as plt data = pd.read_csv('sortedFeature.csv') labelMat = data['classlabel'] # dataMat=data.ix[:,0:80] dataMat = data.ix[:, 0:11] dataMat = ann.preprocess(dataMat) # dataMat = ann.preprocess1(dataMat) evaluate_train = [] evaluate_test = [] prenum_train = [] prenum_test = [] skf = StratifiedKFold(n_splits=10) for train, test in skf.split(dataMat, labelMat): print("%s %s" % (train, test)) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train] test_out = labelMat[test] train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out)
from sklearn.model_selection import StratifiedKFold from imblearn.over_sampling import RandomOverSampler import pandas as pd #python data analysis import matplotlib.pyplot as plt data = pd.read_csv('sortedFeature.csv') labelMat = data['classlabel'] dataMat = data.ix[:, 0:7] evaluate_train = [] evaluate_test = [] prenum_train = [] prenum_test = [] data01 = ann.preprocess(dataMat) dataMat1 = ann.preprocess1(data01) addones = np.ones((1293, 1)) dataMat = np.c_[addones, dataMat1] evaluate_train = [] evaluate_test = [] prenum_train = [] prenum_test = [] skf = StratifiedKFold(n_splits=10) for train, test in skf.split(dataMat, labelMat): #============================================================================== # skf=StratifiedShuffleSplit(n_splits=10) # for train,test in skf.split(dataMat,labelMat):
import pandas as pd import numpy as np import ann global dataMat global labelMat ###read the data### pandas_data = pd.read_csv('eigen62.csv') data = pandas_data.fillna(np.mean(pandas_data)) data['age'][data['age'] > 200] = 91.4 #data2 = data.drop(['hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov', 'spo2_cov','height'], axis=1) data2 = data dataSet = np.array(data2) dataSet[:, 0:62] = ann.preprocess(dataSet[:, 0:62]) dataSet[:, 0:62] = ann.preprocess1(dataSet[:, 0:62]) # dataSet=np.array(dataSet) # print("test")
prenum_test:各折测试集混淆矩阵的结果 """ import ann import pandas as pd import numpy as np from imblearn.over_sampling import RandomOverSampler from sklearn.model_selection import StratifiedKFold import matplotlib.pyplot as plt data = pd.read_csv('sortedFeature.csv') #排序后的特征值 labelMat = data['classlabel'] # dataMat=data.ix[:,0:80]#全部特征值 # dataMat=data.ix[:,0:67]#BER最小对应的特征值子集 dataMat = data.ix[:, 0:8] #特征值个数最少的特征值子集 dataMat = ann.preprocess(dataMat) #归一化到[0,1] dataMat = ann.preprocess1(dataMat) #归一化到[-1,1] neuo = np.shape(dataMat)[1] #隐含层中神经元的数目和特征值个数一致 evaluate_train = [] evaluate_test = [] prenum_train = [] prenum_test = [] skf = StratifiedKFold(n_splits=10) #十折交叉验证 kfold = 1 for train, test in skf.split(dataMat, labelMat): print("第%s 次交叉验证:" % kfold) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train]