Ejemplo n.º 1
0
from sklearn.neural_network import MLPClassifier  # import the classifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

pandas_data = pd.read_csv('sql_eigen.csv')
data = pandas_data.fillna(np.mean(pandas_data))  #缺失数据插补

data['age'][data['age'] > 200] = 91.4  #年龄大于200岁的用中位数91.4代替
data2 = data.drop([
    'hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov',
    'spo2_cov', 'height'
],
                  axis=1)  #去掉没用的列
dataSet = np.array(data2)
dataSet[:, 0:78] = ann.preprocess(dataSet[:, 0:78])  #归一化到[0,1]
dataSet[:, 0:78] = ann.preprocess1(dataSet[:, 0:78])  #归一化到[-1,1]

dataMat = dataSet[:, 0:78]
labelMat = dataSet[:, 78]
# dataMat=np.array(dataMat)
# labelMat=np.array(labelMat)
'''
计算各个特征值的平均影响值
'''
clf = MLPClassifier(hidden_layer_sizes=(78, ),
                    activation='tanh',
                    shuffle=True,
                    solver='sgd',
                    alpha=1e-6,
                    batch_size=5,
Ejemplo n.º 2
0
# @File    : global_new.py
#提取特征值,对哑变量性别进行处理,然后进行归一化
#0——79列是特征值,最后一列是标签

import pandas as pd
import numpy as np
import ann
global dataMat
global labelMat
global colnames
global framefile
#建立全局变量,提取全部特征值

###read the data###
data = pd.read_csv('final_eigen.csv')
dummy_sex = data['sex']  #哑变量
dummies = pd.get_dummies(dummy_sex, prefix='sex')  #哑变量变换
data = data.drop(['sex'], axis=1)  #去掉原来的列
datawithdummy = dummies.join(data)  #矩阵进行拼接
colnames = datawithdummy.keys()
dataSet = np.array(datawithdummy)
dataSet[:, 0:80] = ann.preprocess(dataSet[:, 0:80])
# dataSet[:,0:80]=ann.preprocess1(dataSet[:,0:80])

dataMat = dataSet[:, 0:80]
labelMat = np.array(datawithdummy['Label'])

framefile = pd.DataFrame(dataSet, columns=colnames)
#writefile.to_csv("F:/testformean.csv")#归一化之后的结果
#print("test")
Ejemplo n.º 3
0
"""
#------------calculate the FS score with scikit-feature package--------------#
from skfeature.function.similarity_based import fisher_score
from skfeature.function.information_theoretical_based import MRMR
from skfeature.function.similarity_based import reliefF
from skfeature.function.statistical_based import gini_index

Relief = reliefF.reliefF(datamat, labelmat)
Fisher= fisher_score.fisher_score(datamat, labelmat)
# mRMR,J,M,=MRMR.mrmr(datamat,labelmat,n_selected_features=80)
# mRMR=-mRMR
gini= gini_index.gini_index(datamat,labelmat)
gini=-gini
FSscore=np.column_stack((Relief,Fisher,gini))#合并三个分数

FSscore=ann.preprocess(FSscore)
FinalScore=np.sum(FSscore,axis=1)
FS=np.column_stack((FSscore,FinalScore))
FS_nor=ann.preprocess(FS)#将最后一列联合得分归一化
FS=pd.DataFrame(FS_nor,columns=["Relief", "Fisher","gini","FinalScore"],index=featurenames)
# FS.to_csv("F:\Githubcode\AdaBoost\myown\FSscore.csv")


sorteigen=FS.sort_values(by='FinalScore',ascending=False,axis=0)
sorteigen.to_csv('FSsort.csv')
#------------crossalidation with ann--------------#
meanfit=[]#用来存储逐渐增加特征值过程中,不同数目特征值对应的BER平均值
stdfit=[]#用来存储逐渐增加特征值过程中,不同数目特征值对应的BER标准差

names=sorteigen.index#排序之后的特征值
sortfeatures=dataFrame[names]
Ejemplo n.º 4
0
import pandas as pd
import  numpy as np
import  ann
from sklearn.neural_network import MLPClassifier  # import the classifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

pandas_data = pd.read_csv('sql_eigen.csv')
data = pandas_data.fillna(np.mean(pandas_data))

data['age'][data['age'] > 200] = 91
data2 = data.drop(['hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov', 'spo2_cov','height'], axis=1)
dataSet=np.array(data2)
dataSet[:,0:78]=ann.preprocess(dataSet[:,0:78])
dataSet[:,0:78]=ann.preprocess1(dataSet[:,0:78])

dataMat=dataSet[:,0:78]
labelMat=dataSet[:,78]
# dataMat=np.array(dataMat)
# labelMat=np.array(labelMat)

clf = MLPClassifier(hidden_layer_sizes=(78,), activation='tanh',
                    shuffle=True, solver='sgd', alpha=1e-6, batch_size=5,
                    learning_rate='adaptive')
clf.fit(dataMat,labelMat)
IV=[]
for i in range(78):
    tmpdata=dataMat.copy()
    tmpdata[:, i]=tmpdata[:,i]*0.8
Ejemplo n.º 5
0
    evaluate_test:各折测试集的各个评价指标,最后两行分别为平均值及标准差
    prenum_train:各折训练集混淆矩阵的结果
    prenum_test:各折测试集混淆矩阵的结果
'''
import ann
import pandas as pd
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

data = pd.read_csv('sortedFeature.csv')
labelMat = data['classlabel']
# dataMat=data.ix[:,0:80]
dataMat = data.ix[:, 0:11]
dataMat = ann.preprocess(dataMat)
# dataMat = ann.preprocess1(dataMat)

evaluate_train = []
evaluate_test = []
prenum_train = []
prenum_test = []

skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(dataMat, labelMat):
    print("%s %s" % (train, test))
    train_in = dataMat[train]
    test_in = dataMat[test]
    train_out = labelMat[train]
    test_out = labelMat[test]
    train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out)
Ejemplo n.º 6
0
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import RandomOverSampler

import pandas as pd  #python data analysis
import matplotlib.pyplot as plt

data = pd.read_csv('sortedFeature.csv')
labelMat = data['classlabel']
dataMat = data.ix[:, 0:7]

evaluate_train = []
evaluate_test = []
prenum_train = []
prenum_test = []

data01 = ann.preprocess(dataMat)
dataMat1 = ann.preprocess1(data01)

addones = np.ones((1293, 1))
dataMat = np.c_[addones, dataMat1]

evaluate_train = []
evaluate_test = []
prenum_train = []
prenum_test = []

skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(dataMat, labelMat):
    #==============================================================================
    # skf=StratifiedShuffleSplit(n_splits=10)
    # for train,test in skf.split(dataMat,labelMat):
Ejemplo n.º 7
0
import pandas as pd
import numpy as np
import ann
global dataMat
global labelMat

###read the data###
pandas_data = pd.read_csv('eigen62.csv')
data = pandas_data.fillna(np.mean(pandas_data))

data['age'][data['age'] > 200] = 91.4
#data2 = data.drop(['hr_cov', 'bpsys_cov', 'bpdia_cov', 'bpmean_cov', 'pulse_cov', 'resp_cov', 'spo2_cov','height'], axis=1)
data2 = data
dataSet = np.array(data2)
dataSet[:, 0:62] = ann.preprocess(dataSet[:, 0:62])
dataSet[:, 0:62] = ann.preprocess1(dataSet[:, 0:62])
# dataSet=np.array(dataSet)
# print("test")
Ejemplo n.º 8
0
    prenum_test:各折测试集混淆矩阵的结果
"""

import ann
import pandas as pd
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

data = pd.read_csv('sortedFeature.csv')  #排序后的特征值
labelMat = data['classlabel']
# dataMat=data.ix[:,0:80]#全部特征值
# dataMat=data.ix[:,0:67]#BER最小对应的特征值子集
dataMat = data.ix[:, 0:8]  #特征值个数最少的特征值子集
dataMat = ann.preprocess(dataMat)  #归一化到[0,1]
dataMat = ann.preprocess1(dataMat)  #归一化到[-1,1]
neuo = np.shape(dataMat)[1]  #隐含层中神经元的数目和特征值个数一致

evaluate_train = []
evaluate_test = []
prenum_train = []
prenum_test = []

skf = StratifiedKFold(n_splits=10)  #十折交叉验证
kfold = 1
for train, test in skf.split(dataMat, labelMat):
    print("第%s 次交叉验证:" % kfold)
    train_in = dataMat[train]
    test_in = dataMat[test]
    train_out = labelMat[train]