/
midbrain-singlepca_shiyan1.py
440 lines (366 loc) · 16 KB
/
midbrain-singlepca_shiyan1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
# -*- encoding:utf-8 -*-
"""
============================
Faces dataset decompositions
============================
This example applies to :ref:`olivetti_faces` different unsupervised
matrix decomposition (dimension reduction) methods from the module
:py:mod:`sklearn.decomposition` (see the documentation chapter
:ref:`decompositions`) .
"""
print(__doc__)
# Authors: Vlad Niculae, Alexandre Gramfort
# License: BSD 3 clause
import logging
from time import time
from numpy.random import RandomState
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import numpy as np
import nibabel as nib
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cluster import MiniBatchKMeans
from sklearn import decomposition
from sklearn import feature_selection
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.preprocessing import StandardScaler
from sklearn.learning_curve import learning_curve
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesRegressor
import os
import pandas as pd
from sklearn.externals import joblib
from os.path import join as pjoin
from nilearn import plotting
n_row, n_col = 2, 2
def plot_gallery(title, images, n_col=n_col, n_row=n_row,cmap=plt.cm.gray):
plt.figure(figsize=(2. * n_col, 2.26 * n_row))
plt.suptitle(title, size=16)
for i, comp in enumerate(images):
plt.subplot(n_row, n_col, i + 1)
vmax = max(comp.max(), -comp.min())
plt.imshow(comp.reshape(image_shape), cmap=cmap,
interpolation='nearest',
vmin=-vmax, vmax=vmax)
plt.xticks(())
plt.yticks(())
plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)
def plot_haxby(activation, title):
z = 25
fig = plt.figure(figsize=(4, 5.4))
fig.subplots_adjust(bottom=0., top=1., left=0., right=1.)
plt.axis('off')
# pl.title('SVM vectors')
plt.imshow(mean_img[:, 4:58, z].T, cmap=pl.cm.gray,
interpolation='nearest', origin='lower')
plt.imshow(activation[:, 4:58, z].T, cmap=pl.cm.hot,
interpolation='nearest', origin='lower')
mask_house = nib.load(h.mask_house[0]).get_data()
mask_face = nib.load(h.mask_face[0]).get_data()
plt.contour(mask_house[:, 4:58, z].astype(np.bool).T, contours=1,
antialiased=False, linewidths=4., levels=[0],
interpolation='nearest', colors=['blue'], origin='lower')
plt.contour(mask_face[:, 4:58, z].astype(np.bool).T, contours=1,
antialiased=False, linewidths=4., levels=[0],
interpolation='nearest', colors=['limegreen'], origin='lower')
p_h = Rectangle((0, 0), 1, 1, fc="blue")
p_f = Rectangle((0, 0), 1, 1, fc="limegreen")
plt.legend([p_h, p_f], ["house", "face"])
plt.title(title, x=.05, ha='left', y=.90, color='w', size=28)
def plot_learning_curve(estimator, title, X, y, ylim=[0.2,1.1], cv=None,
n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
"""
Generate a simple plot of the test and traning learning curve.
Parameters
----------
estimator : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
title : string
Title for the chart.
X : array-like, shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape (n_samples) or (n_samples, n_features), optional
Target relative to X for classification or regression;
None for unsupervised learning.
ylim : tuple, shape (ymin, ymax), optional
Defines minimum and maximum yvalues plotted.
cv : integer, cross-validation generator, optional
If an integer is passed, it is the number of folds (defaults to 3).
Specific cross-validation objects can be passed, see
sklearn.cross_validation module for the list of possible objects
n_jobs : integer, optional
Number of jobs to run in parallel (default 1).
"""
fig = plt.figure()
ax = plt.subplot(111)
plt.title(title)
if ylim is not None:
plt.ylim(*ylim)
plt.xlabel("Training examples")
plt.ylabel("Score")
#plt.xticks(color='w')
#plt.yticks(color='w')
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid(color = 'k')
print test_scores.shape
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.3,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.3, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
#ax.patch.set_color('k')
ax.patch.set_linewidth=2.5
axis = plt.gca().xaxis
for line in axis.get_ticklines():
#line.set_color('w')
line.set_markersize(10)
line.set_markeredgewidth(1.5)
#ax.set_edgecolor='w'
#ax.patch.set_edgecolor='w'
#fig.show()
plt.legend(loc="best")
#plt.savefig(title,facecolor='black')
plt.savefig(title)
data_con_path = os.getcwd()+'//data//ana9-14//control//'
data_pat_path = os.getcwd()+'//data//ana9-14//patient//'
data_path = os.getcwd()+'/zhibiao/'
label_path = os.getcwd()+'/data/'
label_con_path = os.getcwd()+'//data//midbrain_block//control//'
label_pat_path = os.getcwd()+'//data//midbrain_block//patient//'
#去掉一些size不是19*20的人,这边需要预处理
controllist = ['HUANGYAXIN','LIUZHIBING','SUNAIQUAN','Tanjinxin',
'WANGCHUNXIANG','WANGKUNYING','WANGWEI','xiaowen',
'XIAQIAORONG','XIEGUOLIANG','YAOYOUYUAN','YUANGUISHENG',
'zhangruiwei','zhaoaiju','ZHOUGUIQUAN','zhouyan']
patientlist = ['CHENLIANXIANG','duxiaojing','jiangchangguo','jiangguozhi',
'liangxiangxiu','liangxiangxiu2','limingcai','liuwuchou',
'liuyingxiao','renqixia','wangdongfang','Wangfusheng',
'wangjinguo','WangPingAn','wangsongyuan','wangwuji',
'Wushaoxian','XIONGYIN','yangjianjun','zhouyongzhong','ZHUCHUANGUI']
filename = 'HUANGYAXIN'
##############################
# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
#pca的主成分个数设置:前6个特征分数为0.28424068, 0.11663118, 0.09604093, 0.08072725, 0.07188693,
# 0.06497383, 0.05966482
n_components = n_row*n_col
###数据大小设置
data_row = 50
data_col = 100
data_shape_pca = 5000
data_shape = 128*128*44
data_shape = 1000
data_num = 16
pat_num = 21
all_num = data_num+pat_num
#图片大小设置
image_shape = (data_row, data_col)
rng = RandomState(0)
#指标设置
classname = ['1_FA','1_MD','1_csa_gfa','1_MSD','1_den']
classname = ['_FA','_MD','_GFA','_MSD','_rtop_signal','_rtop_pdf']
classnum = 6
X = np.zeros([classnum,all_num,data_shape])
X_pca = np.zeros([classnum,all_num,data_shape_pca])
#得到控制组的X
for c,cla in enumerate(classname):
print cla
for idx,name in enumerate(controllist):
print idx,name
f_nii = nib.load(data_path+name+cla+'.nii.gz')
f_all = f_nii.get_data()
label_nii = nib.load(label_con_path+name+'.nii-label.nii.gz')
label_img = label_nii.get_data()
f_pca = nib.load(label_path+'/big_gong_block/'+'/control/'+name+'.nii-label.nii.gz')
f_pca = f_pca.get_data()
f_pca = f_all[f_pca==1]
f_pca = f_pca.reshape(-1)
f_part = f_all[label_img==1]
print f_part.shape
#f = np.array(f)
#d_std = StandardScaler().fit_transform(d)
X[c,idx,:f_part.shape[0]] = (f_part-f_part.min())/(f_part.max()-f_part.min())
X_pca[c,idx,:f_pca.shape[0]] = (f_pca-f_pca.min())/(f_pca.max()-f_pca.min())
for name in patientlist:
idx+=1
print idx,name
#f = np.load(data_pat_path+name+cla+'.npy')
f_nii = nib.load(data_path+name+cla+'.nii.gz')
f_all = f_nii.get_data()
label_nii = nib.load(label_pat_path+name+'.nii-label.nii.gz')
label_img = label_nii.get_data()
f_pca = nib.load(label_path+'/big_gong_block/'+'/patient/'+name+'.nii-label.nii.gz')
f_pca = f_pca.get_data()
f_pca = f_all[f_pca==1]
f_pca = f_pca.reshape(-1)
f_part = f_all[label_img==1]
#f = np.array(f)
#d_std = StandardScaler().fit_transform(d)
print f_part.shape
X[c,idx,:f_part.shape[0]] = (f_part-f_part.min())/(f_part.max()-f_part.min())
#f_pca_std = StandardScaler().fit_transform(f_pca)
X_pca[c,idx,:f_pca.shape[0]] = (f_pca-f_pca.min())/(f_pca.max()-f_pca.min())
### fit patient data ####
#指标pca
print 'pca starting'
#存储pca模型
zhibiao_pca = decomposition.IncrementalPCA(n_components=3)
zhibiao_pca.fit(X_pca.reshape(classnum,-1).T)
#joblib.dump(zhibiao, os.getcwd()+'/modelsave/zhibiao_pca_5000.pkl')
#zhibiao_pca = joblib.load(os.getcwd()+'/modelsave/zhibiao_pca_5000.pkl')
X_outpca = zhibiao_pca.transform(X.reshape(classnum,-1).T) #中脑或者全脑
###############################################################################
#得出X,Y的数据
X_learning = np.zeros([all_num,data_shape*2]) #(n_samples, n_features)
for i in range(all_num):
X_learning[i,:data_shape]=X_outpca[i*data_shape:(i+1)*data_shape,0]
X_learning[i,data_shape:]=X_outpca[i*data_shape:(i+1)*data_shape,1]
Y_learning = np.zeros(all_num,int)
Y_learning[data_num:] = np.ones(pat_num,int)
Y_learning[Y_learning==1].shape
X_learning = np.zeros([all_num,data_shape*3]) #(n_samples, n_features)
for i in range(all_num):
X_learning[i,:data_shape]=X_outpca[i*data_shape:(i+1)*data_shape,0]
X_learning[i,data_shape:data_shape*2]=X_outpca[i*data_shape:(i+1)*data_shape,1]
X_learning[i,data_shape*2:]=X_outpca[i*data_shape:(i+1)*data_shape,2]
"""
plt.figure()
plt.plot(X_learning[:16,:].mean(axis=0),'b-',X_learning[16:,:].mean(axis=0),'r:')
plt.xlim(xmax=9000)
plt.text(x=0,y=0.7,s = "pca1",fontsize=17,color='y')
plt.text(x=3000,y=0.7,s = "pca2",fontsize=17,color='y')
plt.text(x=6000,y=0.7,s = "pca3",fontsize=17,color='y')
plt.grid()
plt.legend({'control','patient'},'best')
plt.title('mean signal of three components after PCA of con and pat in gong_block')
"""
##############################################################################
#feature selection
print 'feature selection'
feature_selec = feature_selection.SelectKBest(feature_selection.f_classif,k=1000) #中闹500
X_reduced = feature_selec.fit_transform(X_learning,Y_learning)
where = feature_selec.get_support()
#awhere = where.reshape(2,20,50,5)
#中脑
#beijing = X_learning.reshape(37,2,20,50,5)
#beijing_zhong = beijing[:,0,:,:,3].mean(axis=0)
#bei = plt.matshow(beijing_zhong+awhere[0,:,:,3]) #图片叠加,是要看她的值区间的,根据值区间分配颜色!
#bei = plt.matshow(awhere[0,:,:,3])
#bayes_estimator.fit(X_reduced,Y_learning)
# 难道中闹也是取特征比较好?
#存储数据
#np.save('/home/gongyilong/brain-pca/mat_save/mid_brain_Xlearning',X_learning)
#np.save('/home/gongyilong/brain-pca/mat_save/mid_brain_Xreduced',X_reduced)
print 'random_tree'
random_forest = RandomForestClassifier(n_estimators=1000)
random_forest = random_forest.fit(X_learning,Y_learning)
ranfortitle = 'Random Forest(1000) model CV(10) in fitting midbrain_block_brain PCA feature vector feature selection'
plot_learning_curve(random_forest,ranfortitle,X_learning,Y_learning, cv=10)
print 'bayes_estimator'
bayes_estimator = GaussianNB()
bayes_estimator.fit(X_learning,Y_learning)
bayestitle = 'Naive Bayes model CV(10) in fitting midbrain_block_brain PCA feature vector no feature selection'
plot_learning_curve(bayes_estimator, bayestitle,X_learning,Y_learning, cv=10)
"""
bayes_estimator = GaussianNB()
bayes_estimator.fit(X_learning,Y_learning)
scores1=cross_validation.cross_val_score(bayes_estimator,X_learning,Y_learning, cv=10)
#scores = cross_validation.cross_val_score(clf, X_scaled_norm, y_le, cv=10)
print("Bayes: Accuracy: %0.2f (+/- %0.2f)" % (scores1.mean(), scores1.std() * 2))
plot_learning_curve(bayes_estimator, 'Learning Curves (Naive Bayes)', X_learning,Y_learning, cv=cv)
"""
"""预测最后准确率"""
pred = bayes_estimator.predict(X_learning)
print ('Bayes Training Accuracy: %f\n', np.mean((pred == Y_learning)) * 100);
pred = random_forest.predict(X_learning)
print ('random_forset Training Accuracy: %f\n', np.mean((pred == Y_learning)) * 100);
# Load faces data
#dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
"""
faces = X
n_samples, n_features = faces.shape
# global centering
faces_centered = faces - faces.mean(axis=0)
# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
print("Dataset consists of %d faces" % n_samples)
"""
###############################################################################
#plot_gallery("original control data", X[:data_num],n_col=4, n_row=4)
###############################################################################
# List of the different estimators, whether to center and transpose the
# problem, and whether the transformer uses the clustering API.
estimatorname = 'Eigenfaces - RandomizedPCA'
estimator = decomposition.RandomizedPCA(n_components=n_components, whiten=True)
center = True
###############################################################################
# Plot a sample of the input data
#plot_gallery("First centered Olivetti faces", faces_centered[:n_components])
###############################################################################
# Do the estimation and plot it
print("Extracting the top %d %s..." % (n_components, estimatorname))
t0 = time()
data = faces
if center:
data = faces_centered
estimator.fit(data)
train_time = (time() - t0)
print("done in %0.3fs" % train_time)
if hasattr(estimator, 'cluster_centers_'):
components_ = estimator.cluster_centers_
else:
components_ = estimator.components_
if hasattr(estimator, 'noise_variance_'):
plot_gallery("Pixelwise variance",
estimator.noise_variance_.reshape(1, -1), n_col=1,
n_row=1)
plot_gallery('%s - Train time %.1fs' % (estimatorname, train_time),
components_[:n_components])
con_num = 16
#plot_gallery("origial con_data", X[:con_num],n_col=4, n_row=6)
plot_gallery("original pat_data", X_pat[:pat_num],n_col=4, n_row=6)
out = np.zeros([pat_num, n_components])
for i in range(pat_num):
out[i] = estimator.transform(X_pat[i])
cc = np.zeros([pat_num,data_row*data_col])
for i in range(pat_num):
for j in range(n_components):
cc[i] += (out[i][j]) * components_[j]
plot_gallery("out pat_data", cc[:pat_num],n_col=4, n_row=6,cmap=plt.cm.Reds)
### 这里是加上均值 !!! ###
dd = np.zeros([pat_num,data_row*data_col])
for i in range(pat_num):
dd[i] = cc[i] + faces.mean(axis=0)
plot_gallery("out differ pat_data", dd[:pat_num],n_col=4, n_row=6,cmap=plt.cm.Reds)
wher = np.zeros([n_components,data_row*data_col])
for i in range(n_components):
for j in range(data_row*data_col):
if components_[i][j]>0.05:
wher[i][j]=components_[i][j]
plot_gallery('%s - Train time %.1fs' % (estimatorname, train_time),
wher[:n_components])
plt.show()
from sklearn.externals import joblib
joblib.dump(estimator, 'brain-pca_'+classname+'.pkl')
clf = joblib.load('brain-pca_'+classname+'.pkl')
plt.close()
### classification ###
conpca = np.zeros([data_num, n_components])
for i in range(data_num):
conpca[i] = estimator.transform(X[i])
out
y_con = np.zeros(16,bool)
y_out = np.ones(pat_num,bool)