-
Notifications
You must be signed in to change notification settings - Fork 0
/
GNB_Language_Switch_MVPA.py
152 lines (123 loc) · 6.43 KB
/
GNB_Language_Switch_MVPA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
global accuracy
You might want to save the log on the shell to a text file.
Lines to be modified if necessary.
sessionPath = '/home/brain/host/20120808jys-lang/'#Sesson path to be changed
chunksTargets_boldDelay="chunksTargets_boldDelay4-4.txt" #This log file is for boldDelay=4 and stimulusWidth=4
#Also check
boldDelay=4 #added
stimulusWidth=4 #added
#Change if you need the targets of classification:this script is for Japanese(j) vs English(e).
dataset = dataset[N.array([l in ['m', 't'] for l in dataset.sa.targets], dtype='bool')]
"""
import mvpa2.suite as M
import numpy as N
import pylab as P # plotting (matlab-like)
import os # file path processing, directory contents
import datetime # time stamps
import pickle # cPickle
import gzip
numFolds=6
# sessionPath = '/home/brain/host/20130908ln'
sessionPath = os.getcwd()
preprocessedCache = os.path.join(sessionPath, 'detrendedZscoredMaskedPyMVPAdataset.pkl')
trimmedCache = os.path.join(sessionPath, 'averagedDetrendedZscoredMaskedPyMVPAdataset.pkl')
# LOAD DATASET (Following the instruction of Brian, "and False" is added at lines 29 and 33.)
if os.path.isfile(trimmedCache) and False:
print 'loading cached averaged, trimmed, preprocessed dataset',trimmedCache,datetime.datetime.now()
dataset = pickle.load(gzip.open(trimmedCache, 'rb'))
else:
if os.path.isfile(preprocessedCache) and False:
print 'loading cached preprocessed dataset',preprocessedCache,datetime.datetime.now()
dataset = pickle.load(gzip.open(preprocessedCache, 'rb', 5))
else:
# if not, generate directly, and then cache
print 'loading and creating dataset',datetime.datetime.now()
# chunksTargets_boldDelay="chunksTargets_boldDelay4-4.txt" #Modified
chunksTargets_boldDelay="chunksTargets_boldDelay3-4-LanguageSwitch-Japanese_English.txt"
boldDelay=6 #added
stimulusWidth=4 #added
volAttribrutes = M.SampleAttributes(os.path.join(sessionPath,'behavioural',chunksTargets_boldDelay)) # default is 3.txt.
# print volAttribrutes.targets
# print len(volAttribrutes.targets)
# print volAttribrutes.chunks
# print len(volAttribrutes.chunks)
dataset = M.fmri_dataset(samples=os.path.join(sessionPath,'analyze/functional/functional4D.nii'),
targets=volAttribrutes.targets, # I think this was "labels" in versions 0.4.*
chunks=volAttribrutes.chunks,
mask=os.path.join(sessionPath,'analyze/structural/lc2ms_deskulled.hdr'))
# DATASET ATTRIBUTES (see AttrDataset)
print 'functional input has',dataset.a.voxel_dim,'voxels of dimesions',dataset.a.voxel_eldim,'mm'
print '... or',N.product(dataset.a.voxel_dim),'voxels per volume'
print 'masked data has',dataset.shape[1],'voxels in each of',dataset.shape[0],'volumes'
print '... which means that',round(100-100*dataset.shape[1]/N.product(dataset.a.voxel_dim)),'% of the voxels were masked out'
print 'of',dataset.shape[1],'remaining features ...'
print 'summary of conditions/volumes\n',datetime.datetime.now()
print dataset.summary_targets()
# DETREND
print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive!
M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend
print '... done',datetime.datetime.now()
# ZSCORE
print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now()
M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods
print '... done',datetime.datetime.now()
#P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))
pickleFile = gzip.open(preprocessedCache, 'wb', 5);
pickle.dump(dataset, pickleFile);
# AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
print 'averaging over trials ...',datetime.datetime.now()
dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets']))
print '... only',dataset.shape[0],'cases left now'
dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5)
# print '\n\n\n'
# print dataset.targets
# print len(dataset.targets)
# print dataset.chunks
# print len(dataset.chunks)
# REDUCE TO CLASS LABELS, AND ONLY KEEP CONDITIONS OF INTEREST (JAPANESE VS ENGLISH)
dataset.targets = [t[0:2] for t in dataset.targets]
dataset = dataset[N.array([l in ['jj', 'je', 'ej', 'ee'] for l in dataset.sa.targets], dtype='bool')]
print '... and only',dataset.shape[0],'cases of interest (Language Switch between Japanese vs English)'
dataset=M.datasets.miscfx.remove_invariant_features(dataset)
print 'saving as compressed file',trimmedCache
pickleFile = gzip.open(trimmedCache, 'wb', 5);
pickle.dump(dataset, pickleFile);
anovaSelectedSMLR = M.FeatureSelectionClassifier(
M.GNB(common_variance=True),
M.SensitivityBasedFeatureSelection(
M.OneWayAnova(),
M.FixedNElementTailSelector(500, mode='select', tail='upper')
),
)
foldwiseCvedAnovaSelectedSMLR = M.CrossValidation(
anovaSelectedSMLR,
M.NFoldPartitioner(),
enable_ca=['samples_error','stats', 'calling_time','confusion']
)
# run classifier
print 'learning on detrended, normalised, averaged, Language Switch ...',datetime.datetime.now()
results = foldwiseCvedAnovaSelectedSMLR(dataset)
print '... done',datetime.datetime.now()
print 'accuracy',N.round(100-N.mean(results)*100,1),'%',datetime.datetime.now()
#New lines for out putting the result into a csv file.
precision=N.round(100-N.mean(results)*100,1)
st=str(boldDelay) + ',' + str(stimulusWidth) + ',' + str(precision) +'\n'
f = open( "withinPredictionResult.csv", "a" )
f.write(st)
f.close
# display results
P.figure()
P.title(str(N.round(foldwiseCvedAnovaSelectedSMLR.ca.stats.stats['ACC%'], 1))+'%, n-fold SMLR with anova FS x 500')
foldwiseCvedAnovaSelectedSMLR.ca.stats.plot()
P.savefig(os.path.join(sessionPath,'confMatrixAvTrial-LanguageSwitch-Japanese_English-GNB.png'))
print foldwiseCvedAnovaSelectedSMLR.ca.stats.matrix
print 'accuracy',N.round(foldwiseCvedAnovaSelectedSMLR.ca.stats.stats['ACC%'], 1),'%',datetime.datetime.now()
# this should give average anova measure over the folds - but in fact would be much the same as taking over single fold
sensana = anovaSelectedSMLR.get_sensitivity_analyzer(postproc=M.maxofabs_sample())
cv_sensana = M.RepeatedMeasure(sensana, M.NFoldPartitioner())
sens = cv_sensana(dataset)
print sens.shape
M.map2nifti(dataset, N.mean(sens,0)).to_filename("anovaSensitivity_"+sessionID+'.nii')
# this looks good, but don't know way to get back from this feature selected space (of 500) to the whole space of 28k or so, for output
weights = anovaSelectedSMLR.clf.weights