forked from alexsavio/aizkolari
/
aizkolari_measure.py
executable file
·433 lines (345 loc) · 16.3 KB
/
aizkolari_measure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
#!/usr/bin/python
#-------------------------------------------------------------------------------
#License GPL v3.0
#Author: Alexandre Manhaes Savio <alexsavio@gmail.com>
#Grupo de Inteligencia Computational <www.ehu.es/ccwintco>
#Universidad del Pais Vasco UPV/EHU
#Use this at your own risk!
#-------------------------------------------------------------------------------
#DEPENDENCIES:
#sudo apt-get install python-argparse python-numpy python-numpy-ext python-matplotlib python-scipy python-nibabel
#For development:
#sudo apt-get install ipython python-nifti python-nitime
#from IPython.core.debugger import Tracer; debug_here = Tracer()
import os
import sys
import argparse
import logging
import numpy as np
import nibabel as nib
import aizkolari_utils as au
import aizkolari_preproc as pre
import aizkolari_pearson as pear
import aizkolari_bhattacharyya as bat
import aizkolari_ttest as ttst
import aizkolari_postproc as post
#-------------------------------------------------------------------------------
def set_parser():
parser = argparse.ArgumentParser(description='Slices and puts together a list of subjects to perform voxe-wise group calculations, e.g., Pearson correlations and bhattacharyya distance. \n The Pearson correlation is calculated between each voxel site for all subjects and the class label vector of the same subjects. \n Bhatthacharyya distance is calculated between each two groups using voxelwise Gaussian univariate distributions of each group. \n Student t-test is calculated as a Welch t-test where the two population variances are assumed to be different.')
parser.add_argument('-c', '--classesf', dest='classes', required=True, help='class label file. one line per class: <class_label>,<class_name>.')
parser.add_argument('-i', '--insubjsf', dest='subjs', required=True, help='file with a list of the volume files and its labels for the analysis. Each line: <class_label>,<subject_file>')
parser.add_argument('-o', '--outdir', dest='outdir', required=True, help='name of the output directory where the results will be put.')
parser.add_argument('-e', '--exclude', dest='exclude', default='', required=False, help='subject list mask, i.e., text file where each line has 0 or 1 indicating with 1 which subject should be excluded in the measure. To help calculating measures for cross-validation folds, for leave-one-out you can use the -l option.')
parser.add_argument('-l', '--leave', dest='leave', default=-1, required=False, type=int, help='index from subject list (counting from 0) indicating one subject to be left out of the measure. For leave-one-out measures.')
parser.add_argument('-d', '--datadir', dest='datadir', required=False, help='folder path where the subjects are, if the absolute path is not included in the subjects list file.', default='')
parser.add_argument('-m', '--mask', dest='mask', required=False, help='Brain mask volume file for all subjects.')
parser.add_argument('-n', '--measure', dest='measure', default='pearson', choices=['pearson','bhatta','bhattacharyya','ttest'], required=False, help='name of the distance/correlation method. Allowed: pearson (Pearson Correlation), bhatta (Bhattacharyya distance), ttest (Student`s t-test). (default: pearson)')
parser.add_argument('-k', '--cleanup', dest='cleanup', action='store_true', help='if you want to clean up all the temp files after processing')
parser.add_argument('-f', '--foldno', dest='foldno', required=False, type=int, default=-1, help='number to identify the fold for this run, in case you will run many different folds.')
parser.add_argument('-x', '--expname', dest='expname', required=False, type=str, default='', help='name to identify this run, in case you will run many different experiments.')
parser.add_argument('-a', '--absolute', dest='absolute', required=False, action='store_true', help='put this if you want absolute values of the measure.')
parser.add_argument('-v', '--verbosity', dest='verbosity', required=False, type=int, default=2, help='Verbosity level: Integer where 0 for Errors, 1 for Progression reports, 2 for Debug reports')
parser.add_argument('--checklist', dest='checklist', required=False, action='store_true', help='If set will use and update a checklist file, which will control the steps already done in case the process is interrupted.')
return parser
#-------------------------------------------------------------------------------
def decide_whether_usemask (maskfname):
usemask = False
if maskfname:
usemask = True
if usemask:
if not os.path.exists(maskfname):
print ('Mask file ' + maskfname + ' not found!')
usemask = False
return usemask
#-------------------------------------------------------------------------------
def get_fold_numberstr (foldno):
if foldno == -1: return ''
else: return zeropad (foldno)
#-------------------------------------------------------------------------------
def get_measure_shortname (measure_name):
if measure_name == 'bhattacharyya' or measure_name == 'bhatta':
measure = 'bat'
elif measure_name == 'pearson':
measure = 'pea'
elif measure_name == 'ttest':
measure = 'ttest'
return measure
#-------------------------------------------------------------------------------
def parse_labels_file (classf):
labels = []
classnames = []
labfile = open(classf, 'r')
for l in labfile:
line = l.strip().split(',')
labels .append (int(line[0]))
classnames.append (line[1])
labfile.close()
return [labels, classnames]
#-------------------------------------------------------------------------------
def parse_subjects_list (subjsfname, datadir=''):
subjlabels = []
subjs = []
if datadir:
datadir += os.path.sep
try:
subjfile = open(subjsfname, 'r')
for s in subjfile:
line = s.strip().split(',')
subjlabels.append(int(line[0]))
subjfname = line[1].strip()
if not os.path.isabs(subjfname):
subjs.append (datadir + subjfname)
else:
subjs.append (subjfname)
subjfile.close()
except:
log.error( "Unexpected error: ", sys.exc_info()[0] )
sys.exit(-1)
return [subjlabels, subjs]
#-------------------------------------------------------------------------------
def parse_exclude_list (excluf, leave=-1):
excluded =[]
if (excluf):
try:
excluded = np.loadtxt(excluf, dtype=int)
#if leave already excluded, dont take it into account
if leave > -1:
if excluded[leave] == 1:
au.log.warn ('Your left out subject (-l) is already being excluded in the exclusion file (-e).')
leave = -1
except:
au.log.error ('Error processing file ' + excluf)
au.log.error ('Unexpected error: ' + str(sys.exc_info()[0]))
sys.exit(-1)
return excluded
#-------------------------------------------------------------------------------
def main(argv=None):
parser = set_parser()
try:
args = parser.parse_args ()
except argparse.ArgumentError, exc:
print (exc.message + '\n' + exc.argument)
parser.error(str(msg))
return -1
datadir = args.datadir.strip()
classf = args.classes.strip()
subjsf = args.subjs.strip()
maskf = args.mask.strip()
outdir = args.outdir.strip()
excluf = args.exclude.strip()
measure = args.measure.strip()
expname = args.expname.strip()
foldno = args.foldno
cleanup = args.cleanup
leave = args.leave
absval = args.absolute
verbose = args.verbosity
chklst = args.checklist
au.setup_logger(verbose)
usemask = decide_whether_usemask(maskf)
foldno = get_fold_numberstr (foldno)
measure = get_measure_shortname (measure)
classnum = au.file_len(classf)
subjsnum = au.file_len(subjsf)
#reading label file
[labels, classnames] = parse_labels_file (classf)
#reading subjects list
[subjlabels, subjs] = parse_subjects_list (subjsf, datadir)
#if output dir does not exist, create
if not(os.path.exists(outdir)):
os.mkdir(outdir)
#checklist_fname
if chklst:
chkf = outdir + os.path.sep + au.checklist_str()
if not(os.path.exists(chkf)):
au.touch(chkf)
else:
chkf = ''
#saving data in files where further processes can find them
outf_subjs = outdir + os.path.sep + au.subjects_str()
outf_labels = outdir + os.path.sep + au.labels_str()
np.savetxt(outf_subjs, subjs, fmt='%s')
np.savetxt(outf_labels, subjlabels, fmt='%i')
#creating folder for slices
slidir = outdir + os.path.sep + au.slices_str()
if not(os.path.exists(slidir)):
os.mkdir(slidir)
#slice the volumes
#creating group and mask slices
pre.slice_and_merge(outf_subjs, outf_labels, chkf, outdir, maskf)
#creating measure output folder
if measure == 'pea':
measure_fname = au.pearson_str()
elif measure == 'bat':
measure_fname = au.bhattacharyya_str()
elif measure == 'ttest':
measure_fname = au.ttest_str()
#checking the leave parameter
if leave > (subjsnum - 1):
au.log.warning('aizkolari_measure: the leave (-l) argument value is ' + str(leave) + ', bigger than the last index of subject: ' + str(subjsnum - 1) + '. Im setting it to -1.')
leave = -1
#reading exclusion list
excluded = parse_exclude_list (excluf, leave)
#setting the output folder mdir extension
mdir = outdir + os.path.sep + measure_fname
if expname:
mdir += '_' + expname
if foldno:
mdir += '_' + foldno
#setting the stats folder
statsdir = outdir + os.path.sep + au.stats_str()
if expname:
statsdir += '_' + expname
if foldno:
statsdir += '_' + foldno
#setting a string with step parameters
step_params = ' ' + measure_fname + ' ' + mdir
absolute_str = ''
if absval:
absolute_str = ' ' + au.abs_str()
step_params += absolute_str
leave_str = ''
if leave > -1:
leave_str = ' excluding subject ' + str(leave)
step_params += leave_str
#checking if this measure has already been done
endstep = au.measure_str() + step_params
stepdone = au.is_done(chkf, endstep)
#add pluses to output dir if it already exists
if stepdone:
while os.path.exists (mdir):
mdir += '+'
else: #work in the last folder used
plus = False
while os.path.exists (mdir):
mdir += '+'
plus = True
if plus:
mdir = mdir[0:-1]
#setting statsdir
pluses = mdir.count('+')
for i in np.arange(pluses): statsdir += '+'
#merging mask slices to mdir
if not stepdone:
#creating output folders
if not os.path.exists (mdir):
os.mkdir(mdir)
#copying files to mdir
au.copy(outf_subjs, mdir)
au.copy(outf_labels, mdir)
#saving exclude files in mdir
outf_exclude = ''
if (excluf):
outf_exclude = au.exclude_str()
if expname:
outf_exclude += '_' + expname
if foldnumber:
outf_exclude += '_' + foldnumber
np.savetxt(outdir + os.path.sep + outf_exclude , excluded, fmt='%i')
np.savetxt(mdir + os.path.sep + au.exclude_str(), excluded, fmt='%i')
excluf = mdir + os.path.sep + au.exclude_str()
step = au.maskmerging_str() + ' ' + measure_fname + ' ' + mdir
if usemask and not au.is_done(chkf, step):
maskregex = au.mask_str() + '_' + au.slice_str() + '*'
post.merge_slices (slidir, maskregex, au.mask_str(), mdir, False)
au.checklist_add(chkf, step)
#CORRELATION
#read the measure argument and start processing
if measure == 'pea':
#measure pearson correlation for each population slice
step = au.measureperslice_str() + step_params
if not au.is_done(chkf, step):
pear.pearson_correlation (outdir, mdir, usemask, excluf, leave)
au.checklist_add(chkf, step)
#merge all correlation slice measures
step = au.postmerging_str() + step_params
if not au.is_done(chkf, step):
pearegex = au.pearson_str() + '_' + au.slice_str() + '*'
peameasf = mdir + os.path.sep + au.pearson_str()
if leave > -1:
pearegex += '_' + au.excluded_str() + str(leave) + '*'
peameasf += '_' + au.excluded_str() + str(leave) + '_' + au.pearson_str()
post.merge_slices (mdir, pearegex, peameasf, mdir)
if absval:
post.change_to_absolute_values(peameasf)
au.checklist_add(chkf, step)
#BHATTACHARYYA AND T-TEST
elif measure == 'bat' or measure == 'ttest':
if not os.path.exists (statsdir):
os.mkdir(statsdir)
gsize = np.zeros([len(classnames),2], dtype=int)
for c in range(len(classnames)):
gname = classnames[c]
glabel = labels [c]
godir = mdir + os.path.sep + gname
au.log.debug ('Processing group ' + gname)
gselect = np.zeros(len(subjs))
gsubjs = list()
glabels = list()
for s in range(len(subjs)):
slabel = subjlabels[s]
if slabel == glabel:
gsubjs .append (subjs[s])
glabels.append (slabel)
gselect[s] = 1
if outf_exclude:
if excluded[s]:
gselect[s] = 0
gsize[c,0] = glabel
gsize[c,1] = np.sum(gselect)
outf_subjs = mdir + os.path.sep + gname + '_' + au.subjects_str()
outf_labels = mdir + os.path.sep + gname + '_' + au.labels_str()
outf_group = mdir + os.path.sep + gname + '_' + au.members_str()
np.savetxt(outf_subjs , gsubjs, fmt='%s')
np.savetxt(outf_labels, glabels, fmt='%i')
np.savetxt(outf_group , gselect, fmt='%i')
step = au.groupfilter_str() + ' ' + gname + ' ' + statsdir
if not au.is_done(chkf, step):
au.group_filter (outdir, statsdir, gname, outf_group, usemask)
au.checklist_add(chkf, step)
grp_step_params = ' ' + au.stats_str() + ' ' + gname + ' ' + statsdir
step = au.measureperslice_str() + grp_step_params
if not au.is_done(chkf, step):
post.group_stats (statsdir, gname, gsize[c,1], statsdir)
au.checklist_add(chkf, step)
statfnames = {}
step = au.postmerging_str() + grp_step_params
if not au.is_done(chkf, step):
statfnames[gname] = post.merge_stats_slices (statsdir, gname)
au.checklist_add(chkf, step)
sampsizef = mdir + os.path.sep + au.groupsizes_str()
np.savetxt(sampsizef, gsize, fmt='%i,%i')
#decide which group distance function to use
if measure == 'bat':
distance_func = bat.measure_bhattacharyya_distance
elif measure == 'ttest':
distance_func = ttst.measure_ttest
#now we deal with the indexed excluded subject
step = au.postmerging_str() + ' ' + str(classnames) + step_params
exsubf = ''
exclas = ''
if leave > -1:
exsubf = subjs[leave]
exclas = classnames[subjlabels[leave]]
if not au.is_done(chkf, step):
#group distance called here, in charge of removing the 'leave' subject from stats as well
measfname = post.group_distance (distance_func, statsdir, classnames, gsize, chkf, absval, mdir, foldno, expname, leave, exsubf, exclas)
if usemask:
au.apply_mask (measfname, mdir + os.path.sep + au.mask_str())
au.checklist_add(chkf, step)
#adding step end indication
au.checklist_add(chkf, endstep)
#CLEAN SPACE SUGGESTION
rmcomm = 'rm -rf ' + outdir + os.path.sep + au.slices_str() + ';'
if cleanup:
au.log.debug ('Cleaning folders:')
au.log.info (rmcomm)
os.system (rmcomm)
else:
au.log.info ('If you need disk space, remove the temporary folders executing:')
au.log.info (rmcomm.replace(';','\n'))
if leave > -1:
au.log.info ('You should not remove these files if you are doing further leave-one-out measures.')
#-------------------------------------------------------------------------------
if __name__ == "__main__":
sys.exit(main())