/
ds_creation.py
183 lines (163 loc) · 7 KB
/
ds_creation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from os.path import join as _opj
from mvpa2.mappers.detrend import poly_detrend
from mvpa2.mappers.zscore import zscore
from mvpa2.datasets.sources.openfmri import OpenFMRIDataset
from mvpa2.datasets.sources.openfmri import _model2id, _cond2id, _taskrun
from mvpa2.datasets.eventrelated import events2sample_attr
from mvpa2.datasets.eventrelated import fit_event_hrf_model
from mvpa2.base.dataset import vstack
import numpy as np
from collections import Counter
def get_bold_run_model(dhandle, model, subj, run):
"""Return the stimulation design for a particular subject/task/run.
Parameters
----------
model : int
Model identifier.
subj : int
Subject identifier.
run : int
Run ID.
Returns
-------
list
One item per event in the run. All items are dictionaries with the
following keys: 'condition', 'onset', 'duration', 'intensity',
'run', 'task', 'trial_idx', 'ctrial_idx', where the first is a
literal label, the last four are integer IDs, and the rest are
typically floating point values. 'onset_idx' is the index of the
event specification sorted by time across the entire run (typically
corresponding to a trial index), 'conset_idx' is analog but contains
the onset index per condition, i.e. the nth trial of the respective
condition in a run.
"""
conditions = dhandle.get_model_conditions(model)
events = []
ev_fields = ('onset', 'duration', 'intensity','id')
# get onset info for specific subject/task/run combo
for cond in conditions:
task_id = cond['task']
def _load_hlpr(fname):
return np.recfromtxt(fname, names=ev_fields)
evdata = np.atleast_1d(dhandle._load_subj_data(
subj,
['model', _model2id(model), 'onsets',
_taskrun(1, run), '%s.txt' % _cond2id(cond['id'])],
_load_hlpr)
)
for i, ev in enumerate(evdata):
evdict = dict(zip(ev_fields,
[ev[field] for field in ev_fields]))
evdict['task'] = task_id
evdict['condition'] = cond['name']
evdict['run'] = run
evdict['conset_idx'] = i
events.append(evdict)
events = sorted(events, key=lambda x: x['onset'])
for i, ev in enumerate(events):
ev['onset_idx'] = i
return events
def create_betas_per_trial_with_pymvpa(study_path, subj, conf, mask_name, flavor, TR):
dhandle = OpenFMRIDataset(study_path)
model = 1
task = 1
# Do this for other tasks as well. not only the first
mask_fname = _opj(study_path,"sub{:0>3d}".format(subj),"masks",conf.mvpa_tasks[0],"{}.nii.gz".format(mask_name))
print mask_fname
run_datasets = []
for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
if type(run_id) == str:
continue
all_events = dhandle.get_bold_run_model(model, subj, run_id)
run_events = []
i=0
for event in all_events:
if event['task']==task:
event['condition'] = '{}-{}'.format(event['condition'],i)
run_events.append(event)
i+=1
# load BOLD data for this run (with masking); add 0-based chunk ID
run_ds = dhandle.get_bold_run_dataset(subj, task, run_id,
flavor = flavor,
chunks = run_id -1,
mask = mask_fname)
# convert event info into a sample attribute and assign as 'targets'
run_ds.sa.time_coords = run_ds.sa.time_indices*TR
print run_id
run_ds.sa['targets'] = events2sample_attr(
run_events, run_ds.sa.time_coords, noinfolabel='rest')
# additional time series preprocessing can go here
poly_detrend(run_ds, polyord=1, chunks_attr='chunks')
zscore(run_ds, chunks_attr='chunks', param_est=('targets', ['rest']), dtype='float32')
glm_dataset = fit_event_hrf_model(run_ds, run_events,time_attr='time_coords',condition_attr='condition')
glm_dataset.sa['targets'] = [x[:x.find('-')] for x in glm_dataset.sa.condition]
glm_dataset.sa.condition = glm_dataset.sa['targets']
glm_dataset.sa['chunks'] = [run_id -1] * len(glm_dataset.samples)
run_datasets.append(glm_dataset)
return vstack(run_datasets, 0)
def create_betas_per_trial_with_pymvpa_roni(study_path, subj, conf, mask_name, flavor, TR):
dhandle = OpenFMRIDataset(study_path)
model = 1
task = 1
# Do this for other tasks as well. not only the first
mask_fname = _opj(study_path,"sub{:0>3d}".format(subj),"masks",conf.mvpa_tasks[0],"{}.nii.gz".format(mask_name))
print mask_fname
run_datasets = []
for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
if type(run_id) == str:
continue
#all_events = dhandle.get_bold_run_model(model, subj, run_id)
all_events = get_bold_run_model(dhandle,2, subj, run_id)
run_events = []
i=0
for event in all_events:
if event['task']==task:
event['condition'] = '{}-{}'.format(event['condition'],event['id'])
run_events.append(event)
i+=1
# load BOLD data for this run (with masking); add 0-based chunk ID
run_ds = dhandle.get_bold_run_dataset(subj, task, run_id,
flavor = flavor,
chunks = run_id -1,
mask = mask_fname)
# convert event info into a sample attribute and assign as 'targets'
run_ds.sa.time_coords = run_ds.sa.time_indices*TR
run_ds.sa['targets'] = events2sample_attr(
run_events, run_ds.sa.time_coords, noinfolabel='rest')
# additional time series preprocessing can go here
poly_detrend(run_ds, polyord=1, chunks_attr='chunks')
zscore(run_ds, chunks_attr='chunks', param_est=('targets', ['rest']), dtype='float32')
glm_dataset = fit_event_hrf_model(run_ds, run_events,time_attr='time_coords',condition_attr='condition')
glm_dataset.sa['targets'] = [x[:x.find('-')] for x in glm_dataset.sa.condition]
glm_dataset.sa['id'] = [x[x.find('-')+1:] for x in glm_dataset.sa.condition]
glm_dataset.sa.condition = glm_dataset.sa['targets']
glm_dataset.sa['chunks'] = [run_id -1] * len(glm_dataset.samples)
# If a trial was dropped (the subject pressed on a button) than the counter trial from the
# other condition should also be dropped
for pair in conf.conditions_to_compare:
cond_bool = np.array([c in pair for c in glm_dataset.sa['condition']])
sub_dataset = glm_dataset[cond_bool]
c = Counter(sub_dataset.sa.id)
for value in c:
if c[value] < 2:
id_bool = np.array([value in cond_id for cond_id in glm_dataset.sa['id']])
glm_dataset = glm_dataset[np.bitwise_not(np.logical_and(id_bool,cond_bool))]
run_datasets.append(glm_dataset)
return vstack(run_datasets, 0)
def detrend(ds):
poly_detrend(ds, polyord=1, chunks_attr='chunks')
zscore(ds, chunks_attr='chunks', dtype='float32')
return ds
def create_betas_per_run_with_pymvpa(study_path, subj, conf, mask_name, flavor):
of = OpenFMRIDataset(study_path)
mask_fname = _opj(study_path,"sub{:0>3d}".format(subj),"masks",conf.mvpa_tasks[0],"{}.nii.gz".format(mask_name))
ds = of.get_model_bold_dataset(
model_id=1, subj_id=subj,
flavor=flavor,
mask= mask_fname,
# preproc_img=smooth,
preproc_ds=detrend,
modelfx=fit_event_hrf_model,
time_attr='time_coords',
condition_attr='condition')
return ds