def preprocess_and_tmp_save_fmri(data_path, task, subj, model, tmp_path, group_mask=None): ''' Generator for preprocessed fMRI runs from one subject of Forrest Gump aligns to group template run-wise linear de-trending and z-scoring IN: data_path - string, path pointing to the Forrest Gump directory task - string, which part of the Forrest Gump dataset to load subj - int, subject to pre-process tmp_path - string, path to save the dataset temporarily to OUT: preprocessed fMRI samples per run''' from nipype.interfaces import fsl dhandle = mvpa.OpenFMRIDataset(data_path) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' if group_mask is None: group_mask = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'brain_mask.nii.gz') mask_fname = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, chunks=run_id - 1, mask=mask_fname, flavor=flavor) filename = 'brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_file = os.path.join(tmp_path, filename) save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_file) warp = fsl.ApplyWarp() warp.inputs.in_file = tmp_file warp.inputs.out_file = os.path.join(tmp_path, 'group_' + filename) warp.inputs.ref_file = os.path.join(data_path, 'templates', 'grpbold7Tp1', 'brain.nii.gz') warp.inputs.field_file = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'subj2tmpl_warp.nii.gz') warp.inputs.interp = 'nn' warp.run() os.remove(tmp_file) run_ds = mvpa.fmri_dataset(os.path.join(tmp_path, 'group_' + filename), mask=group_mask, chunks=run_id - 1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) os.remove(os.path.join(tmp_path, 'group_' + filename)) yield run_ds.samples.astype('float32')
def tmp_save_fmri(datapath, task, subj, model): dhandle = mvpa.OpenFMRIDataset(datapath) #mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') flavor = 'dico_bold7Tp1_to_subjbold7Tp1' group_brain_mask = '/home/mboos/SpeechEncoding/brainmask_group_template.nii.gz' mask_fname = os.path.join(datapath, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') #mask_fname = '/home/mboos/SpeechEncoding/masks/epi_subj_{}.nii.gz'.format(subj) scratch_path = '/home/data/scratch/mboos/prepro/tmp/' for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) filename = 'whole_brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_path = scratch_path + filename save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_path) os.system('applywarp -i {0} -o {1} -r /home/data/psyinf/forrest_gump/anondata/templates/grpbold7Tp1/brain.nii.gz -w /home/data/psyinf/forrest_gump/anondata/sub{2:03}/templates/bold7Tp1/in_grpbold7Tp1/subj2tmpl_warp.nii.gz --interp=nn'.format(tmp_path, scratch_path+'group_'+filename,subj)) os.remove(tmp_path) run_ds = mvpa.fmri_dataset(scratch_path+'group_'+filename, mask=group_brain_mask, chunks=run_id-1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) joblib.dump(run_ds.samples.astype('float32'), '/home/data/scratch/mboos/prepro/tmp/whole_brain_subj_{}_run_{}.pkl'.format(subj, run_id)) os.remove(scratch_path+'group_'+filename) return run_ds.samples.shape[1]
targets=volAttribrutes.targets, # I think this was "labels" in versions 0.4.* chunks=volAttribrutes.chunks, mask=os.path.join(sessionPath,'analyze/structural/lc2ms_deskulled.hdr')) # DATASET ATTRIBUTES (see AttrDataset) print 'functional input has',dataset.a.voxel_dim,'voxels of dimesions',dataset.a.voxel_eldim,'mm' print '... or',N.product(dataset.a.voxel_dim),'voxels per volume' print 'masked data has',dataset.shape[1],'voxels in each of',dataset.shape[0],'volumes' print '... which means that',round(100-100*dataset.shape[1]/N.product(dataset.a.voxel_dim)),'% of the voxels were masked out' print 'of',dataset.shape[1],'remaining features ...' print 'summary of conditions/volumes\n',datetime.datetime.now() print dataset.summary_targets() # DETREND print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done',datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now() M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done',datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5); pickle.dump(dataset, pickleFile); # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL print 'averaging over trials ...',datetime.datetime.now() dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets'])) print '... only',dataset.shape[0],'cases left now'
# DATASET ATTRIBUTES (see AttrDataset) print 'functional input has', dataset.a.voxel_dim, 'voxels of dimesions', dataset.a.voxel_eldim, 'mm' print '... or', N.product(dataset.a.voxel_dim), 'voxels per volume' print 'masked data has', dataset.shape[ 1], 'voxels in each of', dataset.shape[0], 'volumes' print '... which means that', round( 100 - 100 * dataset.shape[1] / N.product(dataset.a.voxel_dim)), '% of the voxels were masked out' print 'of', dataset.shape[1], 'remaining features ...' print 'summary of conditions/volumes\n', datetime.datetime.now() print dataset.summary_targets() # DETREND print 'detrending (remove slow drifts in signal, and jumps between runs) ...', datetime.datetime.now( ) # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done', datetime.datetime.now() # ZSCORE print 'zscore normalising (give all voxels similar variance) ...', datetime.datetime.now( ) M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods print '... done', datetime.datetime.now() #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png')) pickleFile = gzip.open(preprocessedCache, 'wb', 5) pickle.dump(dataset, pickleFile)
K_FEATS = 1000 if MASK == 'vtc' else 500 featsel = SelectKBest(f_classif, k=K_FEATS) clf = LogisticRegression(penalty='l2', multi_class='ovr', solver='liblinear') ################# ## LOAD DATA ## ################# map_ds_dict, mem_ds_dict = load_data(MASK) # preprocess for d in [mem_ds_dict, map_ds_dict]: for ds in d.values(): mvpa2.remove_invariant_features(ds) mvpa2.poly_detrend(ds, polyord=1, chunks_attr='chunks') mvpa2.zscore(ds, chunks_attr='chunks') ############################################################## ## build and convert to common space using hyperalignment ## ############################################################## # select features based on localizer data fsel_masks = [ featsel.fit(ds.samples, ds.targets).get_support() for ds in map_ds_dict.values() ] # apply feature selection to all data (localizer and memory) fs_mapds_list = [ ds[:, mask] for ds, mask in zip(map_ds_dict.values(), fsel_masks) ]
'*_task-avmovie_run-*highpass_tmpl.nii.gz')) mask_fn = base_dir + participant + anat_dir + 'brain_mask_tmpl.nii.gz' assert len(movie_fns) == 8 # Include chunk (i.e., run) labels movie_ds = mv.vstack([ mv.fmri_dataset(movie_fn, mask=mask_fn, chunks=run) for run, movie_fn in enumerate(movie_fns) ]) # Assign participant labels as feature attribute movie_ds.fa['participant'] = [participant] * movie_ds.shape[1] print("Loaded movie data for participant {0}".format(participant)) # Perform linear detrending per chunk mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks') # Perform low-pass filtering per chunk movie_ds.samples = clean(movie_ds.samples, sessions=movie_ds.sa.chunks, low_pass=.1, high_pass=None, t_r=2.0, detrend=False, standardize=False) # Z-score movie time series per chunk mv.zscore(movie_ds, chunks_attr='chunks') print("Finished preprocessing (detrending, z-scoring) for participant {0}". format(participant))
if i==0: ds = ds[:-4] elif i<7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples)*i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_'+param for param in mc], chunks_attr='chunks' ) #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5/TR ws = [(1/lf)/nf, (1/hf)/nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset
#mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 T3 = False #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 datapath = os.path.join('/home','data','psyinf','forrest_gump','anondata') #boldlist = sorted(glob.glob(os.path.join(datapath,'task002*'))) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' for subj in xrange(1,20): mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj%02dbold.nii.gz' % subj) #load and save all datasets run_datasets = [] for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) run_datasets.append(run_ds) s1ds = mvpa.vstack(run_datasets) mvpa.poly_detrend(s1ds,polyord=1,chunks_attr='chunks') mvpa.zscore(s1ds) s1ds.save(os.path.join('/home','mboos','SpeechEncoding','PreProcessed','FG_subj' + str(subj) + 'pp.gzipped.hdf5'),compression=9)
def preprocess_dataset(ds, type_, **kwargs): """ Preprocess the dataset: detrending of single run and for chunks, the zscoring is also done by chunks and by run. Parameters ---------- ds : Dataset The dataset to be preprocessed type : string The experiment to be processed kwargs : dict mean_samples - boolean : if samples should be averaged label_included - list : list of labels to be included in the dataset label_dropped - string : label to be dropped (rest, fixation) Returns ------- Dataset the processed dataset """ mean = False normalization = 'feature' for arg in kwargs: if (arg == 'mean_samples'): mean = kwargs[arg] if (arg == 'label_included'): label_included = kwargs[arg].split(',') if (arg == 'label_dropped'): label_dropped = kwargs[arg] if (arg == 'img_dim'): img_dim = int(kwargs[arg]) if (arg == 'normalization'): normalization = str(kwargs[arg]) logger.info('Dataset preprocessing: Detrending...') if len(np.unique(ds.sa['file'])) != 1: poly_detrend(ds, polyord = 1, chunks_attr = 'file') poly_detrend(ds, polyord = 1, chunks_attr = 'chunks') if label_dropped != 'None': logger.info('Removing labels...') ds = ds[ds.sa.targets != label_dropped] if label_included != ['all']: ds = ds[np.array([l in label_included for l in ds.sa.targets], dtype='bool')] if str(mean) == 'True': logger.info('Dataset preprocessing: Averaging samples...') avg_mapper = mean_group_sample(['event_num']) ds = ds.get_mapped(avg_mapper) if normalization == 'feature' or normalization == 'both': logger.info('Dataset preprocessing: Normalization feature-wise...') if img_dim == 4: zscore(ds, chunks_attr='file') zscore(ds)#, param_est=('targets', ['fixation'])) if normalization == 'sample' or normalization == 'both': #Normalizing image-wise logger.info('Dataset preprocessing: Normalization sample-wise...') ds.samples -= np.mean(ds, axis=1)[:, None] ds.samples /= np.std(ds, axis=1)[:, None] ds.samples[np.isnan(ds.samples)] = 0 ds.a.events = find_events(#event= ds.sa.event_num, chunks = ds.sa.chunks, targets = ds.sa.targets) return ds
if i == 0: ds = ds[:-4] elif i < 7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples) * i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_' + param for param in mc], chunks_attr='chunks') #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5 / TR ws = [(1 / lf) / nf, (1 / hf) / nf] b, a = signal.butter(5, ws, btype='band') S = [signal.filtfilt(b, a, x) for x in ds.samples.T] ds.samples = np.array(S).T ds.samples = ds.samples.astype('float32') #Create Event-related Dataset