def var_corrs(x, y, msk_list): for msk_i in msk_list: ds1 = mvpa2.fmri_dataset(x, mask=msk_i) ds2 = mvpa2.fmri_dataset(y, mask=msk_i) print(msk_i) print(stats.spearmanr(ds1.samples.T, ds2.samples.T, nan_policy='omit')) print(stats.pearsonr(ds1.samples.T, ds2.samples.T))
def mk_movie_dataset(gd, subj, mask, task=1, flavor='', filter=None, writeto=None, add_fa=None): cur_max_time = 0 segments = [] for seg in range(1,9): print 'Seg', seg ds = fmri_dataset( gd.get_run_fmri(subj, task, seg, flavor=flavor), mask=mask, add_fa=add_fa) if task == 1: # sanitize TR ds.sa.time_coords = np.arange(len(ds)) * 2.0 mc = gd.get_run_motion_estimates(subj, task, seg) for i, par in enumerate(('mc_xtrans', 'mc_ytrans', 'mc_ztrans', 'mc_xrot', 'mc_yrot', 'mc_zrot')): ds.sa[par] = mc.T[i] ds.sa['movie_segment'] = [seg] * len(ds) TR = np.diff(ds.sa.time_coords).mean() if not filter is None: print 'filter' ds = filter(ds) # truncate segment time series to remove overlap if seg > 1: ds = ds[4:] if seg < 8: ds = ds[:-4] ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time cur_max_time = ds.sa.movie_time[-1] + TR if writeto is None: segments.append(ds) else: ds.samples = ds.samples.astype('float32') h5save(writeto % (subj, task, seg), ds, compression=9) return segments
def preprocess_and_tmp_save_fmri(data_path, task, subj, model, tmp_path, group_mask=None): ''' Generator for preprocessed fMRI runs from one subject of Forrest Gump aligns to group template run-wise linear de-trending and z-scoring IN: data_path - string, path pointing to the Forrest Gump directory task - string, which part of the Forrest Gump dataset to load subj - int, subject to pre-process tmp_path - string, path to save the dataset temporarily to OUT: preprocessed fMRI samples per run''' from nipype.interfaces import fsl dhandle = mvpa.OpenFMRIDataset(data_path) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' if group_mask is None: group_mask = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'brain_mask.nii.gz') mask_fname = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj, task, run_id, chunks=run_id - 1, mask=mask_fname, flavor=flavor) filename = 'brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_file = os.path.join(tmp_path, filename) save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_file) warp = fsl.ApplyWarp() warp.inputs.in_file = tmp_file warp.inputs.out_file = os.path.join(tmp_path, 'group_' + filename) warp.inputs.ref_file = os.path.join(data_path, 'templates', 'grpbold7Tp1', 'brain.nii.gz') warp.inputs.field_file = os.path.join(data_path, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'in_grpbold7Tp1', 'subj2tmpl_warp.nii.gz') warp.inputs.interp = 'nn' warp.run() os.remove(tmp_file) run_ds = mvpa.fmri_dataset(os.path.join(tmp_path, 'group_' + filename), mask=group_mask, chunks=run_id - 1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) os.remove(os.path.join(tmp_path, 'group_' + filename)) yield run_ds.samples.astype('float32')
def get_nav_sac_data(nav_attr, sac_attr, subj, subj_mask): # nav data nav_ds = fmri_dataset(samples=TSTATS_DIR + 'nav_bin_tstats/%s_nav_bin.nii.gz' % subj, mask=subj_mask) nav_ds.sa['chunks'] = nav_attr.chunks # chunks: run #s nav_ds.sa['targets'] = nav_attr.targets # targets: run types (up/down) # saccades data sac_ds = fmri_dataset(samples=TSTATS_DIR + 'sac_tstats/%s_sac.nii.gz' % subj, mask=subj_mask) sac_ds.sa['chunks'] = sac_attr.chunks sac_ds.sa['targets'] = sac_attr.targets sac_ds = sac_ds[np.isin(sac_ds.sa.targets, [POWERFUL_DIRCT, POWERLESS_DIRCT])] # combine datasets dataset = nav_ds dataset.append(sac_ds) return dataset
def preprocessed_fmri_dataset(bold_fname, preproc_img=None, preproc_ds=None, add_sa=None, **kwargs): """ Parameters ---------- bold_fname : str File name of BOLD scan preproc_img : callable or None See get_bold_run_dataset() documentation preproc_ds : callable or None If not None, this callable will be called with each run bold dataset as an argument before ``modelfx`` is executed. The callable must return a dataset. add_sa : dict or None Returns ------- Dataset """ # open the BOLD image bold_img = nb.load(bold_fname) if not preproc_img is None: bold_img = preproc_img(bold_img) # load (and mask) data ds = fmri_dataset(bold_img, **kwargs) if not add_sa is None: for sa in add_sa: ds.sa[sa] = add_sa[sa] if not preproc_ds is None: ds = preproc_ds(ds) return ds
def main(): subject_list = sys.argv[1:] if len(sys.argv) > 1 else EVERYONE print(subject_list) attr = SampleAttributes(TSTATS_DIR + TSTATS_NAME + '_attr.txt') for subj in subject_list: tstats_file = TSTATS_DIR + TSTATS_NAME + '_tstats/%s_%s.nii.gz' % ( subj, TSTATS_NAME) dataset = fmri_dataset(samples=tstats_file, mask=MASK_DIR + '%s_ribbon_rsmp0_dil3mm.nii.gz' % subj) dataset.sa['chunks'] = attr.chunks dataset.sa['targets'] = attr.targets dataset = remove_invariant_features(dataset) similarity = CustomDist(squareform(LABELS_NAV)) searchlight = sphere_searchlight(similarity, SEARCHLIGHT_RADIUS) searchlight_map = searchlight(dataset) # save files nifti = map2nifti(data=searchlight_map, dataset=dataset) nifti.to_filename(OUTDIR + OUTFILE % (subj, SEARCHLIGHT_RADIUS))
def tmp_save_fmri(datapath, task, subj, model): dhandle = mvpa.OpenFMRIDataset(datapath) #mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') flavor = 'dico_bold7Tp1_to_subjbold7Tp1' group_brain_mask = '/home/mboos/SpeechEncoding/brainmask_group_template.nii.gz' mask_fname = os.path.join(datapath, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz') #mask_fname = '/home/mboos/SpeechEncoding/masks/epi_subj_{}.nii.gz'.format(subj) scratch_path = '/home/data/scratch/mboos/prepro/tmp/' for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) filename = 'whole_brain_subj_{}_run_{}.nii.gz'.format(subj, run_id) tmp_path = scratch_path + filename save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_path) os.system('applywarp -i {0} -o {1} -r /home/data/psyinf/forrest_gump/anondata/templates/grpbold7Tp1/brain.nii.gz -w /home/data/psyinf/forrest_gump/anondata/sub{2:03}/templates/bold7Tp1/in_grpbold7Tp1/subj2tmpl_warp.nii.gz --interp=nn'.format(tmp_path, scratch_path+'group_'+filename,subj)) os.remove(tmp_path) run_ds = mvpa.fmri_dataset(scratch_path+'group_'+filename, mask=group_brain_mask, chunks=run_id-1) mvpa.poly_detrend(run_ds, polyord=1) mvpa.zscore(run_ds) joblib.dump(run_ds.samples.astype('float32'), '/home/data/scratch/mboos/prepro/tmp/whole_brain_subj_{}_run_{}.pkl'.format(subj, run_id)) os.remove(scratch_path+'group_'+filename) return run_ds.samples.shape[1]
def preprocessed_fmri_dataset( bold_fname, preproc_img=None, preproc_ds=None, add_sa=None, **kwargs): """ Parameters ---------- bold_fname : str File name of BOLD scan preproc_img : callable or None See get_bold_run_dataset() documentation preproc_ds : callable or None If not None, this callable will be called with each run bold dataset as an argument before ``modelfx`` is executed. The callable must return a dataset. add_sa : dict or None Returns ------- Dataset """ # open the BOLD image bold_img = nb.load(bold_fname) if not preproc_img is None: bold_img = preproc_img(bold_img) # load (and mask) data ds = fmri_dataset(bold_img, **kwargs) if not add_sa is None: for sa in add_sa: ds.sa[sa] = add_sa[sa] if not preproc_ds is None: ds = preproc_ds(ds) return ds
print 'loading and creating dataset', datetime.datetime.now() # chunksTargets_boldDelay="chunksTargets_boldDelay4-4.txt" #Modified chunksTargets_boldDelay = "chunksTargets_boldDelay{0}-{1}-direction.txt".format( boldDelay, stimulusWidth) volAttribrutes = M.SampleAttributes( os.path.join(sessionPath, 'behavioural', chunksTargets_boldDelay)) # default is 3.txt. # print volAttribrutes.targets # print len(volAttribrutes.targets) # print volAttribrutes.chunks # print len(volAttribrutes.chunks) dataset = M.fmri_dataset( samples=os.path.join(sessionPath, 'analyze/functional/functional4D.nii'), targets=volAttribrutes. targets, # I think this was "labels" in versions 0.4.* chunks=volAttribrutes.chunks, mask=os.path.join(sessionPath, 'analyze/structural/lc2ms_deskulled.hdr')) # DATASET ATTRIBUTES (see AttrDataset) print 'functional input has', dataset.a.voxel_dim, 'voxels of dimesions', dataset.a.voxel_eldim, 'mm' print '... or', N.product(dataset.a.voxel_dim), 'voxels per volume' print 'masked data has', dataset.shape[ 1], 'voxels in each of', dataset.shape[0], 'volumes' print '... which means that', round( 100 - 100 * dataset.shape[1] / N.product(dataset.a.voxel_dim)), '% of the voxels were masked out' print 'of', dataset.shape[1], 'remaining features ...' print 'summary of conditions/volumes\n', datetime.datetime.now() print dataset.summary_targets()
import mvpa2.suite as mvpa2 fn1 = '/scratch/scratch/ucjtbob/narps1_only_subval_model/BIC_level2/BIC_medians.nii.gz' fn2 = '/scratch/scratch/ucjtbob/narps1_only_entropy_model/BIC_level2/BIC_medians.nii.gz' fn3 = '/scratch/scratch/ucjtbob/narps1_subval_entropy/BIC_level2/BIC_medians.nii.gz' #fn_BIC_diff = '/scratch/scratch/ucjtbob//BIC_diffs_results/subval_minus_entropy_means.nii.gz_T.nii.gz_tfce_corrp_tstat1.nii.gz' #ds_diff = mvpa2.fmri_dataset(fn_BIC_diff) accumbens = '/scratch/scratch/ucjtbob/narps_masks/Accumbens_narps.nii.gz' amygdala = '/scratch/scratch/ucjtbob/narps_masks/Amygdala_narps.nii.gz' fmc = '/scratch/scratch/ucjtbob/narps_masks/Frontal_Medial_Cortex_narps.nii.gz' msk = None ds1 = mvpa2.fmri_dataset(fn1, mask=msk) ds2 = mvpa2.fmri_dataset(fn2, mask=msk) ds3 = mvpa2.fmri_dataset(fn3, mask=msk) ds1 = mvpa2.remove_invariant_features(ds1) ds2 = mvpa2.remove_invariant_features(ds2) ds3 = mvpa2.remove_invariant_features(ds3) bic_sums = [np.sum(ds1.samples), np.sum(ds2.samples), np.sum(ds3.samples)] np.argsort(bic_sums) bic_means = [np.mean(ds1.samples), np.mean(ds2.samples), np.mean(ds3.samples)] np.argsort(bic_means) #bic_means[0]/bic_means[1] #bic_means
# Loading mask mask = load_mask(path, subj, **kwargs) # Check attributes/dataset sample mismatches vol_sum = np.sum([img.shape[3] for img in fmri_list]) if vol_sum != len(attr.targets): logger.debug('Volumes no.: '+str(vol_sum)+' Targets no.: '+str(len(attr.targets))) del fmri_list logger.error(subj + ' *** ERROR: Attributes Length mismatches with fMRI volumes! ***') raise ValueError('Attributes Length mismatches with fMRI volumes!') # Load the pymvpa dataset. try: logger.info('Loading dataset...') ds = fmri_dataset(fmri_list, targets=attr.targets, chunks=attr.chunks, mask=mask) logger.info('Dataset loaded...') except ValueError, e: logger.error(subj + ' *** ERROR: '+ str(e)) del fmri_list return 0; # Update Dataset attributes # # TODO: Evaluate if it is useful to build a dedicated function ev_list = [] events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks) for i in range(len(events)): duration = events[i]['duration'] for j in range(duration): ev_list.append(i+1)
'openfmri.org') ##Parameter zsc = 1 #Voxelwise zscoring samples_size = 12 #Length of segments in sec if align == 'nonlinear': maskfile = os.path.join(datapath, 'templates', 'grpbold7Tad', 'qa', 'dico7Tad2grpbold7Tad_nl', 'brain_mask_intersection.nii.gz') elif align == 'linear': maskfile = os.path.join(datapath, 'templates', 'grpbold7Tad', 'qa', 'dico7Tad2grpbold7Tad7Tad', 'brain_mask_intersection.nii.gz') ds = mvpa.fmri_dataset(maskfile, mask=maskfile) dsfile = '_z' + str(zsc) + '_' + str(samples_size) + '_' + align #Load dataset of two subjects and reorganise for univariate analysis evds1 = mvpa.h5load(os.path.join('dataset', subj1 + dsfile + '.hdf5')) evds1 = evds1.mapper.reverse(evds1) evds2 = mvpa.h5load(os.path.join('dataset', subj2 + dsfile + '.hdf5')) evds2 = evds1.mapper.reverse(evds2) evds = mvpa.vstack([evds1, evds2]) del evds1, evds2 # Prepare inter-subject correlation measure class Corr(mvpa.Measure): is_trained = True
# Loading mask mask = load_mask(path, **kwargs) roi_labels['brain'] = mask # Check roi_labels roi_labels = load_roi_labels(roi_labels) # Load the pymvpa dataset. try: logger.info('Loading dataset...') ds = fmri_dataset(fmri_list, targets=attr.targets, chunks=attr.chunks, mask=mask, add_fa=roi_labels) logger.debug('Dataset loaded...') except ValueError, e: logger.error(subj + ' *** ERROR: '+ str(e)) del fmri_list # Add filename attributes for detrending purposes ds = add_filename(ds, fmri_list) del fmri_list # Update Dataset attributes
# Load masked movie data IN GROUP TEMPLATE SPACE and assign ROIs as feature attributes # Set order of polynomial for detrending polyord = 3 movie_dss = [] for participant in participants: # Load movie data with brain mask for a participant movie_fns = sorted( glob(base_dir + participant + data_dir + '*_task-avmovie_run-*highpass_tmpl.nii.gz')) mask_fn = base_dir + participant + anat_dir + 'brain_mask_tmpl.nii.gz' assert len(movie_fns) == 8 # Include chunk (i.e., run) labels movie_ds = mv.vstack([ mv.fmri_dataset(movie_fn, mask=mask_fn, chunks=run) for run, movie_fn in enumerate(movie_fns) ]) # Assign participant labels as feature attribute movie_ds.fa['participant'] = [participant] * movie_ds.shape[1] print("Loaded movie data for participant {0}".format(participant)) # Perform linear detrending per chunk mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks') # Perform low-pass filtering per chunk movie_ds.samples = clean(movie_ds.samples, sessions=movie_ds.sa.chunks, low_pass=.1, high_pass=None,
def buildremapper(ds_type, sub, data, rootdir = '.', anatdir = 'ses-movie/anat', rois=['FFA', 'LOC', 'PPA', 'VIS', 'EBA', 'OFA'], ): """During the hdf5 dataset creation, wrapping information was lost :-( This function attempts to recover this information: For full datasets, we load the brain group template -- for stripped ds, we build a new mask of only ROIs of the participants. Loading this as an fmri_dataset back into the analysis should yield a wrapper, that we can get the dataset lacking a wrapper 'get_wrapped'. """ # TODO: define rootdir, anatdir less hardcoded # Q: do I need to load participants brain warped into groupspace individually or is one general enough? if ds_type == 'full': brain = 'sourcedata/tnt/{}/bold3Tp2/in_grpbold3Tp2/head.nii.gz'.format(sub) mask = 'sourcedata/tnt/{}/bold3Tp2/in_grpbold3Tp2/brain_mask.nii.gz'.format(sub) #maybe take the study-template here. # brain = 'sourcedata/tnt/templates/grpbold3Tp2/brain.nii.gz' # head = 'sourcedata/tnt/templates/grpbold3Tp2/head.nii.gz' dummy = mv.fmri_dataset(brain, mask=mask) # # WIP -- still debating whether this is necessary. # elif ds_type == 'stripped': # # if the dataset is stripped, we have to make a custom mask... yet pondering whether that is worth the work... # # we have to build the masks participant-wise, because each participant has custom masks per run (possibly several)... # # create a dummy outlay: (first dim of hrf estimates should be number of voxel) # all_rois_mask = np.array([['placeholder'] * data.shape[1]]).astype('S10') # for roi in rois: # if roi == 'VIS': # roi_fns = sorted(glob(rootdir + participant + anatdir + \ # '{0}_*_mask_tmpl.nii.gz'.format(roi))) # else: # if bilateral: # # if its bilateralized we don't need to segregate based on hemispheres # # else: # # we need to segregate based on hemispheres # left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ # 'l{0}*mask_tmpl.nii.gz'.format(roi))) # right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ # 'r{0}*mask_tmpl.nii.gz'.format(roi))) # roi_fns = left_roi_fns + right_roi_fns # if len(roi_fns) > 1: # # if there are more than 1 mask, combine them # roi_mask = np.sum([mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns], axis=0) # # Set any voxels that might exceed 1 to 1 # roi_mask = np.where(roi_mask > 0, 1, 0) # elif len(roi_fns) == 0: # # if there are no masks, we get zeros # print("ROI {0} does not exist for participant {1}; appending all zeros".format(roi, participant)) # roi_mask = np.zeros((1, data_ds.shape[1])) # elif len(roi_fns) == 1: # roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples # ## continue here # now that we have a dummy ds with a wrapper, we can project the betas into a brain --> map2nifti # does that. If we save that, we should be able to load it into FSL. return mv.map2nifti(dummy, data)
import sys #import pandas as pd import numpy as np import mvpa2.suite as mvpa2 fn = sys.argv[1] + 'res4d.nii.gz' #fn = '/scratch/scratch/ucjtbob/narps1_only_entropy_model/narps_level1/sub001_run01.feat/stats/res4d.nii.gz' #k = 11 print(fn) ds = mvpa2.fmri_dataset(fn) RSS = np.sum(np.power(ds.samples, 2), axis=0) k = int(sys.argv[2]) print(k, " PEs") n = ds.shape[0] print(n, " data points") #this was to verify with sigmasquareds.nii.gz #fn2 = '/scratch/scratch/ucjtbob/narps1_only_entropy_model/narps_level1/sub001_run01.feat/stats/sigmasquareds.nii.gz' #ds2 = mvpa2.fmri_dataset(fn2) #RSS2 = ds2.samples * (n-k) BIC = k * np.log(n) + n * np.log(RSS / n) BIC[~np.isfinite(BIC)] = 0 print(np.sum(BIC), ' BIC') print(BIC.shape) ds.samples = BIC
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois): """ Build an hdf5 dataset. """ # initialize a list to load all datasets into: data_dss = [] # get list of participants from root dir participants = sorted( [path.split('/')[-1] for path in glob(rootdir + 'sub-*')]) assert len(participants) != 0 print('The following participants were found: {}'.format(participants)) for participant in participants: # count the number of participant substitutions necessary data_fns = sorted(glob(rootdir + participant + datadir)) print(rootdir + participant + datadir) mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz' if analysis == 'localizer': assert len(data_fns) == 4 if analysis == 'avmovie': assert len(data_fns) == 8 data_ds = mv.vstack([ mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run) for run, data_fn in enumerate(data_fns) ]) data_ds.fa['participant'] = [participant] * data_ds.shape[1] print('loaded data for participant {}.'.format(participant)) # z scoring if analysis == 'localizer' and zscore == 'baseline-zscore': events = get_group_events(eventdir) means, stds = extract_baseline(events, data_ds) mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks') print('finished baseline zscoring for participant {}.'.format( participant)) elif zscore == 'zscore': mv.zscore(data_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') # roi masks all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(rootdir + participant + anatdir + \ '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'l{0}*mask_tmpl.nii.gz'.format(roi))) right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'r{0}*mask_tmpl.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, data_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches dataset dimension assert roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, data_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, data_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, data_ds.shape[1])) # Ensure that number of voxels in ROI mask matches dataset dimension assert lat_roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(rootdir + participant + anatdir + '/{0}_*_mask_tmpl.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign roi mask to dataset feature attributes data_ds.fa['all_ROIs'] = all_rois_flat # join all datasets data_dss.append(data_ds) # save full dataset mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(data_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds) print('Transposed the group-dataset and saved it.') return ds
filename = sub + '_' + roi print sub behav_file = 'all_attr.txt' print roi bold_fname = os.path.join(cwd1, sub, 'betas_sub' + sub + '.nii.gz') #full functional timeseries (beta series) mask_fname = os.path.join(cwd1, sub, 'native_masks', roi) #chooses the mask for a given ROI attr_fname = os.path.join(cwd1, sub, behav_file) #codes stimuli number and run number attr = mvpa2.SampleAttributes(attr_fname) #loads attributes into pymvpa ds = mvpa2.fmri_dataset( bold_fname, targets=attr.targets, chunks=attr.chunks, mask=mask_fname ) #loads dataset with appropriate mask and attribute information mvpa2.zscore(ds, chunks_attr='chunks') #z-scores dataset per run ds = mvpa2.remove_nonfinite_features(ds) ds = mvpa2.remove_invariant_features(ds) stimuli = [] for i in range(0, 16): stimuli.append(ds.uniquetargets[i]) #create all possible pairs for confusion matrix pair_list = list(itertools.combinations(range(len(stimuli)), 2)) pair_list2 = [] for x in range(0, len(pair_list)): pair_list2.append([stimuli[pair_list[x][0]], stimuli[pair_list[x][1]]]) test_accs, val_accs, nfs_per_chunk, val_chunks = clf_wrapper(ds, pair_list2)
pwd = '/scratch/scratch/ucjtbob' model_dir = '/narps1-5_subval_entropy' level = '/narps_level2' msk = pwd + model_dir + '/narps_level3/interceptAllSubs.gfeat/cope1.feat/intercept_msk.nii.gz' cope_num = 3 #1 intercept, 2 sv, 3 de work_dir = pwd + model_dir + level fldrs = os.listdir(work_dir) fldrs.sort() for fldr in fldrs: print(fldr) sub_fldr = work_dir + '/' + fldr z_stat1 = sub_fldr + '/cope' + str(cope_num) + '.feat/stats/zstat1.nii.gz' #cope3.feat is for entropy ds_tmp = mvpa2.fmri_dataset(z_stat1) ds_tmp.samples = ds_tmp.samples*-1 nimg = mvpa2.map2nifti(ds_tmp) nimg.to_filename(sub_fldr + '/cope' + str(cope_num) + '.feat/stats/zstat2.nii.gz') #compute which var wins w.r.t. absolute value mn_dir = '/second_level_diffs/signed_diffs/flip_DE_sign' #'/second_level_diffs/signed_diffs/zstat1s' # entropies = pwd + model_dir + mn_dir + '/entropies_z.nii.gz' subvals = pwd + model_dir + mn_dir + '/subval_z.nii.gz' ds_DE = mvpa2.fmri_dataset(entropies) ds_SV = mvpa2.fmri_dataset(subvals) ds_DE_mn = np.mean(ds_DE.samples,axis=0) ds_SV_mn = np.mean(ds_SV.samples,axis=0)
mvpa.debug.active += ["SLC"] #Set working and data directory path = os.path.join('/home','data','exppsy','baumgartner','forrestgump') datapath = os.path.join('/home','data','exppsy','forrest_gump','openfmri.org') ##Parameter zsc = 1 #Voxelwise zscoring samples_size = 12 #Length of segments in sec if align=='nonlinear': maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad_nl','brain_mask_intersection.nii.gz') elif align=='linear': maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad7Tad','brain_mask_intersection.nii.gz') ds = mvpa.fmri_dataset(maskfile, mask=maskfile) dsfile = '_z'+str(zsc)+'_'+str(samples_size)+'_'+align #Load dataset of two subjects and reorganise for univariate analysis evds1 = mvpa.h5load(os.path.join('dataset',subj1+dsfile+'.hdf5')) evds1 = evds1.mapper.reverse(evds1) evds2 = mvpa.h5load(os.path.join('dataset',subj2+dsfile+'.hdf5')) evds2 = evds1.mapper.reverse(evds2) evds = mvpa.vstack([evds1,evds2]) del evds1, evds2 # Prepare inter-subject correlation measure class Corr(mvpa.Measure): is_trained = True def __init__(self,subj1,subj2, **kwargs): mvpa.Measure.__init__(self, **kwargs)
def buildadataset(zscore, rois, event_path=None): """buildataset() will build and save participant-specific hdf5 datasets with all rois from preprocessed objectcategories data, stack them for a group dataset and save them, and transpose the group dataset and save it. The parameter 'zscore' determines whether and what kind of z-scoring should be performed.""" print('I am building a dataset with the following option: {}.'.format( zscore)) # get the participants and rois participants = sorted( [path.split('/')[-1] for path in glob(base_dir + 'sub-*')]) localizer_dss = [] for participant in participants: localizer_fns = sorted(glob(base_dir + participant + locdir + \ '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format( participant))) mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz' assert len(localizer_fns) == 4 localizer_ds = mv.vstack([ mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run) for run, localizer_fn in enumerate(localizer_fns) ]) localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1] print('loaded localizer data for participant {}.'.format(participant)) # zscore the data with means and standard deviations from no-stimulation # periods if zscore == 'custom': events = get_group_events(event_path) means, stds = extract_baseline(events, localizer_ds) # zscore stuff mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks') print('finished custom zscoring for participant {}.'.format( participant)) elif zscore == 'z-score': mv.zscore(localizer_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') all_rois_mask = np.array([['brain'] * localizer_ds.shape[1] ]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(base_dir + participant + anat_dir + \ '{0}_*_mask.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'l{0}_*_mask.nii.gz'.format(roi))) right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'r{0}_*_mask.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, localizer_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches localizer data assert roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, localizer_ds.shape[1])) # Ensure that number of voxels in ROI mask matches localizer data assert lat_roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '/{0}_*_mask.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['all_ROIs'] = all_rois_flat if save_per_subject: mv.h5save(base_dir + participant + locdir + \ '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format( participant), localizer_ds) print('Saved dataset for {}.'.format(participant)) # join all datasets localizer_dss.append(localizer_ds) # save full dataset mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5', localizer_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(localizer_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5', ds) print('Transposed the group-dataset and saved it.') return ds
filename = sub + '_fset' + str(fset_num) + '_chunk' + str(chunk_num[0]) print sub print np.array(job_table).shape behav_file = 'sub' + sub + '_attr.txt' bold_fname = os.path.join(cwd1, sub, 'betas_sub' + sub + '.nii.gz') #full functional timeseries (beta series) attr_fname = os.path.join(cwd1, 'all_attr', behav_file) #codes stimuli number and run number attr = mvpa2.SampleAttributes(attr_fname) #loads attributes into pymvpa ds = mvpa2.fmri_dataset(bold_fname, targets=attr.targets, chunks=attr.chunks) ds = mvpa2.remove_nonfinite_features(ds) ds = mvpa2.remove_invariant_features(ds) #this basically breaks up the brain into 100 different areas (to parallelize the searchlight) try: ds = ds[:, fset_num * 1000:(fset_num * 1000) + 1000] except: ds = ds[:, fset_num * 1000:] stimuli = [] for i in range(0, 54): stimuli.append(ds.uniquetargets[i]) #create all possible pairs for confusion matrix
def preprocessing(ds_p, ref_space, warp_files, mask_p, **kwargs): mask_p = str(mask_p) ref_space = str(ref_space) detrending = kwargs.get('detrending', None) use_zscore = kwargs.get('use_zscore', True) use_events = kwargs.get('use_events', False) anno_dir = kwargs.get('anno_dir', None) use_glm_estimates = kwargs.get('use_glm_estimates', False) targets = kwargs.get('targets', None) event_offset = kwargs.get('event_offset', None) event_dur = kwargs.get('event_dur', None) save_disc_space = kwargs.get('save_disc_space', True) rois = kwargs.get('rois', None) vp_num_str = ds_p[(ds_p.find("sub") + 4):(ds_p.find("sub") + 6)] warp_file = [warp_file for warp_file in warp_files if warp_file.find(vp_num_str) != -1][0] part_info = find_participant_info(ds_p) if save_disc_space: temp_file_add = "tmp_warped_data_file.nii.gz" temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", temp_file_add)) else: temp_file_add = "sub-{}_{}-movie_run-{}_warped_file.nii.gz".format(part_info[0], part_info[1], int(part_info[2])) temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", "runs_for_testing", temp_file_add)) # change warped_ds = warp_image(ds_p, ref_space, warp_file, temp_file, save_disc_space=save_disc_space) while not os.path.exists(warped_ds): time.sleep(5) if os.path.isfile(warped_ds): if mask_p is not None: mask = get_adjusted_mask(mask_p, ref_space) if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask) else: if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds) ds.sa['participant'] = [int(part_info[0])] ds.sa["movie_type"] = [part_info[1]] ds.sa['chunks'] = [int(part_info[2])] if detrending is not None: detrender = mvpa.PolyDetrendMapper(polyord=1) ds = ds.get_mapped(detrender) if use_zscore: mvpa.zscore(ds) if use_events: events = create_event_dict(anno_dir, ds_p, targets, event_dur) if use_glm_estimates: ds = mvpa.fit_event_hrf_model(ds, events, time_attr='time_coords', condition_attr='targets') else: ds = mvpa.extract_boxcar_event_samples(ds, events=events, time_attr='time_coords', match='closest', event_offset=event_offset, event_duration=event_dur, eprefix='event', event_mapper=None) ds = fix_info_after_events(ds) return ds
print("bold_image -> %s" % bold_filename) print("mask_image -> %s" % args.mask) print("Filter_sel -> %s" % args.filter_type) print("FWHM -> %d" % args.fwhm) orig_bold = nib.load(bold_filename) if args.filter_type == 'none': fmri_img = orig_bold else: fmri_img = gaussian_spatial_filter(orig_bold, ftype=args.filter_type, fwhm=args.fwhm, bandwidth=args.dog_bandwidth) tsds = fmri_dataset(fmri_img, mask=args.mask) # load original data to get actual timing info, and avoid potential # problems from pre-processing above tsds.sa.time_coords = fmri_dataset(bold_filename).sa.time_coords # post-process time series dataset -- possibly modeling run_mkds_args = {k: v[run_id] for k, v in mkds_args.items()} ds = args.mkds(tsds, **run_mkds_args) for attr in ('target', 'chunk'): attr_val = getattr(args, '{}_attr'.format(attr)) if attr_val not in ds.sa.keys(): raise RuntimeError( '{} "{}" not found in dataset attributes: {}"'.format( attr, attr_val, ds.sa.keys())) ds_list.append(ds)
if align=='nonlinear': boldfile = 'bold_dico_dico7Tad2grpbold7Tad_nl.nii.gz' maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad_nl','brain_mask_intersection.nii.gz') elif align=='linear': boldfile = 'bold_dico_dico7Tad2grpbold7Tad.nii.gz' maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad','brain_mask_intersection.nii.gz') boldlist = np.sort(glob.glob(os.path.join(datapath,subj,'BOLD','task001*'))) print subj #Concatenate segments and remove presentation overlap at the end and begin of each segment Ds = [] for i,run in enumerate(boldlist): print run ds = mvpa.fmri_dataset(os.path.join(datapath,run,boldfile), mask=maskfile) mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt')) for param in mc: ds.sa['mc_' + param] = mc[param] if i==0: ds = ds[:-4] elif i<7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples)*i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32')
if os.path.isfile(preprocessedCache) and False: print 'loading cached preprocessed dataset',preprocessedCache,datetime.datetime.now() dataset = pickle.load(gzip.open(preprocessedCache, 'rb', 5)) else: # if not, generate directly, and then cache print 'loading and creating dataset',datetime.datetime.now() # chunksTargets_boldDelay="chunksTargets_boldDelay4-4.txt" #Modified chunksTargets_boldDelay="chunksTargets_boldDelay{0}-{1}-LanguageSwitch-Japanese_English.txt".format(boldDelay, stimulusWidth) volAttribrutes = M.SampleAttributes(os.path.join(sessionPath,'behavioural',chunksTargets_boldDelay)) # default is 3.txt. # print volAttribrutes.targets # print len(volAttribrutes.targets) # print volAttribrutes.chunks # print len(volAttribrutes.chunks) dataset = M.fmri_dataset(samples=os.path.join(sessionPath,'analyze/functional/functional4D.nii'), targets=volAttribrutes.targets, # I think this was "labels" in versions 0.4.* chunks=volAttribrutes.chunks, mask=os.path.join(sessionPath,'analyze/structural/lc2ms_deskulled.hdr')) # DATASET ATTRIBUTES (see AttrDataset) print 'functional input has',dataset.a.voxel_dim,'voxels of dimesions',dataset.a.voxel_eldim,'mm' print '... or',N.product(dataset.a.voxel_dim),'voxels per volume' print 'masked data has',dataset.shape[1],'voxels in each of',dataset.shape[0],'volumes' print '... which means that',round(100-100*dataset.shape[1]/N.product(dataset.a.voxel_dim)),'% of the voxels were masked out' print 'of',dataset.shape[1],'remaining features ...' print 'summary of conditions/volumes\n',datetime.datetime.now() print dataset.summary_targets() # DETREND print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive! M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend print '... done',datetime.datetime.now()
'brain_mask_intersection.nii.gz') elif align == 'linear': boldfile = 'bold_dico_dico7Tad2grpbold7Tad.nii.gz' maskfile = os.path.join(datapath, 'templates', 'grpbold7Tad', 'qa', 'dico7Tad2grpbold7Tad', 'brain_mask_intersection.nii.gz') boldlist = np.sort(glob.glob(os.path.join(datapath, subj, 'BOLD', 'task001*'))) print subj #Concatenate segments and remove presentation overlap at the end and begin of each segment Ds = [] for i, run in enumerate(boldlist): print run ds = mvpa.fmri_dataset(os.path.join(datapath, run, boldfile), mask=maskfile) mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt')) for param in mc: ds.sa['mc_' + param] = mc[param] if i == 0: ds = ds[:-4] elif i < 7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples) * i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32')