def get_voxel_coords(ds, append=True, zscore=True): """ This function is able to append coordinates (and their squares, etc., to a dataset. If append = False, it returns a dataset with only coordinates, and no fmri data. Such a dataset is useful for a sanity check of the classification. """ ds_coords = ds.copy('deep') # Append voxel coordinates (and squares, cubes) products = np.column_stack((ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1], ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 2], ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2], ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2])) coords = np.hstack((ds.sa.voxel_indices, ds.sa.voxel_indices ** 2, ds.sa.voxel_indices ** 3, products)) coords = mv.Dataset(coords, sa=ds_coords.sa) if zscore: mv.zscore(coords, chunks_attr='participant') ds_coords.fa.clear() if append: ds_coords.samples = np.hstack((ds_coords.samples, coords.samples)) elif not append: ds_coords.samples = coords.samples return ds_coords
def store_class(data, node, result): # import pdb; pdb.set_trace() class_ds = mv.Dataset(samples=data.sa.voxel_indices) class_ds.sa['targets'] = data.sa.targets class_ds.sa['partitions'] = data.sa.partitions class_ds.sa['predictions'] = clf.predict(data) class_ds.sa['participant'] = data.sa.participant classifications.append(class_ds)
def flip_sensitivities(sensitivities): """ The sensitivities are computed in non-changeable order, so if we don't want to confuse people, we flip the sign when we display the ROIs in an order different from during sensitivity computation. """ return mv.Dataset(sensitivities.samples * -1, sa=sensitivities.sa, fa=sensitivities.fa)
class RSA(mvpa.Measure): is_trained = True def __init__(self,subj1,subj2, **kwargs): mvpa.Measure.__init__(self, **kwargs) self._subj1 = subj1 self._subj2 = subj2 def _call(self, evds): dsm1 = sd.pdist(evds[evds.sa.subj==self._subj1].samples, metric='correlation') dsm2 = sd.pdist(evds[evds.sa.subj==self._subj2].samples, metric='correlation') res = 1-Bio.Cluster.distancematrix(np.vstack((dsm1,dsm2)),dist='s')[1][0] return mvpa.Dataset(np.array(res)[np.newaxis])
def __call__(self, valsTrain, labelsTrain, valsTest, doAncestralCV=True): """Trains on ancestral population followed by testing on admixed population. Optionally does cross validation on ancestral population. Arguments: - `valsTrain`: numpy array (nSamplesxnFeatures) of training samples - `labelsTrain`: list of nSamples labels - `valsTest`: numpy array of (nSamples2xnFeatures) of test samples """ #Create and normalize data ds = pymvpa.Dataset(valsTrain) ds.sa['targets'] = labelsTrain runtype = np.zeros(valsTrain.shape[0]) runtype[0::3] = 0 runtype[1::3] = 1 runtype[2::3] = 2 ds.sa['runtype'] = runtype try: #Train on ancestral self.classifier.train(ds) admixedClass = self.classifier.predict(valsTest) except pymvpa.DegenerateInputError: #The valsTrain is to small to contain information print "WARNING: Window is degenerate; guessing ancestry" admixedClass = np.zeros( valsTest.shape[0]) #Just assign ancestry to first pop if doAncestralCV: return 1. / len( np.unique(labelsTrain )), admixedClass #Assign success to create equal return admixedClass if doAncestralCV: #Cross Validated ancestral population hspl = pymvpa.NGroupPartitioner(3, attr='runtype') # cvte = pymvpa.CrossValidation(self.classifier, hspl) cvte = pymvpa.CrossValidation(self.classifier, hspl, enable_ca='stats') cv_results = cvte(ds) return cvte.ca.stats.matrix, admixedClass # ancestralSuccess=1-np.mean(cv_results) # return ancestralSuccess, admixedClass return admixedClass
def project_betas(ds, analysis, eventdir, results_dir, annot_dir=None, ): """ Currently unused, but can become relevant later on. Will keep it in utils.py. Project beta values from 2nd analysis approach into the brain. Current problem: For first analysis type overlaps are excluded (for classification purposes), so we need to do the glm on data with overlaps. Thats why its a separate function and not integrated into the reversed analysis. :return: nifti images... many nifti images in a dictionary # project beta estimates back into a brain. I'll save-guard this function for now, because there is still # the unsolved overlap issue... project_beta = False if project_beta: print('going on to project resulting betas back into brain...') subs = np.unique(hrf_estimates_transposed.sa.participant) regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(ds_type, sub, data.samples.T[idx], # we select one beta vector per regressor ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) """ ds_transposed = ds.get_mapped(mv.TransposeMapper()) assert ds_transposed.shape[0] < ds_transposed.shape[1] # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset chunks, runs, runonsets = False, False, False if analysis == 'avmovie': ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed) events_dicts = get_events(analysis=analysis, eventdir=eventdir, results_dir=results_dir, chunks=chunks, runs=runs, runonsets=runonsets, annot_dir=annot_dir, multimatch=False) # step 1: do the glm on the data hrf_estimates = mv.fit_event_hrf_model(ds_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) # lets save these mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates) print('calculated the glm, saving results') # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper()) assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1] subs = np.unique(hrf_estimates_transposed.sa.participant) print('going on to project resulting betas back into brain...') regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(sub, data.samples.T[idx], # we select one beta vector per regressor ds_type='full', # currently we can only do this for the full ds. ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) return result_maps
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois): """ Build an hdf5 dataset. """ # initialize a list to load all datasets into: data_dss = [] # get list of participants from root dir participants = sorted( [path.split('/')[-1] for path in glob(rootdir + 'sub-*')]) assert len(participants) != 0 print('The following participants were found: {}'.format(participants)) for participant in participants: # count the number of participant substitutions necessary data_fns = sorted(glob(rootdir + participant + datadir)) print(rootdir + participant + datadir) mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz' if analysis == 'localizer': assert len(data_fns) == 4 if analysis == 'avmovie': assert len(data_fns) == 8 data_ds = mv.vstack([ mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run) for run, data_fn in enumerate(data_fns) ]) data_ds.fa['participant'] = [participant] * data_ds.shape[1] print('loaded data for participant {}.'.format(participant)) # z scoring if analysis == 'localizer' and zscore == 'baseline-zscore': events = get_group_events(eventdir) means, stds = extract_baseline(events, data_ds) mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks') print('finished baseline zscoring for participant {}.'.format( participant)) elif zscore == 'zscore': mv.zscore(data_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') # roi masks all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(rootdir + participant + anatdir + \ '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'l{0}*mask_tmpl.nii.gz'.format(roi))) right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'r{0}*mask_tmpl.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, data_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches dataset dimension assert roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, data_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, data_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, data_ds.shape[1])) # Ensure that number of voxels in ROI mask matches dataset dimension assert lat_roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(rootdir + participant + anatdir + '/{0}_*_mask_tmpl.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign roi mask to dataset feature attributes data_ds.fa['all_ROIs'] = all_rois_flat # join all datasets data_dss.append(data_ds) # save full dataset mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(data_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds) print('Transposed the group-dataset and saved it.') return ds
sl_radius = HYPERALIGNMENT_RADIUS #create query engine qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius)) qe.train(ref_ds) # load all subject nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*')) print('Loading participant data from: ') print(chamats) mysubs = nfiles[0:nsubs] # import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets dss = [] for sub in range(len(mysubs)): ds = mv.Dataset(np.load(mysubs[sub])) ds.fa['voxel_indices'] = range(ds.shape[1]) #ds.sa['chunks'] = np.repeat(i,cnx_tx) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Number of data sets in dss: ') print(len(dss)) print('Size of data sets: ') print(dss[0].shape) # create SL hyperalignment instance hyper = SearchlightHyperalignment( queryengine=qe, compute_recon=False, # We don't need to project back from common space to subject space
mvpa_dir, 'no_roi_ids', 'search_RDMs_sq_zscore_p{0}_{1}.hdf5'.format( participant, hemi))) sl_sq = sl_result.samples.reshape(n_conditions, n_conditions, n_vertices) sl_tri = [] for sl in sl_sq.T: sl_tri.append(squareform(sl, checks=False)) sl_tri = np.array(sl_tri).T assert sl_tri.shape == (n_conditions * (n_conditions - 1) / 2, n_vertices) sl_tri = mv.Dataset( sl_tri, sa={ 'conditions': list( combinations(condition_order['original_condition_order'], 2)) }, fa=sl_result.fa, a=sl_result.a) sl_tri.sa['participants'] = [int(participant)] * sl_tri.shape[0] sl_rdms[hemi][participant] = sl_tri print("Loaded searchlight RDMs for participant {0} " "hemisphere {1}".format(participant, hemi)) # Compute ISC in leave-one-out fashion sl_iscs = {} for hemi in ['lh', 'rh']: sl_iscs[hemi] = {} for participant in sorted(participants.keys()): lo_rdm = sl_rdms[hemi][participant].samples
base_dir + participant + data_dir + '{0}_avmovie_detrend{1}_lowpass_ROIs_tmpl_bold.hdf5'.format( participant, polyord), movie_ds) print("Finished participant {0}, saved the data".format(participant)) mv.h5save( results_dir + 'allsub_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format(polyord), movie_dss) print('Saved the group dataset in {}.'.format(results_dir)) # Horizontally stack all data sets ds_wide = mv.hstack(movie_dss) # Transpose brain so voxels are now samples ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) # Save transposed data mv.h5save( results_dir + 'allsub_transpose_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format( polyord), ds) print('Saved the transposed group dataset in {}.'.format(results_dir)) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('-b', '--base_dir', help="Please specify the root of your"
# the one with the lean one cv_rsa = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'), mv.HalfPartitioner(attr='sessions'), errorfx=None, postproc=lean_errorfx) sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'], nproc=1, results_backend='native') #sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'], # nproc=1, results_backend='native', roi_ids=cortical_vertices) #tmp_prefix='/local/tmp/sam_sl_p{0}_{1}_'.format(participant_id, hemi) mv.debug.active += ['SLC'] sl_result = sl(ds) assert len(sl_result.sa) == 0 # we didn't pass any sl_result.sa = target_sa print '>>>', np.mean(sl.ca.roi_sizes), np.std(sl.ca.roi_sizes) sl_means = np.mean(np.dstack((sl_result.samples[:n_conditions**2, :], sl_result.samples[n_conditions**2:, :])), axis=2) sl_final = mv.Dataset( sl_means, sa={'conditions': sl_result.sa.conditions[:sl_means.shape[0], :].tolist(), 'participants': [int(participant[-2:])] * sl_means.shape[0]}, fa=sl_result.fa, a=sl_result.a) #assert sl_result.shape[0] == n_conditions**2 print(sl_final) mv.h5save('/idata/DBIC/cara/life/search_RDMs_sq_zscore_HA_{0}_{1}.hdf5'.format(participant, hemi), sl_final) #mv.niml.write(join(mvpa_dir, 'search_RDMs_sq_p{0}_{1}_TEST.niml.dset'.format( # participant_id, hemi)), sl_result)
def buildadataset(zscore, rois, event_path=None): """buildataset() will build and save participant-specific hdf5 datasets with all rois from preprocessed objectcategories data, stack them for a group dataset and save them, and transpose the group dataset and save it. The parameter 'zscore' determines whether and what kind of z-scoring should be performed.""" print('I am building a dataset with the following option: {}.'.format( zscore)) # get the participants and rois participants = sorted( [path.split('/')[-1] for path in glob(base_dir + 'sub-*')]) localizer_dss = [] for participant in participants: localizer_fns = sorted(glob(base_dir + participant + locdir + \ '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format( participant))) mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz' assert len(localizer_fns) == 4 localizer_ds = mv.vstack([ mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run) for run, localizer_fn in enumerate(localizer_fns) ]) localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1] print('loaded localizer data for participant {}.'.format(participant)) # zscore the data with means and standard deviations from no-stimulation # periods if zscore == 'custom': events = get_group_events(event_path) means, stds = extract_baseline(events, localizer_ds) # zscore stuff mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks') print('finished custom zscoring for participant {}.'.format( participant)) elif zscore == 'z-score': mv.zscore(localizer_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') all_rois_mask = np.array([['brain'] * localizer_ds.shape[1] ]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(base_dir + participant + anat_dir + \ '{0}_*_mask.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'l{0}_*_mask.nii.gz'.format(roi))) right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'r{0}_*_mask.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, localizer_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches localizer data assert roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, localizer_ds.shape[1])) # Ensure that number of voxels in ROI mask matches localizer data assert lat_roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '/{0}_*_mask.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['all_ROIs'] = all_rois_flat if save_per_subject: mv.h5save(base_dir + participant + locdir + \ '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format( participant), localizer_ds) print('Saved dataset for {}.'.format(participant)) # join all datasets localizer_dss.append(localizer_ds) # save full dataset mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5', localizer_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(localizer_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5', ds) print('Transposed the group-dataset and saved it.') return ds
import mvpa2.suite as mv from scipy.stats import zscore as sciz, pearsonr import os.path, time from glob import glob from scipy.io import loadmat import numpy as np import pandas as pd import nibabel as nb import h5py from mvpa2.datasets.base import Dataset from mvpa2.misc.surfing.queryengine import SurfaceQueryEngine from mvpa2.support.nibabel.surf import read as read_surface from mvpa2.datasets.mri import fmri_dataset from mvpa2.misc.neighborhood import IndexQueryEngine, Sphere from mvpa2.datasets.base import mask_mapper import mvpa2.misc.surfing.volume_mask_dict as volmask from mvpa2.algorithms.searchlight_hyperalignment import SearchlightHyperalignment from mvpa2.mappers.zscore import zscore from mvpa2.base.hdf5 import h5save, h5load datadir = '/dartfs-hpc/rc/home/1/f0040y1/CANlab/labdata/projects/OLP4CBP/hyperalignment/common_spaces/bladderpain/commonspace_subs-201_radius-10' # get list of npy files to convert myfiles = glob(os.path.join(datadir, '*.npy')) # loop files, load as pymvpa dataset, save as hdf5.gz for sub in myfiles: mysub = mv.Dataset(np.load(sub)) savepath = os.path.join(datadir, os.path.split(sub)[1][0:-4] + '.hdf5.gz') h5save(savepath, mysub)
for parcel in range(len(indices)): PARCEL_NUMBER = parcel #int(sys.argv[2]) print('Number of Subjects: {0}'.format(nsubs)) print('Parcel Number: {0}'.format(PARCEL_NUMBER)) print('Parcel Label: {0}'.format(labels[PARCEL_NUMBER])) print('Parcel Description: {0}'.format(label_descriptions[PARCEL_NUMBER])) print('Voxels in Parcel: {0}'.format(sum(indices[PARCEL_NUMBER]))) df_results.loc[parcel, 'Parcel_desc'] = label_descriptions[PARCEL_NUMBER] df_results.loc[parcel, 'Parcel_label'] = labels[PARCEL_NUMBER] df_results.loc[parcel, 'Voxels_in_parcel'] = sum(indices[PARCEL_NUMBER]) myvoxels = np.nonzero(indices[PARCEL_NUMBER]) dss = [] for sub in range(len(mats)): ds = mats[sub][:, myvoxels[0]] ds = mv.Dataset(ds) ds.fa['voxel_indices'] = range(ds.shape[1]) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Size of Training data sets: {0}'.format(dss[0].shape)) print('Beginning Hyperalignment.') # create hyperalignment instance hyper = Hyperalignment(nproc=1, ) hyper.train(dss) # get mappers to common space created by hyper.train (2x procrustes iteration) mappers = hyper(dss) # apply mappers back onto training data
def _call(self, evds): res = 1 - sd.pdist( np.hstack( (evds[evds.sa.subj == self._subj1].samples, evds[evds.sa.subj == self._subj2].samples)).T, 'correlation') return mvpa.Dataset(np.array(res)[np.newaxis])