Beispiel #1
0
def get_voxel_coords(ds,
                     append=True,
                     zscore=True):
    """ This function is able to append coordinates (and their
    squares, etc., to a dataset. If append = False, it returns
    a dataset with only coordinates, and no fmri data. Such a
    dataset is useful for a sanity check of the classification.
    """
    ds_coords = ds.copy('deep')
    # Append voxel coordinates (and squares, cubes)
    products = np.column_stack((ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1],
                                ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 2],
                                ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2],
                                ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2]))
    coords = np.hstack((ds.sa.voxel_indices,
                        ds.sa.voxel_indices ** 2,
                        ds.sa.voxel_indices ** 3,
                        products))
    coords = mv.Dataset(coords, sa=ds_coords.sa)
    if zscore:
        mv.zscore(coords, chunks_attr='participant')
    ds_coords.fa.clear()
    if append:
        ds_coords.samples = np.hstack((ds_coords.samples, coords.samples))
    elif not append:
        ds_coords.samples = coords.samples
    return ds_coords
 def store_class(data, node, result):
     # import pdb; pdb.set_trace()
     class_ds = mv.Dataset(samples=data.sa.voxel_indices)
     class_ds.sa['targets'] = data.sa.targets
     class_ds.sa['partitions'] = data.sa.partitions
     class_ds.sa['predictions'] = clf.predict(data)
     class_ds.sa['participant'] = data.sa.participant
     classifications.append(class_ds)
Beispiel #3
0
def flip_sensitivities(sensitivities):
    """
    The sensitivities are computed in non-changeable order, so if we don't want to confuse people,
    we flip the sign when we display the ROIs in an order different from during sensitivity
    computation.
    """
    return mv.Dataset(sensitivities.samples * -1,
                      sa=sensitivities.sa,
                      fa=sensitivities.fa)
class RSA(mvpa.Measure):
    is_trained = True
    def __init__(self,subj1,subj2, **kwargs):
		mvpa.Measure.__init__(self, **kwargs)
		self._subj1 = subj1
		self._subj2 = subj2
	def _call(self, evds):
		dsm1 = sd.pdist(evds[evds.sa.subj==self._subj1].samples, metric='correlation')
		dsm2 = sd.pdist(evds[evds.sa.subj==self._subj2].samples, metric='correlation')
	res = 1-Bio.Cluster.distancematrix(np.vstack((dsm1,dsm2)),dist='s')[1][0]
	return mvpa.Dataset(np.array(res)[np.newaxis])
Beispiel #5
0
 def __call__(self, valsTrain, labelsTrain, valsTest, doAncestralCV=True):
     """Trains on ancestral population followed by testing on
     admixed population.  Optionally does cross validation on
     ancestral population.
     
     Arguments:
     - `valsTrain`: numpy array (nSamplesxnFeatures) of training samples 
     - `labelsTrain`: list of nSamples labels
     - `valsTest`:  numpy array of (nSamples2xnFeatures) of test samples
     """
     #Create and normalize data
     ds = pymvpa.Dataset(valsTrain)
     ds.sa['targets'] = labelsTrain
     runtype = np.zeros(valsTrain.shape[0])
     runtype[0::3] = 0
     runtype[1::3] = 1
     runtype[2::3] = 2
     ds.sa['runtype'] = runtype
     try:  #Train on ancestral
         self.classifier.train(ds)
         admixedClass = self.classifier.predict(valsTest)
     except pymvpa.DegenerateInputError:  #The valsTrain is to small to contain information
         print "WARNING: Window is degenerate; guessing ancestry"
         admixedClass = np.zeros(
             valsTest.shape[0])  #Just assign ancestry to first pop
         if doAncestralCV:
             return 1. / len(
                 np.unique(labelsTrain
                           )), admixedClass  #Assign success to create equal
         return admixedClass
     if doAncestralCV:  #Cross Validated ancestral population
         hspl = pymvpa.NGroupPartitioner(3, attr='runtype')
         # cvte = pymvpa.CrossValidation(self.classifier, hspl)
         cvte = pymvpa.CrossValidation(self.classifier,
                                       hspl,
                                       enable_ca='stats')
         cv_results = cvte(ds)
         return cvte.ca.stats.matrix, admixedClass
         # ancestralSuccess=1-np.mean(cv_results)
         # return ancestralSuccess, admixedClass
     return admixedClass
Beispiel #6
0
def project_betas(ds,
                  analysis,
                  eventdir,
                  results_dir,
                  annot_dir=None,
                  ):
    """
    Currently unused, but can become relevant later on. Will keep it in utils.py.
    Project beta values from 2nd analysis approach into the brain.
    Current problem: For first analysis type overlaps are excluded (for classification
    purposes), so we need to do the glm on data with overlaps. Thats why its a separate function
    and not integrated into the reversed analysis.
    :return: nifti images... many nifti images in a dictionary


    # project beta estimates back into a brain. I'll save-guard this function for now, because there is still
    # the unsolved overlap issue...
    project_beta = False
    if project_beta:
        print('going on to project resulting betas back into brain...')
        subs = np.unique(hrf_estimates_transposed.sa.participant)
        regs = hrf_estimates_transposed.fa.condition
        assert len(subs) > 0
        from collections import OrderedDict
        result_maps = OrderedDict()
        for sub in subs:
            print('...for subject {}...'.format(sub))
            result_maps[sub] = OrderedDict()
            # subset to participants dataframe
            data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                              fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                              sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
            # loop over regressors
            for idx, reg in enumerate(regs):
                result_map = buildremapper(ds_type,
                                           sub,
                                           data.samples.T[idx], # we select one beta vector per regressor
                                           )
                # populate a nested dict with the resulting nifti images
                # this guy has one nifti image per regressor for each subject
                result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    """

    ds_transposed = ds.get_mapped(mv.TransposeMapper())
    assert ds_transposed.shape[0] < ds_transposed.shape[1]

    # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset
    chunks, runs, runonsets = False, False, False

    if analysis == 'avmovie':
        ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed)

    events_dicts = get_events(analysis=analysis,
                              eventdir=eventdir,
                              results_dir=results_dir,
                              chunks=chunks,
                              runs=runs,
                              runonsets=runonsets,
                              annot_dir=annot_dir,
                              multimatch=False)

    # step 1: do the glm on the data
    hrf_estimates = mv.fit_event_hrf_model(ds_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)

    # lets save these
    mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates)
    print('calculated the glm, saving results')

    # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas
    hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper())
    assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1]

    subs = np.unique(hrf_estimates_transposed.sa.participant)
    print('going on to project resulting betas back into brain...')

    regs = hrf_estimates_transposed.fa.condition
    assert len(subs) > 0
    from collections import OrderedDict
    result_maps = OrderedDict()
    for sub in subs:
        print('...for subject {}...'.format(sub))
        result_maps[sub] = OrderedDict()
        # subset to participants dataframe
        data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                          fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                          sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
        # loop over regressors
        for idx, reg in enumerate(regs):
            result_map = buildremapper(sub,
                                       data.samples.T[idx], # we select one beta vector per regressor
                                       ds_type='full', # currently we can only do this for the full ds.
                                       )
            # populate a nested dict with the resulting nifti images
            # this guy has one nifti image per regressor for each subject
            result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    return result_maps
Beispiel #7
0
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois):
    """
    Build an hdf5 dataset.
    """
    # initialize a list to load all datasets into:
    data_dss = []

    # get list of participants from root dir
    participants = sorted(
        [path.split('/')[-1] for path in glob(rootdir + 'sub-*')])
    assert len(participants) != 0
    print('The following participants were found: {}'.format(participants))

    for participant in participants:
        # count the number of participant substitutions necessary
        data_fns = sorted(glob(rootdir + participant + datadir))
        print(rootdir + participant + datadir)
        mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz'
        if analysis == 'localizer':
            assert len(data_fns) == 4
        if analysis == 'avmovie':
            assert len(data_fns) == 8
        data_ds = mv.vstack([
            mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run)
            for run, data_fn in enumerate(data_fns)
        ])
        data_ds.fa['participant'] = [participant] * data_ds.shape[1]
        print('loaded data for participant {}.'.format(participant))

        # z scoring
        if analysis == 'localizer' and zscore == 'baseline-zscore':
            events = get_group_events(eventdir)
            means, stds = extract_baseline(events, data_ds)
            mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks')
            print('finished baseline zscoring for participant {}.'.format(
                participant))
        elif zscore == 'zscore':
            mv.zscore(data_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        # roi masks
        all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                      '{0}_*_mask_tmpl.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                           'l{0}*mask_tmpl.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                            'r{0}*mask_tmpl.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns
            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, data_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)

            # Ensure that number of voxels in ROI mask matches dataset dimension
            assert roi_mask.shape[1] == data_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to data feature attributes
            data_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, data_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1

                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, data_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, data_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches dataset dimension
                assert lat_roi_mask.shape[1] == data_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to data feature attributes
                data_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(rootdir + participant + anatdir +
                         '/{0}_*_mask_tmpl.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi

        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())

        # Assign roi mask to dataset feature attributes
        data_ds.fa['all_ROIs'] = all_rois_flat

        # join all datasets
        data_dss.append(data_ds)

    # save full dataset
    mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(data_dss)
    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds)
    print('Transposed the group-dataset and saved it.')
    return ds
sl_radius = HYPERALIGNMENT_RADIUS

#create query engine
qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius))
qe.train(ref_ds)

# load all subject 
nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*'))
print('Loading participant data from: ')
print(chamats)
mysubs = nfiles[0:nsubs]

# import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets
dss = []
for sub in range(len(mysubs)):
    ds = mv.Dataset(np.load(mysubs[sub]))
    ds.fa['voxel_indices'] = range(ds.shape[1])
    #ds.sa['chunks'] = np.repeat(i,cnx_tx)
    mv.zscore(ds, chunks_attr=None)
    dss.append(ds)
    
    
print('Number of data sets in dss: ')
print(len(dss))
print('Size of data sets: ')
print(dss[0].shape)
    
# create SL hyperalignment instance
hyper = SearchlightHyperalignment(
    queryengine=qe,
    compute_recon=False, # We don't need to project back from common space to subject space
Beispiel #9
0
                mvpa_dir, 'no_roi_ids',
                'search_RDMs_sq_zscore_p{0}_{1}.hdf5'.format(
                    participant, hemi)))
        sl_sq = sl_result.samples.reshape(n_conditions, n_conditions,
                                          n_vertices)
        sl_tri = []
        for sl in sl_sq.T:
            sl_tri.append(squareform(sl, checks=False))
        sl_tri = np.array(sl_tri).T
        assert sl_tri.shape == (n_conditions * (n_conditions - 1) / 2,
                                n_vertices)
        sl_tri = mv.Dataset(
            sl_tri,
            sa={
                'conditions':
                list(
                    combinations(condition_order['original_condition_order'],
                                 2))
            },
            fa=sl_result.fa,
            a=sl_result.a)
        sl_tri.sa['participants'] = [int(participant)] * sl_tri.shape[0]
        sl_rdms[hemi][participant] = sl_tri
        print("Loaded searchlight RDMs for participant {0} "
              "hemisphere {1}".format(participant, hemi))

# Compute ISC in leave-one-out fashion
sl_iscs = {}
for hemi in ['lh', 'rh']:
    sl_iscs[hemi] = {}
    for participant in sorted(participants.keys()):
        lo_rdm = sl_rdms[hemi][participant].samples
Beispiel #10
0
            base_dir + participant + data_dir +
            '{0}_avmovie_detrend{1}_lowpass_ROIs_tmpl_bold.hdf5'.format(
                participant, polyord), movie_ds)
        print("Finished participant {0}, saved the data".format(participant))

mv.h5save(
    results_dir +
    'allsub_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format(polyord),
    movie_dss)
print('Saved the group dataset in {}.'.format(results_dir))

# Horizontally stack all data sets
ds_wide = mv.hstack(movie_dss)

# Transpose brain so voxels are now samples
ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy())

# Save transposed data
mv.h5save(
    results_dir +
    'allsub_transpose_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format(
        polyord), ds)
print('Saved the transposed group dataset in {}.'.format(results_dir))

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('-b',
                        '--base_dir',
                        help="Please specify the root of your"
Beispiel #11
0
    # the one with the lean one
    cv_rsa = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'),
                                 mv.HalfPartitioner(attr='sessions'),
                                 errorfx=None, postproc=lean_errorfx)

    sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'],
                        nproc=1, results_backend='native')
    #sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'],
    #                    nproc=1, results_backend='native', roi_ids=cortical_vertices)
    #tmp_prefix='/local/tmp/sam_sl_p{0}_{1}_'.format(participant_id, hemi)
    mv.debug.active += ['SLC']
    sl_result = sl(ds)
    assert len(sl_result.sa) == 0  # we didn't pass any
    sl_result.sa = target_sa

    print '>>>', np.mean(sl.ca.roi_sizes), np.std(sl.ca.roi_sizes)

    sl_means = np.mean(np.dstack((sl_result.samples[:n_conditions**2, :],
                                  sl_result.samples[n_conditions**2:, :])),
                       axis=2)
    sl_final = mv.Dataset(
         sl_means,
         sa={'conditions': sl_result.sa.conditions[:sl_means.shape[0], :].tolist(),
             'participants': [int(participant[-2:])] * sl_means.shape[0]},
         fa=sl_result.fa, a=sl_result.a)
    #assert sl_result.shape[0] == n_conditions**2
    print(sl_final)
    mv.h5save('/idata/DBIC/cara/life/search_RDMs_sq_zscore_HA_{0}_{1}.hdf5'.format(participant, hemi), sl_final)
        #mv.niml.write(join(mvpa_dir, 'search_RDMs_sq_p{0}_{1}_TEST.niml.dset'.format(
        #                                       participant_id, hemi)), sl_result)
def buildadataset(zscore, rois, event_path=None):
    """buildataset() will build and save participant-specific hdf5 datasets
    with all rois from preprocessed objectcategories data, stack them for a
    group dataset and save them, and transpose the group dataset and save it.
    The parameter 'zscore' determines whether and what kind of z-scoring
    should be performed."""
    print('I am building a dataset with the following option: {}.'.format(
        zscore))

    # get the participants and rois
    participants = sorted(
        [path.split('/')[-1] for path in glob(base_dir + 'sub-*')])
    localizer_dss = []

    for participant in participants:
        localizer_fns = sorted(glob(base_dir + participant + locdir + \
                                    '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format(
                                        participant)))
        mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz'
        assert len(localizer_fns) == 4
        localizer_ds = mv.vstack([
            mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run)
            for run, localizer_fn in enumerate(localizer_fns)
        ])

        localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1]
        print('loaded localizer data for participant {}.'.format(participant))

        # zscore the data with means and standard deviations from no-stimulation
        # periods
        if zscore == 'custom':
            events = get_group_events(event_path)
            means, stds = extract_baseline(events, localizer_ds)
            # zscore stuff
            mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks')
            print('finished custom zscoring for participant {}.'.format(
                participant))
        elif zscore == 'z-score':
            mv.zscore(localizer_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        all_rois_mask = np.array([['brain'] * localizer_ds.shape[1]
                                  ]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                      '{0}_*_mask.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                           'l{0}_*_mask.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                            'r{0}_*_mask.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns

            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, localizer_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)
            # Ensure that number of voxels in ROI mask matches localizer data
            assert roi_mask.shape[1] == localizer_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to localizer data feature attributes
            localizer_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, localizer_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches localizer data
                assert lat_roi_mask.shape[1] == localizer_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to localizer data feature attributes
                localizer_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(base_dir + participant + anat_dir +
                         '/{0}_*_mask.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi
        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())
        # Assign ROI mask to localizer data feature attributes
        localizer_ds.fa['all_ROIs'] = all_rois_flat

        if save_per_subject:
            mv.h5save(base_dir + participant + locdir + \
                  '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format(
                      participant), localizer_ds)
            print('Saved dataset for {}.'.format(participant))
        # join all datasets
        localizer_dss.append(localizer_ds)

    # save full dataset
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5',
        localizer_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(localizer_dss)

    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5',
        ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Beispiel #13
0
import mvpa2.suite as mv
from scipy.stats import zscore as sciz, pearsonr
import os.path, time
from glob import glob
from scipy.io import loadmat
import numpy as np
import pandas as pd
import nibabel as nb
import h5py
from mvpa2.datasets.base import Dataset
from mvpa2.misc.surfing.queryengine import SurfaceQueryEngine
from mvpa2.support.nibabel.surf import read as read_surface
from mvpa2.datasets.mri import fmri_dataset
from mvpa2.misc.neighborhood import IndexQueryEngine, Sphere
from mvpa2.datasets.base import mask_mapper
import mvpa2.misc.surfing.volume_mask_dict as volmask
from mvpa2.algorithms.searchlight_hyperalignment import SearchlightHyperalignment
from mvpa2.mappers.zscore import zscore
from mvpa2.base.hdf5 import h5save, h5load

datadir = '/dartfs-hpc/rc/home/1/f0040y1/CANlab/labdata/projects/OLP4CBP/hyperalignment/common_spaces/bladderpain/commonspace_subs-201_radius-10'

# get list of npy files to convert
myfiles = glob(os.path.join(datadir, '*.npy'))

# loop files, load as pymvpa dataset, save as hdf5.gz
for sub in myfiles:
    mysub = mv.Dataset(np.load(sub))
    savepath = os.path.join(datadir, os.path.split(sub)[1][0:-4] + '.hdf5.gz')
    h5save(savepath, mysub)
for parcel in range(len(indices)):
    PARCEL_NUMBER = parcel  #int(sys.argv[2])
    print('Number of Subjects: {0}'.format(nsubs))
    print('Parcel Number: {0}'.format(PARCEL_NUMBER))
    print('Parcel Label: {0}'.format(labels[PARCEL_NUMBER]))
    print('Parcel Description: {0}'.format(label_descriptions[PARCEL_NUMBER]))
    print('Voxels in Parcel: {0}'.format(sum(indices[PARCEL_NUMBER])))
    df_results.loc[parcel, 'Parcel_desc'] = label_descriptions[PARCEL_NUMBER]
    df_results.loc[parcel, 'Parcel_label'] = labels[PARCEL_NUMBER]
    df_results.loc[parcel, 'Voxels_in_parcel'] = sum(indices[PARCEL_NUMBER])

    myvoxels = np.nonzero(indices[PARCEL_NUMBER])
    dss = []
    for sub in range(len(mats)):
        ds = mats[sub][:, myvoxels[0]]
        ds = mv.Dataset(ds)
        ds.fa['voxel_indices'] = range(ds.shape[1])
        mv.zscore(ds, chunks_attr=None)
        dss.append(ds)

    print('Size of Training data sets: {0}'.format(dss[0].shape))
    print('Beginning Hyperalignment.')

    # create hyperalignment instance
    hyper = Hyperalignment(nproc=1, )
    hyper.train(dss)

    # get mappers to common space created by hyper.train (2x procrustes iteration)
    mappers = hyper(dss)

    # apply mappers back onto training data
 def _call(self, evds):
     res = 1 - sd.pdist(
         np.hstack(
             (evds[evds.sa.subj == self._subj1].samples,
              evds[evds.sa.subj == self._subj2].samples)).T, 'correlation')
     return mvpa.Dataset(np.array(res)[np.newaxis])