def get_cammoun2012_yeo(scale, data_dir=None): """ Returns Yeo RSN affiliations for Cammoun parcellation Parameters ---------- scale : str Scale of Cammoun et al., 2012 to use data_dir : str or os.PathLike Directory where parcellation should be downloaded to (or exists, if already downloaded). Default: None Returns ------- labels : (2,) namedtuple Where the first entry ('vertices') is the vertex-level RSN affiliations and the second entry ('parcels') is the parcel-level RSN affiliations """ # get requested annotation files cammoun = nndata.fetch_cammoun2012('fsaverage5', data_dir=data_dir)[scale] # we also need to load in the CSV file with info about the parcellation. # unlike the Schaefer et al parcellation the labels in our annotation file # provide no information about the network affiliation info = pd.read_csv(nndata.fetch_cammoun2012(data_dir=data_dir)['info']) info = info.query(f'scale == "{scale}"') network_labels, parcel_labels = [], [] for hemi in ('lh', 'rh'): # query dataframe for information for current hemisphere cinfo = info.query(f'hemisphere == "{hemi[0].capitalize()}"') # read in annotation file for given hemisphere annot = getattr(cammoun, hemi) labels, ctab, names = nib.freesurfer.read_annot(annot) names = [m.decode() for m in names] # create empty arrays for vertex- and parcel-level affiliations networks = np.zeros_like(labels) parcels = np.zeros(len(names), dtype=int) for n, parcel in enumerate(names): # these should be 'background' parcels (unknown / corpuscallosum) if parcel not in list(info['label']): continue # get the yeo affiliation from the dataframe and assign accordingly net = np.squeeze(cinfo.query(f'label == "{parcel}"')['yeo_7']) parcels[n] = YEO_CODES[net] networks[labels == n] = YEO_CODES[net] # store network affiliations for this hemisphere network_labels.append(networks) parcel_labels.append(parcels) return NETWORKS(np.hstack(network_labels), np.hstack(parcel_labels))
def get_cammoun_schaefer(vers='fsaverage5', data_dir=None, networks='7'): """ Returns Cammoun 2012 and Schaefer 2018 atlases as dictionary Parameters ---------- vers : str, optional Which version of the atlases to get. Default: 'fsaverage5' data_dir : str or os.PathLike, optional Data directory where downloaded atlases should be stored. If not specified will default to $NNT_DATA or ~/nnt-data networks : {'7', '17'}, optional Which networks to get for Schaefer 2018 atlas. Default: '7' Returns ------- atlases : dict Where keys are 'atl-cammoun2012' and 'atl-schaefer2018' """ cammoun = nndata.fetch_cammoun2012(vers, data_dir=data_dir) schaefer = nndata.fetch_schaefer2018('fsaverage5', data_dir=data_dir) schaefer = { k: schaefer.get(k) for k in schaefer.keys() if 'Parcels7Networks' in k } return {'atl-cammoun2012': cammoun, 'atl-schaefer2018': schaefer}
def test_fetch_cammoun2012(tmpdir, version, expected): keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) # output has expected keys assert all(hasattr(cammoun, k) for k in keys) # and keys are expected lengths! for k, e in zip(keys, expected): out = getattr(cammoun, k) if isinstance(out, list): assert len(out) == e else: assert isinstance(out, str) and out.endswith('.nii.gz') if 'fsaverage' in version: with pytest.warns(DeprecationWarning): datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0)
def _fetch_cammoun_parcellation(template: str, n_regions: int, data_dir: Path) -> List[np.ndarray]: """Fetches Cammoun parcellations.""" key = f"scale{n_regions:03}" bunch = nnt_datasets.fetch_cammoun2012(version=template, data_dir=str(data_dir)) if template == "fslr32k": gifti = [nib_load(file) for file in bunch[key]] parcellations = [x.darrays[0].data for x in gifti] else: parcellations = [read_annot(file)[0] for file in bunch[key]] return parcellations
def test_get_centroids(tmpdir, scale, expected): # fetch test dataset cammoun = datasets.fetch_cammoun2012('volume', data_dir=tmpdir, verbose=0) ijk = utils.get_centroids(cammoun[scale]) xyz = utils.get_centroids(cammoun[scale], image_space=True) # we get expected shape regardless of requested coordinate space assert ijk.shape == xyz.shape == (expected, 3) # ijk is all positive (i.e., cartesian) coordinates assert np.all(ijk > 0) # requesting specific labels gives us a subset of the full `ijk` lim = utils.get_centroids(cammoun[scale], labels=[1, 2, 3]) assert np.all(lim == ijk[:3])
def main(): # cammoun2012 is used to generate cortical thickness measures cammoun2012 = datasets.fetch_cammoun2012(data_dir=directories.rois) del cammoun2012['info'] for scale, mask in cammoun2012.items(): outfile = op.basename(mask).replace('.nii.gz', '_corticalthickness') outpath = op.join(directories.parcels, outfile) extract_data(mask, 'sscorticalthickness', outpath) # pauli2018 is used to generate subcortical volume measures pauli2018 = datasets.fetch_pauli2018(data_dir=directories.rois) del pauli2018['info'], pauli2018['probabilistic'] for scale, mask in pauli2018.items(): outfile = op.basename(mask).replace('.nii.gz', '_subcorticalvolume') outpath = op.join(directories.parcels, outfile) extract_data(mask, 'brainsegmentation', outpath)
def get_cammoun2012_vek(scale, data_dir=None): """ Returns von Economo cytoarchitectonic classes for Cammoun parcellation Parameters ---------- scale : str Scale of Cammoun et al., 2012 to use data_dir : str or os.PathLike Directory where parcellation should be downloaded to (or exists, if already downloaded). Default: None Returns ------- labels : (2,) namedtuple Where the first entry ('vertices') is the vertex-level classes and the second entry ('parcels') is the parcel-level classes """ cammoun = nndata.fetch_cammoun2012('fsaverage5', data_dir=data_dir)[scale] vek_annots = _apply_vek_prob(data_dir=data_dir) return _parcellate_vek_classes(cammoun, vek_annots)
# # To address this we can use a spatial permutation test (called a "spin test"), # first introduced by Alexander-Bloch et al., 2018. This test works by thinking # about the brain as a sphere and considering random rotations of this sphere. # If we rotate our data and resample datapoints based on their rotated values, # we can generate a null distribution that is more appropriate to our spatially # auto-correlated data. # # To do this we need the spatial coordinates of our brain regions, as well as # an array indicating to which hemisphere each region belongs. In this example # we'll use one of the parcellations commonly employed in the lab (Cammoun et # al., 2012). First, we'll fetch the left and right hemisphere FreeSurfer-style # annotation files for this parcellation (using the lowest resolution of the # parcellation): lhannot, rhannot = datasets.fetch_cammoun2012('surface')['scale033'] ############################################################################### # Then we'll find the centroids of this parcellation defined on the spherical # projection of the fsaverage surface. This function will return the xyz # coordinates (`coords`) for each parcel defined in `lhannot` and `rhannot`, as # well as a vector identifying to which hemisphere each parcel belongs # (`hemi`): from netneurotools import freesurfer coords, hemi = freesurfer.find_fsaverage_centroids(lhannot, rhannot) print(coords.shape, hemi.shape) ############################################################################### # We'll use these coordinates to generate a resampling array based on this idea # of a "rotation"-based null model. As an example we'll only generate 1000
def main(): # N.B. this will NOT work unless you set the environmental variables # $PPMI_USER and $PPMI_PASSWORD prior to running this script. # these variables must be the username and password you received when # registering for the PPMI. for more information on data access see: # https://www.ppmi-info.org/access-data-specimens/download-data/ pypmi.fetch_studydata('all', path=directories.ppmi, overwrite=False) # load demographic data and keep only individuals with PD and healthy # individuals. we'll use the information in this data frame to residualize # our data against different variables (e.g., age, gender) print('Loading demographics information...') demographics = pypmi.load_demographics(directories.ppmi) \ .query('diagnosis in ["pd", "hc"]') \ .set_index('participant') demographics['family_history'] = demographics['family_history'].astype(bool) # load all non-MRI data print('Loading all non-MRI data (this step may take some time)...') datscan = pypmi.load_datscan(directories.ppmi, measures='all') biospec = pypmi.load_biospecimen(directories.ppmi, measures='all') behavior = pypmi.load_behavior(directories.ppmi, measures='all') # sometimes, because of how PPMI data were collected, there are slight # variations in the recorded date for the same visit, resulting in scores # for a single visit being split across two or more rows in the dataframe # (i.e., one row might have MoCA scores for visit "V01" and the other has # UPDRS scores for visit "V01") # to remedy this we use pandas `DataFrame.combine_first()` method, merging # scores from both rows and retaining the earliest date as the "true" date # (dates were generally only ~1 month different and if that difference # makes a significant impact on our results then I quit) print('Wrangling non-MRI data into a usable format...') first = behavior.drop_duplicates(['participant', 'visit'], 'first') \ .reset_index(drop=True) last = behavior.drop_duplicates(['participant', 'visit'], 'last') \ .reset_index(drop=True) behavior = first.combine_first(last) # get first visit scores for non-MRI data datscan, dat_date = get_visit(datscan, list(demographics.index), visit='SC') biospec, bio_date = get_visit(biospec, list(demographics.index), visit='BL') # behavioral data acquisition was split across screening + baseline visits # so we need to take the earliest visit for each measure # that is, not all measures were collected at screening so we need to use # the baseline visit scores for those measures # unfortunately which visit various measures were initially collected at # DIFFERED for PD and HC individuals, so we need to do this separately for # the two groups and then merge them back together... ¯\_(ツ)_/¯ beh, beh_dates = [], [] for diagnosis in ['pd', 'hc']: participants = demographics.query(f'diagnosis == "{diagnosis}"').index beh_sc, beh_date = get_visit(behavior, list(participants), visit='SC') beh_bl, _ = get_visit(behavior, list(participants), visit='BL') drop = np.intersect1d(beh_sc.columns, beh_bl.columns) beh += [pd.merge(beh_sc, beh_bl.drop(drop, axis=1), on='participant')] beh_dates += [beh_date] behavior = pd.concat(beh, join='inner') beh_date = pd.concat(beh_dates, join='inner') # iterate through all combinations of cortical + subcortical parcellations # note: there's only one subcortical parcellation (we had considered doing # more but the number of good subcortical parcellations is...limited) cth_data = sorted(glob.glob(op.join(directories.parcels, '*thickness.npy'))) vol_data = sorted(glob.glob(op.join(directories.parcels, '*volume.npy'))) for cth, vol in itertools.product(cth_data, vol_data): # determine what cortical / subcortical parcellation combo we're using # this will determine the name of the output file # the specific details include the resolution of cortical parcellation # and the datatype of the subcortical parcellation (scale, ) = re.search(r'res-(\d+)', cth).groups() (dtype, ) = re.search(r'_hemi-both_(\S+)_', vol).groups() hdf = structures.Frog(op.join(directories.snf, f'scale{scale}_{dtype}.h5')) print(f'Loading MRI data for {op.basename(hdf.filename)}...') # load parcellated cortical thickness data ct_parc = nndata.fetch_cammoun2012(data_dir=directories.rois, verbose=0)['info'] ct_parc = pd.read_csv(ct_parc).query(f'scale == "scale{scale}" ' '& structure == "cortex"') ct_parc['label'] = (ct_parc['label'] + '_' + ct_parc['hemisphere'].apply(str.lower)) cortthick, cth_date = get_parcels(cth, session=1, return_date=True, parcellation=ct_parc) # load parcellated subcortical volume data sv_parc = nndata.fetch_pauli2018(data_dir=directories.rois, verbose=0)['info'] sv_parc = pd.read_csv(sv_parc) subvolume, vol_date = get_parcels(vol, session=1, return_date=True, parcellation=sv_parc) # perform batch correction on MRI data # first, grab the demographics of subjects for whom we have neuro data. # then, remove all sites where we only have data from one subject since # we cannot generate batch correction parameters in these instances. # finally, perform the actual batch correction using `neurocombat` cortthick, subvolume, demo = \ preprocess.intersect_subjects(cortthick, subvolume, demographics) sites, counts = np.unique(demo['site'], return_counts=True) demo = demo[demo['site'].isin(sites[counts > 1])] cortthick, subvolume, demo = \ preprocess.intersect_subjects(cortthick, subvolume, demo) cortthick.iloc[:, :] = batch_correct(cortthick, demo) subvolume.iloc[:, :] = batch_correct(subvolume, demo) # only keep subjects for whom we have all datatypes # we preprocess HC and PD data separately because part of the process # involves imputation and we want to impute missing data using values # from each diagnostic group, separately data = [cortthick, subvolume, datscan, biospec, behavior] *data, demo = preprocess.intersect_subjects(*data, demo) hc_data, hc_demo = snfprep(data, demo.query('diagnosis == "hc"')) pd_data, pd_demo = snfprep(data, demo.query('diagnosis == "pd"')) # only keep features for which we have both PD and HC data for n, (hc_dtype, pd_dtype) in enumerate(zip(hc_data, pd_data)): cols = np.intersect1d(hc_dtype.columns, pd_dtype.columns) hc_data[n], pd_data[n] = hc_data[n][cols], pd_data[n][cols] # "regress out" age, gender, age x gender interactions (and total # estimated intracranial volume, if MRI data) from all data. # we also want to save all this data to disk so we can load it easily # in the future! do that for all the raw data, regressor matrices, and # processed (i.e., residualized) data # we do this because we don't want these sorts of things to bias our # initial analyses when creating the fused networks keys = [ 'cortical_thickness', 'subcortical_volume', 'dat_scans', 'csf_assays', 'behavioral_measures' ] dates = [cth_date, vol_date, dat_date, bio_date, beh_date] for grp, dataset, demo in zip(['pd', 'hc'], [pd_data, hc_data], [pd_demo, hc_demo]): hdf.save(demo, f'/raw/{grp}_demographics', overwrite=False) for n, (df, key, date) in enumerate(zip(dataset, keys, dates)): reg = gen_regressors(date, demo) # get comparative regressors / data (this is always healthy # inviduals -- we use them to estimate the betas for the # residualization process) comp_reg, comp_df = gen_regressors(date, hc_demo), hc_data[n] resid = nnstats.residualize(reg, df, comp_reg, comp_df, normalize=False) resid = pd.DataFrame(resid, index=df.index, columns=df.columns) hdf.save(df, f'/raw/{grp}_{key}', overwrite=False) hdf.save(reg, f'/regressors/{grp}_{key}', overwrite=False) hdf.save(resid, f'/processed/{grp}_{key}', overwrite=False)
from brainspace.null_models import moran from netneurotools import (datasets as nndata, freesurfer as nnsurf, utils as nnutils) from parspin import burt from parspin.plotting import save_brainmap FIGSIZE = 500 SPINDIR = Path('./data/derivatives/spins').resolve() DISTDIR = Path('./data/derivatives/geodesic').resolve() ROIDIR = Path('./data/raw/rois').resolve() FIGDIR = Path('./figures/spins/examples').resolve() OPTS = {'colorbar': False, 'colormap': 'coolwarm', 'vmin': 0} warnings.simplefilter('ignore', category=np.VisibleDeprecationWarning) if __name__ == "__main__": cammoun = nndata.fetch_cammoun2012('MNI152NLin2009aSym', data_dir=ROIDIR) name, scale = 'atl-cammoun2012', 'scale125' lh, rh = nndata.fetch_cammoun2012('fsaverage', data_dir=ROIDIR)[scale] info = pd.read_csv(cammoun['info']) info = info.query(f'scale == "{scale}" & structure == "cortex"') n_right = len(info.query('hemisphere == "R"')) # get coordinates and make LH/RH like surface info labels = np.asarray(info['id']) coords = nnutils.get_centroids(cammoun[scale], labels=labels, image_space=True) coords = np.row_stack([coords[n_right:], coords[:n_right]]) # generate re-ordering of coordinates based on Y-position start = end = 0
def plot_brain_surface(values, network, hemi=None, cmap="viridis", alpha=0.8, colorbar=True, centered=False, vmin=None, vmax=None, representation='surface'): ''' Function to plot data on the brain, on a surface parcellation. PARAMETERS ---------- values : ndarray (n,) Values to be plotted on the brain, where n is the number of nodes in the parcellation. network : dictionary Dictionary storing the network on associated with the values (to be used to identify the adequate surface parcellation) ''' cortical_hemi_mask = network['hemi_mask'][network['subcortex_mask'] == 0] n = len(cortical_hemi_mask) if hemi is None: hemi = network['info']['hemi'] if hemi == "L": scores = np.zeros((n)) + np.mean(values) scores[cortical_hemi_mask == 1] = values values = scores elif hemi == "R": scores = np.zeros((n)) + np.mean(values) scores[cortical_hemi_mask == 0] = values values = scores order = network["order"] noplot = network["noplot"] lh = network["lhannot"] rh = network["rhannot"] if os.path.isfile(lh) or os.path.isfile(rh) is False: fetch_cammoun2012(version='fsaverage') fetch_schaefer2018() # Adjust colormap based on parameters if centered is True: m = max(abs(np.amin(values)), np.amax(values)) vmin = -m vmax = m else: if vmin is None: vmin = np.amin(values) if vmax is None: vmax = np.amax(values) # Plot the brain surface im = plot_fsaverage(values, lhannot=lh, rhannot=rh, noplot=noplot, order=order, views=['lateral', 'm'], vmin=vmin, vmax=vmax, colormap=cmap, alpha=alpha, colorbar=colorbar, data_kws={'representation': representation}, show_toolbar=True) return im
shutil.rmtree(label_dir) return created def strip(x): """ Removes everything after last underscore from `x` """ return '_'.join(x.split('_')[:-1]) annot = 'atl-Cammoun2012_space-fsaverage_res-{}_hemi-{}_deterministic.annot' if __name__ == '__main__': ##### # get the GCS files and apply them onto the fsaverage surface gcs = datasets.fetch_cammoun2012('gcs') for scale, gcsfiles in gcs.items(): for fn in gcsfiles: hemi = re.search('hemi-([RL])', fn).group(1) scale = re.search('res-(.*)_hemi-', fn).group(1) out = op.join(op.dirname(fn), annot.format(scale, hemi)) freesurfer.apply_prob_atlas('fsaverage', fn, hemi.lower() + 'h', ctab=fn.replace('.gcs', '.ctab'), annot=out) ##### # get scale 500 parcellation files and combine dirname = op.dirname(fn) lh = sorted(glob.glob(op.join(dirname, '*res-500*_hemi-L*annot')))
# original. While this doesn't seem too bad, when we lower the resolution of # our data down even more (as we do with parcellations), this can become # especially problematic. # # We can demonstrate this for the 1000-node parcellation that we have for our # dataset above. We need to define the spatial coordinates of the parcels on # a spherical surface projection. To do this, we'll fetch the left and right # hemisphere FreeSurfer annotation files for the parcellation and then find the # centroids of each parcel (defined on the spherical projection of the # `fsaverage` surface): from netneurotools import freesurfer as nnsurf # this will download the Cammoun et al., 2012 FreeSurfer annotation files to # the $HOME/nnt-data/atl-cammoun2012 directory lhannot, rhannot = nndata.fetch_cammoun2012('surface', verbose=0)['scale500'] # this will find the center-of-mass of each parcel in the provided annotations coords, hemi = nnsurf.find_fsaverage_centroids(lhannot, rhannot, surf='sphere') ############################################################################### # The :func:`find_fsaverage_centroids` function return the xyz coordinates # (``coords``) for each parcel defined in `lhannot` and `rhannot`, as well as # an indicator array identifying to which hemisphere each parcel belongs # (``hemi``): # # We'll use these coordinates to generate a resampling array as we did before # for the `fsaverage6` vertex coordinates: # we'll generate 1000 rotations here instead of only 10 as we did previously spins, cost = nnstats.gen_spinsamples(coords, hemi, n_rotate=1000, seed=1234)
ctab=ctab_fname, annot=out, label=label), quiet=quiet) created.append(out) # remove temporary label directory shutil.rmtree(label_dir) return created if __name__ == '__main__': ##### # get the GCS files and apply them onto the fsaverage surface gcs = datasets.fetch_cammoun2012('gcs') for scale, gcsfiles in gcs.items(): for fn in gcsfiles: hemi = re.search('hemi-([RL])', fn).group(1) scale = re.search('res-(.*)_hemi-', fn).group(1) dirname = op.join(op.dirname(op.dirname(fn)), 'fsaverage') out = op.join(dirname, ANNOT.format(scale, hemi)) freesurfer.apply_prob_atlas('fsaverage', fn, hemi.lower() + 'h', ctab=fn.replace('.gcs', '.ctab'), annot=out) ##### # get scale 500 parcellation files and combine lh = sorted(glob.glob(op.join(dirname, ANNOT.format('500*', 'L'))))
def cammoun_surf(tmp_path_factory): tmpdir = str(tmp_path_factory.getbasetemp()) return datasets.fetch_cammoun2012('fsaverage5', data_dir=tmpdir, verbose=0)
Saves resampling indices to `spinmat.mat` in `../data` directory """ import os import numpy as np from scipy import io as sio from netneurotools import datasets, freesurfer, stats projdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if __name__ == '__main__': # this will fetch Freesurfer annotation files for the Cammoun parcellation # they will be saved to ~/nnt-data/atl-cammoun2012 surf_files = datasets.fetch_cammoun2012('surface') spinmat = [] for scale, (lh, rh) in surf_files.items(): print('Running {}'.format(scale)) # find spherical coordinates of the parcels + hemisphere assignments coords, hemi = freesurfer.find_fsaverage_centroids(lh, rh) # generate the rotations / resampling indices spins, cost = stats.gen_spinsamples(coords, hemi, exact=False, seed=1234, n_rotate=10000) spinmat.append(spins.astype('int16'))