def burt2020_surrogates(name, scale): """ Generates surrogates according to Burt et al., 2020, NeuroImage Parameters ---------- atlas : {'atl-cammoun2012', 'atl-schaefer2018'}, str Name of atlas for which to load data scale : str Scale of atlas to use """ # load data + distance matrix for given parcellation lh, rh, concepts = surrogates.load_data(NSDIR, name, scale) dlh, drh = surrogates.load_dist(DISTDIR, name, scale) outdir = SURRDIR / name / 'burt2020' / 'neurosynth' Parallel(n_jobs=N_PROC)( delayed(surrogates.burt2020_surrogates)(lh[:, i], rh[:, i], dlh, drh, fname=outdir / concepts[i] / f'{scale}_surrogates.csv', n_perm=N_PERM, seed=SEED) for i in putils.trange(len(concepts), desc=f'Burt 2020 ({scale})'))
def burt2018_surrogates(name, scale): """ Generates surrogates according to Burt et al., 2018, Nat Neuro Parameters ---------- atlas : {'atl-cammoun2012', 'atl-schaefer2018'}, str Name of atlas for which to load data scale : str Scale of atlas to use """ # load data + distance matrix for given parcellation lh, rh, concepts = surrogates.load_data(NSDIR, name, scale) dlh, drh = surrogates.load_dist(DISTDIR, name, scale) # boxcox transformation requires positive values; shift shift = abs(min(np.min(lh), np.min(rh))) + 0.1 lh, rh = lh + shift, rh + shift outdir = SURRDIR / name / 'burt2018' / 'neurosynth' Parallel(n_jobs=N_PROC)( delayed(surrogates.burt2018_surrogates)(lh[:, i], rh[:, i], dlh, drh, fname=outdir / concepts[i] / f'{scale}_surrogates.csv', n_perm=N_PERM) for i in putils.trange(len(concepts), desc=f'Burt 2020 ({scale})'))
def parcellate_sim(val, alphadir, annot): """ Parcellates simulated surface GRF with `annot` Parameters ---------- val : {'x', 'y'} Which simulated vector to parcellate alphadir : os.PathLike Directory in which simulated data are stored annot : (2,) namedtuple With entries ('lh', 'rh') of filepaths to annotation files to be used to parcellate data Returns ------- data : (N, `N_SIM`) pandas.DataFrame Where `N` is the number of regions in the parcellation and the index of the dataframe are the region names """ data = pd.DataFrame(index=putils.get_names(lh=annot.lh, rh=annot.rh)) alpha = alphadir.parent.name for sim in putils.trange(N_SIM, desc=f'Parcellating {alpha} {val}'): img = nib.load(alphadir / f'{val}_{sim:04d}.mgh').get_fdata().squeeze() cdata = [] for n, hemi in enumerate(('lh', 'rh')): sl = slice(10242 * n, 10242 * (n + 1)) cdata.append(putils.parcellate(img[sl], getattr(annot, hemi))) data = data.assign(**{str(sim): np.hstack(cdata)}) return data
def calc_moran(dist, nulls, n_jobs=1): """ Calculates Moran's I for every column of `nulls` Parameters ---------- dist : (N, N) array_like Full distance matrix (inter-hemispheric distance should be np.inf) nulls : (N, P) array_like Null brain maps for which to compute Moran's I n_jobs : int, optional Number of parallel workers to use for calculating Moran's I. Default: 1 Returns ------- moran : (P,) np.ndarray Moran's I for `P` null maps """ def _moran(dist, sim, medmask): mask = np.logical_and(medmask, np.logical_not(np.isnan(sim))) return spatial.morans_i(dist[np.ix_(mask, mask)], sim[mask], normalize=False, invert_dist=False) # do some pre-calculation on our distance matrix to reduce computation time with np.errstate(divide='ignore', invalid='ignore'): dist = 1 / dist np.fill_diagonal(dist, 0) dist /= dist.sum(axis=-1, keepdims=True) # NaNs in the `dist` array are the "original" medial wall; mask these medmask = np.logical_not(np.isnan(dist[:, 0])) # calculate moran's I, masking out NaN values for each null (i.e., the # rotated medial wall) fn = dump(dist, spatial.make_tmpname('.mmap'))[0] dist = load(fn, mmap_mode='r') moran = np.array( Parallel(n_jobs=n_jobs)( delayed(_moran)(dist, nulls[:, n], medmask) for n in utils.trange(nulls.shape[-1], desc="Running Moran's I") ) ) Path(fn).unlink() return moran
def get_surface_distance(surf, dlabel=None, medial=None, medial_labels=None, drop_labels=None, use_wb=False, n_proc=1, verbose=False): """ Calculates surface distance for vertices in `surf` Parameters ---------- surf : str or os.PathLike Path to surface file on which to calculate distance dlabel : str or os.PathLike, optional Path to file with parcel labels for provided `surf`. If provided will calculate parcel-parcel distances instead of vertex distances. Default: None medial : str or os.PathLike, optional Path to file containing labels for vertices corresponding to medial wall. If provided (and `use_wb=False`), will disallow calculation of surface distance along the medial wall. Default: None medial_labels : list of str, optional List of parcel names that comprise the medial wall and through which travel should be disallowed (if `dlabel` provided and `use_wb=False`). Will supersede `medial` if both are provided. Default: None drop_labels : list of str, optional List of parcel names that should be dropped from the final distance matrix (if `dlabel` is provided). If not specified, will ignore all parcels commonly used to reference the medial wall (e.g., 'unknown', 'corpuscallosum', '???', 'Background+FreeSurfer_Defined_Medial_Wall'). Default: None use_wb : bool, optional Whether to use calls to `wb_command -surface-geodesic-distance` for computation of the distance matrix; this will involve significant disk I/O. If False, all computations will be done in memory using the :func:`scipy.sparse.csgraph.dijkstra` function. Default: False n_proc : int, optional Number of processors to use for parallelizing distance calculation. If negative, will use max available processors plus 1 minus the specified number. Default: 1 (no parallelization) verbose : bool, optional Whether to print progress bar while distances are calculated. Default: True Returns ------- distance : (N, N) numpy.ndarray Surface distance between vertices/parcels on `surf` Notes ----- The distance matrix computed with `use_wb=False` will have slightly lower values than when `use_wb=True` due to known estimation errors. These will be fixed at a later date. """ if drop_labels is None: drop_labels = [ 'unknown', 'corpuscallosum', '???', 'Background+FreeSurfer_Defined_Medial_Wall' ] if medial_labels is None: medial_labels = [] # convert to paths, if necessary surf, dlabel, medial = pathify(surf), pathify(dlabel), pathify(medial) # wb_command requires gifti files so convert if we receive e.g., a FS file # also return a "remove" flag that will be used to delete the temporary # gifti file at the end of this process surf, remove_surf = _surf_to_gii(surf) n_vert = len(nib.load(surf).agg_data()[0]) # check if dlabel / medial wall files were provided labels, mask = None, np.zeros(n_vert, dtype=bool) dlabel, remove_dlabel = _labels_to_gii(dlabel, surf) medial, remove_medial = _labels_to_gii(medial, surf) # get data from dlabel / medial wall files if they provided if dlabel is not None: labels = nib.load(dlabel).agg_data() if medial is not None: mask = nib.load(medial).agg_data().astype(bool) # determine which parcels should be ignored (if they exist) delete, uniq_labels = [], np.unique(labels) if (len(drop_labels) > 0 or len(medial_labels) > 0) and labels is not None: # get vertex labels n_labels = len(uniq_labels) # get parcel labels and reverse dict to (name : label) table = nib.load(dlabel).labeltable.get_labels_as_dict() table = {v: k for k, v in table.items()} # generate dict mapping label to array indices (since labels don't # necessarily start at 0 / aren't contiguous) idx = dict(zip(uniq_labels, np.arange(n_labels))) # get indices of parcel distance matrix to be deleted for lab in set(table) & set(drop_labels): lab = table.get(lab) delete.append(idx.get(lab)) for lab in set(table) & set(medial_labels): lab = table.get(lab) mask[labels == lab] = True # calculate distance from each vertex to all other parcels parallel = Parallel(n_jobs=n_proc, max_nbytes=None) if use_wb: parfunc = delayed(_get_workbench_distance) graph = surf else: parfunc = delayed(_get_graph_distance) graph = make_surf_graph(*nib.load(surf).agg_data(), mask=mask) bar = trange(n_vert, verbose=verbose, desc='Calculating distances') dist = np.row_stack(parallel(parfunc(n, graph, labels) for n in bar)) # average distance for all vertices within a parcel + set diagonal to 0 if labels is not None: dist = np.row_stack( [dist[labels == lab].mean(axis=0) for lab in uniq_labels]) dist[np.diag_indices_from(dist)] = 0 # remove distances for parcels that we aren't interested in if len(delete) > 0: for axis in range(2): dist = np.delete(dist, delete, axis=axis) # if we created gifti files then remove them if remove_surf: surf.unlink() if remove_dlabel: dlabel.unlink() if remove_medial: medial.unlink() return dist
cdata.append(putils.parcellate(img[sl], getattr(annot, hemi))) data = data.assign(**{str(sim): np.hstack(cdata)}) return data if __name__ == '__main__': parcellations = putils.get_cammoun_schaefer(data_dir=ROIDIR) for alpha in simnulls.ALPHAS: outdir = SIMDIR / f'alpha-{float(alpha):.1f}' / 'sim' outdir.mkdir(parents=True, exist_ok=True) # generate simulated GRFs Parallel(n_jobs=N_PROC)( delayed(create_and_save_grfs)( corr=0.15, alpha=alpha, seed=n, outdir=outdir) for n in putils.trange(N_SIM, desc=f'Simulating alpha-{alpha:.2f}')) # parcellate simulated GRFs and save as CSV for name, annotations in parcellations.items(): for scale, annot in annotations.items(): scdir = outdir.parent / name scdir.mkdir(parents=True, exist_ok=True) for val in ('x', 'y'): fn = scdir / f'{scale}_{val}.csv' if fn.exists(): continue parcellate_sim(val, outdir, annot).to_csv(fn, sep=',')
def run_null(parcellation, scale, spatnull, alpha): """ Runs spatial null models for given combination of inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spatnull : str Name of spin method to be used alpha : float Spatial autocorrelation parameter to be used """ print(f'{time.ctime()}: {parcellation} {scale} {spatnull} {alpha} ', flush=True) # filenames (for I/O) spins_fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv' pvals_fn = (SIMDIR / alpha / parcellation / 'nulls' / spatnull / f'{scale}_nulls.csv') perms_fn = pvals_fn.parent / f'{scale}_perms.csv' if SHUFFLE: pvals_fn = pvals_fn.parent / f'{scale}_nulls_shuffle.csv' perms_fn = perms_fn.parent / f'{scale}_perms_shuffle.csv' if pvals_fn.exists() and perms_fn.exists(): return # load simulated data alphadir = SIMDIR / alpha if parcellation == 'vertex': x, y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM) else: x, y = simnulls.load_parc_data(alphadir, parcellation, scale, n_sim=N_SIM) # if we're computing info on SHUFFLED data, get the appropriate random `y` if SHUFFLE: y = _get_ysim(y, np.random.default_rng(1).permutation(N_SIM)) # calculate the null p-values if spatnull == 'naive-para': pvals = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1] perms = np.array([np.nan]) elif spatnull == 'cornblath': fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(fn, n_perm=N_PERM) fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}") annot = fetcher('fsaverage5', data_dir=ROIDIR)[scale] out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_cornblath)(x[:, sim], y[:, sim], spins, annot) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) elif spatnull == 'baum': x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(spins_fn, n_perm=N_PERM) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_baum)(x[:, sim], y[:, sim], spins) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) elif spatnull in ('burt2018', 'burt2020', 'moran'): xarr = np.asarray(x) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_genmod)(xarr[:, sim], _get_ysim(y, sim), parcellation, scale, spatnull) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) else: # vazquez-rodriguez, vasa, hungarian, naive-nonparametric x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(spins_fn, n_perm=N_PERM) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(simnulls.calc_pval)(x[:, sim], y[:, sim], y[spins, sim]) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) # save to disk putils.save_dir(perms_fn, np.atleast_1d(perms), overwrite=False) putils.save_dir(pvals_fn, np.atleast_1d(pvals), overwrite=False)