def burt2020_surrogates(name, scale):
    """
    Generates surrogates according to Burt et al., 2020, NeuroImage

    Parameters
    ----------
    atlas : {'atl-cammoun2012', 'atl-schaefer2018'}, str
        Name of atlas for which to load data
    scale : str
        Scale of atlas to use
    """

    # load data + distance matrix for given parcellation
    lh, rh, concepts = surrogates.load_data(NSDIR, name, scale)
    dlh, drh = surrogates.load_dist(DISTDIR, name, scale)

    outdir = SURRDIR / name / 'burt2020' / 'neurosynth'
    Parallel(n_jobs=N_PROC)(
        delayed(surrogates.burt2020_surrogates)(lh[:, i],
                                                rh[:, i],
                                                dlh,
                                                drh,
                                                fname=outdir / concepts[i] /
                                                f'{scale}_surrogates.csv',
                                                n_perm=N_PERM,
                                                seed=SEED)
        for i in putils.trange(len(concepts), desc=f'Burt 2020 ({scale})'))
def burt2018_surrogates(name, scale):
    """
    Generates surrogates according to Burt et al., 2018, Nat Neuro

    Parameters
    ----------
    atlas : {'atl-cammoun2012', 'atl-schaefer2018'}, str
        Name of atlas for which to load data
    scale : str
        Scale of atlas to use
    """

    # load data + distance matrix for given parcellation
    lh, rh, concepts = surrogates.load_data(NSDIR, name, scale)
    dlh, drh = surrogates.load_dist(DISTDIR, name, scale)

    # boxcox transformation requires positive values; shift
    shift = abs(min(np.min(lh), np.min(rh))) + 0.1
    lh, rh = lh + shift, rh + shift

    outdir = SURRDIR / name / 'burt2018' / 'neurosynth'
    Parallel(n_jobs=N_PROC)(
        delayed(surrogates.burt2018_surrogates)(lh[:, i],
                                                rh[:, i],
                                                dlh,
                                                drh,
                                                fname=outdir / concepts[i] /
                                                f'{scale}_surrogates.csv',
                                                n_perm=N_PERM)
        for i in putils.trange(len(concepts), desc=f'Burt 2020 ({scale})'))
Example #3
0
def parcellate_sim(val, alphadir, annot):
    """
    Parcellates simulated surface GRF with `annot`

    Parameters
    ----------
    val : {'x', 'y'}
        Which simulated vector to parcellate
    alphadir : os.PathLike
        Directory in which simulated data are stored
    annot : (2,) namedtuple
        With entries ('lh', 'rh') of filepaths to annotation files to be used
        to parcellate data

    Returns
    -------
    data : (N, `N_SIM`) pandas.DataFrame
        Where `N` is the number of regions in the parcellation and the index
        of the dataframe are the region names
    """

    data = pd.DataFrame(index=putils.get_names(lh=annot.lh, rh=annot.rh))
    alpha = alphadir.parent.name
    for sim in putils.trange(N_SIM, desc=f'Parcellating {alpha} {val}'):
        img = nib.load(alphadir / f'{val}_{sim:04d}.mgh').get_fdata().squeeze()
        cdata = []
        for n, hemi in enumerate(('lh', 'rh')):
            sl = slice(10242 * n, 10242 * (n + 1))
            cdata.append(putils.parcellate(img[sl], getattr(annot, hemi)))
        data = data.assign(**{str(sim): np.hstack(cdata)})

    return data
Example #4
0
def calc_moran(dist, nulls, n_jobs=1):
    """
    Calculates Moran's I for every column of `nulls`

    Parameters
    ----------
    dist : (N, N) array_like
        Full distance matrix (inter-hemispheric distance should be np.inf)
    nulls : (N, P) array_like
        Null brain maps for which to compute Moran's I
    n_jobs : int, optional
        Number of parallel workers to use for calculating Moran's I. Default: 1

    Returns
    -------
    moran : (P,) np.ndarray
        Moran's I for `P` null maps
    """

    def _moran(dist, sim, medmask):
        mask = np.logical_and(medmask, np.logical_not(np.isnan(sim)))
        return spatial.morans_i(dist[np.ix_(mask, mask)], sim[mask],
                                normalize=False, invert_dist=False)

    # do some pre-calculation on our distance matrix to reduce computation time
    with np.errstate(divide='ignore', invalid='ignore'):
        dist = 1 / dist
        np.fill_diagonal(dist, 0)
        dist /= dist.sum(axis=-1, keepdims=True)
    # NaNs in the `dist` array are the "original" medial wall; mask these
    medmask = np.logical_not(np.isnan(dist[:, 0]))

    # calculate moran's I, masking out NaN values for each null (i.e., the
    # rotated medial wall)
    fn = dump(dist, spatial.make_tmpname('.mmap'))[0]
    dist = load(fn, mmap_mode='r')
    moran = np.array(
        Parallel(n_jobs=n_jobs)(
            delayed(_moran)(dist, nulls[:, n], medmask)
            for n in utils.trange(nulls.shape[-1], desc="Running Moran's I")
        )
    )

    Path(fn).unlink()
    return moran
def get_surface_distance(surf,
                         dlabel=None,
                         medial=None,
                         medial_labels=None,
                         drop_labels=None,
                         use_wb=False,
                         n_proc=1,
                         verbose=False):
    """
    Calculates surface distance for vertices in `surf`

    Parameters
    ----------
    surf : str or os.PathLike
        Path to surface file on which to calculate distance
    dlabel : str or os.PathLike, optional
        Path to file with parcel labels for provided `surf`. If provided will
        calculate parcel-parcel distances instead of vertex distances. Default:
        None
    medial : str or os.PathLike, optional
        Path to file containing labels for vertices corresponding to medial
        wall. If provided (and `use_wb=False`), will disallow calculation of
        surface distance along the medial wall. Default: None
    medial_labels : list of str, optional
        List of parcel names that comprise the medial wall and through which
        travel should be disallowed (if `dlabel` provided and `use_wb=False`).
        Will supersede `medial` if both are provided. Default: None
    drop_labels : list of str, optional
        List of parcel names that should be dropped from the final distance
        matrix (if `dlabel` is provided). If not specified, will ignore all
        parcels commonly used to reference the medial wall (e.g., 'unknown',
        'corpuscallosum', '???', 'Background+FreeSurfer_Defined_Medial_Wall').
        Default: None
    use_wb : bool, optional
        Whether to use calls to `wb_command -surface-geodesic-distance` for
        computation of the distance matrix; this will involve significant disk
        I/O. If False, all computations will be done in memory using the
        :func:`scipy.sparse.csgraph.dijkstra` function. Default: False
    n_proc : int, optional
        Number of processors to use for parallelizing distance calculation. If
        negative, will use max available processors plus 1 minus the specified
        number. Default: 1 (no parallelization)
    verbose : bool, optional
        Whether to print progress bar while distances are calculated. Default:
        True

    Returns
    -------
    distance : (N, N) numpy.ndarray
        Surface distance between vertices/parcels on `surf`

    Notes
    -----
    The distance matrix computed with `use_wb=False` will have slightly lower
    values than when `use_wb=True` due to known estimation errors. These will
    be fixed at a later date.
    """

    if drop_labels is None:
        drop_labels = [
            'unknown', 'corpuscallosum', '???',
            'Background+FreeSurfer_Defined_Medial_Wall'
        ]
    if medial_labels is None:
        medial_labels = []

    # convert to paths, if necessary
    surf, dlabel, medial = pathify(surf), pathify(dlabel), pathify(medial)

    # wb_command requires gifti files so convert if we receive e.g., a FS file
    # also return a "remove" flag that will be used to delete the temporary
    # gifti file at the end of this process
    surf, remove_surf = _surf_to_gii(surf)
    n_vert = len(nib.load(surf).agg_data()[0])

    # check if dlabel / medial wall files were provided
    labels, mask = None, np.zeros(n_vert, dtype=bool)
    dlabel, remove_dlabel = _labels_to_gii(dlabel, surf)
    medial, remove_medial = _labels_to_gii(medial, surf)

    # get data from dlabel / medial wall files if they provided
    if dlabel is not None:
        labels = nib.load(dlabel).agg_data()
    if medial is not None:
        mask = nib.load(medial).agg_data().astype(bool)

    # determine which parcels should be ignored (if they exist)
    delete, uniq_labels = [], np.unique(labels)
    if (len(drop_labels) > 0 or len(medial_labels) > 0) and labels is not None:
        # get vertex labels
        n_labels = len(uniq_labels)

        # get parcel labels and reverse dict to (name : label)
        table = nib.load(dlabel).labeltable.get_labels_as_dict()
        table = {v: k for k, v in table.items()}

        # generate dict mapping label to array indices (since labels don't
        # necessarily start at 0 / aren't contiguous)
        idx = dict(zip(uniq_labels, np.arange(n_labels)))

        # get indices of parcel distance matrix to be deleted
        for lab in set(table) & set(drop_labels):
            lab = table.get(lab)
            delete.append(idx.get(lab))

        for lab in set(table) & set(medial_labels):
            lab = table.get(lab)
            mask[labels == lab] = True

    # calculate distance from each vertex to all other parcels
    parallel = Parallel(n_jobs=n_proc, max_nbytes=None)
    if use_wb:
        parfunc = delayed(_get_workbench_distance)
        graph = surf
    else:
        parfunc = delayed(_get_graph_distance)
        graph = make_surf_graph(*nib.load(surf).agg_data(), mask=mask)
    bar = trange(n_vert, verbose=verbose, desc='Calculating distances')
    dist = np.row_stack(parallel(parfunc(n, graph, labels) for n in bar))

    # average distance for all vertices within a parcel + set diagonal to 0
    if labels is not None:
        dist = np.row_stack(
            [dist[labels == lab].mean(axis=0) for lab in uniq_labels])
        dist[np.diag_indices_from(dist)] = 0

    # remove distances for parcels that we aren't interested in
    if len(delete) > 0:
        for axis in range(2):
            dist = np.delete(dist, delete, axis=axis)

    # if we created gifti files then remove them
    if remove_surf:
        surf.unlink()
    if remove_dlabel:
        dlabel.unlink()
    if remove_medial:
        medial.unlink()

    return dist
Example #6
0
            cdata.append(putils.parcellate(img[sl], getattr(annot, hemi)))
        data = data.assign(**{str(sim): np.hstack(cdata)})

    return data


if __name__ == '__main__':
    parcellations = putils.get_cammoun_schaefer(data_dir=ROIDIR)

    for alpha in simnulls.ALPHAS:
        outdir = SIMDIR / f'alpha-{float(alpha):.1f}' / 'sim'
        outdir.mkdir(parents=True, exist_ok=True)

        # generate simulated GRFs
        Parallel(n_jobs=N_PROC)(
            delayed(create_and_save_grfs)(
                corr=0.15, alpha=alpha, seed=n, outdir=outdir)
            for n in putils.trange(N_SIM,
                                   desc=f'Simulating alpha-{alpha:.2f}'))

        # parcellate simulated GRFs and save as CSV
        for name, annotations in parcellations.items():
            for scale, annot in annotations.items():
                scdir = outdir.parent / name
                scdir.mkdir(parents=True, exist_ok=True)
                for val in ('x', 'y'):
                    fn = scdir / f'{scale}_{val}.csv'
                    if fn.exists():
                        continue
                    parcellate_sim(val, outdir, annot).to_csv(fn, sep=',')
def run_null(parcellation, scale, spatnull, alpha):
    """
    Runs spatial null models for given combination of inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spatnull : str
        Name of spin method to be used
    alpha : float
        Spatial autocorrelation parameter to be used
    """

    print(f'{time.ctime()}: {parcellation} {scale} {spatnull} {alpha} ',
          flush=True)

    # filenames (for I/O)
    spins_fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv'
    pvals_fn = (SIMDIR / alpha / parcellation / 'nulls' / spatnull
                / f'{scale}_nulls.csv')
    perms_fn = pvals_fn.parent / f'{scale}_perms.csv'

    if SHUFFLE:
        pvals_fn = pvals_fn.parent / f'{scale}_nulls_shuffle.csv'
        perms_fn = perms_fn.parent / f'{scale}_perms_shuffle.csv'

    if pvals_fn.exists() and perms_fn.exists():
        return

    # load simulated data
    alphadir = SIMDIR / alpha
    if parcellation == 'vertex':
        x, y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM)
    else:
        x, y = simnulls.load_parc_data(alphadir, parcellation, scale,
                                       n_sim=N_SIM)

    # if we're computing info on SHUFFLED data, get the appropriate random `y`
    if SHUFFLE:
        y = _get_ysim(y, np.random.default_rng(1).permutation(N_SIM))

    # calculate the null p-values
    if spatnull == 'naive-para':
        pvals = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1]
        perms = np.array([np.nan])
    elif spatnull == 'cornblath':
        fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(fn, n_perm=N_PERM)
        fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}")
        annot = fetcher('fsaverage5', data_dir=ROIDIR)[scale]
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_cornblath)(x[:, sim], y[:, sim], spins, annot)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    elif spatnull == 'baum':
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(spins_fn, n_perm=N_PERM)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_baum)(x[:, sim], y[:, sim], spins)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    elif spatnull in ('burt2018', 'burt2020', 'moran'):
        xarr = np.asarray(x)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_genmod)(xarr[:, sim], _get_ysim(y, sim),
                             parcellation, scale, spatnull)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    else:  # vazquez-rodriguez, vasa, hungarian, naive-nonparametric
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(spins_fn, n_perm=N_PERM)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(simnulls.calc_pval)(x[:, sim], y[:, sim], y[spins, sim])
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)

    # save to disk
    putils.save_dir(perms_fn, np.atleast_1d(perms), overwrite=False)
    putils.save_dir(pvals_fn, np.atleast_1d(pvals), overwrite=False)