コード例 #1
0
def load_full_distmat(data, distdir, parcellation, scale):
    """
    Returns full distance matrix for given `parcellation` and `scale`

    Parameters
    ----------
    data : pd.DataFrame or array_like
        Data used to determine hemisphere designations for loaded distance
        matrices
    distdir : os.PathLike
        Filepath to directory containing geodesic distance files
    parcellation : {'atl-cammoun2012', 'atl-schaefer2018'}
        Name of parcellation to use
    scale : str
        Scale of parcellation to use. Must be valid scale for specified
        `parcellation`

    Returns
    -------
    dist : (N, N) np.ndarray
        Full distance matrix (inter-hemispheric distances set to np.inf)
    """

    # get "full" distance matrix for data, with inter-hemi set to np.inf
    dist = np.ones((len(data), len(data))) * np.inf
    for _, hdist, hidx in utils.yield_data_dist(distdir, parcellation,
                                                scale, data, inverse=False):
        dist[np.ix_(hidx, hidx)] = hdist
    np.fill_diagonal(dist, 1)

    return dist
def make_surrogates(data, parcellation, scale, spatnull):
    """
    Generates surrogates for `data` using `spatnull` method

    Parameters
    ----------
    data : (N,) pd.DataFrame
    parcellation : {'atl-cammoun2012', 'atl-schaefer2018'}
    scale : str
    spatnull : {'burt2018', 'burt2020', 'moran'}

    Returns
    -------
    surrogates : (N, `N_PERM`) np.ndarray
    """

    if spatnull not in ('burt2018', 'burt2020', 'moran'):
        raise ValueError(f'Cannot make surrogates for null method {spatnull}')

    darr = np.asarray(data)
    dmin = darr[np.logical_not(np.isnan(darr))].min()

    surrogates = np.zeros((len(data), N_PERM))
    for hdata, dist, idx in putils.yield_data_dist(
        DISTDIR, parcellation, scale, data, inverse=(spatnull == 'moran')
    ):

        # handle NaNs before generating surrogates; should only be relevant
        # when using vertex-level data, but good nonetheless
        mask = np.logical_not(np.isnan(hdata))
        surrogates[idx[np.logical_not(mask)]] = np.nan
        hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask]

        if spatnull == 'burt2018':
            # Box-Cox transformation requires positive data :man_facepalming:
            hdata += np.abs(dmin) + 0.1
            surrogates[idx] = \
                burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED)
        elif spatnull == 'burt2020':
            if parcellation == 'vertex':  # memmap is required for this shit
                index = np.argsort(dist, axis=-1)
                dist = np.sort(dist, axis=-1)
                knn = 1000 if USE_KNN else len(hdata)
                surrogates[idx] = \
                    mapgen.Sampled(hdata, dist, index, knn=knn,
                                   seed=SEED)(N_PERM).T
            else:
                surrogates[idx] = \
                    mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T
        elif spatnull == 'moran':
            mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM,
                                           tol=1e-6, random_state=SEED)
            with threadpoolctl.threadpool_limits(limits=2):
                surrogates[idx] = mrs.fit(dist).randomize(hdata).T

    return surrogates
コード例 #3
0
def run_null(netclass, parc, scale, spintype):
    """
    Runs spatial permutation null model for given combination of inputs

    Parameters
    ----------
    netclass : {'vek', 'yeo'}
        Network partition to test
    parc : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spintype : str
        Name of spin method to be used

    Returns
    -------
    stats : pd.DataFrame
        Generated statistics with columns ['parcellation', 'scale', 'spintype',
        'netclass', 'network', 'zscore', 'pval']
    """
    data = load_data(netclass, parc, scale)

    # run the damn thing
    print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True)
    out = HCPDIR / parc / 'nulls' / netclass / spintype / f'{scale}_nulls.csv'
    if out.exists():
        permnets = np.loadtxt(out, delimiter=',')
    elif spintype == 'cornblath':
        # even though we're working with parcellated data we need to project
        # that to the surface + spin the vertices, so let's load our
        # pre-generated vertex-level spins
        spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'

        # get annotation files (we need these to project parcels to surface)
        fetcher = getattr(nndata, f"fetch_{parc.replace('atl-', '')}")
        annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale]

        # pre-load the spins for this function (assumes `spins` is array)
        print('Pre-loading spins...', end='\b' * 20, flush=True)
        spins = np.loadtxt(spins, delimiter=',', dtype='int32')
        # generate "spun" data; permdata will be an (R, T, n_rotate) array
        # where `R` is regions and `T` is 1 (myelination)
        permdata = nnsurf.spin_data(np.asarray(data['myelin']),
                                    version='fsaverage5',
                                    lhannot=annotations.lh,
                                    rhannot=annotations.rh,
                                    spins=spins,
                                    n_rotate=spins.shape[-1],
                                    verbose=True)
        permnets = np.vstack([
            _get_netmeans(permdata[..., n], data['networks'])
            for n in range(spins.shape[-1])
        ])
        putils.save_dir(out, permnets)
    elif spintype in ['burt2018', 'burt2020']:
        surrdir = SURRDIR / parc / spintype / 'hcp'
        surrogates = get_surrogates(data['myelin'], surrdir, scale)
        permnets = np.vstack([
            _get_netmeans(surrogates[..., n], data['networks'])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permnets)
    elif spintype == 'moran':
        surrogates = np.zeros((len(data['myelin']), 10000))
        for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parc, scale,
                                                      data['myelin']):
            mrs = moran.MoranRandomization(joint=True,
                                           n_rep=10000,
                                           tol=1e-6,
                                           random_state=1234)
            mrs.fit(dist)
            surrogates[idx] = np.squeeze(mrs.randomize(hemi)).T

        permnets = np.vstack([
            _get_netmeans(surrogates[..., n], data['networks'])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permnets)
    else:
        spins = SPDIR / parc / spintype / f'{scale}_spins.csv'
        permnets = gen_permnets(data['myelin'], data['networks'], spins, out)

    # now get the real network averages and compare to the permuted values
    real = _get_netmeans(data['myelin'], data['networks'])
    zscores, pvals = get_fwe(real, permnets)

    out = pd.DataFrame(
        dict(parcellation=parc,
             scale=scale,
             spintype=spintype,
             netclass=netclass,
             network=list(NET_CODES[netclass].keys()),
             zscore=zscores,
             pval=pvals))

    return out
コード例 #4
0
            print(f'Comparing surrogates for {scale}')

            # load T1w/T2w for given parcellation + resolution
            data = pd.read_csv(HCPDIR / name / f'{scale}.csv', index_col=0)
            data = data.drop([i for i in data.index if i in putils.DROP])
            data = data['myelin']

            # generate surrogates for each method using distance matrix w/ and
            # w/o medial wall travel (one surrogate per method per dist matrix)
            burt2018, burt2020, moran = [], [], []
            for med in [True, False]:
                for method, surrs in zip(METHODS, [burt2018, burt2020, moran]):
                    surrdata = []
                    for hd, dist, _ in putils.yield_data_dist(DISTDIR,
                                                              name,
                                                              scale,
                                                              data,
                                                              medial=med,
                                                              inverse=False):
                        if method == 'burt2018':
                            surr = burt.batch_surrogates(dist,
                                                         hd,
                                                         seed=SEED,
                                                         n_surr=N_SURROGATES,
                                                         n_jobs=N_PROC).T
                        elif method == 'burt2020':
                            base = Base(hd,
                                        dist,
                                        resample=True,
                                        seed=SEED,
                                        n_jobs=N_PROC)
                            surr = base(N_SURROGATES, 50)
コード例 #5
0
def run_null(parcellation, scale, spintype):
    """
    Runs spatial permutation null model for given combination of inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spintype : str
        Name of spin method to be used

    Returns
    -------
    stats : pd.DataFrame
        Generated statistics with columns ['parcellation', 'scale', 'spintype',
        'n_sig']
    """

    nsdata = load_data(parcellation, scale)

    # run the damn thing
    print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True)
    out = NSDIR / parcellation / 'nulls' / spintype / f'{scale}_nulls.csv'
    if out.exists():
        permcorrs = np.loadtxt(out).reshape(-1, 1)
    elif spintype == 'cornblath':
        # even though we're working with parcellated data we need to project
        # that to the surface + spin the vertices, so let's load our
        # pre-generated vertex-level spins
        spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'

        # get annotation files
        fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}")
        annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale]

        # pre-load the spins for this function (assumes `spins` is array)
        # permdata will be an (R, T, n_rotate) array
        print('Pre-loading spins...', end='\b' * 20, flush=True)
        spins = np.loadtxt(spins, delimiter=',', dtype='int32')
        permdata = nnsurf.spin_data(nsdata, version='fsaverage5',
                                    lhannot=annotations.lh,
                                    rhannot=annotations.rh,
                                    spins=spins, n_rotate=spins.shape[-1],
                                    verbose=True)
        permcorrs = np.vstack([
            _get_permcorr(nsdata, permdata[..., n])
            for n in range(permdata.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    elif spintype in ['burt2018', 'burt2020']:
        surrdir = SURRDIR / parcellation / spintype / 'neurosynth'
        # generate the permuted data from the surrogate resampling arrays
        print('Generating surrogates...', end='\b' * 24, flush=True)
        permdata = get_surrogates(nsdata, surrdir, scale)
        permcorrs = np.vstack([
            _get_permcorr(nsdata, permdata[..., n])
            for n in range(permdata.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    elif spintype == 'moran':
        surrogates = np.zeros((*nsdata.shape, 10000))
        for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parcellation,
                                                      scale, nsdata):
            mrs = moran.MoranRandomization(joint=True, n_rep=10000,
                                           tol=1e-6, random_state=1234)
            mrs.fit(dist)
            surrogates[idx] = mrs.randomize(hemi).transpose(1, 2, 0)

        permcorrs = np.vstack([
            _get_permcorr(nsdata, surrogates[..., n])
            for n in range(surrogates.shape[-1])
        ])
        putils.save_dir(out, permcorrs)
    else:
        spins = SPDIR / parcellation / spintype / f'{scale}_spins.csv'
        permcorrs = gen_permcorrs(nsdata, spins, out)

    nsdata = nsdata.dropna(axis=0, how='all')
    pvals = get_fwe(np.corrcoef(nsdata.T), permcorrs)

    out = pd.DataFrame(dict(
        parcellation=parcellation,
        scale=scale,
        spintype=spintype,
        n_sig=np.sum(np.triu(pvals < ALPHA, k=1))
    ), index=[0])

    return out