def get_runtime(parcellation, scale, spatnull):
    """
    Runs spatial null models for given combination of inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spatnull : str
        Name of spin method to be used
    """

    # filenames (for I/O)
    fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv'

    # load simulated data
    alphadir = SIMDIR / ALPHA
    if parcellation == 'vertex':
        x, y = simnulls.load_vertex_data(alphadir, sim=0)
    else:
        x, y = simnulls.load_parc_data(alphadir, parcellation, scale, sim=0)

    # start timer (after loading data--accounds for diff b/w vertex/parc)
    start = time.time()

    # calculate the null p-values
    if spatnull == 'naive-para':
        nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1]
        nulls = None
    elif spatnull == 'naive-nonpara':
        nulls = naive_nonpara(y, fn=fn)
    elif spatnull == 'vazquez-rodriguez':
        nulls = vazquez_rodriguez(y, parcellation, scale, fn=fn)
    elif spatnull == 'vasa':
        nulls = vasa(y, parcellation, scale, fn=fn)
    elif spatnull == 'hungarian':
        nulls = hungarian(y, parcellation, scale, fn=fn)
    elif spatnull == 'cornblath':
        fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'
        nulls = cornblath(y, parcellation, scale, fn=fn)
    elif spatnull == 'baum':
        nulls = baum(y, parcellation, scale, fn=fn)
    elif spatnull in ('burt2018', 'burt2020', 'moran'):
        nulls = make_surrogates(y, parcellation, scale, spatnull, fn=fn)
    else:
        raise ValueError(f'Invalid spatnull: {spatnull}')

    if nulls is not None:
        simnulls.calc_pval(x, y, nulls)

    end = time.time()
    ct = CompTime(parcellation, scale, spatnull, end - start)
    print(ct)

    return asdict(ct)
예제 #2
0
def combine_nulls(parcellation, scale, spatnull, alpha):
    """
    Combines outputs of all simulations into single files for provided inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spatnull : str
        Name of spin method to be used
    alpha : float
        Spatial autocorrelation parameter to be used
    """

    print(f'{spatnull} {alpha} {parcellation} {scale}')

    nulldir = SIMDIR / alpha / parcellation / 'nulls' / spatnull
    pvals_fn = nulldir / f'{scale}_nulls.csv'
    perms_fn = nulldir / f'{scale}_perms.csv'

    # only some of the spatial null models were run in serial mode; these are
    # the ones that are missing the top-level file and whose outputs we need to
    # combine. do that here.
    if not pvals_fn.exists():
        pvals, perms = np.zeros(N_SIM), np.zeros((N_PERM, N_SIM))
        for sim in range(N_SIM):
            pvals[sim] = \
                np.loadtxt(nulldir / 'pvals' / f'{scale}_nulls_{sim:04d}.csv')
            perms[:, sim] = \
                np.loadtxt(nulldir / 'pvals' / f'{scale}_perms_{sim:04d}.csv')
        putils.save_dir(pvals_fn, pvals, overwrite=False)
        putils.save_dir(perms_fn, perms, overwrite=False)
    else:
        pvals = np.loadtxt(pvals_fn)

    # grab the empirical correlations for each simulation---good to have
    if parcellation == 'vertex':
        x, y = simnulls.load_vertex_data(SIMDIR / alpha, n_sim=N_SIM)
    else:
        x, y = simnulls.load_parc_data(SIMDIR / alpha,
                                       parcellation,
                                       scale,
                                       n_sim=N_SIM)
    corrs = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[0]

    return pd.DataFrame(
        dict(parcellation=parcellation,
             scale=scale,
             spatnull=spatnull,
             alpha=alpha,
             corr=corrs,
             sim=range(len(pvals)),
             pval=pvals))
예제 #3
0
def pval_by_subsets(parcellation, scale, spatnull, alpha):
    """
    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spatnull : str
        Name of spin method to be used
    alpha : float
        Spatial autocorrelation parameter to be used

    Returns
    -------
    pvals : pd.DataFrame
    """

    print(spatnull, alpha, parcellation, scale)

    if spatnull == 'naive-para':
        return

    # load simulated data
    alphadir = SIMDIR / alpha
    if parcellation == 'vertex':
        x, y = simnulls.load_vertex_data(alphadir, sim=SIM)
    else:
        x, y = simnulls.load_parc_data(alphadir, parcellation, scale, sim=SIM)

    corr = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[0]
    perms = np.loadtxt(alphadir / parcellation / 'nulls' / spatnull / 'pvals' /
                       f'{scale}_perms_{SIM}.csv')

    orig = pval_from_perms(corr, perms)
    pvals = defaultdict(list)
    for subset in [100, 500, 1000, 5000]:
        rs = np.random.default_rng(SEED)
        for n in range(N_PVALS):
            # select `subset` correlations from `perms` and calculate p-value
            # store the p-value and repeat `N_PVALS` times
            sub = rs.choice(perms, size=subset, replace=False)
            pvals[subset].append(pval_from_perms(corr, sub) - orig)
        # arrays are nicer than lists
        pvals[subset] = np.asarray(pvals[subset])

    df = pd.melt(pd.DataFrame(pvals), var_name='n_nulls', value_name='d(pval)')
    # add single p-value generated from 10000 nulls
    df = df.assign(parcellation=parcellation,
                   scale=scale,
                   spatnull=spatnull,
                   alpha=alpha)

    return df['parcellation', 'scale', 'spatnull', 'alpha', 'n_nulls',
              'd(pval)']
def calc_moran(parcellation, scale, alpha):
    """
    Calculate's Moran's I of all simulations for provided inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    alpha : float
        Spatial autocorrelation parameter to be used

    Returns
    -------
    moran_fn : os.PathLike
        Path to generated file containing Moran's I for simulations
    """

    print(f'{time.ctime()}: {parcellation} {scale} {alpha}', flush=True)

    # filename for output
    moran_fn = (SIMDIR / alpha / parcellation / f'{scale}_moran.csv')

    if moran_fn.exists():
        return moran_fn

    # load simulated data
    alphadir = SIMDIR / alpha
    if parcellation == 'vertex':
        y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM)[1]
    else:
        y = simnulls.load_parc_data(alphadir, parcellation, scale,
                                    n_sim=N_SIM)[1]

    dist = simnulls.load_full_distmat(y, DISTDIR, parcellation, scale)
    moran = simnulls.calc_moran(dist, np.asarray(y), n_jobs=N_PROC)
    putils.save_dir(moran_fn, np.atleast_1d(moran), overwrite=False)

    return moran_fn
def run_null(parcellation, scale, spatnull, alpha):
    """
    Runs spatial null models for given combination of inputs

    Parameters
    ----------
    parcellation : str
        Name of parcellation to be used
    scale : str
        Scale of `parcellation` to be used
    spatnull : str
        Name of spin method to be used
    alpha : float
        Spatial autocorrelation parameter to be used
    """

    print(f'{time.ctime()}: {parcellation} {scale} {spatnull} {alpha} ',
          flush=True)

    # filenames (for I/O)
    spins_fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv'
    pvals_fn = (SIMDIR / alpha / parcellation / 'nulls' / spatnull
                / f'{scale}_nulls.csv')
    perms_fn = pvals_fn.parent / f'{scale}_perms.csv'

    if SHUFFLE:
        pvals_fn = pvals_fn.parent / f'{scale}_nulls_shuffle.csv'
        perms_fn = perms_fn.parent / f'{scale}_perms_shuffle.csv'

    if pvals_fn.exists() and perms_fn.exists():
        return

    # load simulated data
    alphadir = SIMDIR / alpha
    if parcellation == 'vertex':
        x, y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM)
    else:
        x, y = simnulls.load_parc_data(alphadir, parcellation, scale,
                                       n_sim=N_SIM)

    # if we're computing info on SHUFFLED data, get the appropriate random `y`
    if SHUFFLE:
        y = _get_ysim(y, np.random.default_rng(1).permutation(N_SIM))

    # calculate the null p-values
    if spatnull == 'naive-para':
        pvals = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1]
        perms = np.array([np.nan])
    elif spatnull == 'cornblath':
        fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv'
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(fn, n_perm=N_PERM)
        fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}")
        annot = fetcher('fsaverage5', data_dir=ROIDIR)[scale]
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_cornblath)(x[:, sim], y[:, sim], spins, annot)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    elif spatnull == 'baum':
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(spins_fn, n_perm=N_PERM)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_baum)(x[:, sim], y[:, sim], spins)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    elif spatnull in ('burt2018', 'burt2020', 'moran'):
        xarr = np.asarray(x)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(_genmod)(xarr[:, sim], _get_ysim(y, sim),
                             parcellation, scale, spatnull)
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)
    else:  # vazquez-rodriguez, vasa, hungarian, naive-nonparametric
        x, y = np.asarray(x), np.asarray(y)
        spins = simnulls.load_spins(spins_fn, n_perm=N_PERM)
        out = Parallel(n_jobs=N_PROC, max_nbytes=None)(
            delayed(simnulls.calc_pval)(x[:, sim], y[:, sim], y[spins, sim])
            for sim in putils.trange(x.shape[-1], desc='Running simulations')
        )
        pvals, perms = zip(*out)

    # save to disk
    putils.save_dir(perms_fn, np.atleast_1d(perms), overwrite=False)
    putils.save_dir(pvals_fn, np.atleast_1d(pvals), overwrite=False)