def get_runtime(parcellation, scale, spatnull): """ Runs spatial null models for given combination of inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spatnull : str Name of spin method to be used """ # filenames (for I/O) fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv' # load simulated data alphadir = SIMDIR / ALPHA if parcellation == 'vertex': x, y = simnulls.load_vertex_data(alphadir, sim=0) else: x, y = simnulls.load_parc_data(alphadir, parcellation, scale, sim=0) # start timer (after loading data--accounds for diff b/w vertex/parc) start = time.time() # calculate the null p-values if spatnull == 'naive-para': nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1] nulls = None elif spatnull == 'naive-nonpara': nulls = naive_nonpara(y, fn=fn) elif spatnull == 'vazquez-rodriguez': nulls = vazquez_rodriguez(y, parcellation, scale, fn=fn) elif spatnull == 'vasa': nulls = vasa(y, parcellation, scale, fn=fn) elif spatnull == 'hungarian': nulls = hungarian(y, parcellation, scale, fn=fn) elif spatnull == 'cornblath': fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' nulls = cornblath(y, parcellation, scale, fn=fn) elif spatnull == 'baum': nulls = baum(y, parcellation, scale, fn=fn) elif spatnull in ('burt2018', 'burt2020', 'moran'): nulls = make_surrogates(y, parcellation, scale, spatnull, fn=fn) else: raise ValueError(f'Invalid spatnull: {spatnull}') if nulls is not None: simnulls.calc_pval(x, y, nulls) end = time.time() ct = CompTime(parcellation, scale, spatnull, end - start) print(ct) return asdict(ct)
def combine_nulls(parcellation, scale, spatnull, alpha): """ Combines outputs of all simulations into single files for provided inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spatnull : str Name of spin method to be used alpha : float Spatial autocorrelation parameter to be used """ print(f'{spatnull} {alpha} {parcellation} {scale}') nulldir = SIMDIR / alpha / parcellation / 'nulls' / spatnull pvals_fn = nulldir / f'{scale}_nulls.csv' perms_fn = nulldir / f'{scale}_perms.csv' # only some of the spatial null models were run in serial mode; these are # the ones that are missing the top-level file and whose outputs we need to # combine. do that here. if not pvals_fn.exists(): pvals, perms = np.zeros(N_SIM), np.zeros((N_PERM, N_SIM)) for sim in range(N_SIM): pvals[sim] = \ np.loadtxt(nulldir / 'pvals' / f'{scale}_nulls_{sim:04d}.csv') perms[:, sim] = \ np.loadtxt(nulldir / 'pvals' / f'{scale}_perms_{sim:04d}.csv') putils.save_dir(pvals_fn, pvals, overwrite=False) putils.save_dir(perms_fn, perms, overwrite=False) else: pvals = np.loadtxt(pvals_fn) # grab the empirical correlations for each simulation---good to have if parcellation == 'vertex': x, y = simnulls.load_vertex_data(SIMDIR / alpha, n_sim=N_SIM) else: x, y = simnulls.load_parc_data(SIMDIR / alpha, parcellation, scale, n_sim=N_SIM) corrs = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[0] return pd.DataFrame( dict(parcellation=parcellation, scale=scale, spatnull=spatnull, alpha=alpha, corr=corrs, sim=range(len(pvals)), pval=pvals))
def pval_by_subsets(parcellation, scale, spatnull, alpha): """ Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spatnull : str Name of spin method to be used alpha : float Spatial autocorrelation parameter to be used Returns ------- pvals : pd.DataFrame """ print(spatnull, alpha, parcellation, scale) if spatnull == 'naive-para': return # load simulated data alphadir = SIMDIR / alpha if parcellation == 'vertex': x, y = simnulls.load_vertex_data(alphadir, sim=SIM) else: x, y = simnulls.load_parc_data(alphadir, parcellation, scale, sim=SIM) corr = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[0] perms = np.loadtxt(alphadir / parcellation / 'nulls' / spatnull / 'pvals' / f'{scale}_perms_{SIM}.csv') orig = pval_from_perms(corr, perms) pvals = defaultdict(list) for subset in [100, 500, 1000, 5000]: rs = np.random.default_rng(SEED) for n in range(N_PVALS): # select `subset` correlations from `perms` and calculate p-value # store the p-value and repeat `N_PVALS` times sub = rs.choice(perms, size=subset, replace=False) pvals[subset].append(pval_from_perms(corr, sub) - orig) # arrays are nicer than lists pvals[subset] = np.asarray(pvals[subset]) df = pd.melt(pd.DataFrame(pvals), var_name='n_nulls', value_name='d(pval)') # add single p-value generated from 10000 nulls df = df.assign(parcellation=parcellation, scale=scale, spatnull=spatnull, alpha=alpha) return df['parcellation', 'scale', 'spatnull', 'alpha', 'n_nulls', 'd(pval)']
def calc_moran(parcellation, scale, alpha): """ Calculate's Moran's I of all simulations for provided inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used alpha : float Spatial autocorrelation parameter to be used Returns ------- moran_fn : os.PathLike Path to generated file containing Moran's I for simulations """ print(f'{time.ctime()}: {parcellation} {scale} {alpha}', flush=True) # filename for output moran_fn = (SIMDIR / alpha / parcellation / f'{scale}_moran.csv') if moran_fn.exists(): return moran_fn # load simulated data alphadir = SIMDIR / alpha if parcellation == 'vertex': y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM)[1] else: y = simnulls.load_parc_data(alphadir, parcellation, scale, n_sim=N_SIM)[1] dist = simnulls.load_full_distmat(y, DISTDIR, parcellation, scale) moran = simnulls.calc_moran(dist, np.asarray(y), n_jobs=N_PROC) putils.save_dir(moran_fn, np.atleast_1d(moran), overwrite=False) return moran_fn
def run_null(parcellation, scale, spatnull, alpha): """ Runs spatial null models for given combination of inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spatnull : str Name of spin method to be used alpha : float Spatial autocorrelation parameter to be used """ print(f'{time.ctime()}: {parcellation} {scale} {spatnull} {alpha} ', flush=True) # filenames (for I/O) spins_fn = SPDIR / parcellation / spatnull / f'{scale}_spins.csv' pvals_fn = (SIMDIR / alpha / parcellation / 'nulls' / spatnull / f'{scale}_nulls.csv') perms_fn = pvals_fn.parent / f'{scale}_perms.csv' if SHUFFLE: pvals_fn = pvals_fn.parent / f'{scale}_nulls_shuffle.csv' perms_fn = perms_fn.parent / f'{scale}_perms_shuffle.csv' if pvals_fn.exists() and perms_fn.exists(): return # load simulated data alphadir = SIMDIR / alpha if parcellation == 'vertex': x, y = simnulls.load_vertex_data(alphadir, n_sim=N_SIM) else: x, y = simnulls.load_parc_data(alphadir, parcellation, scale, n_sim=N_SIM) # if we're computing info on SHUFFLED data, get the appropriate random `y` if SHUFFLE: y = _get_ysim(y, np.random.default_rng(1).permutation(N_SIM)) # calculate the null p-values if spatnull == 'naive-para': pvals = nnstats.efficient_pearsonr(x, y, nan_policy='omit')[1] perms = np.array([np.nan]) elif spatnull == 'cornblath': fn = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(fn, n_perm=N_PERM) fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}") annot = fetcher('fsaverage5', data_dir=ROIDIR)[scale] out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_cornblath)(x[:, sim], y[:, sim], spins, annot) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) elif spatnull == 'baum': x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(spins_fn, n_perm=N_PERM) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_baum)(x[:, sim], y[:, sim], spins) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) elif spatnull in ('burt2018', 'burt2020', 'moran'): xarr = np.asarray(x) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(_genmod)(xarr[:, sim], _get_ysim(y, sim), parcellation, scale, spatnull) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) else: # vazquez-rodriguez, vasa, hungarian, naive-nonparametric x, y = np.asarray(x), np.asarray(y) spins = simnulls.load_spins(spins_fn, n_perm=N_PERM) out = Parallel(n_jobs=N_PROC, max_nbytes=None)( delayed(simnulls.calc_pval)(x[:, sim], y[:, sim], y[spins, sim]) for sim in putils.trange(x.shape[-1], desc='Running simulations') ) pvals, perms = zip(*out) # save to disk putils.save_dir(perms_fn, np.atleast_1d(perms), overwrite=False) putils.save_dir(pvals_fn, np.atleast_1d(pvals), overwrite=False)