def load_full_distmat(data, distdir, parcellation, scale): """ Returns full distance matrix for given `parcellation` and `scale` Parameters ---------- data : pd.DataFrame or array_like Data used to determine hemisphere designations for loaded distance matrices distdir : os.PathLike Filepath to directory containing geodesic distance files parcellation : {'atl-cammoun2012', 'atl-schaefer2018'} Name of parcellation to use scale : str Scale of parcellation to use. Must be valid scale for specified `parcellation` Returns ------- dist : (N, N) np.ndarray Full distance matrix (inter-hemispheric distances set to np.inf) """ # get "full" distance matrix for data, with inter-hemi set to np.inf dist = np.ones((len(data), len(data))) * np.inf for _, hdist, hidx in utils.yield_data_dist(distdir, parcellation, scale, data, inverse=False): dist[np.ix_(hidx, hidx)] = hdist np.fill_diagonal(dist, 1) return dist
def make_surrogates(data, parcellation, scale, spatnull): """ Generates surrogates for `data` using `spatnull` method Parameters ---------- data : (N,) pd.DataFrame parcellation : {'atl-cammoun2012', 'atl-schaefer2018'} scale : str spatnull : {'burt2018', 'burt2020', 'moran'} Returns ------- surrogates : (N, `N_PERM`) np.ndarray """ if spatnull not in ('burt2018', 'burt2020', 'moran'): raise ValueError(f'Cannot make surrogates for null method {spatnull}') darr = np.asarray(data) dmin = darr[np.logical_not(np.isnan(darr))].min() surrogates = np.zeros((len(data), N_PERM)) for hdata, dist, idx in putils.yield_data_dist( DISTDIR, parcellation, scale, data, inverse=(spatnull == 'moran') ): # handle NaNs before generating surrogates; should only be relevant # when using vertex-level data, but good nonetheless mask = np.logical_not(np.isnan(hdata)) surrogates[idx[np.logical_not(mask)]] = np.nan hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask] if spatnull == 'burt2018': # Box-Cox transformation requires positive data :man_facepalming: hdata += np.abs(dmin) + 0.1 surrogates[idx] = \ burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED) elif spatnull == 'burt2020': if parcellation == 'vertex': # memmap is required for this shit index = np.argsort(dist, axis=-1) dist = np.sort(dist, axis=-1) knn = 1000 if USE_KNN else len(hdata) surrogates[idx] = \ mapgen.Sampled(hdata, dist, index, knn=knn, seed=SEED)(N_PERM).T else: surrogates[idx] = \ mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T elif spatnull == 'moran': mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM, tol=1e-6, random_state=SEED) with threadpoolctl.threadpool_limits(limits=2): surrogates[idx] = mrs.fit(dist).randomize(hdata).T return surrogates
def run_null(netclass, parc, scale, spintype): """ Runs spatial permutation null model for given combination of inputs Parameters ---------- netclass : {'vek', 'yeo'} Network partition to test parc : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spintype : str Name of spin method to be used Returns ------- stats : pd.DataFrame Generated statistics with columns ['parcellation', 'scale', 'spintype', 'netclass', 'network', 'zscore', 'pval'] """ data = load_data(netclass, parc, scale) # run the damn thing print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True) out = HCPDIR / parc / 'nulls' / netclass / spintype / f'{scale}_nulls.csv' if out.exists(): permnets = np.loadtxt(out, delimiter=',') elif spintype == 'cornblath': # even though we're working with parcellated data we need to project # that to the surface + spin the vertices, so let's load our # pre-generated vertex-level spins spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' # get annotation files (we need these to project parcels to surface) fetcher = getattr(nndata, f"fetch_{parc.replace('atl-', '')}") annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale] # pre-load the spins for this function (assumes `spins` is array) print('Pre-loading spins...', end='\b' * 20, flush=True) spins = np.loadtxt(spins, delimiter=',', dtype='int32') # generate "spun" data; permdata will be an (R, T, n_rotate) array # where `R` is regions and `T` is 1 (myelination) permdata = nnsurf.spin_data(np.asarray(data['myelin']), version='fsaverage5', lhannot=annotations.lh, rhannot=annotations.rh, spins=spins, n_rotate=spins.shape[-1], verbose=True) permnets = np.vstack([ _get_netmeans(permdata[..., n], data['networks']) for n in range(spins.shape[-1]) ]) putils.save_dir(out, permnets) elif spintype in ['burt2018', 'burt2020']: surrdir = SURRDIR / parc / spintype / 'hcp' surrogates = get_surrogates(data['myelin'], surrdir, scale) permnets = np.vstack([ _get_netmeans(surrogates[..., n], data['networks']) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permnets) elif spintype == 'moran': surrogates = np.zeros((len(data['myelin']), 10000)) for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parc, scale, data['myelin']): mrs = moran.MoranRandomization(joint=True, n_rep=10000, tol=1e-6, random_state=1234) mrs.fit(dist) surrogates[idx] = np.squeeze(mrs.randomize(hemi)).T permnets = np.vstack([ _get_netmeans(surrogates[..., n], data['networks']) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permnets) else: spins = SPDIR / parc / spintype / f'{scale}_spins.csv' permnets = gen_permnets(data['myelin'], data['networks'], spins, out) # now get the real network averages and compare to the permuted values real = _get_netmeans(data['myelin'], data['networks']) zscores, pvals = get_fwe(real, permnets) out = pd.DataFrame( dict(parcellation=parc, scale=scale, spintype=spintype, netclass=netclass, network=list(NET_CODES[netclass].keys()), zscore=zscores, pval=pvals)) return out
print(f'Comparing surrogates for {scale}') # load T1w/T2w for given parcellation + resolution data = pd.read_csv(HCPDIR / name / f'{scale}.csv', index_col=0) data = data.drop([i for i in data.index if i in putils.DROP]) data = data['myelin'] # generate surrogates for each method using distance matrix w/ and # w/o medial wall travel (one surrogate per method per dist matrix) burt2018, burt2020, moran = [], [], [] for med in [True, False]: for method, surrs in zip(METHODS, [burt2018, burt2020, moran]): surrdata = [] for hd, dist, _ in putils.yield_data_dist(DISTDIR, name, scale, data, medial=med, inverse=False): if method == 'burt2018': surr = burt.batch_surrogates(dist, hd, seed=SEED, n_surr=N_SURROGATES, n_jobs=N_PROC).T elif method == 'burt2020': base = Base(hd, dist, resample=True, seed=SEED, n_jobs=N_PROC) surr = base(N_SURROGATES, 50)
def run_null(parcellation, scale, spintype): """ Runs spatial permutation null model for given combination of inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spintype : str Name of spin method to be used Returns ------- stats : pd.DataFrame Generated statistics with columns ['parcellation', 'scale', 'spintype', 'n_sig'] """ nsdata = load_data(parcellation, scale) # run the damn thing print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True) out = NSDIR / parcellation / 'nulls' / spintype / f'{scale}_nulls.csv' if out.exists(): permcorrs = np.loadtxt(out).reshape(-1, 1) elif spintype == 'cornblath': # even though we're working with parcellated data we need to project # that to the surface + spin the vertices, so let's load our # pre-generated vertex-level spins spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' # get annotation files fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}") annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale] # pre-load the spins for this function (assumes `spins` is array) # permdata will be an (R, T, n_rotate) array print('Pre-loading spins...', end='\b' * 20, flush=True) spins = np.loadtxt(spins, delimiter=',', dtype='int32') permdata = nnsurf.spin_data(nsdata, version='fsaverage5', lhannot=annotations.lh, rhannot=annotations.rh, spins=spins, n_rotate=spins.shape[-1], verbose=True) permcorrs = np.vstack([ _get_permcorr(nsdata, permdata[..., n]) for n in range(permdata.shape[-1]) ]) putils.save_dir(out, permcorrs) elif spintype in ['burt2018', 'burt2020']: surrdir = SURRDIR / parcellation / spintype / 'neurosynth' # generate the permuted data from the surrogate resampling arrays print('Generating surrogates...', end='\b' * 24, flush=True) permdata = get_surrogates(nsdata, surrdir, scale) permcorrs = np.vstack([ _get_permcorr(nsdata, permdata[..., n]) for n in range(permdata.shape[-1]) ]) putils.save_dir(out, permcorrs) elif spintype == 'moran': surrogates = np.zeros((*nsdata.shape, 10000)) for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parcellation, scale, nsdata): mrs = moran.MoranRandomization(joint=True, n_rep=10000, tol=1e-6, random_state=1234) mrs.fit(dist) surrogates[idx] = mrs.randomize(hemi).transpose(1, 2, 0) permcorrs = np.vstack([ _get_permcorr(nsdata, surrogates[..., n]) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permcorrs) else: spins = SPDIR / parcellation / spintype / f'{scale}_spins.csv' permcorrs = gen_permcorrs(nsdata, spins, out) nsdata = nsdata.dropna(axis=0, how='all') pvals = get_fwe(np.corrcoef(nsdata.T), permcorrs) out = pd.DataFrame(dict( parcellation=parcellation, scale=scale, spintype=spintype, n_sig=np.sum(np.triu(pvals < ALPHA, k=1)) ), index=[0]) return out