def make_surrogates(data, parcellation, scale, spatnull): """ Generates surrogates for `data` using `spatnull` method Parameters ---------- data : (N,) pd.DataFrame parcellation : {'atl-cammoun2012', 'atl-schaefer2018'} scale : str spatnull : {'burt2018', 'burt2020', 'moran'} Returns ------- surrogates : (N, `N_PERM`) np.ndarray """ if spatnull not in ('burt2018', 'burt2020', 'moran'): raise ValueError(f'Cannot make surrogates for null method {spatnull}') darr = np.asarray(data) dmin = darr[np.logical_not(np.isnan(darr))].min() surrogates = np.zeros((len(data), N_PERM)) for hdata, dist, idx in putils.yield_data_dist( DISTDIR, parcellation, scale, data, inverse=(spatnull == 'moran') ): # handle NaNs before generating surrogates; should only be relevant # when using vertex-level data, but good nonetheless mask = np.logical_not(np.isnan(hdata)) surrogates[idx[np.logical_not(mask)]] = np.nan hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask] if spatnull == 'burt2018': # Box-Cox transformation requires positive data :man_facepalming: hdata += np.abs(dmin) + 0.1 surrogates[idx] = \ burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED) elif spatnull == 'burt2020': if parcellation == 'vertex': # memmap is required for this shit index = np.argsort(dist, axis=-1) dist = np.sort(dist, axis=-1) knn = 1000 if USE_KNN else len(hdata) surrogates[idx] = \ mapgen.Sampled(hdata, dist, index, knn=knn, seed=SEED)(N_PERM).T else: surrogates[idx] = \ mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T elif spatnull == 'moran': mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM, tol=1e-6, random_state=SEED) with threadpoolctl.threadpool_limits(limits=2): surrogates[idx] = mrs.fit(dist).randomize(hdata).T return surrogates
def make_surrogates(data, parcellation, scale, spatnull, fn=None): if spatnull not in ('burt2018', 'burt2020', 'moran'): raise ValueError(f'Cannot make surrogates for null method {spatnull}') darr = np.asarray(data) dmin = darr[np.logical_not(np.isnan(darr))].min() surrogates = np.zeros((len(data), N_PERM)) for n, hemi in enumerate(('lh', 'rh')): dist = get_distmat(hemi, parcellation, scale, fn=fn) try: idx = np.asarray([ n for n, f in enumerate(data.index)if f.startswith(hemi) ]) hdata = np.squeeze(np.asarray(data.iloc[idx])) except AttributeError: idx = np.arange(n * (len(data) // 2), (n + 1) * (len(data) // 2)) hdata = np.squeeze(data[idx]) # handle NaNs before generating surrogates; should only be relevant # when using vertex-level data, but good nonetheless mask = np.logical_not(np.isnan(hdata)) surrogates[idx[np.logical_not(mask)]] = np.nan hdata, dist, idx = hdata[mask], dist[np.ix_(mask, mask)], idx[mask] if spatnull == 'burt2018': # Box-Cox transformation requires positive data hdata += np.abs(dmin) + 0.1 surrogates[idx] = \ burt.batch_surrogates(dist, hdata, n_surr=N_PERM, seed=SEED) elif spatnull == 'burt2020': if parcellation == 'vertex': index = np.argsort(dist, axis=-1) dist = np.sort(dist, axis=-1) surrogates[idx] = \ mapgen.Sampled(hdata, dist, index, seed=SEED)(N_PERM).T else: surrogates[idx] = \ mapgen.Base(hdata, dist, seed=SEED)(N_PERM, 50).T elif spatnull == 'moran': dist = dist.astype('float64') # required for some reason... np.fill_diagonal(dist, 1) dist **= -1 mrs = moran.MoranRandomization(joint=True, n_rep=N_PERM, tol=1e-6, random_state=SEED) surrogates[idx] = mrs.fit(dist).randomize(hdata).T return surrogates
def run_null(netclass, parc, scale, spintype): """ Runs spatial permutation null model for given combination of inputs Parameters ---------- netclass : {'vek', 'yeo'} Network partition to test parc : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spintype : str Name of spin method to be used Returns ------- stats : pd.DataFrame Generated statistics with columns ['parcellation', 'scale', 'spintype', 'netclass', 'network', 'zscore', 'pval'] """ data = load_data(netclass, parc, scale) # run the damn thing print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True) out = HCPDIR / parc / 'nulls' / netclass / spintype / f'{scale}_nulls.csv' if out.exists(): permnets = np.loadtxt(out, delimiter=',') elif spintype == 'cornblath': # even though we're working with parcellated data we need to project # that to the surface + spin the vertices, so let's load our # pre-generated vertex-level spins spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' # get annotation files (we need these to project parcels to surface) fetcher = getattr(nndata, f"fetch_{parc.replace('atl-', '')}") annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale] # pre-load the spins for this function (assumes `spins` is array) print('Pre-loading spins...', end='\b' * 20, flush=True) spins = np.loadtxt(spins, delimiter=',', dtype='int32') # generate "spun" data; permdata will be an (R, T, n_rotate) array # where `R` is regions and `T` is 1 (myelination) permdata = nnsurf.spin_data(np.asarray(data['myelin']), version='fsaverage5', lhannot=annotations.lh, rhannot=annotations.rh, spins=spins, n_rotate=spins.shape[-1], verbose=True) permnets = np.vstack([ _get_netmeans(permdata[..., n], data['networks']) for n in range(spins.shape[-1]) ]) putils.save_dir(out, permnets) elif spintype in ['burt2018', 'burt2020']: surrdir = SURRDIR / parc / spintype / 'hcp' surrogates = get_surrogates(data['myelin'], surrdir, scale) permnets = np.vstack([ _get_netmeans(surrogates[..., n], data['networks']) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permnets) elif spintype == 'moran': surrogates = np.zeros((len(data['myelin']), 10000)) for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parc, scale, data['myelin']): mrs = moran.MoranRandomization(joint=True, n_rep=10000, tol=1e-6, random_state=1234) mrs.fit(dist) surrogates[idx] = np.squeeze(mrs.randomize(hemi)).T permnets = np.vstack([ _get_netmeans(surrogates[..., n], data['networks']) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permnets) else: spins = SPDIR / parc / spintype / f'{scale}_spins.csv' permnets = gen_permnets(data['myelin'], data['networks'], spins, out) # now get the real network averages and compare to the permuted values real = _get_netmeans(data['myelin'], data['networks']) zscores, pvals = get_fwe(real, permnets) out = pd.DataFrame( dict(parcellation=parc, scale=scale, spintype=spintype, netclass=netclass, network=list(NET_CODES[netclass].keys()), zscore=zscores, pval=pvals)) return out
lhdata, rhdata = data[:end - start], data[end - start:] lhdist = np.loadtxt(DISTDIR / name / 'nomedial' / f'{scale}_lh_dist.csv', delimiter=',') rhdist = np.loadtxt(DISTDIR / name / 'nomedial' / f'{scale}_rh_dist.csv', delimiter=',') plot = np.hstack((burt.make_surrogate(lhdist, lhdata + 1, seed=1234), burt.make_surrogate(rhdist, rhdata + 1, seed=1234))) save_brainmap(plot, FIGDIR / 'burt2018_surf.png', lh, rh, **OPTS) # burt 2020 (need to rescale to original data range) plot = np.hstack((nnutils.rescale( Base(lhdata, lhdist, seed=1234)(200, 50).T[:, 180], lhdata.min(), lhdata.max()), nnutils.rescale( Base(rhdata, rhdist, seed=1234)(200, 50).T[:, 180], rhdata.min(), rhdata.max()))) save_brainmap(plot, FIGDIR / 'burt2020_surf.png', lh, rh, **OPTS) # moran spectral randomization np.fill_diagonal(lhdist, 1) np.fill_diagonal(rhdist, 1) lhdist **= -1 rhdist **= -1 mrs = moran.MoranRandomization(joint=True, n_rep=1000, tol=1e-6, random_state=1234) plot = np.hstack((np.squeeze(mrs.fit(lhdist).randomize(lhdata)), np.squeeze(mrs.fit(rhdist).randomize(rhdata))))[611] save_brainmap(plot, FIGDIR / 'moran_surf.png', lh, rh, **OPTS)
def run_null(parcellation, scale, spintype): """ Runs spatial permutation null model for given combination of inputs Parameters ---------- parcellation : str Name of parcellation to be used scale : str Scale of `parcellation` to be used spintype : str Name of spin method to be used Returns ------- stats : pd.DataFrame Generated statistics with columns ['parcellation', 'scale', 'spintype', 'n_sig'] """ nsdata = load_data(parcellation, scale) # run the damn thing print(f'Running {spintype:>9} spins for {scale}: ', end='', flush=True) out = NSDIR / parcellation / 'nulls' / spintype / f'{scale}_nulls.csv' if out.exists(): permcorrs = np.loadtxt(out).reshape(-1, 1) elif spintype == 'cornblath': # even though we're working with parcellated data we need to project # that to the surface + spin the vertices, so let's load our # pre-generated vertex-level spins spins = SPDIR / 'vertex' / 'vazquez-rodriguez' / 'fsaverage5_spins.csv' # get annotation files fetcher = getattr(nndata, f"fetch_{parcellation.replace('atl-', '')}") annotations = fetcher('fsaverage5', data_dir=ROIDIR)[scale] # pre-load the spins for this function (assumes `spins` is array) # permdata will be an (R, T, n_rotate) array print('Pre-loading spins...', end='\b' * 20, flush=True) spins = np.loadtxt(spins, delimiter=',', dtype='int32') permdata = nnsurf.spin_data(nsdata, version='fsaverage5', lhannot=annotations.lh, rhannot=annotations.rh, spins=spins, n_rotate=spins.shape[-1], verbose=True) permcorrs = np.vstack([ _get_permcorr(nsdata, permdata[..., n]) for n in range(permdata.shape[-1]) ]) putils.save_dir(out, permcorrs) elif spintype in ['burt2018', 'burt2020']: surrdir = SURRDIR / parcellation / spintype / 'neurosynth' # generate the permuted data from the surrogate resampling arrays print('Generating surrogates...', end='\b' * 24, flush=True) permdata = get_surrogates(nsdata, surrdir, scale) permcorrs = np.vstack([ _get_permcorr(nsdata, permdata[..., n]) for n in range(permdata.shape[-1]) ]) putils.save_dir(out, permcorrs) elif spintype == 'moran': surrogates = np.zeros((*nsdata.shape, 10000)) for hemi, dist, idx in putils.yield_data_dist(DISTDIR, parcellation, scale, nsdata): mrs = moran.MoranRandomization(joint=True, n_rep=10000, tol=1e-6, random_state=1234) mrs.fit(dist) surrogates[idx] = mrs.randomize(hemi).transpose(1, 2, 0) permcorrs = np.vstack([ _get_permcorr(nsdata, surrogates[..., n]) for n in range(surrogates.shape[-1]) ]) putils.save_dir(out, permcorrs) else: spins = SPDIR / parcellation / spintype / f'{scale}_spins.csv' permcorrs = gen_permcorrs(nsdata, spins, out) nsdata = nsdata.dropna(axis=0, how='all') pvals = get_fwe(np.corrcoef(nsdata.T), permcorrs) out = pd.DataFrame(dict( parcellation=parcellation, scale=scale, spintype=spintype, n_sig=np.sum(np.triu(pvals < ALPHA, k=1)) ), index=[0]) return out