def test_brody_fit() -> None: for N in [100, 250, 500, 1000]: unfolded = Eigenvalues(generate_eigs(N)).unfold(degree=7) # test fitting via max spacing res = unfolded.fit_brody(method="spacing") spacings = res["spacings"] if -np.inf in spacings or np.inf in spacings: raise ValueError("Return spacings contains infinities.") ecdf = res["ecdf"] if np.sum(ecdf < 0) > 0 or np.sum(ecdf > 1): raise ValueError("Invalid values in empirical cdf.") brody_cdf = res["brody_cdf"] if np.sum(brody_cdf < 0) > 0 or np.sum(brody_cdf > 1): raise ValueError("Invalid values in brody cdf.") # test fitting via mle res = unfolded.fit_brody(method="mle") spacings = res["spacings"] if -np.inf in spacings or np.inf in spacings: raise ValueError("Return spacings contains infinities.") ecdf = res["ecdf"] if np.sum(ecdf < 0) > 0 or np.sum(ecdf > 1): raise ValueError("Invalid values in empirical cdf.") brody_cdf = res["brody_cdf"] if np.sum(brody_cdf < 0) > 0 or np.sum(brody_cdf > 1): raise ValueError("Invalid values in brody cdf.")
def test_trim_manual() -> None: vals = generate_eigs(2000) for i in range(20): m, n = np.sort(np.array(np.random.uniform(0, len(vals), 2), dtype=int)) raw_trimmed = np.copy(vals[m:n]) eigenvalues = Eigenvalues(vals) trimmed = eigenvalues.trim_manually(m, n) assert np.allclose(raw_trimmed, trimmed.vals)
def precompute_levelvar(eigpaths: List[Path], args: Any, out: Path, force: bool = False, silent: bool = False) -> Path: """Take the eigenvalues saved in `eigpaths`, compute the levelvar, and save that in a DataFrame in `out` Parameters ---------- eigpaths: List[Path] The values of either DATASETS or DATASETS_FULLPRE args: Args Contains the unfolding, trimming, normalization, etc options defined in run.py out: Path See usage below. force: bool If False (default), don't recompute the values if they already exist. silent: bool If False (default) display a tqdm progress bar while calculating. Returns ------- pickle: Path Path to the pickle file saving the precomputed values. """ if not force and out.exists(): return out var_df = pd.DataFrame() desc = "{} - Levelvar" pbar = tqdm(total=len(eigpaths), desc=desc.format("eigs-XX"), disable=silent) for path in eigpaths: eigname = path.stem vals = np.load(path) if args.trim in ["(1,:)", "", "(0,:)"]: vals = vals[1:] # smallest eigenvalue is always spurious here else: low, high = eval(args.trim) vals = vals[low:high] eigs = Eigenvalues(vals) unfolded = eigs.unfold(**args.unfold) pbar.set_description(desc=desc.format(path.stem)) levelvar = unfolded.level_variance(**args.levelvar) pbar.update() if var_df.get("L") is None: var_df["L"] = levelvar["L"] var_df[eigname] = levelvar["sigma"] pbar.close() var_df.to_pickle(out) return out
def precompute_brody(eigpaths: List[Path], args: Any, out: Path, force: bool = False, silent: bool = False) -> Path: """Take the eigenvalues saved in `eigpaths`, compute the Brody parameter beta, and save that in a DataFrame in `out` Parameters ---------- eigpaths: List[Path] The values of either DATASETS or DATASETS_FULLPRE args: Args Contains the unfolding, trimming, normalization, etc options defined in run.py out: Path See usage below. force: bool If False (default), don't recompute the values if they already exist. silent: bool If False (default) display a tqdm progress bar while calculating. Returns ------- pickle: Path Path to the pickle file saving the precomputed values. """ if not force and out.exists(): return out brod_df = pd.DataFrame(index=["beta"]) desc = "{} - Brody" pbar = tqdm(total=len(eigpaths), desc=desc.format("eigs-XX"), disable=silent) for path in eigpaths: eigname = path.stem vals = np.load(path) if args.trim in ["(1,:)", "", "(0,:)"]: vals = vals[1:] # smallest eigenvalue is always spurious here else: low, high = eval(args.trim) vals = vals[low:high] eigs = Eigenvalues(vals) unfolded = eigs.unfold(**args.unfold) # print(f"\t\tComputing Brody fit for {str(path.resolve().name)}...") pbar.set_description(desc=desc.format(path.stem)) pbar.update() brody = unfolded.fit_brody(**args.brody) brod_df[eigname] = brody["beta"] pbar.close() brod_df.to_pickle(out) return out
def test_trim_reports() -> None: eigs = Eigenvalues(generate_eigs(2000, seed=2)) report = eigs.trim_report() best_smoothers, best_unfolds, best_indices, consistent_smoothers = ( report.best_overall()) assert np.array_equal(np.sort(consistent_smoothers), np.sort(["poly_7", "poly_8", "poly_9"])) assert np.array_equal(best_indices, [(104, 1765), (231, 1765), (104, 2000)]) report.plot_trim_steps(mode="test")
def test_poisson() -> None: for i in range(1): vals = generate_eigs(5000, kind="poisson") unfolded = Eigenvalues(vals).unfold() unfolded.plot_nnsd( title="Poisson Spacing Test", bins=10, kde=True, mode="test", ensembles=["poisson"], )
def test_unfold_methods() -> None: eigs = Eigenvalues(generate_eigs(500, seed=2)) trimmed = eigs.get_best_trimmed() print("Trim starts and ends:") print(trimmed.vals[0]) print(trimmed.vals[-1]) assert np.allclose(trimmed.vals[0], -35.84918623729985) assert np.allclose(trimmed.vals[-1], 34.709818777689364) unfolded = eigs.trim_unfold_auto() print("Trim starts and ends:") print(unfolded.vals[0]) print(unfolded.vals[-1]) assert np.allclose(unfolded.vals[0], -2.473290621491799) assert np.allclose(unfolded.vals[-1], 504.2764217889801)
def unfold_and_plot(eigs: ndarray, suptitle: str) -> None: unfolded = Eigenvalues(eigs).trim_unfold_auto(max_trim=0.5, max_iters=9, poly_degrees=[13], gompertz=False) trimmed = np.round(100 - 100 * len(unfolded.vals) / len(eigs), 1) _observables( eigs=unfolded.original_eigs, unfolded=unfolded.vals, rigidity_df=unfolded.spectral_rigidity(c_iters=10000, show_progress=True), levelvar_df=unfolded.level_variance(show_progress=True), suptitle=suptitle + f" ({trimmed}% removed)", mode="noblock", )
def _get_formatted_data(self) -> Dict[str, Tuple[DataFrame, DataFrame]]: """Load all data based on self.args, and reformat for ML classifiers.""" trim_args = self.args.trim unf_args = self.args.unfold # see if the raw eigs alone are more useful than RMT stats eigs1 = [np.load(p) for p in self.eigs1] eigs2 = [np.load(p) for p in self.eigs2] unfolded1 = DataFrame( data=[Eigenvalues(_trimmed_from_args(eigs, trim_args)).unfold(**unf_args).vals for eigs in eigs1] ) unfolded2 = DataFrame( data=[Eigenvalues(_trimmed_from_args(eigs, trim_args)).unfold(**unf_args).vals for eigs in eigs2] ) l1, l2 = np.min([len(eigs) for eigs in eigs1]), np.min([len(eigs) for eigs in eigs2]) l_shared = np.min([l1, l2]) eigs1 = [eigs[-l_shared:] for eigs in eigs1] # use largest eigenvalues only eigs2 = [eigs[-l_shared:] for eigs in eigs2] eigs1, eigs2 = DataFrame(data=np.array(eigs1)), DataFrame(data=np.array(eigs2)) largest1 = DataFrame([np.load(p).max() for p in self.eigs1]) largest2 = DataFrame([np.load(p).max() for p in self.eigs2]) largest20_1 = DataFrame([np.load(p)[-20:] for p in self.eigs1]) largest20_2 = DataFrame([np.load(p)[-20:] for p in self.eigs2]) noise1 = pd.read_pickle(self.marchenko[0]).loc["noise_ratio", :].T noise2 = pd.read_pickle(self.marchenko[1]).loc["noise_ratio", :].T noise_shifted1 = pd.read_pickle(self.marchenko[0]).loc["noise_ratio_shifted", :].T noise_shifted2 = pd.read_pickle(self.marchenko[1]).loc["noise_ratio_shifted", :].T brody1 = pd.read_pickle(self.brody[0]).loc["beta"].T brody2 = pd.read_pickle(self.brody[1]).loc["beta"].T rig1 = pd.read_pickle(self.rigidity[0]).set_index("L").T # must be (n_samples, n_features) rig2 = pd.read_pickle(self.rigidity[1]).set_index("L").T var1 = pd.read_pickle(self.levelvar[0]).set_index("L").T var2 = pd.read_pickle(self.levelvar[1]).set_index("L").T return { "Raw Eigs": (eigs1, eigs2), "Unfolded": (unfolded1, unfolded2), "Largest": (largest1, largest2), "Largest20": (largest20_1, largest20_2), "Noise": (noise1, noise2), "Noise (shift)": (noise_shifted1, noise_shifted2), "Brody": (brody1, brody2), "Rigidity": (rig1, rig2), "Levelvar": (var1, var2), }
def test_init_sanity() -> None: eigs = Eigenvalues(generate_eigs(1000)) report = eigs.trim_report( max_iters=9, poly_degrees=[5, 7, 9], spline_degrees=[], spline_smooths=[], show_progress=True, ) assert np.allclose(report._untrimmed, eigs.original_eigenvalues) assert isinstance(report.summary, pd.DataFrame) assert isinstance(report._trim_iters, list) assert isinstance(report._trim_iters[0], TrimIter) path = Path(".") / "trim_report.csv" report.to_csv(path) assert path.exists() path.unlink() report.plot_trim_steps(mode="test")
def test_axes_configuring() -> None: var = 0.1 percent = 25 A = np.random.standard_normal([1000, 500]) correlated = np.random.permutation(A.shape[0] - 1) + 1 # don't select first row last = int(np.floor((percent / 100) * A.shape[0])) corr_indices = correlated[:last] # introduce correlation in A for i in corr_indices: A[i, :] = np.random.uniform(1, 2) * A[0, :] + np.random.normal( 0, var, size=A.shape[1]) M = correlate_fast(A) eigs = get_eigs(M) print(f"\nPercent correlated noise: {percent}%") unfolded = Eigenvalues(eigs).unfold(degree=13) unfolded.plot_fit(mode="noblock") goe_unfolded(1000, log=True).plot_fit(mode="block")
def test_plot_rigidity() -> None: # good fit for max_L=50 when using generate_eigs(10000) # good fit for max_L=55 when using generate_eigs(20000) # not likely to be good fit for max_L beyond 20 for generate_eigs(1000) # L good | len(eigs) | percent # ----------------------------------- # 30-40 | 2000 | # 30-50 | 8000 | 0.375 - 0.625 # 50-70 | 10000 | 0.5 - 0.7 # 50 | 20000 | 0.25 eigs = Eigenvalues(generate_eigs(2000, log=True)) unfolded = eigs.unfold(smoother="poly", degree=19) unfolded.plot_nnsd(mode="test") # unfolded.plot_next_nnsd(mode="test") unfolded.plot_level_variance( L=np.arange(0.5, 100, 0.2), mode="test", ensembles=["goe", "poisson"] ) unfolded.plot_spectral_rigidity( L=np.arange(1, 200, 0.5), c_iters=10000, mode="test" )
def plot_pred_nnsd( args: Any, dataset_name: str, comparison: str, unfold: List[int] = [5, 7, 9, 11, 13], ensembles: bool = True, trim: float = 3.0, silent: bool = False, force: bool = False, ) -> None: global ARGS # ARGS.fullpre = True BINS = np.linspace(0, trim, 20) # for trim_idx in ["(1,-1)", "(1,-20)"]: for trim_idx in ["(1,-1)"]: ARGS.trim = trim_idx all_pairs = [] for normalize in [False]: args.normalize = normalize for degree in unfold: ARGS.unfold["degree"] = degree pairings = Pairings(args, dataset_name) pairing = list( filter(lambda p: p.label == comparison, pairings.pairs)) if len(pairing) != 1: raise ValueError("Too many pairings, something is wrong.") all_pairs.append(pairing[0]) g1, _, g2 = all_pairs[0].label.split("_") # groupnames fig: plt.Figure fig, axes = plt.subplots(nrows=1, ncols=len(all_pairs), sharex=True, squeeze=False) for i, (pair, unfold_degree) in enumerate(zip(all_pairs, unfold)): ax: plt.Axes = axes.flat[i] eigs1, eigs2 = pair.eigs1, pair.eigs2 unfold_args = {**ARGS.unfold, **dict(degree=unfold_degree)} unf1 = [ Eigenvalues(np.load(e)).unfold(**unfold_args) for e in eigs1 ] unf2 = [ Eigenvalues(np.load(e)).unfold(**unfold_args) for e in eigs2 ] alpha1, alpha2 = 1 / len(unf1), 1 / len(unf2) # alpha_adj = 0.02 # good for just plotting hists, no brody alpha_adj = 0.00 alpha1 += alpha_adj alpha2 += alpha_adj for j, unf in enumerate(unf1): spacings = unf.spacings if trim > 0.0: spacings = spacings[spacings <= trim] beta = fit_brody_mle(spacings) brody = brody_dist(spacings, beta) # Generate expected distributions for classical ensembles sbn.distplot( spacings, norm_hist=True, bins=BINS, kde=False, # label=g1 if j == 0 else None, axlabel="spacing (s)", color="#FD8208", # hist_kws={"alpha": alpha1, "histtype": "step", "linewidth": 0.5}, hist_kws={"alpha": alpha1}, # kde_kws={"alpha": alpha1, "color":"#FD8208"}, ax=ax, ) sbn.lineplot(x=spacings, y=brody, color="#FD8208", ax=ax, alpha=0.9, label=g1 if j == 0 else None, linewidth=0.5) for j, unf in enumerate(unf2): spacings = unf.spacings if trim > 0.0: spacings = spacings[spacings <= trim] beta = fit_brody_mle(spacings) brody = brody_dist(spacings, beta) sbn.distplot( spacings, norm_hist=True, bins=BINS, # doane kde=False, # label=g2 if j == 0 else None, axlabel="spacing (s)", color="#000000", # hist_kws={"alpha": alpha2, "histtype": "step", "linewidth": 0.5}, hist_kws={"alpha": alpha2}, # kde_kws={"alpha": alpha2, "color":"#000000"}, ax=ax, ) sbn.lineplot(x=spacings, y=brody, color="#000000", ax=ax, alpha=0.9, label=g2 if j == 0 else None, linewidth=0.5) if ensembles: s = np.linspace(0, trim, 10000) poisson = GDE.nnsd(spacings=s) goe = GOE.nnsd(spacings=s) sbn.lineplot(x=s, y=poisson, color="#08FD4F", label="Poisson", ax=ax, alpha=0.5) sbn.lineplot(x=s, y=goe, color="#0066FF", label="GOE", ax=ax, alpha=0.5) ax.legend().set_visible(False) ax.set_title(f"Unfolding Degree {unfold[i]}") ax.set_xlabel("") ax.set_ylabel("") axes.flat[0].legend().set_visible(True) fig.text(0.5, 0.04, "spacing (s)", ha="center", va="center") # xlabel fig.text(0.03, 0.5, "p(s)", ha="center", va="center", rotation="vertical") # ylabel fig.set_size_inches(w=7, h=1.5) # TMI full-page max width is 7 inches # fig.set_size_inches(w=3.5, h=3.5) # TMI half-page max width is 3.5 inches fig.subplots_adjust(top=0.83, bottom=0.2, left=0.075, right=0.955, hspace=0.2, wspace=0.23) # fontdic = {"fontname": "Arial", "fontsize": 10.0} # fig.suptitle(f"{dataset_name} {ARGS.trim} - NNSD", fontdict=fontdic) make_plot(fig, show=False, fmt="png", fignum="9")
nargs=1, action="store") args = parser.parse_args() img = nib.load(args.bold[0]).get_fdata() mask = np.array(nib.load(args.mask[0]).get_fdata(), dtype=bool) N, t = (np.prod(img.shape[:-1]), img.shape[-1]) img = img.reshape([N, t]) mask = mask.reshape(-1) # remove dead or constant voxels for i, signal in enumerate(img): if np.sum(signal) == 0 or np.sum(signal * signal) == 0 or np.std( signal, ddof=1) == 0: mask[i] = False brain = img[mask, :] eigs = Eigenvalues.from_time_series(brain, covariance=False, trim_zeros=False) vals = eigs.vals outfile = Path(args.outfile[0]) shape_outfile = outfile.parent / outfile.name.replace("eigs", "shapes") parent = outfile.resolve().parent.resolve() mkdirp(parent) np.save(outfile, vals, allow_pickle=False) np.save(shape_outfile, np.array(brain.shape, dtype=int), allow_pickle=False) print(f"Saved eigenvalues to {args.outfile[0]}")
def plot_nnsd( self, trim_args: str, unfold_args: dict, n_bins: int = 20, max_spacing: float = 5.0, title: str = None, outdir: Path = None, ) -> None: # label is always g1_v_g2, we want "attention" to be orange, "nonattend" # to be black if self.label in ["rest_v_task", "nopain_v_pain", "control_v_control_pre", "park_pre_v_parkinsons"]: c1, c2 = "#000000", "#FD8208" elif self.label in [ "allpain_v_nopain", "allpain_v_duloxetine", "high_v_low", "control_pre_v_park_pre", "control_v_parkinsons", ]: c1, c2 = "#FD8208", "#000000" elif self.label in ["control_pre_v_parkinsons", "control_v_park_pre"]: return # meaningless comparison else: c1, c2 = "#EA00FF", "#FD8208" unfolded1 = [] for path in self.eigs1: vals = np.load(path) if trim_args in ["(1,:)", "", "(0,:)"]: vals = vals[1:] # smallest eigenvalue is always spurious here else: low, high = eval(trim_args) vals = vals[low:high] unfolded1.append(np.sort(Eigenvalues(vals).unfold(**unfold_args).vals)) unfolded2 = [] for path in self.eigs2: vals = np.load(path) if trim_args in ["(1,:)", "", "(0,:)"]: vals = vals[1:] # smallest eigenvalue is always spurious here else: low, high = eval(trim_args) vals = vals[low:high] unfolded2.append(np.sort(Eigenvalues(vals).unfold(**unfold_args).vals)) spacings1 = [np.diff(unfolded) for unfolded in unfolded1] spacings2 = [np.diff(unfolded) for unfolded in unfolded2] # trim largest histogram skewing spacing spacings1 = [spacings[spacings < max_spacing] for spacings in spacings1] spacings2 = [spacings[spacings < max_spacing] for spacings in spacings2] mean_brody1 = np.round(float(pd.read_pickle(self.brody[0]).mean(axis=1)), 2) mean_brody2 = np.round(float(pd.read_pickle(self.brody[1]).mean(axis=1)), 2) _configure_sbn_style() fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True) # plot curves for each group for spacings in spacings1: sbn.distplot( spacings, norm_hist=True, bins=n_bins, kde=True, axlabel="spacing (s)", color=c1, kde_kws={"alpha": np.max([0.1, 1 / len(spacings1)])}, hist_kws={"alpha": np.max([0.1, 1 / len(spacings1)])}, ax=axes[0], ) for spacings in spacings2: sbn.distplot( spacings, norm_hist=True, bins=n_bins, kde=True, axlabel="spacing (s)", color=c2, kde_kws={"alpha": np.max([0.1, 1 / len(spacings1)])}, hist_kws={"alpha": np.max([0.1, 1 / len(spacings2)])}, ax=axes[1], ) # plot bootstrapped means and CIs for each group # boots = _percentile_boot(df1) # sbn.lineplot(x=L, y=boots["mean"], color=c1, label=self.subgroup1, ax=ax) # ax.fill_between(x=L, y1=boots["low"], y2=boots["high"], color=c1, alpha=0.3) # boots = _percentile_boot(df2) # sbn.lineplot(x=L, y=boots["mean"], color=c2, label=self.subgroup2, ax=ax) # ax.fill_between(x=L, y1=boots["low"], y2=boots["high"], color=c2, alpha=0.3) # plot theoretically-expected curves s = np.linspace(0, max_spacing, 5000) sbn.lineplot(x=s, y=Poisson.nnsd(spacings=s), color="#08FD4F", label="Poisson", ax=axes[0]) sbn.lineplot(x=s, y=Poisson.nnsd(spacings=s), color="#08FD4F", label="Poisson", ax=axes[1]) sbn.lineplot(x=s, y=GOE.nnsd(spacings=s), color="#0066FF", label="GOE", ax=axes[0]) sbn.lineplot(x=s, y=GOE.nnsd(spacings=s), color="#0066FF", label="GOE", ax=axes[1]) # ensure all plots have identical axes axes[0].set_ylim(top=2.0) axes[1].set_ylim(top=2.0) # ax.legend().set_visible(True) fig.suptitle(f"{self.label}: NNSD" if title is None else title) for i, ax in enumerate(axes): ax.set_xlabel("spacing (s)") ax.set_ylabel("density p(s)", fontname="DejaVu Sans") ax.set_title( f"{self.subgroup1 if i == 0 else self.subgroup2} (<β> = {mean_brody1 if i == 0 else mean_brody2})" ) if outdir is None: plt.show() plt.close() else: os.makedirs(outdir, exist_ok=True) prefix = _prefix(trim_args, unfold_args) outfile = outdir / f"{prefix}_{self.label}_nnsd.png" fig.set_size_inches(10, 5) plt.savefig(outfile, dpi=300) plt.close() print(f"Pooled nnsd plot saved to {relpath(outfile)}")
def plot_subject_nnsd(args: Any, n_bins: int = 20, outfile: Path = None) -> None: subjects = get_subjects_dict() sbn.set_context("paper") sbn.set_style("ticks", {"ytick.left": False}) c1 = "#000000" fig, axes = plt.subplots(ncols=6, nrows=4, sharex=True, sharey=True) pbar = tqdm(desc="NNSD", total=8 * len(subjects)) for i, (subj_id, subject) in enumerate(subjects.items()): ax: Axes = axes.flat[i] eigpaths = subject["runs"] unfoldeds = [] for path in eigpaths: vals = np.load(path) if args.trim in ["(1,:)", "", "(0,:)"]: vals = vals[1:] # smallest eigenvalue is always spurious here else: low, high = eval(args.trim) vals = vals[low:high] unfoldeds.append( np.sort(Eigenvalues(vals).unfold(**args.unfold).vals)) all_spacings = [np.diff(unfolded) for unfolded in unfoldeds] kde_gridsize = 1000 kdes = np.empty([len(all_spacings), kde_gridsize], dtype=float) s = np.linspace(0, 3, kde_gridsize) bins = np.linspace(0, 3, n_bins + 1) for j, spacings in enumerate(all_spacings): sbn.distplot( spacings[(spacings > 0) & (spacings <= 3)], norm_hist=True, bins=bins, kde=False, color=c1, hist_kws={ "alpha": 1.0 / 8, "range": (0.0, 3.0) }, ax=ax, ) kde = _kde(spacings, grid=s) kdes[j, :] = kde pbar.update() # sbn.lineplot(x=s, y=kde, color=c1, alpha=1.0 / 8, ax=ax) # sbn.lineplot(x=s, y=kdes.mean(axis=0), color="#9d0000", label="Mean KDE", ax=ax) sbn.lineplot(x=s, y=kdes.mean(axis=0), color=c1, label="Mean KDE", ax=ax) sbn.lineplot(x=s, y=Poisson.nnsd(spacings=s), color="#08FD4F", label="Poisson", ax=ax) sbn.lineplot(x=s, y=GOE.nnsd(spacings=s), color="#0066FF", label="GOE", ax=ax) ax.set_title(f"subj-{subj_id}") ax.set_ylabel("") ax.legend(frameon=False, framealpha=0) plt.setp(ax.get_legend().get_texts(), fontsize="6") # if i != 0: handle, labels = ax.get_legend_handles_labels() ax._remove_legend(handle) ticks = [0, 1, 2, 3] ax.set_xticks(ticks) # ax.set_xticklabels(ticks, rotation=45, horizontalalignment="right") ax.set_xticklabels(ticks) pbar.close() fig.suptitle("Per-Subject NNSD") fig.set_size_inches(10, 6) fig.subplots_adjust(left=0.13, bottom=0.15, wspace=0.1, hspace=0.35) fig.text(0.5, 0.04, "spacing (s)", ha="center", va="center") # xlabel fig.text(0.05, 0.5, "density p(s)", ha="center", va="center", rotation="vertical") # ylabel plt.setp(axes, xlim=(0.0, 3.0), ylim=(0.0, 1.2)) # better use of space handles, labels = axes.flat[-1].get_legend_handles_labels() fig.legend(handles, labels, loc="center right", frameon=False, framealpha=0, fontsize="8") if outfile is None: plt.show() else: fig.savefig(str(outfile.resolve()), dpi=300) print(f"Saved NNSD plot to {str(outfile.relative_to(DATA_ROOT))}") plt.close()
def test_brody_plot() -> None: # test GOE eigs bw = 0.2 # bw = "scott" mode: PlotMode = "test" ensembles = ["goe", "poisson"] for N in [100, 250, 500, 1000]: _configure_sbn_style() fig, axes = plt.subplots(2, 2) for i in range(4): eigs = generate_eigs(N) Eigenvalues(eigs).unfold(degree=7).plot_nnsd( brody=True, kde_bw=bw, title=f"GOE N={N}", ensembles=ensembles, mode="return", fig=fig, axes=axes.flat[i], ) _handle_plot_mode(mode, fig, axes) # test time series _configure_sbn_style() fig, axes = plt.subplots(2, 2) for i in range(4): eigs = np.linalg.eigvalsh( np.corrcoef(np.random.standard_normal([1000, 250]))) Eigenvalues(eigs).unfold(degree=7).plot_nnsd( brody=True, brody_fit="mle", mode=mode, title=f"t-series (untrimmed)(MLE)", ensembles=ensembles, kde_bw=bw, fig=fig, axes=axes.flat[i], ) _handle_plot_mode(mode, fig, axes) _configure_sbn_style() fig, axes = plt.subplots(2, 2) for i in range(4): eigs = np.linalg.eigvalsh( np.corrcoef(np.random.standard_normal([1000, 250]))) Eigenvalues(eigs).unfold(degree=7).plot_nnsd( brody=True, brody_fit="spacings", mode=mode, title=f"t-series (untrimmed)(spacings)", ensembles=ensembles, kde_bw=bw, fig=fig, axes=axes.flat[i], ) _handle_plot_mode(mode, fig, axes) _configure_sbn_style() fig, axes = plt.subplots(2, 2) for i in range(4): eigs = np.linalg.eigvalsh( np.corrcoef(np.random.standard_normal([1000, 250]))) eigs = eigs[eigs > 100 * np.abs(eigs.min())] Eigenvalues(eigs).unfold(degree=7).plot_nnsd( brody=True, brody_fit="spacings", mode=mode, title="t-series (trimmed)(spacings)", ensembles=ensembles, kde_bw=bw, fig=fig, axes=axes.flat[i], ) _handle_plot_mode(mode, fig, axes) _configure_sbn_style() fig, axes = plt.subplots(2, 2) for i in range(4): eigs = np.linalg.eigvalsh( np.corrcoef(np.random.standard_normal([1000, 250]))) eigs = eigs[eigs > 100 * np.abs(eigs.min())] Eigenvalues(eigs).unfold(degree=7).plot_nnsd( brody=True, brody_fit="mle", mode=mode, title="t-series (trimmed)(MLE)", ensembles=ensembles, kde_bw=bw, fig=fig, axes=axes.flat[i], ) _handle_plot_mode(mode, fig, axes) if mode != "test": plt.show()
def precompute_marchenko(eigpaths: List[Path], out: Path, force: bool = False, silent: bool = False) -> Path: """Take the eigenvalues saved in `eigpaths`, compute the Marchenko-Pastur endpoints (both shifted and unshifted), and save that in a DataFrame at `out`. DataFrame will also contain information rated to proportion of eigenvalues within those bounds "noise_ratio" and "noise_ratio" shifted. Parameters ---------- eigpaths: List[Path] The values of either DATASETS or DATASETS_FULLPRE out: Path See usage below. force: bool If False (default), don't recompute the values if they already exist. silent: bool If False (default) display a tqdm progress bar while calculating. Returns ------- pickle: Path Path to the pickle file saving the precomputed values. """ if not force and out.exists(): return out marchenko_df = pd.DataFrame(index=[ "low", "high", "low_shift", "high_shift", "noise_ratio", "noise_ratio_shifted" ], dtype=int) desc = "{} - Marchenko" pbar = tqdm(total=len(eigpaths), desc=desc.format("eigs-XX"), disable=silent) for path in eigpaths: eigname = path.stem vals = np.load(path) # trim the phoney zero eigenvalue due to correlation rank vals = vals[1:] N, T = np.load(str(path).replace("eigs", "shapes")) eigs = Eigenvalues(vals) _, marchenko = eigs.trim_marchenko_pastur(series_length=T, n_series=N, use_shifted=False) _, marchenko_shifted = eigs.trim_marchenko_pastur(series_length=T, n_series=N, use_shifted=True) noise_ratio = np.mean((vals > marchenko[0]) & (vals < marchenko[1])) noise_ratio_shifted = np.mean((vals > marchenko_shifted[0]) & (vals < marchenko_shifted[1])) marchenko_df[eigname] = [ marchenko[0], marchenko[1], marchenko_shifted[0], marchenko_shifted[1], noise_ratio, noise_ratio_shifted, ] pbar.set_description(desc=desc.format(path.stem)) pbar.update() pbar.close() # print(marchenko_df) marchenko_df.to_pickle(out) return out
def test_unfold_compare() -> None: metrics: List[Metric] = ["msqd", "mad", "corr"] print("\n") print("=" * 80) print("Comparing a GOE matrix to GOE") print("=" * 80) eigs = Eigenvalues(generate_eigs(2000, seed=2)) unfolded = eigs.unfold(degree=13) df = unfolded.ensemble_compare(ensemble=GOE, metrics=metrics, show_progress=True) print(df) print("\n") print("=" * 80) print("Comparing a Poisson / GDE matrix to GOE") print("=" * 80) eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2)) unfolded = eigs.unfold(degree=13) df = unfolded.ensemble_compare(ensemble=GOE, metrics=metrics, show_progress=True) print(df) print("\n") print("=" * 80) print("Comparing a Poisson / GDE matrix to GOE") print("=" * 80) eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2)) unfolded = eigs.unfold(degree=13) df = unfolded.ensemble_compare(ensemble=GOE, metrics=metrics, show_progress=True) print(df) print("\n") print("=" * 80) print("Comparing a Poisson / GDE matrix to Poisson / GDE") print("=" * 80) eigs = Eigenvalues(generate_eigs(2000, kind="poisson", seed=2)) unfolded = eigs.unfold(degree=13) df = unfolded.ensemble_compare(ensemble=GDE, metrics=metrics, show_progress=True) print(df)