def compute_all_diffs_dfs(args: Any, silent: bool = False) -> Path: _, diffs_out = stats_fnames(args, args.normalize, extension="csv") all_diffs_out = DATA_ROOT / diffs_out DUDS = [ "control_pre_v_parkinsons", "park_pre_v_parkinsons", "control_v_park_pre", "control_v_control_pre" ] all_diffs = [] datasets = DATASETS_FULLPRE if args.fullpre else DATASETS for dataset_name, dataset in datasets.items(): if dataset_name == "SINGLE_SUBJECT": continue precompute_dataset(dataset_name, args=args, force_all=False, silent=silent) pairings = Pairings(args, dataset_name) for pair in pairings.pairs: if pair.label in DUDS: continue diffs = pair.paired_differences(args) all_diffs.append(diffs) diffs_df = pd.concat(all_diffs) diffs_df.to_csv(all_diffs_out) if not silent: print(diffs_df) print(f"Saved all differences to {all_diffs_out}") return all_diffs_out
def plot_raw_eigs(args: Any, show: bool = False) -> None: """Create violin plots of the distributions of the largest eigenvalues Parameters ---------- args: Args Contains the unfolding, trimming, normalization, etc options defined in run.py show: bool If False (default) just save the plot. Otherwise, call plt.show() and do NOT save """ if not show: outdir = DATA_ROOT / "plots" os.makedirs(outdir, exist_ok=True) datasets = DATASETS_FULLPRE if args.fullpre else DATASETS for dataset_name, subgroups in datasets.items(): for subgroup_name, eigpaths in subgroups.items(): pass # for subgroupname, eigpaths in dataset.items(): for dataset_name in datasets: dfs = [] for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items(): path = observables["largest"] df_full = pd.read_pickle(path) df = pd.DataFrame(df_full.loc["largest", :], dtype=float) n = len(df) df["subgroup"] = [groupname for _ in range(n)] dfs.append(df) df = pd.concat(dfs) sbn.set_context("paper") sbn.set_style("ticks") args_prefix = argstrings_from_args(args)[0] dname = dataset_name.upper() prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}" title = f"{dname} - Largest λ" subtitle = args_prefix.replace("_", " ") with sbn.axes_style("ticks"): subfontsize = 12 fontsize = 16 ax = sbn.violinplot(x="subgroup", y="largest", data=df) sbn.despine(offset=10, trim=True) plt.gcf().suptitle(title, fontsize=fontsize) ax.set_title(subtitle, fontdict={"fontsize": subfontsize}) ax.set_title(title) ax.set_xlabel("λ_max", fontdict={"fontsize": fontsize}) ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize}) if show: plt.show() plt.close() else: out = outdir / f"{prefix}largest.png" plt.gcf().set_size_inches(w=8, h=8) plt.savefig(out, dpi=300) plt.close() print(f"Largest λ violin plot saved to {relpath(out)}")
def make_marchenko_plots(shifted: bool = False): for fullpre in [True, False]: ARGS.fullpre = fullpre for normalize in [False]: ARGS.normalize = normalize datasets_all = DATASETS_FULLPRE if ARGS.fullpre else DATASETS datasets = {} for dataset_name, dataset in datasets_all.items(): if dataset_name == "SINGLE_SUBJECT": continue datasets[dataset_name] = dataset # print(len(datasets)) # 12 # sys.exit(1) fig: plt.Figure fig, axes = plt.subplots(nrows=4, ncols=3) suptitle = ( f"{'Shifted ' if shifted else ''}Marchenko Noise Ratio {'(preprocessed)' if ARGS.fullpre else ''}" ) for i, dataset_name in enumerate(datasets): dfs = [] for groupname, observables in precompute_dataset(dataset_name, ARGS, silent=True).items(): df_full = pd.read_pickle(observables["marchenko"]) df = pd.DataFrame(df_full.loc["noise_ratio_shifted" if shifted else "noise_ratio", :]) df["subgroup"] = [groupname for _ in range(len(df))] dfs.append(df) df = pd.concat(dfs) sbn.set_context("paper") sbn.set_style("ticks") # args_prefix = argstrings_from_args(args)[0] dname = dataset_name.upper() # prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}" title = f"{dname}" with sbn.axes_style("ticks"): subfontsize = 10 fontsize = 12 ax: plt.Axes = axes.flat[i] sbn.violinplot(x="subgroup", y=f"noise_ratio{'_shifted' if shifted else ''}", data=df, ax=ax) sbn.despine(offset=10, trim=True, ax=ax) ax.set_title(title, fontdict={"fontsize": fontsize}) ax.set_xlabel("", fontdict={"fontsize": subfontsize}) ax.set_ylabel("", fontdict={"fontsize": subfontsize}) fig.suptitle(suptitle) fig.text(x=0.5, y=0.04, s="Subgroup", ha="center", va="center") # xlabel fig.text(x=0.05, y=0.5, s="Noise Proportion", ha="center", va="center", rotation="vertical") # ylabel fig.subplots_adjust(hspace=0.48, wspace=0.3) # sbn.despine(ax=axes.flat[-1], trim=True) fig.delaxes(ax=axes.flat[-1]) plt.show(block=False) plt.show()
def compute_all_preds_df(args: Any, silent: bool = False, force: bool = False) -> Path: preds_out, _ = stats_fnames(args, args.normalize, extension="csv") all_preds_out = DATA_ROOT / preds_out if not force and all_preds_out.exists(): return all_preds_out DUDS = [ "control_pre_v_parkinsons", "park_pre_v_parkinsons", "control_v_park_pre", "control_v_control_pre" ] datasets = DATASETS_FULLPRE if args.fullpre else DATASETS all_preds = [] for dataset_name, dataset in datasets.items(): if dataset_name == "SINGLE_SUBJECT": continue precompute_dataset(dataset_name, args=args, force_all=False, silent=silent) pairings = Pairings(args, dataset_name) for pair in pairings.pairs: if pair.label in DUDS: continue preds = pair.paired_predicts(args=args, logistic=True, normalize=args.normalize, silent=silent) all_preds.append(preds) preds_df = pd.concat(all_preds) preds_df.to_csv(all_preds_out) if not silent: print(preds_df) print(f"Saved all predictions to {all_preds_out}") return all_preds_out
def make_largest_plots(): for fullpre in [True, False]: ARGS.fullpre = fullpre for NORMALIZE in [False]: datasets_all = DATASETS_FULLPRE if ARGS.fullpre else DATASETS datasets = {} for dataset_name, dataset in datasets_all.items(): if dataset_name == "SINGLE_SUBJECT": continue datasets[dataset_name] = dataset # print(len(datasets)) # 12 # sys.exit(1) fig: plt.Figure fig, axes = plt.subplots(nrows=4, ncols=3) suptitle = f"Largest Eigenvalue{' (preprocessed)' if ARGS.fullpre else ''}" for i, dataset_name in enumerate(datasets): dfs = [] for groupname, observables in precompute_dataset(dataset_name, ARGS, silent=True).items(): df_full = pd.read_pickle(observables["largest"]) df = pd.DataFrame(df_full.loc["largest", :], dtype=float) df["subgroup"] = [groupname for _ in range(len(df))] dfs.append(df) df = pd.concat(dfs) sbn.set_context("paper") sbn.set_style("ticks") dname = dataset_name.upper() title = f"{dname}" with sbn.axes_style("ticks"): subfontsize = 10 fontsize = 12 ax: plt.Axes = axes.flat[i] sbn.violinplot(x="subgroup", y="largest", data=df, ax=ax) sbn.despine(offset=10, trim=True, ax=ax) ax.set_title(title, fontdict={"fontsize": fontsize}) ax.set_xlabel("", fontdict={"fontsize": subfontsize}) ax.set_ylabel("", fontdict={"fontsize": subfontsize}) fig.suptitle(suptitle) fig.text(x=0.5, y=0.04, s="Subgroup", ha="center", va="center") # xlabel fig.text(x=0.05, y=0.5, s="Magnitude", ha="center", va="center", rotation="vertical") # ylabel fig.subplots_adjust(hspace=0.48, wspace=0.3) # sbn.despine(ax=axes.flat[-1], trim=True) fig.delaxes(ax=axes.flat[-1]) plt.show(block=False) plt.show()
def plot_marchenko(args: Any, show: bool = False) -> None: """Create violin plots of the distributions of marchenko noise percents Parameters ---------- args: Args Contains the unfolding, trimming, normalization, etc options defined in run.py show: bool If False (default) just save the plot. Otherwise, call plt.show() and do NOT save """ # label is always g1_v_g2, we want "attention" to be orange, "nonattend" # to be black if not show: outdir = DATA_ROOT / "plots" os.makedirs(outdir, exist_ok=True) datasets = DATASETS_FULLPRE if args.fullpre else DATASETS total = 0 fig, axes = plt.subplots(nrows=3, nocols=8) for dataset_name in datasets: for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items(): total += 1 # count how much plots we need fig, axes = plt.subplots(nrows=3, nocols=8) for dataset_name in datasets: dfs, dfs_noise = [], [] for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items(): df_full = pd.read_pickle(observables["marchenko"]) df = pd.DataFrame(df_full.loc["noise_ratio", :]) df_noise = pd.DataFrame(df_full.loc["noise_ratio_shifted", :]) n = len(df) df["subgroup"] = [groupname for _ in range(n)] df_noise["subgroup"] = [groupname for _ in range(n)] dfs.append(df) dfs_noise.append(df_noise) df = pd.concat(dfs) df_noise = pd.concat(dfs_noise) sbn.set_context("paper") sbn.set_style("ticks") args_prefix = argstrings_from_args(args)[0] dname = dataset_name.upper() prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}" title = f"{dname} - Marchenko Noise Ratio" title_shifted = f"{title} (shifted)" subtitle = args_prefix.replace("_", " ") with sbn.axes_style("ticks"): subfontsize = 12 fontsize = 16 ax: plt.Axes = sbn.violinplot(x="subgroup", y="noise_ratio", data=df) sbn.despine(offset=10, trim=True) plt.gcf().suptitle(title, fontsize=fontsize) ax.set_title(subtitle, fontdict={"fontsize": subfontsize}) ax.set_xlabel("Noise Proportion", fontdict={"fontsize": fontsize}) ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize}) if show: plt.show() plt.close() else: out = outdir / f"{prefix}marchenko.png" plt.gcf().set_size_inches(w=8, h=8) plt.savefig(out) plt.close() print(f"Marchenko plot saved to {relpath(out)}") ax = sbn.violinplot(x="subgroup", y="noise_ratio_shifted", data=df_noise) sbn.despine(offset=10, trim=True) plt.gcf().suptitle(title_shifted, fontsize=fontsize) ax.set_title(subtitle, fontdict={"fontsize": subfontsize}) ax.set_xlabel("Noise Proportion", fontdict={"fontsize": fontsize}) ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize}) if show: plt.show() plt.close() else: out = outdir / f"{prefix}marchenko_shifted.png" plt.gcf().set_size_inches(w=8, h=8) plt.savefig(out) plt.close() print(f"Marchenko shifted plot saved to {relpath(out)}")