Beispiel #1
0
def compute_all_diffs_dfs(args: Any, silent: bool = False) -> Path:
    _, diffs_out = stats_fnames(args, args.normalize, extension="csv")
    all_diffs_out = DATA_ROOT / diffs_out
    DUDS = [
        "control_pre_v_parkinsons", "park_pre_v_parkinsons",
        "control_v_park_pre", "control_v_control_pre"
    ]

    all_diffs = []
    datasets = DATASETS_FULLPRE if args.fullpre else DATASETS
    for dataset_name, dataset in datasets.items():
        if dataset_name == "SINGLE_SUBJECT":
            continue
        precompute_dataset(dataset_name,
                           args=args,
                           force_all=False,
                           silent=silent)
        pairings = Pairings(args, dataset_name)
        for pair in pairings.pairs:
            if pair.label in DUDS:
                continue
            diffs = pair.paired_differences(args)
            all_diffs.append(diffs)

    diffs_df = pd.concat(all_diffs)
    diffs_df.to_csv(all_diffs_out)
    if not silent:
        print(diffs_df)
    print(f"Saved all differences to {all_diffs_out}")
    return all_diffs_out
def plot_raw_eigs(args: Any, show: bool = False) -> None:
    """Create violin plots of the distributions of the largest eigenvalues

    Parameters
    ----------
    args: Args
        Contains the unfolding, trimming, normalization, etc options defined in
        run.py

    show: bool
        If False (default) just save the plot. Otherwise, call plt.show() and do
        NOT save
    """
    if not show:
        outdir = DATA_ROOT / "plots"
        os.makedirs(outdir, exist_ok=True)

    datasets = DATASETS_FULLPRE if args.fullpre else DATASETS
    for dataset_name, subgroups in datasets.items():
        for subgroup_name, eigpaths in subgroups.items():
            pass

    # for subgroupname, eigpaths in dataset.items():
    for dataset_name in datasets:
        dfs = []
        for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items():
            path = observables["largest"]
            df_full = pd.read_pickle(path)
            df = pd.DataFrame(df_full.loc["largest", :], dtype=float)
            n = len(df)
            df["subgroup"] = [groupname for _ in range(n)]
            dfs.append(df)
        df = pd.concat(dfs)

        sbn.set_context("paper")
        sbn.set_style("ticks")
        args_prefix = argstrings_from_args(args)[0]
        dname = dataset_name.upper()
        prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}"
        title = f"{dname} - Largest λ"
        subtitle = args_prefix.replace("_", " ")
        with sbn.axes_style("ticks"):
            subfontsize = 12
            fontsize = 16
            ax = sbn.violinplot(x="subgroup", y="largest", data=df)
            sbn.despine(offset=10, trim=True)
            plt.gcf().suptitle(title, fontsize=fontsize)
            ax.set_title(subtitle, fontdict={"fontsize": subfontsize})
            ax.set_title(title)
            ax.set_xlabel("λ_max", fontdict={"fontsize": fontsize})
            ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize})
            if show:
                plt.show()
                plt.close()
            else:
                out = outdir / f"{prefix}largest.png"
                plt.gcf().set_size_inches(w=8, h=8)
                plt.savefig(out, dpi=300)
                plt.close()
                print(f"Largest λ violin plot saved to {relpath(out)}")
Beispiel #3
0
def make_marchenko_plots(shifted: bool = False):
    for fullpre in [True, False]:
        ARGS.fullpre = fullpre
        for normalize in [False]:
            ARGS.normalize = normalize
            datasets_all = DATASETS_FULLPRE if ARGS.fullpre else DATASETS
            datasets = {}
            for dataset_name, dataset in datasets_all.items():
                if dataset_name == "SINGLE_SUBJECT":
                    continue
                datasets[dataset_name] = dataset

            # print(len(datasets))  # 12
            # sys.exit(1)

            fig: plt.Figure
            fig, axes = plt.subplots(nrows=4, ncols=3)
            suptitle = (
                f"{'Shifted ' if shifted else ''}Marchenko Noise Ratio {'(preprocessed)' if ARGS.fullpre else ''}"
            )
            for i, dataset_name in enumerate(datasets):
                dfs = []
                for groupname, observables in precompute_dataset(dataset_name, ARGS, silent=True).items():
                    df_full = pd.read_pickle(observables["marchenko"])
                    df = pd.DataFrame(df_full.loc["noise_ratio_shifted" if shifted else "noise_ratio", :])
                    df["subgroup"] = [groupname for _ in range(len(df))]
                    dfs.append(df)
                df = pd.concat(dfs)

                sbn.set_context("paper")
                sbn.set_style("ticks")
                # args_prefix = argstrings_from_args(args)[0]
                dname = dataset_name.upper()
                # prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}"
                title = f"{dname}"

                with sbn.axes_style("ticks"):
                    subfontsize = 10
                    fontsize = 12
                    ax: plt.Axes = axes.flat[i]
                    sbn.violinplot(x="subgroup", y=f"noise_ratio{'_shifted' if shifted else ''}", data=df, ax=ax)
                    sbn.despine(offset=10, trim=True, ax=ax)
                    ax.set_title(title, fontdict={"fontsize": fontsize})
                    ax.set_xlabel("", fontdict={"fontsize": subfontsize})
                    ax.set_ylabel("", fontdict={"fontsize": subfontsize})
            fig.suptitle(suptitle)
            fig.text(x=0.5, y=0.04, s="Subgroup", ha="center", va="center")  # xlabel
            fig.text(x=0.05, y=0.5, s="Noise Proportion", ha="center", va="center", rotation="vertical")  # ylabel
            fig.subplots_adjust(hspace=0.48, wspace=0.3)
            # sbn.despine(ax=axes.flat[-1], trim=True)
            fig.delaxes(ax=axes.flat[-1])
            plt.show(block=False)

    plt.show()
Beispiel #4
0
def compute_all_preds_df(args: Any,
                         silent: bool = False,
                         force: bool = False) -> Path:
    preds_out, _ = stats_fnames(args, args.normalize, extension="csv")
    all_preds_out = DATA_ROOT / preds_out
    if not force and all_preds_out.exists():
        return all_preds_out
    DUDS = [
        "control_pre_v_parkinsons", "park_pre_v_parkinsons",
        "control_v_park_pre", "control_v_control_pre"
    ]
    datasets = DATASETS_FULLPRE if args.fullpre else DATASETS

    all_preds = []
    for dataset_name, dataset in datasets.items():
        if dataset_name == "SINGLE_SUBJECT":
            continue
        precompute_dataset(dataset_name,
                           args=args,
                           force_all=False,
                           silent=silent)
        pairings = Pairings(args, dataset_name)
        for pair in pairings.pairs:
            if pair.label in DUDS:
                continue
            preds = pair.paired_predicts(args=args,
                                         logistic=True,
                                         normalize=args.normalize,
                                         silent=silent)
            all_preds.append(preds)

    preds_df = pd.concat(all_preds)
    preds_df.to_csv(all_preds_out)
    if not silent:
        print(preds_df)
    print(f"Saved all predictions to {all_preds_out}")
    return all_preds_out
Beispiel #5
0
def make_largest_plots():
    for fullpre in [True, False]:
        ARGS.fullpre = fullpre
        for NORMALIZE in [False]:
            datasets_all = DATASETS_FULLPRE if ARGS.fullpre else DATASETS
            datasets = {}
            for dataset_name, dataset in datasets_all.items():
                if dataset_name == "SINGLE_SUBJECT":
                    continue
                datasets[dataset_name] = dataset

            # print(len(datasets))  # 12
            # sys.exit(1)

            fig: plt.Figure
            fig, axes = plt.subplots(nrows=4, ncols=3)
            suptitle = f"Largest Eigenvalue{' (preprocessed)' if ARGS.fullpre else ''}"
            for i, dataset_name in enumerate(datasets):
                dfs = []
                for groupname, observables in precompute_dataset(dataset_name, ARGS, silent=True).items():
                    df_full = pd.read_pickle(observables["largest"])
                    df = pd.DataFrame(df_full.loc["largest", :], dtype=float)
                    df["subgroup"] = [groupname for _ in range(len(df))]
                    dfs.append(df)
                df = pd.concat(dfs)

                sbn.set_context("paper")
                sbn.set_style("ticks")
                dname = dataset_name.upper()
                title = f"{dname}"

                with sbn.axes_style("ticks"):
                    subfontsize = 10
                    fontsize = 12
                    ax: plt.Axes = axes.flat[i]
                    sbn.violinplot(x="subgroup", y="largest", data=df, ax=ax)
                    sbn.despine(offset=10, trim=True, ax=ax)
                    ax.set_title(title, fontdict={"fontsize": fontsize})
                    ax.set_xlabel("", fontdict={"fontsize": subfontsize})
                    ax.set_ylabel("", fontdict={"fontsize": subfontsize})
            fig.suptitle(suptitle)
            fig.text(x=0.5, y=0.04, s="Subgroup", ha="center", va="center")  # xlabel
            fig.text(x=0.05, y=0.5, s="Magnitude", ha="center", va="center", rotation="vertical")  # ylabel
            fig.subplots_adjust(hspace=0.48, wspace=0.3)
            # sbn.despine(ax=axes.flat[-1], trim=True)
            fig.delaxes(ax=axes.flat[-1])
            plt.show(block=False)

    plt.show()
def plot_marchenko(args: Any, show: bool = False) -> None:
    """Create violin plots of the distributions of marchenko noise percents

    Parameters
    ----------
    args: Args
        Contains the unfolding, trimming, normalization, etc options defined in
        run.py

    show: bool
        If False (default) just save the plot. Otherwise, call plt.show() and do
        NOT save
    """
    # label is always g1_v_g2, we want "attention" to be orange, "nonattend"
    # to be black
    if not show:
        outdir = DATA_ROOT / "plots"
        os.makedirs(outdir, exist_ok=True)

    datasets = DATASETS_FULLPRE if args.fullpre else DATASETS

    total = 0
    fig, axes = plt.subplots(nrows=3, nocols=8)
    for dataset_name in datasets:
        for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items():
            total += 1  # count how much plots we need

    fig, axes = plt.subplots(nrows=3, nocols=8)
    for dataset_name in datasets:
        dfs, dfs_noise = [], []
        for groupname, observables in precompute_dataset(dataset_name, args, silent=True).items():
            df_full = pd.read_pickle(observables["marchenko"])
            df = pd.DataFrame(df_full.loc["noise_ratio", :])
            df_noise = pd.DataFrame(df_full.loc["noise_ratio_shifted", :])
            n = len(df)
            df["subgroup"] = [groupname for _ in range(n)]
            df_noise["subgroup"] = [groupname for _ in range(n)]
            dfs.append(df)
            dfs_noise.append(df_noise)
        df = pd.concat(dfs)
        df_noise = pd.concat(dfs_noise)

        sbn.set_context("paper")
        sbn.set_style("ticks")
        args_prefix = argstrings_from_args(args)[0]
        dname = dataset_name.upper()
        prefix = f"{dname}_{args_prefix}_{'fullpre_' if args.fullpre else ''}"
        title = f"{dname} - Marchenko Noise Ratio"
        title_shifted = f"{title} (shifted)"
        subtitle = args_prefix.replace("_", " ")

        with sbn.axes_style("ticks"):
            subfontsize = 12
            fontsize = 16
            ax: plt.Axes = sbn.violinplot(x="subgroup", y="noise_ratio", data=df)
            sbn.despine(offset=10, trim=True)
            plt.gcf().suptitle(title, fontsize=fontsize)
            ax.set_title(subtitle, fontdict={"fontsize": subfontsize})
            ax.set_xlabel("Noise Proportion", fontdict={"fontsize": fontsize})
            ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize})
            if show:
                plt.show()
                plt.close()
            else:
                out = outdir / f"{prefix}marchenko.png"
                plt.gcf().set_size_inches(w=8, h=8)
                plt.savefig(out)
                plt.close()
                print(f"Marchenko plot saved to {relpath(out)}")

            ax = sbn.violinplot(x="subgroup", y="noise_ratio_shifted", data=df_noise)
            sbn.despine(offset=10, trim=True)
            plt.gcf().suptitle(title_shifted, fontsize=fontsize)
            ax.set_title(subtitle, fontdict={"fontsize": subfontsize})
            ax.set_xlabel("Noise Proportion", fontdict={"fontsize": fontsize})
            ax.set_ylabel("Subgroup", fontdict={"fontsize": fontsize})
            if show:
                plt.show()
                plt.close()
            else:
                out = outdir / f"{prefix}marchenko_shifted.png"
                plt.gcf().set_size_inches(w=8, h=8)
                plt.savefig(out)
                plt.close()
                print(f"Marchenko shifted plot saved to {relpath(out)}")