Ejemplo n.º 1
0
        p, c, dtype, ctissues = ("UBFD1", "TP63", "crispr", ["Lung"])

        plot_df = pd.concat(
            [
                drespo.loc[[c]].T.add_suffix("_y")
                if dtype == "drug" else crispr.loc[[c]].T.add_suffix("_y"),
                prot.loc[[p]].T.add_suffix("_prot"),
                gexp.loc[[p]].T.add_suffix("_gexp"),
                prot_obj.ss["tissue"],
            ],
            axis=1,
            sort=False,
        ).dropna(subset=[f"{c}_y", f"{p}_prot"])

        # Protein
        ax = GIPlot.gi_tissue_plot(f"{p}_prot", f"{c}_y", plot_df)

        if dtype == "drug":
            ax.axhline(np.log(drespo_maxc[c]),
                       ls="--",
                       lw=.3,
                       color=CrispyPlot.PAL_DTRACE[1])

        ax.set_xlabel(f"{p}\nProtein intensities")
        ax.set_ylabel(
            f"{c}\n{'Drug response IC50' if dtype == 'drug' else 'CRISPR-Cas9 (log2 FC)'}"
        )
        plt.savefig(
            f"{RPATH}/TopHits_{p}_{c}_{dtype}_regression_tissue_plot.pdf",
            bbox_inches="tight",
        )
            (drespo.T < dmax[drespo.index]).sum().rename("nsamples"),
            drespo.T[drespo.T < dmax[drespo.index]].median().rename("dependency"),
            ml_scores,
        ],
        axis=1,
    )
    drug_selective["name"] = [i.split(";")[1] for i in drug_selective.index]

    drug_selective_set = set(drug_selective.query("skew < -2").index)

    # Scatter
    grid = GIPlot.gi_regression(
        "skew",
        "median",
        drug_selective,
        size="dependency",
        size_inverse=True,
        size_legend_title="Median IC50",
        plot_reg=False,
        plot_annot=False,
    )

    grid.ax_joint.axvline(-1, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--")
    g_highlight_df = drug_selective.query("skew < -1").sort_values("skew").head(5)
    labels = [
        grid.ax_joint.text(
            row["skew"], row["median"], row["name"], color="k", fontsize=4
        )
        for _, row in g_highlight_df.iterrows()
    ]
    adjust_text(
        labels,
Ejemplo n.º 3
0
        drug_pca_df.corr(),
        cmap="Spectral",
        annot=True,
        center=0,
        fmt=".2f",
        annot_kws=dict(size=4),
        lw=0.05,
        figsize=(3, 3),
    )

    plt.savefig(f"{RPATH}/drug_pca_clustermap.pdf", bbox_inches="tight", dpi=600)
    plt.close("all")

    #
    y_var = "PC1"
    g = GIPlot.gi_regression("growth", y_var, drug_pca_df, lowess=True)
    g.set_axis_labels("Growth rate", f"{y_var} ({drug_vexp[y_var]*100:.1f}%)")
    plt.savefig(f"{RPATH}/drug_pca_regression_growth.pdf", bbox_inches="tight", dpi=600)
    plt.close("all")

    # Covariates
    #

    # CRISPR
    covs_crispr = LMModels.define_covariates(
        institute=crispr_obj.merged_institute,
        medium=True,
        cancertype=False,
        tissuetype=False,
        mburden=False,
        ploidy=True,
        signature=[i[0] if len(i) > 0 else "All" for i in plot_df["signature"]]
    )

    ax_min = plot_df[["gexp_corr", "prot_corr"]].min().min() * 1.1
    ax_max = plot_df[["gexp_corr", "prot_corr"]].max().max() * 1.1

    discrete_pal = pd.Series(
        sns.color_palette("tab10").as_hex()[: len(signatures)], index=signatures
    )
    discrete_pal["All"] = CrispyPlot.PAL_DTRACE[0]

    grid = GIPlot.gi_regression_marginal(
        "gexp_corr",
        "prot_corr",
        "signature",
        plot_df,
        plot_reg=False,
        plot_annot=False,
        scatter_kws=dict(edgecolor="w", lw=0.1, s=8),
        discrete_pal=discrete_pal,
    )

    grid.ax_joint.plot([ax_min, ax_max], [ax_min, ax_max], "k--", lw=0.3)
    grid.ax_joint.set_xlim(ax_min, ax_max)
    grid.ax_joint.set_ylim(ax_min, ax_max)

    labels = [
        grid.ax_joint.text(row["gexp_corr"], row["prot_corr"], i, color="k", fontsize=4)
        for i, row in plot_df.query("signature != 'All'")
        .sort_values("attenuation", ascending=False)
        .head(15)
        .iterrows()
               for g in plot_df.index])
plot_df = plot_df.assign(
    signature=[i[0] if len(i) > 0 else "All" for i in plot_df["signature"]])

ax_min = plot_df[["gexp_corr", "prot_corr"]].min().min() * 1.1
ax_max = plot_df[["gexp_corr", "prot_corr"]].max().max() * 1.1

discrete_pal = pd.Series(sns.color_palette("tab10").as_hex()[:len(signatures)],
                         index=signatures)
discrete_pal["All"] = CrispyPlot.PAL_DTRACE[0]

grid = GIPlot.gi_regression_marginal(
    "gexp_corr",
    "prot_corr",
    "signature",
    plot_df,
    plot_reg=False,
    plot_annot=False,
    scatter_kws=dict(edgecolor="w", lw=0.1, s=8),
    discrete_pal=discrete_pal,
)

grid.ax_joint.plot([ax_min, ax_max], [ax_min, ax_max], "k--", lw=0.3)
grid.ax_joint.set_xlim(ax_min, ax_max)
grid.ax_joint.set_ylim(ax_min, ax_max)

labels = [
    grid.ax_joint.text(row["gexp_corr"],
                       row["prot_corr"],
                       i,
                       color="k",
                       fontsize=4)
    dsets = ["crispr", "gexp", "prot"]

    order = natsorted(set(df_corr_ppi["n_ppi"]))
    pal = pd.Series(sns.color_palette("Blues_d", n_colors=len(order)).as_hex(),
                    index=order)

    _, axs = plt.subplots(1, len(dsets), figsize=(2 * len(dsets), 2), dpi=600)

    for i, dt in enumerate(dsets):
        ax = axs[i]

        ax = GIPlot.gi_classification(
            dt,
            "n_ppi",
            df_corr_ppi,
            orient="h",
            palette=pal.to_dict(),
            order=order,
            ax=ax,
        )

        if dt == "crispr":
            xlabel, title = "Gene essentiality\n(mean scaled FC)", "CRISPR-Cas9"
        elif dt == "gexp":
            xlabel, title = "Gene expression\n(mean voom)", "RNA-Seq"
        else:
            xlabel, title = "Protein abundance\n(mean intensities)", "SWATH-MS"

        ax.set_xlabel(xlabel)
        ax.set_ylabel("Number of protein interactions" if i == 0 else None)
        ax.set_title(title)
        cbar_pos=None,
        figsize=np.array(plot_df.shape) * 0.275,
    )

    plt.savefig(
        f"{RPATH}/ProteinTranscriptSample_cfeatures_clustermap.pdf",
        bbox_inches="tight",
        transparent=True,
    )
    plt.close("all")

    #
    for z_var in ["CopyNumberInstability", "ploidy"]:
        ax = GIPlot.gi_continuous_plot("prot_corr",
                                       "gexp_prot_corr",
                                       z_var,
                                       satt_corr,
                                       mid_point_norm=False)
        ax.set_xlabel("Sanger&CMRI\nProtein ~ Copy number (Pearson's R)")
        ax.set_ylabel("Sanger&CMRI\nProtein ~ Transcript (Pearson's R)")
        plt.savefig(
            f"{RPATH}/ProteinTranscriptSample_prot_gexp_regression_{z_var}.pdf",
            bbox_inches="tight",
        )
        plt.close("all")

    #
    x_var, y_var, z_var = "ploidy", "CopyNumberInstability", "size"
    ax = GIPlot.gi_continuous_plot(x_var,
                                   y_var,
                                   z_var,
Ejemplo n.º 8
0
plot_df = pd.concat(
    [
        mofa.factors[[f_x, f_y]],
        gexp.loc[["CDH1", "VIM"]].T.add_suffix("_transcriptomics"),
        prot.loc[["CDH1", "VIM"]].T.add_suffix("_proteomics"),
        ss["Tissue_type"],
    ],
    axis=1,
    sort=False,
)

# Tissue plot
ax = GIPlot.gi_tissue_plot(f_x,
                           f_y,
                           plot_df,
                           plot_reg=False,
                           pal=PALETTE_TTYPE)
ax.set_xlabel(f"Factor {f_x[1:]}")
ax.set_ylabel(f"Factor {f_y[1:]}")
plt.savefig(f"{RPATH}/MultiOmics_{f_x}_{f_y}_tissue_plot.pdf",
            bbox_inches="tight")
plt.savefig(f"{RPATH}/MultiOmics_{f_x}_{f_y}_tissue_plot.png",
            bbox_inches="tight",
            dpi=600)
plt.close("all")

# Continous annotation
for z in ["VIM_proteomics", "CDH1_proteomics"]:
    ax = GIPlot.gi_continuous_plot(f_x,
                                   f_y,
Ejemplo n.º 9
0
    plot_df = pd.concat(
        [
            drespo.loc[[c]].T.add_suffix("_y")
            if dtype == "drug" else crispr.loc[[c]].T.add_suffix("_y"),
            prot.loc[[p]].T.add_suffix("_prot"),
            gexp.loc[[p]].T.add_suffix("_gexp"),
            ss["Tissue_type"],
        ],
        axis=1,
        sort=False,
    ).dropna(subset=[f"{c}_y", f"{p}_prot"])

    # Protein
    ax = GIPlot.gi_tissue_plot(f"{p}_prot",
                               f"{c}_y",
                               plot_df,
                               pal=PALETTE_TTYPE)

    if dtype == "drug":
        ax.axhline(np.log(dmaxc[c]),
                   ls="--",
                   lw=0.3,
                   color=CrispyPlot.PAL_DTRACE[1])

    ax.set_xlabel(f"{p}\nProtein intensities")
    ax.set_ylabel(
        f"{c}\n{'Drug response IC50' if dtype == 'drug' else 'CRISPR-Cas9 (log2 FC)'}"
    )
    plt.savefig(
        f"{RPATH}/TopHits_{p}_{c}_{dtype}_regression_tissue_plot.pdf",
        bbox_inches="tight",
            crispr.apply(skew, axis=1).rename("skew"),
            crispr.median(1).rename("median"),
            (crispr < -0.5).sum(1).rename("nsamples"),
            crispr[crispr < -0.5].median(1).rename("dependency").abs(),
            ml_scores,
        ],
        axis=1,
    )

    crispr_selective_set = set(crispr_selective.query("skew < -3").index)

    # Scatter
    grid = GIPlot.gi_regression(
        "skew",
        "median",
        crispr_selective,
        size="dependency",
        plot_reg=False,
        plot_annot=False,
    )

    grid.ax_joint.axvline(-3, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--")
    g_highlight_df = crispr_selective.query("skew < -3").sort_values(
        "skew").head(15)
    labels = [
        grid.ax_joint.text(row["skew"],
                           row["median"],
                           i,
                           color="k",
                           fontsize=4) for i, row in g_highlight_df.iterrows()
    ]
    adjust_text(