p, c, dtype, ctissues = ("UBFD1", "TP63", "crispr", ["Lung"]) plot_df = pd.concat( [ drespo.loc[[c]].T.add_suffix("_y") if dtype == "drug" else crispr.loc[[c]].T.add_suffix("_y"), prot.loc[[p]].T.add_suffix("_prot"), gexp.loc[[p]].T.add_suffix("_gexp"), prot_obj.ss["tissue"], ], axis=1, sort=False, ).dropna(subset=[f"{c}_y", f"{p}_prot"]) # Protein ax = GIPlot.gi_tissue_plot(f"{p}_prot", f"{c}_y", plot_df) if dtype == "drug": ax.axhline(np.log(drespo_maxc[c]), ls="--", lw=.3, color=CrispyPlot.PAL_DTRACE[1]) ax.set_xlabel(f"{p}\nProtein intensities") ax.set_ylabel( f"{c}\n{'Drug response IC50' if dtype == 'drug' else 'CRISPR-Cas9 (log2 FC)'}" ) plt.savefig( f"{RPATH}/TopHits_{p}_{c}_{dtype}_regression_tissue_plot.pdf", bbox_inches="tight", )
(drespo.T < dmax[drespo.index]).sum().rename("nsamples"), drespo.T[drespo.T < dmax[drespo.index]].median().rename("dependency"), ml_scores, ], axis=1, ) drug_selective["name"] = [i.split(";")[1] for i in drug_selective.index] drug_selective_set = set(drug_selective.query("skew < -2").index) # Scatter grid = GIPlot.gi_regression( "skew", "median", drug_selective, size="dependency", size_inverse=True, size_legend_title="Median IC50", plot_reg=False, plot_annot=False, ) grid.ax_joint.axvline(-1, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--") g_highlight_df = drug_selective.query("skew < -1").sort_values("skew").head(5) labels = [ grid.ax_joint.text( row["skew"], row["median"], row["name"], color="k", fontsize=4 ) for _, row in g_highlight_df.iterrows() ] adjust_text( labels,
drug_pca_df.corr(), cmap="Spectral", annot=True, center=0, fmt=".2f", annot_kws=dict(size=4), lw=0.05, figsize=(3, 3), ) plt.savefig(f"{RPATH}/drug_pca_clustermap.pdf", bbox_inches="tight", dpi=600) plt.close("all") # y_var = "PC1" g = GIPlot.gi_regression("growth", y_var, drug_pca_df, lowess=True) g.set_axis_labels("Growth rate", f"{y_var} ({drug_vexp[y_var]*100:.1f}%)") plt.savefig(f"{RPATH}/drug_pca_regression_growth.pdf", bbox_inches="tight", dpi=600) plt.close("all") # Covariates # # CRISPR covs_crispr = LMModels.define_covariates( institute=crispr_obj.merged_institute, medium=True, cancertype=False, tissuetype=False, mburden=False, ploidy=True,
signature=[i[0] if len(i) > 0 else "All" for i in plot_df["signature"]] ) ax_min = plot_df[["gexp_corr", "prot_corr"]].min().min() * 1.1 ax_max = plot_df[["gexp_corr", "prot_corr"]].max().max() * 1.1 discrete_pal = pd.Series( sns.color_palette("tab10").as_hex()[: len(signatures)], index=signatures ) discrete_pal["All"] = CrispyPlot.PAL_DTRACE[0] grid = GIPlot.gi_regression_marginal( "gexp_corr", "prot_corr", "signature", plot_df, plot_reg=False, plot_annot=False, scatter_kws=dict(edgecolor="w", lw=0.1, s=8), discrete_pal=discrete_pal, ) grid.ax_joint.plot([ax_min, ax_max], [ax_min, ax_max], "k--", lw=0.3) grid.ax_joint.set_xlim(ax_min, ax_max) grid.ax_joint.set_ylim(ax_min, ax_max) labels = [ grid.ax_joint.text(row["gexp_corr"], row["prot_corr"], i, color="k", fontsize=4) for i, row in plot_df.query("signature != 'All'") .sort_values("attenuation", ascending=False) .head(15) .iterrows()
for g in plot_df.index]) plot_df = plot_df.assign( signature=[i[0] if len(i) > 0 else "All" for i in plot_df["signature"]]) ax_min = plot_df[["gexp_corr", "prot_corr"]].min().min() * 1.1 ax_max = plot_df[["gexp_corr", "prot_corr"]].max().max() * 1.1 discrete_pal = pd.Series(sns.color_palette("tab10").as_hex()[:len(signatures)], index=signatures) discrete_pal["All"] = CrispyPlot.PAL_DTRACE[0] grid = GIPlot.gi_regression_marginal( "gexp_corr", "prot_corr", "signature", plot_df, plot_reg=False, plot_annot=False, scatter_kws=dict(edgecolor="w", lw=0.1, s=8), discrete_pal=discrete_pal, ) grid.ax_joint.plot([ax_min, ax_max], [ax_min, ax_max], "k--", lw=0.3) grid.ax_joint.set_xlim(ax_min, ax_max) grid.ax_joint.set_ylim(ax_min, ax_max) labels = [ grid.ax_joint.text(row["gexp_corr"], row["prot_corr"], i, color="k", fontsize=4)
dsets = ["crispr", "gexp", "prot"] order = natsorted(set(df_corr_ppi["n_ppi"])) pal = pd.Series(sns.color_palette("Blues_d", n_colors=len(order)).as_hex(), index=order) _, axs = plt.subplots(1, len(dsets), figsize=(2 * len(dsets), 2), dpi=600) for i, dt in enumerate(dsets): ax = axs[i] ax = GIPlot.gi_classification( dt, "n_ppi", df_corr_ppi, orient="h", palette=pal.to_dict(), order=order, ax=ax, ) if dt == "crispr": xlabel, title = "Gene essentiality\n(mean scaled FC)", "CRISPR-Cas9" elif dt == "gexp": xlabel, title = "Gene expression\n(mean voom)", "RNA-Seq" else: xlabel, title = "Protein abundance\n(mean intensities)", "SWATH-MS" ax.set_xlabel(xlabel) ax.set_ylabel("Number of protein interactions" if i == 0 else None) ax.set_title(title)
cbar_pos=None, figsize=np.array(plot_df.shape) * 0.275, ) plt.savefig( f"{RPATH}/ProteinTranscriptSample_cfeatures_clustermap.pdf", bbox_inches="tight", transparent=True, ) plt.close("all") # for z_var in ["CopyNumberInstability", "ploidy"]: ax = GIPlot.gi_continuous_plot("prot_corr", "gexp_prot_corr", z_var, satt_corr, mid_point_norm=False) ax.set_xlabel("Sanger&CMRI\nProtein ~ Copy number (Pearson's R)") ax.set_ylabel("Sanger&CMRI\nProtein ~ Transcript (Pearson's R)") plt.savefig( f"{RPATH}/ProteinTranscriptSample_prot_gexp_regression_{z_var}.pdf", bbox_inches="tight", ) plt.close("all") # x_var, y_var, z_var = "ploidy", "CopyNumberInstability", "size" ax = GIPlot.gi_continuous_plot(x_var, y_var, z_var,
plot_df = pd.concat( [ mofa.factors[[f_x, f_y]], gexp.loc[["CDH1", "VIM"]].T.add_suffix("_transcriptomics"), prot.loc[["CDH1", "VIM"]].T.add_suffix("_proteomics"), ss["Tissue_type"], ], axis=1, sort=False, ) # Tissue plot ax = GIPlot.gi_tissue_plot(f_x, f_y, plot_df, plot_reg=False, pal=PALETTE_TTYPE) ax.set_xlabel(f"Factor {f_x[1:]}") ax.set_ylabel(f"Factor {f_y[1:]}") plt.savefig(f"{RPATH}/MultiOmics_{f_x}_{f_y}_tissue_plot.pdf", bbox_inches="tight") plt.savefig(f"{RPATH}/MultiOmics_{f_x}_{f_y}_tissue_plot.png", bbox_inches="tight", dpi=600) plt.close("all") # Continous annotation for z in ["VIM_proteomics", "CDH1_proteomics"]: ax = GIPlot.gi_continuous_plot(f_x, f_y,
plot_df = pd.concat( [ drespo.loc[[c]].T.add_suffix("_y") if dtype == "drug" else crispr.loc[[c]].T.add_suffix("_y"), prot.loc[[p]].T.add_suffix("_prot"), gexp.loc[[p]].T.add_suffix("_gexp"), ss["Tissue_type"], ], axis=1, sort=False, ).dropna(subset=[f"{c}_y", f"{p}_prot"]) # Protein ax = GIPlot.gi_tissue_plot(f"{p}_prot", f"{c}_y", plot_df, pal=PALETTE_TTYPE) if dtype == "drug": ax.axhline(np.log(dmaxc[c]), ls="--", lw=0.3, color=CrispyPlot.PAL_DTRACE[1]) ax.set_xlabel(f"{p}\nProtein intensities") ax.set_ylabel( f"{c}\n{'Drug response IC50' if dtype == 'drug' else 'CRISPR-Cas9 (log2 FC)'}" ) plt.savefig( f"{RPATH}/TopHits_{p}_{c}_{dtype}_regression_tissue_plot.pdf", bbox_inches="tight",
crispr.apply(skew, axis=1).rename("skew"), crispr.median(1).rename("median"), (crispr < -0.5).sum(1).rename("nsamples"), crispr[crispr < -0.5].median(1).rename("dependency").abs(), ml_scores, ], axis=1, ) crispr_selective_set = set(crispr_selective.query("skew < -3").index) # Scatter grid = GIPlot.gi_regression( "skew", "median", crispr_selective, size="dependency", plot_reg=False, plot_annot=False, ) grid.ax_joint.axvline(-3, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--") g_highlight_df = crispr_selective.query("skew < -3").sort_values( "skew").head(15) labels = [ grid.ax_joint.text(row["skew"], row["median"], i, color="k", fontsize=4) for i, row in g_highlight_df.iterrows() ] adjust_text(