(drespo.T < dmax[drespo.index]).sum().rename("nsamples"),
            drespo.T[drespo.T < dmax[drespo.index]].median().rename("dependency"),
            ml_scores,
        ],
        axis=1,
    )
    drug_selective["name"] = [i.split(";")[1] for i in drug_selective.index]

    drug_selective_set = set(drug_selective.query("skew < -2").index)

    # Scatter
    grid = GIPlot.gi_regression(
        "skew",
        "median",
        drug_selective,
        size="dependency",
        size_inverse=True,
        size_legend_title="Median IC50",
        plot_reg=False,
        plot_annot=False,
    )

    grid.ax_joint.axvline(-1, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--")
    g_highlight_df = drug_selective.query("skew < -1").sort_values("skew").head(5)
    labels = [
        grid.ax_joint.text(
            row["skew"], row["median"], row["name"], color="k", fontsize=4
        )
        for _, row in g_highlight_df.iterrows()
    ]
    adjust_text(
        labels,
Ejemplo n.º 2
0
        drug_pca_df.corr(),
        cmap="Spectral",
        annot=True,
        center=0,
        fmt=".2f",
        annot_kws=dict(size=4),
        lw=0.05,
        figsize=(3, 3),
    )

    plt.savefig(f"{RPATH}/drug_pca_clustermap.pdf", bbox_inches="tight", dpi=600)
    plt.close("all")

    #
    y_var = "PC1"
    g = GIPlot.gi_regression("growth", y_var, drug_pca_df, lowess=True)
    g.set_axis_labels("Growth rate", f"{y_var} ({drug_vexp[y_var]*100:.1f}%)")
    plt.savefig(f"{RPATH}/drug_pca_regression_growth.pdf", bbox_inches="tight", dpi=600)
    plt.close("all")

    # Covariates
    #

    # CRISPR
    covs_crispr = LMModels.define_covariates(
        institute=crispr_obj.merged_institute,
        medium=True,
        cancertype=False,
        tissuetype=False,
        mburden=False,
        ploidy=True,
            crispr.apply(skew, axis=1).rename("skew"),
            crispr.median(1).rename("median"),
            (crispr < -0.5).sum(1).rename("nsamples"),
            crispr[crispr < -0.5].median(1).rename("dependency").abs(),
            ml_scores,
        ],
        axis=1,
    )

    crispr_selective_set = set(crispr_selective.query("skew < -3").index)

    # Scatter
    grid = GIPlot.gi_regression(
        "skew",
        "median",
        crispr_selective,
        size="dependency",
        plot_reg=False,
        plot_annot=False,
    )

    grid.ax_joint.axvline(-3, c=GIPlot.PAL_DTRACE[1], lw=0.3, ls="--")
    g_highlight_df = crispr_selective.query("skew < -3").sort_values(
        "skew").head(15)
    labels = [
        grid.ax_joint.text(row["skew"],
                           row["median"],
                           i,
                           color="k",
                           fontsize=4) for i, row in g_highlight_df.iterrows()
    ]
    adjust_text(