Beispiel #1
0
    def pathway_enrichment(self,
                           factor,
                           views=None,
                           genesets=None,
                           nprocesses=4,
                           permutation_num=0):
        if genesets is None:
            genesets = [
                "c6.all.v7.1.symbols.gmt",
                "c5.all.v7.1.symbols.gmt",
                "h.all.v7.1.symbols.gmt",
                "c2.all.v7.1.symbols.gmt",
            ]

        if views is None:
            views = ["methylation", "transcriptomics", "proteomics"]

        df = pd.concat(
            [
                gseapy.ssgsea(
                    self.weights[v][factor],
                    processes=nprocesses,
                    permutation_num=permutation_num,
                    gene_sets=Enrichment.read_gmt(f"{DPATH}/pathways/{g}"),
                    no_plot=True,
                ).res2d.assign(geneset=g).assign(view=v).reset_index()
                for v in views for g in genesets
            ],
            ignore_index=True,
        )
        df = df.rename(columns={"sample1": "nes"}).sort_values("nes")
        return df
Beispiel #2
0
        ("prot_culture_reps", "PC1"),
        ("prot_culture_reps", "PC2"),
        ("prot_broad_culture", "PC1"),
        ("prot_culture_reps_emt", "PC1"),
        ("prot_culture_reps_emt", "PC2"),
        ("prot_broad_culture_emt", "PC1"),
        ("prot_broad_culture_emt", "PC2"),
        ("prot_broad_culture_emt", "PC4"),
    ]

    enr_pcs = pd.concat(
        [
            gseapy.ssgsea(
                dsets_dred[dtype]["loadings"].loc[dtype_pc],
                processes=4,
                gene_sets=Enrichment.read_gmt(f"{DPATH}/pathways/{g}"),
                no_plot=True,
            ).res2d.assign(geneset=g).assign(dtype=dtype).assign(
                dtype_pc=dtype_pc).reset_index() for dtype, dtype_pc in enr_pcs
            for g in genesets
        ],
        ignore_index=True,
    )
    enr_pcs = enr_pcs.rename(columns={"sample1": "nes"}).sort_values("nes")
    enr_pcs.to_csv(f"{RPATH}/DimRed_pcs_enr.csv.gz",
                   compression="gzip",
                   index=False)

    # Plot
    enr_pcs_plt = [
        ("prot", "PC1", "prot_broad", "PC1", 0.5),
    "Transcriptomics ~ Copy number\n(Pearson's R)",
    "Protein ~ Copy number\n(Pearson's R)",
)

plt.savefig(f"{RPATH}/ProteinAttenuation_attenuation_scatter.pdf",
            bbox_inches="tight")
plt.savefig(f"{RPATH}/ProteinAttenuation_attenuation_scatter.png",
            bbox_inches="tight")
plt.close("all")

# ### Pathway enrichement analysis of attenuated proteins
background = set(patt_corr.index)
sublist = set(patt_corr.query("cluster == 'High'").index)

enr_obj = Enrichment(gmts=["c5.all.v7.1.symbols.gmt"],
                     sig_min_len=15,
                     padj_method="fdr_bh")

enr = enr_obj.hypergeom_enrichments(sublist, background,
                                    "c5.all.v7.1.symbols.gmt")
enr = enr[enr["adj.p_value"] < 0.01].head(30).reset_index()
enr["name"] = [i[3:].lower().replace("_", " ") for i in enr["gset"]]

_, ax = plt.subplots(1, 1, figsize=(2.0, 5.0), dpi=600)

sns.barplot(
    -np.log10(enr["adj.p_value"]),
    enr["name"],
    orient="h",
    color=CrispyPlot.PAL_DTRACE[2],
    ax=ax,
    # Discretise attenuated samples
    gmm = GaussianMixture(n_components=2,
                          means_init=[[0],
                                      [0.4]]).fit(satt_corr[["attenuation"]])
    s_type, clusters = (
        pd.Series(gmm.predict(satt_corr[["attenuation"]]),
                  index=satt_corr.index),
        pd.Series(gmm.means_[:, 0], index=range(2)),
    )
    satt_corr["cluster"] = [
        "High" if s_type[p] == clusters.argmax() else "Low"
        for p in satt_corr.index
    ]

    # Pathway enrichment
    emt_sig = Enrichment.read_gmt(f"{DPATH}/pathways/emt.symbols.gmt",
                                  min_size=0)
    emt_enr = pd.Series({
        s:
        SSGSEA.gsea(gexp[s],
                    emt_sig["HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"])[0]
        for s in gexp
    })

    proteasome_sig = Enrichment.read_gmt(
        f"{DPATH}/pathways/proteasome.symbols.gmt", min_size=0)
    proteasome_enr = pd.Series({
        s: SSGSEA.gsea(prot[s].dropna(),
                       proteasome_sig["BIOCARTA_PROTEASOME_PATHWAY"])[0]
        for s in prot
    })
    proteasome_enr_broad = pd.Series({