Beispiel #1
0
def test_fnc():
    """Test function, get_sections_2d, in the Grouper class."""
    usrgo2nt = {getattr(nt, 'GO'):nt for nt in goea_results if getattr(nt, 'p_fdr_bh') < 0.05}
    usrgos = usrgo2nt.keys()
    grprdflt = _get_grprdflt()
    hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=SECTIONS, hdrgos=None)
    grprobj = Grouper("test", usrgos, hdrobj, grprdflt.gosubdag, go2nt=usrgo2nt)
    assert set(usrgos) == grprobj.usrgos
    sections_act = grprobj.get_sections_2d()
    chk_results(sections_act, grprobj)
Beispiel #2
0
 def get_sortobj(self, goea_results, **kws):
     """Return a Grouper object, given a list of GOEnrichmentRecord."""
     nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
     goids = set(nt.GO for nt in nts_goea)
     go2nt = {nt.GO:nt for nt in nts_goea}
     grprobj = Grouper("GOEA", goids, self.hdrobj, self.grprdflt.gosubdag, go2nt=go2nt)
     grprobj.prt_summary(sys.stdout)
     # hdrgo_prt", "section_prt", "top_n", "use_sections"
     sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld))
     return sortobj
Beispiel #3
0
 def get_sortobj(self, goea_results, **kws):
     """Return a Grouper object, given a list of GOEnrichmentRecord."""
     nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
     goids = set(nt.GO for nt in nts_goea)
     go2nt = {nt.GO:nt for nt in nts_goea}
     grprobj = Grouper("GOEA", goids, self.hdrobj, self.grprdflt.gosubdag, go2nt=go2nt)
     grprobj.prt_summary(sys.stdout)
     # hdrgo_prt", "section_prt", "top_n", "use_sections"
     sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld))
     return sortobj
def test_wr_sections_all():
    """Test that all sections files generated by wr_sections have the same content."""
    f_sec_rd = "data/gjoneska_pfenning/sections_in.txt"
    f_sec_wr  = "tmp_test_sections_out.txt"
    # Travis-CI path is cwd
    f_sec_py  = "tmp_test_sections.py"
    # f_sec_mod = "tmp_test_sections"
    # Read user GO IDs. Setup to write sections text file and Python file
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    sec_rd = _read_sections(f_sec_rd)
    # Do preliminaries
    godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1])
    assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0]
    grprobj = Grouper("test", usrgos, hdrobj, gosubdag)
    # Create text and Python sections files
    objsecwr = WrSectionsTxt(grprobj)
    objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr))
    objsecpy = WrSectionsPy(grprobj)
    objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version)
    # Read text and Python sections files
    sec_wr = _read_sections(f_sec_wr)
    sec_py = _read_sections(f_sec_py)
Beispiel #5
0
 def cli(self, prt=sys.stdout):
     """Command-line interface for go_draw script."""
     kws = self.objdoc.get_docargs(prt=None)
     godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship'])
     usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt)
     tcntobj = self._get_tcntobj(usrgos, godag, **kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None)
     grprdflt = GrouperDflts(self.gosubdag, kws['slims'])
     ver_list = [godag.version, grprdflt.ver_goslims]
     prt.write("{VER}\n".format(VER="\n".join(ver_list)))
     sections = self._read_sections(kws['ifile'])
     # print("SECSECSEC", sections)
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag)
     # Write sections
     objsecwr = WrSectionsTxt(grprobj, ver_list)
     if not os.path.exists(kws['ifile']):
         objsecwr.wr_txt_section_hdrgos(kws['ifile'])
     objsecwr.wr_txt_section_hdrgos(kws['ofile'])
     objsecpy = WrSectionsPy(grprobj, ver_list)
     if 'py' in kws:
         objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version)
     # Write user GO IDs in sections
     sortobj = Sorter(grprobj)
     objgowr = WrXlsxSortedGos("init", sortobj, ver_list)
     objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt)
     #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt)
     self._prt_cnt_usrgos(usrgos, sys.stdout)
def _get_grprobj():
    """Get object for grouping GO IDs."""
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS)
    return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
Beispiel #7
0
 def get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor):
     """Adds section text in all GO terms if not Misc. Adds Misc in terms of interest."""
     goids_main = set(o.id for o in grprobj_cur.gosubdag.go2obj.values())
     hdrobj = grprobj_cur.hdrobj
     grprobj_all = Grouper("all", grprobj_cur.usrgos.union(goids_main),
                           hdrobj, grprobj_cur.gosubdag)
     # Adds section text to all GO terms in plot (misses middle GO terms)
     _secdflt = hdrobj.secdflt
     _hilight = set(grp_go2color.keys()).union(grp_go2bordercolor)
     ret_go2txt = {}
     # Keep sections text only if GO header, GO user, or not Misc.
     if hdrobj.sections:
         for goid, txt in grprobj_all.get_go2sectiontxt().items():
             if txt == 'broad':
                 continue
             if txt != _secdflt or goid in _hilight:
                 ret_go2txt[goid] = txt
     return ret_go2txt
def _wr_sections_txt(fout_txt, usrgos, sections, grprdflt):
    """Given a list of usrgos and sections, write text file."""
    try:
        hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=sections)
        grprobj = Grouper(fout_txt, usrgos, hdrobj, grprdflt.gosubdag, go2nt=None)
        full_txt = os.path.join(REPO, fout_txt)
        WrSectionsTxt(grprobj).wr_txt_section_hdrgos(full_txt, sortby=None, prt_section=True)
        assert os.path.exists(full_txt)
    except RuntimeError as inst:
        sys.stdout.write("\n  **FATAL: {MSG}\n\n".format(MSG=str(inst)))
Beispiel #9
0
def _get_grprobj():
    """Get object for grouping GO IDs."""
    usrgos = _get_usrgos()
    sections = _get_sections()
    godag = get_godag("go-basic.obo",
                      prt=None,
                      loading_bar=False,
                      optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sections)
    return Grouper("wrusrgos", usrgos, hdrobj, gosubdag)
 def __init__(self, name, goea_results, obj):
     self.name = name
     self.datobj = obj  # AArtGeneProductSetsAll
     _ini = _Init(obj)
     self.go2nt = _ini.get_go2nt(goea_results)
     _grprobj = Grouper("grp", self.go2nt, obj.hdrobj, obj.grprdflt.gosubdag, go2nt=self.go2nt)
     self.sortobj = Sorter(_grprobj)
     self.sec2gos = _ini.get_sec2gos(self.sortobj)
     self.sec2chr = cx.OrderedDict([(s, obj.sec2chr[s]) for s in self.sec2gos.keys()])
     self.go2chrs = _ini.get_go2chrs(self.sec2gos, self.sec2chr)
     self.gene2gos = _ini.get_gene2gos(self.go2nt)
     self.gene2section2gos = _ini.get_gene2section2gos(self.gene2gos, self.sec2gos)
     self.gene2aart = _ini.get_gene2aart(self.gene2section2gos, self.sec2chr)
Beispiel #11
0
def test_alt_id():
    """Ensure that alternate GO IDs."""
    obo_dag = get_godag("go-basic.obo")
    # Create/Initialize GoSubDag
    goids = _get_data0()
    gosubdag = GoSubDag(obo_dag.keys(), obo_dag)
    grprdflt = GrouperDflts(gosubdag)
    # Create/Initialize Grouper
    hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None)
    Grouper("test_altid_gosubdag", goids, hdrobj, grprdflt.gosubdag, go2nt=None)
    alt_ids = _get_altids(obo_dag)
    obo_goids = obo_dag.keys()
    obo_goids_set = set(obo_goids)
    assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
Beispiel #12
0
 def __init__(self, gosubdag, godag_version, **kws):
     _kws = {k: v for k, v in kws.items() if k in self.kws_dict}
     self.grprdflt = GrouperDflts(gosubdag, _kws['slims'])
     self.ver_list = [godag_version, self.grprdflt.ver_goslims]
     self.sections = read_sections(self._get_secstr(**_kws),
                                   exclude_ungrouped=False)
     self.hdrobj = HdrgosSections(gosubdag, self.grprdflt.hdrgos_dflt,
                                  self.sections)
     _go2nt = _kws.get('go2nt')
     self.grprobj = Grouper("all",
                            gosubdag.go_sources,
                            self.hdrobj,
                            gosubdag,
                            go2nt=_go2nt)
Beispiel #13
0
def test_grouper_d2(do_plot=False):
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Get GOs to be grouped
    # Since no "Grouping GOs" were provided, depth-01 GOs are used for grouping.
    grprdflt = GrouperDflts()
    hdrobj = HdrgosSections(grprdflt.gosubdag,
                            grprdflt.hdrgos_dflt,
                            sections=None,
                            hdrgos=None)
    grprobj = Grouper("Transient Increase",
                      get_data0(),
                      hdrobj,
                      grprdflt.gosubdag,
                      go2nt=None)
    objwr = WrSections(grprobj)
    objwr.wr_txt_section_hdrgos("transient_increase_hdrgos.txt")
    objwr.wr_txt_grouping_gos()
    if do_plot:
        # Don't run in Travis-CI because it does not contain 'dot'
        from goatools.grouper.grprplt import GrouperPlot
        GrouperPlot(grprobj).plot_groups_unplaced()
    chk_hdrs(grprobj)
Beispiel #14
0
 def _get_grprobj(self, goids, sections):
     """Get Grouper, given GO IDs and sections."""
     grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     return Grouper("sections", goids, hdrobj, self.gosubdag)
Beispiel #15
0
def test_dflthdrs(prt=sys.stdout, do_plt=False):
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Initialize GoSubDag for grouping use once, since it takes a few seconds to initialize
    grprdflt = _get_grprdflt()

    # Get GOs to be grouped
    data = get_data0()

    # This may need to be updated if default hdrgos are changed
    exp_hdrs0 = set([
        "GO:0050789",  # BP 11,095 L01 D01 B     regulation of biological process
        "GO:0044848",  # BP     62 L01 D01 S     biological phase
        "GO:0050794",  # BP  8,031 L02 D02 AB    regulation of cellular process
        "GO:0019222",  # BP  3,227 L02 D02 AB    regulation of metabolic process
        "GO:0048583",  # BP  2,377 L02 D02 AB    regulation of response to stimulus
        "GO:0050793",  # BP  1,789 L02 D02 AB    regulation of developmental process
        "GO:0023051",  # BP  1,364 L02 D02 AB    regulation of signaling
        "GO:0002682",  # BP  1,183 L02 D02 AB    regulation of immune system process
        "GO:0007155",  # BP    165 L02 D02 P     cell adhesion
        "GO:0080134",  # BP    940 L03 D03 AB    regulation of response to stress
        "GO:0007165",  # BP    717 L03 D03 AB    signal transduction
        "GO:0050877",  # BP     96 L03 D03 K     neurological system process
        "GO:0007267"
    ])  # BP     99 L03 D04 CDR   cell-cell signaling

    # Since no "GO group headers" (None) were provided, depth-01 GOs are used for grouping.
    hdrobj0 = HdrgosSections(grprdflt.gosubdag,
                             grprdflt.hdrgos_dflt,
                             sections=None,
                             hdrgos=None)
    grprobj0 = Grouper("dflt", data, hdrobj0, grprdflt.gosubdag, go2nt=None)
    _, _, nts0_go, act_hdrs0 = run(grprobj0, hdrobj0, exp_hdrs0)

    # Grouping GOs are provided, these are added to the depth-01 defaults GOs are used for grouping.
    hdrgos = set([
        "GO:0099536",  # BP     40 L04 D05 CDR   regulation of response to stimulus
        "GO:0051239",  # BP  2,532 L02 D02 AB    regulation of multicellular organismal process
        "GO:0048519",  # BP  3,293 L02 D02 AB    negative regulation of biological process
        "GO:0048518"
    ])  # BP  3,353 L02 D02 AB    positive regulation of biological process

    exp_hdrs1 = exp_hdrs0.union(hdrgos)
    name = "usrhdrs4"
    hdrobj1 = HdrgosSections(grprdflt.gosubdag,
                             grprdflt.hdrgos_dflt,
                             sections=None,
                             hdrgos=hdrgos)
    grprobj1 = Grouper(name, data, hdrobj1, grprdflt.gosubdag, go2nt=None)
    sortobj1, _, nts1_go, act_hdrs1 = run(grprobj1, hdrobj1, exp_hdrs1)

    if do_plt:
        from goatools.grouper.grprplt import GrouperPlot
        prt.write("\nPLOT DAG\n")
        GrouperPlot(grprobj1).plot_grouped_gos()

    # GO:0099536 was a "middle" term (neither usrgo, not hdrgo) in run0, but is a hdrgo in run1

    # print "THIS"
    # grprdflt.gosubdag.prt_goids(nts1_go)
    # print "MINUS"
    # grprdflt.gosubdag.prt_goids(nts0_go)
    # print "EQUALS"
    # print nts1_go.difference(nts0_go)

    assert nts1_go.difference(nts0_go) == set(["GO:0099536"])
    assert act_hdrs1.difference(act_hdrs0) == set(hdrgos)

    hdrgo_prt = False
    sys.stdout.write("\n{NAME}: PRINT GOs hdrgo_prt({H}):\n".format(
        H=hdrgo_prt, NAME=name))
    sortobj1.prt_gos(hdrgo_prt=hdrgo_prt)
    nts2 = sortobj1.get_nts_flat(hdrgo_prt)
    nts2_go = set([nt.GO for nt in nts2])

    assert len(nts1_go) > len(nts2_go)
    assert nts1_go.intersection(data) == nts2_go
    assert nts2_go == data