def test_wr_sections_all():
    """Test that all sections files generated by wr_sections have the same content."""
    f_sec_rd = "data/gjoneska_pfenning/sections_in.txt"
    f_sec_wr  = "tmp_test_sections_out.txt"
    # Travis-CI path is cwd
    f_sec_py  = "tmp_test_sections.py"
    # f_sec_mod = "tmp_test_sections"
    # Read user GO IDs. Setup to write sections text file and Python file
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    sec_rd = _read_sections(f_sec_rd)
    # Do preliminaries
    godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1])
    assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0]
    grprobj = Grouper("test", usrgos, hdrobj, gosubdag)
    # Create text and Python sections files
    objsecwr = WrSectionsTxt(grprobj)
    objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr))
    objsecpy = WrSectionsPy(grprobj)
    objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version)
    # Read text and Python sections files
    sec_wr = _read_sections(f_sec_wr)
    sec_py = _read_sections(f_sec_py)
def test_wr_sections_txt():
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Get GOs to be grouped
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    # Read OBO files once to save time
    grprdflt = GrouperDflts()

    # ------------------------------------------------------------------
    # Print usrgos in txt (Do not use sections containing hdrgos)
    # ------------------------------------------------------------------
    # Show GO grouping hdrgos and usrgos to show how usrgos are grouped
    _wr_sections_txt("a_sec0_hdr1.txt", usrgos, sections=None, grprdflt=grprdflt)

    # ------------------------------------------------------------------
    # Print usrgos in txt using sections containing hdrgos
    # ------------------------------------------------------------------
    sec1 = _read_sections("./data/gjoneska_pfenning/sections_in.txt")
    _wr_sections_txt("a_sec1_hdr1.txt", usrgos, sec1, grprdflt=grprdflt)

    # ------------------------------------------------------------------
    sec2a = _read_sections("goatools/test_data/sections/gjoneska_pfenning.py")
    _wr_sections_txt("b_sec1_hdr1.txt", usrgos, sec2a, grprdflt=grprdflt)

    sec2b = _read_sections("goatools.test_data.sections.gjoneska_pfenning")
    _wr_sections_txt("c_sec1_hdr1.txt", usrgos, sec2b, grprdflt=grprdflt)
    _chk_sections(sec2a, sec2b)
Esempio n. 3
0
 def cli(self, prt=sys.stdout):
     """Command-line interface for go_draw script."""
     kws = self.objdoc.get_docargs(prt=None)
     godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship'])
     usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt)
     tcntobj = self._get_tcntobj(usrgos, godag, **kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None)
     grprdflt = GrouperDflts(self.gosubdag, kws['slims'])
     ver_list = [godag.version, grprdflt.ver_goslims]
     prt.write("{VER}\n".format(VER="\n".join(ver_list)))
     sections = self._read_sections(kws['ifile'])
     # print("SECSECSEC", sections)
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag)
     # Write sections
     objsecwr = WrSectionsTxt(grprobj, ver_list)
     if not os.path.exists(kws['ifile']):
         objsecwr.wr_txt_section_hdrgos(kws['ifile'])
     objsecwr.wr_txt_section_hdrgos(kws['ofile'])
     objsecpy = WrSectionsPy(grprobj, ver_list)
     if 'py' in kws:
         objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version)
     # Write user GO IDs in sections
     sortobj = Sorter(grprobj)
     objgowr = WrXlsxSortedGos("init", sortobj, ver_list)
     objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt)
     #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt)
     self._prt_cnt_usrgos(usrgos, sys.stdout)
Esempio n. 4
0
def _get_grprobj():
    """Get object for grouping GO IDs."""
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS)
    return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
Esempio n. 5
0
def _get_grprobj():
    """Get object for grouping GO IDs."""
    usrgos = _get_usrgos()
    sections = _get_sections()
    godag = get_godag("go-basic.obo",
                      prt=None,
                      loading_bar=False,
                      optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sections)
    return Grouper("wrusrgos", usrgos, hdrobj, gosubdag)
Esempio n. 6
0
 def __init__(self, gene2gos, objcli, godag_version):
     # _goids = set(o.id for o in godag.values() if not o.children)
     _goids = set(r.GO for r in objcli.results_all)
     _tobj = TermCounts(objcli.godag, gene2gos)
     # pylint: disable=line-too-long
     self.gosubdag = GoSubDag(_goids, objcli.godag, relationships=True, tcntobj=_tobj, prt=sys.stdout)
     self.grprdflt = GrouperDflts(self.gosubdag, objcli.args.goslim)
     self.hdrobj = HdrgosSections(self.grprdflt.gosubdag, self.grprdflt.hdrgos_dflt, objcli.sections)
     self.pval_fld = objcli.get_pval_field()  # primary pvalue of interest
     self.ver_list = [godag_version,
                      self.grprdflt.ver_goslims,
                      "Sections: {S}".format(S=objcli.args.sections)]
Esempio n. 7
0
def test_alt_id():
    """Ensure that alternate GO IDs."""
    obo_dag = get_godag("go-basic.obo")
    # Create/Initialize GoSubDag
    goids = _get_data0()
    gosubdag = GoSubDag(obo_dag.keys(), obo_dag)
    grprdflt = GrouperDflts(gosubdag)
    # Create/Initialize Grouper
    hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None)
    Grouper("test_altid_gosubdag", goids, hdrobj, grprdflt.gosubdag, go2nt=None)
    alt_ids = _get_altids(obo_dag)
    obo_goids = obo_dag.keys()
    obo_goids_set = set(obo_goids)
    assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
Esempio n. 8
0
 def __init__(self, gosubdag, godag_version, **kws):
     _kws = {k: v for k, v in kws.items() if k in self.kws_dict}
     self.grprdflt = GrouperDflts(gosubdag, _kws['slims'])
     self.ver_list = [godag_version, self.grprdflt.ver_goslims]
     self.sections = read_sections(self._get_secstr(**_kws),
                                   exclude_ungrouped=False)
     self.hdrobj = HdrgosSections(gosubdag, self.grprdflt.hdrgos_dflt,
                                  self.sections)
     _go2nt = _kws.get('go2nt')
     self.grprobj = Grouper("all",
                            gosubdag.go_sources,
                            self.hdrobj,
                            gosubdag,
                            go2nt=_go2nt)
Esempio n. 9
0
def test_fnc():
    """Test function, get_sections_2d, in the Grouper class."""
    usrgo2nt = {
        getattr(nt, 'GO'): nt
        for nt in goea_results if getattr(nt, 'p_fdr_bh') < 0.05
    }
    usrgos = usrgo2nt.keys()
    grprdflt = GrouperDflts()
    hdrobj = HdrgosSections(grprdflt.gosubdag,
                            grprdflt.hdrgos_dflt,
                            sections=SECTIONS,
                            hdrgos=None)
    grprobj = Grouper("test",
                      usrgos,
                      hdrobj,
                      grprdflt.gosubdag,
                      go2nt=usrgo2nt)
    assert set(usrgos) == grprobj.usrgos
    sections_act = grprobj.get_sections_2d()
    chk_results(sections_act, grprobj)
Esempio n. 10
0
def test_wr_sections_txt():
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Get GOs to be grouped
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    # Read OBO files once to save time
    grprdflt = GrouperDflts()

    # ------------------------------------------------------------------
    # Print usrgos in txt (Do not use sections containing hdrgos)
    # ------------------------------------------------------------------
    # Show GO grouping hdrgos and usrgos to show how usrgos are grouped
    _wr_sections_txt("a0_hdr1.txt",
                     usrgos,
                     sections_file=None,
                     grprdflt=grprdflt)

    # ------------------------------------------------------------------
    # Print usrgos in txt using sections containing hdrgos
    # ------------------------------------------------------------------
    sec1 = _read_sections("./data/gjoneska/sections_in.txt")
    # Print usrgos in sections, showing how they were grouped under hdrgos
    _wr_sections_txt("a_ec0_hdr1.txt", usrgos, sec1, grprdflt=grprdflt)
Esempio n. 11
0
def test_grouper_d2(do_plot=False):
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Get GOs to be grouped
    # Since no "Grouping GOs" were provided, depth-01 GOs are used for grouping.
    grprdflt = GrouperDflts()
    hdrobj = HdrgosSections(grprdflt.gosubdag,
                            grprdflt.hdrgos_dflt,
                            sections=None,
                            hdrgos=None)
    grprobj = Grouper("Transient Increase",
                      get_data0(),
                      hdrobj,
                      grprdflt.gosubdag,
                      go2nt=None)
    objwr = WrSections(grprobj)
    objwr.wr_txt_section_hdrgos("transient_increase_hdrgos.txt")
    objwr.wr_txt_grouping_gos()
    if do_plot:
        # Don't run in Travis-CI because it does not contain 'dot'
        from goatools.grouper.grprplt import GrouperPlot
        GrouperPlot(grprobj).plot_groups_unplaced()
    chk_hdrs(grprobj)
Esempio n. 12
0
 def _get_grprobj(self, goids, sections):
     """Get Grouper, given GO IDs and sections."""
     grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     return Grouper("sections", goids, hdrobj, self.gosubdag)
Esempio n. 13
0
def _get_grprdflt():
    """Get Grouper defaults."""
    gosubdag = _get_gosubdag()
    fin_slim = os.path.join(REPO, 'goslim_generic.obo')
    return GrouperDflts(gosubdag, fin_slim)
Esempio n. 14
0
def test_dflthdrs(prt=sys.stdout, do_plt=False):
    """Group depth-02 GO terms under their most specific depth-01 GO parent(s)."""
    # Initialize GoSubDag for grouping use once, since it takes a few seconds to initialize
    grprdflt = GrouperDflts()

    # Get GOs to be grouped
    data = get_data0()

    # This may need to be updated if default hdrgos are changed
    exp_hdrs0 = set([
        "GO:0050789",  # BP 11,095 L01 D01 B     regulation of biological process
        "GO:0044848",  # BP     62 L01 D01 S     biological phase
        "GO:0050794",  # BP  8,031 L02 D02 AB    regulation of cellular process
        "GO:0019222",  # BP  3,227 L02 D02 AB    regulation of metabolic process
        "GO:0048583",  # BP  2,377 L02 D02 AB    regulation of response to stimulus
        "GO:0050793",  # BP  1,789 L02 D02 AB    regulation of developmental process
        "GO:0023051",  # BP  1,364 L02 D02 AB    regulation of signaling
        "GO:0002682",  # BP  1,183 L02 D02 AB    regulation of immune system process
        "GO:0007155",  # BP    165 L02 D02 P     cell adhesion
        "GO:0080134",  # BP    940 L03 D03 AB    regulation of response to stress
        "GO:0007165",  # BP    717 L03 D03 AB    signal transduction
        "GO:0050877",  # BP     96 L03 D03 K     neurological system process
        "GO:0007267"
    ])  # BP     99 L03 D04 CDR   cell-cell signaling

    # Since no "GO group headers" (None) were provided, depth-01 GOs are used for grouping.
    hdrobj0 = HdrgosSections(grprdflt.gosubdag,
                             grprdflt.hdrgos_dflt,
                             sections=None,
                             hdrgos=None)
    grprobj0 = Grouper("dflt", data, hdrobj0, grprdflt.gosubdag, go2nt=None)
    _, _, nts0_go, act_hdrs0 = run(grprobj0, hdrobj0, exp_hdrs0)

    # Grouping GOs are provided, these are added to the depth-01 defaults GOs are used for grouping.
    hdrgos = set([
        "GO:0099536",  # BP     40 L04 D05 CDR   regulation of response to stimulus
        "GO:0051239",  # BP  2,532 L02 D02 AB    regulation of multicellular organismal process
        "GO:0048519",  # BP  3,293 L02 D02 AB    negative regulation of biological process
        "GO:0048518"
    ])  # BP  3,353 L02 D02 AB    positive regulation of biological process

    exp_hdrs1 = exp_hdrs0.union(hdrgos)
    name = "usrhdrs4"
    hdrobj1 = HdrgosSections(grprdflt.gosubdag,
                             grprdflt.hdrgos_dflt,
                             sections=None,
                             hdrgos=hdrgos)
    grprobj1 = Grouper(name, data, hdrobj1, grprdflt.gosubdag, go2nt=None)
    sortobj1, _, nts1_go, act_hdrs1 = run(grprobj1, hdrobj1, exp_hdrs1)

    if do_plt:
        from goatools.grouper.grprplt import GrouperPlot
        prt.write("\nPLOT DAG\n")
        GrouperPlot(grprobj1).plot_grouped_gos()

    # GO:0099536 was a "middle" term (neither usrgo, not hdrgo) in run0, but is a hdrgo in run1

    # print "THIS"
    # grprdflt.gosubdag.prt_goids(nts1_go)
    # print "MINUS"
    # grprdflt.gosubdag.prt_goids(nts0_go)
    # print "EQUALS"
    # print nts1_go.difference(nts0_go)

    assert nts1_go.difference(nts0_go) == set(["GO:0099536"])
    assert act_hdrs1.difference(act_hdrs0) == set(hdrgos)

    hdrgo_prt = False
    sys.stdout.write("\n{NAME}: PRINT GOs hdrgo_prt({H}):\n".format(
        H=hdrgo_prt, NAME=name))
    sortobj1.prt_gos(hdrgo_prt=hdrgo_prt)
    nts2 = sortobj1.get_nts_flat(hdrgo_prt)
    nts2_go = set([nt.GO for nt in nts2])

    assert len(nts1_go) > len(nts2_go)
    assert nts1_go.intersection(data) == nts2_go
    assert nts2_go == data