def test_wr_sections_all(): """Test that all sections files generated by wr_sections have the same content.""" f_sec_rd = "data/gjoneska_pfenning/sections_in.txt" f_sec_wr = "tmp_test_sections_out.txt" # Travis-CI path is cwd f_sec_py = "tmp_test_sections.py" # f_sec_mod = "tmp_test_sections" # Read user GO IDs. Setup to write sections text file and Python file usrgos = [getattr(nt, 'GO') for nt in goea_results] sec_rd = _read_sections(f_sec_rd) # Do preliminaries godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) # Exclude ungrouped "Misc." section of sections var(sec_rd) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1]) assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0] grprobj = Grouper("test", usrgos, hdrobj, gosubdag) # Create text and Python sections files objsecwr = WrSectionsTxt(grprobj) objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr)) objsecpy = WrSectionsPy(grprobj) objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version) # Read text and Python sections files sec_wr = _read_sections(f_sec_wr) sec_py = _read_sections(f_sec_py)
def test_wr_sections_txt(): """Group depth-02 GO terms under their most specific depth-01 GO parent(s).""" # Get GOs to be grouped usrgos = [getattr(nt, 'GO') for nt in goea_results] # Read OBO files once to save time grprdflt = GrouperDflts() # ------------------------------------------------------------------ # Print usrgos in txt (Do not use sections containing hdrgos) # ------------------------------------------------------------------ # Show GO grouping hdrgos and usrgos to show how usrgos are grouped _wr_sections_txt("a_sec0_hdr1.txt", usrgos, sections=None, grprdflt=grprdflt) # ------------------------------------------------------------------ # Print usrgos in txt using sections containing hdrgos # ------------------------------------------------------------------ sec1 = _read_sections("./data/gjoneska_pfenning/sections_in.txt") _wr_sections_txt("a_sec1_hdr1.txt", usrgos, sec1, grprdflt=grprdflt) # ------------------------------------------------------------------ sec2a = _read_sections("goatools/test_data/sections/gjoneska_pfenning.py") _wr_sections_txt("b_sec1_hdr1.txt", usrgos, sec2a, grprdflt=grprdflt) sec2b = _read_sections("goatools.test_data.sections.gjoneska_pfenning") _wr_sections_txt("c_sec1_hdr1.txt", usrgos, sec2b, grprdflt=grprdflt) _chk_sections(sec2a, sec2b)
def cli(self, prt=sys.stdout): """Command-line interface for go_draw script.""" kws = self.objdoc.get_docargs(prt=None) godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship']) usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt) tcntobj = self._get_tcntobj(usrgos, godag, **kws) # Gets TermCounts or None self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None) grprdflt = GrouperDflts(self.gosubdag, kws['slims']) ver_list = [godag.version, grprdflt.ver_goslims] prt.write("{VER}\n".format(VER="\n".join(ver_list))) sections = self._read_sections(kws['ifile']) # print("SECSECSEC", sections) hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag) # Write sections objsecwr = WrSectionsTxt(grprobj, ver_list) if not os.path.exists(kws['ifile']): objsecwr.wr_txt_section_hdrgos(kws['ifile']) objsecwr.wr_txt_section_hdrgos(kws['ofile']) objsecpy = WrSectionsPy(grprobj, ver_list) if 'py' in kws: objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version) # Write user GO IDs in sections sortobj = Sorter(grprobj) objgowr = WrXlsxSortedGos("init", sortobj, ver_list) objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt) #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt) self._prt_cnt_usrgos(usrgos, sys.stdout)
def _get_grprobj(): """Get object for grouping GO IDs.""" fin_obo = os.path.join(REPO, "go-basic.obo") godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS) return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
def _get_grprobj(): """Get object for grouping GO IDs.""" usrgos = _get_usrgos() sections = _get_sections() godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sections) return Grouper("wrusrgos", usrgos, hdrobj, gosubdag)
def __init__(self, gene2gos, objcli, godag_version): # _goids = set(o.id for o in godag.values() if not o.children) _goids = set(r.GO for r in objcli.results_all) _tobj = TermCounts(objcli.godag, gene2gos) # pylint: disable=line-too-long self.gosubdag = GoSubDag(_goids, objcli.godag, relationships=True, tcntobj=_tobj, prt=sys.stdout) self.grprdflt = GrouperDflts(self.gosubdag, objcli.args.goslim) self.hdrobj = HdrgosSections(self.grprdflt.gosubdag, self.grprdflt.hdrgos_dflt, objcli.sections) self.pval_fld = objcli.get_pval_field() # primary pvalue of interest self.ver_list = [godag_version, self.grprdflt.ver_goslims, "Sections: {S}".format(S=objcli.args.sections)]
def test_alt_id(): """Ensure that alternate GO IDs.""" obo_dag = get_godag("go-basic.obo") # Create/Initialize GoSubDag goids = _get_data0() gosubdag = GoSubDag(obo_dag.keys(), obo_dag) grprdflt = GrouperDflts(gosubdag) # Create/Initialize Grouper hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None) Grouper("test_altid_gosubdag", goids, hdrobj, grprdflt.gosubdag, go2nt=None) alt_ids = _get_altids(obo_dag) obo_goids = obo_dag.keys() obo_goids_set = set(obo_goids) assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
def __init__(self, gosubdag, godag_version, **kws): _kws = {k: v for k, v in kws.items() if k in self.kws_dict} self.grprdflt = GrouperDflts(gosubdag, _kws['slims']) self.ver_list = [godag_version, self.grprdflt.ver_goslims] self.sections = read_sections(self._get_secstr(**_kws), exclude_ungrouped=False) self.hdrobj = HdrgosSections(gosubdag, self.grprdflt.hdrgos_dflt, self.sections) _go2nt = _kws.get('go2nt') self.grprobj = Grouper("all", gosubdag.go_sources, self.hdrobj, gosubdag, go2nt=_go2nt)
def test_fnc(): """Test function, get_sections_2d, in the Grouper class.""" usrgo2nt = { getattr(nt, 'GO'): nt for nt in goea_results if getattr(nt, 'p_fdr_bh') < 0.05 } usrgos = usrgo2nt.keys() grprdflt = GrouperDflts() hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=SECTIONS, hdrgos=None) grprobj = Grouper("test", usrgos, hdrobj, grprdflt.gosubdag, go2nt=usrgo2nt) assert set(usrgos) == grprobj.usrgos sections_act = grprobj.get_sections_2d() chk_results(sections_act, grprobj)
def test_wr_sections_txt(): """Group depth-02 GO terms under their most specific depth-01 GO parent(s).""" # Get GOs to be grouped usrgos = [getattr(nt, 'GO') for nt in goea_results] # Read OBO files once to save time grprdflt = GrouperDflts() # ------------------------------------------------------------------ # Print usrgos in txt (Do not use sections containing hdrgos) # ------------------------------------------------------------------ # Show GO grouping hdrgos and usrgos to show how usrgos are grouped _wr_sections_txt("a0_hdr1.txt", usrgos, sections_file=None, grprdflt=grprdflt) # ------------------------------------------------------------------ # Print usrgos in txt using sections containing hdrgos # ------------------------------------------------------------------ sec1 = _read_sections("./data/gjoneska/sections_in.txt") # Print usrgos in sections, showing how they were grouped under hdrgos _wr_sections_txt("a_ec0_hdr1.txt", usrgos, sec1, grprdflt=grprdflt)
def test_grouper_d2(do_plot=False): """Group depth-02 GO terms under their most specific depth-01 GO parent(s).""" # Get GOs to be grouped # Since no "Grouping GOs" were provided, depth-01 GOs are used for grouping. grprdflt = GrouperDflts() hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None) grprobj = Grouper("Transient Increase", get_data0(), hdrobj, grprdflt.gosubdag, go2nt=None) objwr = WrSections(grprobj) objwr.wr_txt_section_hdrgos("transient_increase_hdrgos.txt") objwr.wr_txt_grouping_gos() if do_plot: # Don't run in Travis-CI because it does not contain 'dot' from goatools.grouper.grprplt import GrouperPlot GrouperPlot(grprobj).plot_groups_unplaced() chk_hdrs(grprobj)
def _get_grprobj(self, goids, sections): """Get Grouper, given GO IDs and sections.""" grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo") hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) return Grouper("sections", goids, hdrobj, self.gosubdag)
def _get_grprdflt(): """Get Grouper defaults.""" gosubdag = _get_gosubdag() fin_slim = os.path.join(REPO, 'goslim_generic.obo') return GrouperDflts(gosubdag, fin_slim)
def test_dflthdrs(prt=sys.stdout, do_plt=False): """Group depth-02 GO terms under their most specific depth-01 GO parent(s).""" # Initialize GoSubDag for grouping use once, since it takes a few seconds to initialize grprdflt = GrouperDflts() # Get GOs to be grouped data = get_data0() # This may need to be updated if default hdrgos are changed exp_hdrs0 = set([ "GO:0050789", # BP 11,095 L01 D01 B regulation of biological process "GO:0044848", # BP 62 L01 D01 S biological phase "GO:0050794", # BP 8,031 L02 D02 AB regulation of cellular process "GO:0019222", # BP 3,227 L02 D02 AB regulation of metabolic process "GO:0048583", # BP 2,377 L02 D02 AB regulation of response to stimulus "GO:0050793", # BP 1,789 L02 D02 AB regulation of developmental process "GO:0023051", # BP 1,364 L02 D02 AB regulation of signaling "GO:0002682", # BP 1,183 L02 D02 AB regulation of immune system process "GO:0007155", # BP 165 L02 D02 P cell adhesion "GO:0080134", # BP 940 L03 D03 AB regulation of response to stress "GO:0007165", # BP 717 L03 D03 AB signal transduction "GO:0050877", # BP 96 L03 D03 K neurological system process "GO:0007267" ]) # BP 99 L03 D04 CDR cell-cell signaling # Since no "GO group headers" (None) were provided, depth-01 GOs are used for grouping. hdrobj0 = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None) grprobj0 = Grouper("dflt", data, hdrobj0, grprdflt.gosubdag, go2nt=None) _, _, nts0_go, act_hdrs0 = run(grprobj0, hdrobj0, exp_hdrs0) # Grouping GOs are provided, these are added to the depth-01 defaults GOs are used for grouping. hdrgos = set([ "GO:0099536", # BP 40 L04 D05 CDR regulation of response to stimulus "GO:0051239", # BP 2,532 L02 D02 AB regulation of multicellular organismal process "GO:0048519", # BP 3,293 L02 D02 AB negative regulation of biological process "GO:0048518" ]) # BP 3,353 L02 D02 AB positive regulation of biological process exp_hdrs1 = exp_hdrs0.union(hdrgos) name = "usrhdrs4" hdrobj1 = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=hdrgos) grprobj1 = Grouper(name, data, hdrobj1, grprdflt.gosubdag, go2nt=None) sortobj1, _, nts1_go, act_hdrs1 = run(grprobj1, hdrobj1, exp_hdrs1) if do_plt: from goatools.grouper.grprplt import GrouperPlot prt.write("\nPLOT DAG\n") GrouperPlot(grprobj1).plot_grouped_gos() # GO:0099536 was a "middle" term (neither usrgo, not hdrgo) in run0, but is a hdrgo in run1 # print "THIS" # grprdflt.gosubdag.prt_goids(nts1_go) # print "MINUS" # grprdflt.gosubdag.prt_goids(nts0_go) # print "EQUALS" # print nts1_go.difference(nts0_go) assert nts1_go.difference(nts0_go) == set(["GO:0099536"]) assert act_hdrs1.difference(act_hdrs0) == set(hdrgos) hdrgo_prt = False sys.stdout.write("\n{NAME}: PRINT GOs hdrgo_prt({H}):\n".format( H=hdrgo_prt, NAME=name)) sortobj1.prt_gos(hdrgo_prt=hdrgo_prt) nts2 = sortobj1.get_nts_flat(hdrgo_prt) nts2_go = set([nt.GO for nt in nts2]) assert len(nts1_go) > len(nts2_go) assert nts1_go.intersection(data) == nts2_go assert nts2_go == data