예제 #1
0
def test_gosubdag_relationships(wr_new_obo_subset=False):
    """Plot both the standard 'is_a' field and the 'part_of' relationship."""

    # Leaf GO: viral triggering of virus induced gene silencing
    goid_chosen = 'GO:0060150'

    # Load GODag with all relationships
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag_r0 = get_godag(fin_obo, loading_bar=None)
    godag_r1 = get_godag(fin_obo,
                         loading_bar=None,
                         optional_attrs=['relationship'])

    file_sub = os.path.join(REPO, "tests/data/viral_gene_silence.obo")

    # Get all GO terms above this low-level GO ID using all relationships
    if wr_new_obo_subset:
        _wr_sub_obo(file_sub, goid_chosen, godag_r1, fin_obo)

    gosubdag_r0 = GoSubDag(set([goid_chosen]), godag_r0)
    gosubdag_r1 = GoSubDag(set([goid_chosen]), godag_r1, relationships=True)

    _run_baseline_r0(gosubdag_r0, gosubdag_r1)

    # BASELINE r1: Test that GOTerm.get_all_upper() is the same as GoSubDag ancestors
    for goid, term in gosubdag_r1.go2obj.items():
        ancestors_r1 = gosubdag_r1.rcntobj.go2parents[goid]
        assert ancestors_r1 == term.get_all_upper()
예제 #2
0
 def run(self, go_sources, exp_gos, **kws):
     """Create GoSubDag using specified GO sources."""
     print("\nSRCS: {GOs}".format(GOs=go_sources))
     gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws)
     gosubdag.prt_goids(gosubdag.go2nt)
     assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format(
         sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
예제 #3
0
class Run(object):
    """Printing GO IDs and Plotting; GODag from obo using GoSubDag."""

    def __init__(self, obo):
        self.go2obj_all = get_godag(os.path.join(REPO, obo))
        self.gosubdag_all = GoSubDag(None, self.go2obj_all)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.go2obj_all)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(REPO, fout_img))

    def run(self, go_sources, exp_gos, **kws):
        """Create GoSubDag using specified GO sources."""
        print("\nSRCS: {GOs}".format(GOs=go_sources))
        gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws)
        gosubdag.prt_goids(gosubdag.go2nt)
        assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format(
            sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
예제 #4
0
 def __init__(self, go2obj, annots, relationships=None, **kws):
     '''
         Initialise the counts and
     '''
     _prt = kws.get('prt')
     # Backup
     self.go2obj = go2obj  # Full GODag
     self.annots, go_alts = clean_anno(annots, go2obj, _prt)[:2]
     # Genes annotated to all associated GO, including inherited up ancestors'
     _relationship_set = RelationshipCombos(go2obj).get_set(relationships)
     self.go2genes = self._init_go2genes(_relationship_set, go2obj)
     self.gene2gos = get_b2aset(self.go2genes)
     # Annotation main GO IDs (prefer main id to alt_id)
     self.goids = set(self.go2genes.keys())
     self.gocnts = Counter({go:len(geneset) for go, geneset in self.go2genes.items()})
     # Get total count for each branch: BP MF CC
     self.aspect_counts = {
         'biological_process': self.gocnts.get(NAMESPACE2GO['biological_process'], 0),
         'molecular_function': self.gocnts.get(NAMESPACE2GO['molecular_function'], 0),
         'cellular_component': self.gocnts.get(NAMESPACE2GO['cellular_component'], 0)}
     self._init_add_goid_alt(go_alts)
     self.gosubdag = GoSubDag(
         set(self.gocnts.keys()),
         go2obj,
         tcntobj=self,
         relationships=_relationship_set,
         prt=None)
     if _prt:
         self.prt_objdesc(_prt)
예제 #5
0
class Run(object):
    """Objects for running plotting test."""

    def __init__(self, obo, gaf, prt):
        self.prt = prt
        self.cwd = os.getcwd()
        # Gene Ontologies
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        # Annotations
        #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
        _file_gaf = dnld_gaf(gaf)
        print("GAF: {GAF}\n".format(GAF=_file_gaf))
        self.gene2gos = read_gaf(_file_gaf)
        self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
        # GoSubDag
        self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt,
                            # rcntobj=False,
                            rcntobj=self.gosubdag_all.rcntobj,
                            go2nt=self.gosubdag_all.go2nt)
        prtfmt = gosubdag.prt_attr['fmta']
        goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
        self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
        gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
예제 #6
0
 def plt_goids(self, fout_img, go_sources):
     """Plot GO IDs."""
     # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
     gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt,
                         # rcntobj=False,
                         rcntobj=self.gosubdag_all.rcntobj,
                         go2nt=self.gosubdag_all.go2nt)
     prtfmt = gosubdag.prt_attr['fmta']
     goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
     self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
     gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
     objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
     objplt.plt_dag(os.path.join(self.cwd, fout_img))
예제 #7
0
 def get_go2desc(goids, go2obj, go2genes):
     """Print GO terms and the number of genes associated with the GO ID."""
     go_desc = []
     gosubdag = GoSubDag(goids, go2obj)
     go2nt = gosubdag.get_go2nt(goids)
     pat = "{G:6,} genes {DESC}"
     pat_go = gosubdag.prt_attr['fmt']
     for goid, ntgo in sorted(
             go2nt.items(),
             key=lambda t: [t[1].NS, t[1].depth, -1 * t[1].dcnt]):
         desc = pat_go.format(**ntgo._asdict())
         go_desc.append((goid, pat.format(G=len(go2genes[goid]),
                                          DESC=desc)))
     return cx.OrderedDict(go_desc)
def test_gosubdag_relationships(prt=sys.stdout):
    """Plot both the standard 'is_a' field and the 'part_of' relationship."""
    goids = set([
        "GO:0032501",
        "GO:0044707",  # alt_id: GO:0032501  # BP  1011 L01 D01 B multicellular organismal process
        "GO:0050874",
        "GO:0007608",  # sensory perception of smell
        "GO:0050911"
    ])  # detection of chemical stimulus involved in sensory perception of smell

    # Load GO-DAG: Load optional 'relationship'
    fin_obo = os.path.join(REPO, "go-basic.obo")
    download_go_basic_obo(fin_obo, prt, loading_bar=None)
    go2obj_plain = GODag(fin_obo)
    go2obj_relat = GODag(fin_obo, optional_attrs=['relationship'])

    print("\nCreate GoSubDag with GO DAG containing no relationships.")
    tic = timeit.default_timer()
    # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship
    gosubdag = GoSubDag(goids, go2obj_plain, relationships=False, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_plain = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))

    print("\nCreate GoSubDag while IGNORING relationships")
    # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship
    gosubdag = GoSubDag(goids, go2obj_relat, relationships=False, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_false = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_plain == goids_false

    print("\nCreate GoSubDag while loading only the 'part_of' relationship")
    gosubdag = GoSubDag(goids,
                        go2obj_relat,
                        relationships=['part_of'],
                        prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_part_of = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_plain.intersection(goids_part_of) == goids_plain
    assert len(goids_part_of) > len(goids_plain)

    print("\nCreate GoSubDag while loading all relationships")
    gosubdag = GoSubDag(goids, go2obj_relat, relationships=True, prt=prt)
    # gosubdag.prt_goids(gosubdag.go2obj)
    goids_true = set(gosubdag.go2obj)
    tic = _rpt_hms(tic, len(gosubdag.go2obj))
    assert goids_part_of.intersection(goids_true) == goids_part_of
    assert len(goids_true) >= len(goids_part_of)
예제 #9
0
 def plt_goids(self, fout_img, go_sources):
     """Plot GO IDs."""
     # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
     gosubdag = GoSubDag(
         go_sources,
         self.gosubdag_all.go2obj,
         prt=self.prt,
         # rcntobj=False,
         rcntobj=self.gosubdag_all.rcntobj,
         go2nt=self.gosubdag_all.go2nt)
     prtfmt = gosubdag.prt_attr['fmta']
     goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
     self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
     gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
     objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
     objplt.plt_dag(os.path.join(self.cwd, fout_img))
예제 #10
0
def build_hierarcy():
    print "fetching ppi"
    go_edges = fetch_string_ppi_edges()

    go2geneids, geneids2go = fetch_go_hierarcy()

    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    out = file(os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"), "w+")  # sys.stdout
    dict_result = {}
    for cur_term in ['GO:0005575']:
        vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}


    driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Hh123456"))

    def add_edge(tx, src, dst, score):
        tx.run(("MERGE (n1: GO{{term:\"{TERM1}\"}})"+ \
                "MERGE (n2: GO{{term:\"{TERM2}\"}})"+ \
                "MERGE (n1)-[r:SCR {{ score: {SCORE} }}]->(n2)").format(TERM1=src, TERM2=dst, SCORE=score))

    def add_node(tx, nd):
        tx.run(("CREATE (n1: GO{{term:\"{TERM1}\"}})".format(TERM1=nd)))

    def add_friends(tx, name, friend_name):
        tx.run("MERGE (a:Person {name: $name}) "
               "MERGE (a)-[:KNOWS]->(friend:Person {name: $friend_name})",
               name=name, friend_name=friend_name)

    def print_friends(tx, name):
        for record in tx.run("MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
                             "RETURN friend.name ORDER BY friend.name", name=name):
            print(record["friend.name"])

    # with driver.session() as session:
    #     count=0
    #     for k, v in dict_result['GO:0005575']['vertices'].iteritems():
    #         if dict_result['GO:0005575']['vertices'].has_key(k) \
    #                         and dict_result['GO:0005575']['vertices'][k]['isleaf']:
    #                     session.write_transaction(add_node,k)
    #                     count+=1
    #     print "total vartices: {}".foramt(count)

    with driver.session() as session:
        count=0
        for cur_edges, score in go_edges.iteritems():

            vertices = cur_edges.split("=")
            if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575'][
                'vertices'].has_key(vertices[1]) and score > 100000 \
                    and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and \
                    dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']:
                count+=1
                session.write_transaction(add_edge, vertices[0], vertices[1], score)
        print "total edges: {}".format(count)
예제 #11
0
 def wr_xlsx(self, fout_xlsx, goids, sortby=None, **kws_usr):
     """Write goids into a table."""
     nts = GoSubDag(goids, self.go2obj).get_nts(goids, sortby)
     kws_wr = kws_usr.copy()
     if 'fld2col_widths' not in kws_wr:
         kws_wr['fld2col_widths'] = self.fld2col_widths
     wr_xlsx_tbl(fout_xlsx, nts, **kws_wr)
예제 #12
0
def build_hierarcy(go_folder,
                   roots=['GO:0008150'],
                   ev_exclude=set()):  #  0008150 0005575 0003674

    go2geneids, geneids2go = fetch_go_hierarcy(go_folder, ev_exclude)
    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    dict_result = {}
    for cur_term in roots:
        vertices, edges = extract_hier_all(gosubdag, cur_term, go2geneids)

        # all_go_ids=set(vertices.keys())
        # for cur_id in all_go_ids:
        #     if not cur_id in go2geneids:
        #         go2geneids[cur_id]=set()

        msg = "Elapsed HMS: {}\n\n".format(
            str(datetime.timedelta(seconds=(toc - tic))))
        sys.stdout.write(msg)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}
    return dict_result, go2geneids, geneids2go, get_entrez2ensembl_dictionary()
예제 #13
0
def test_go_parents():
    """Run GO parent tests"""
    gosubdag_all = GoSubDag(None,
                            get_godag("go-basic.obo", prt=None),
                            rcntobj=True)
    run_1(gosubdag_all)
    run_2(gosubdag_all)
예제 #14
0
def test_wr_sections_all():
    """Test that all sections files generated by wr_sections have the same content."""
    f_sec_rd = "data/gjoneska_pfenning/sections_in.txt"
    f_sec_wr  = "tmp_test_sections_out.txt"
    # Travis-CI path is cwd
    f_sec_py  = "tmp_test_sections.py"
    # f_sec_mod = "tmp_test_sections"
    # Read user GO IDs. Setup to write sections text file and Python file
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    sec_rd = _read_sections(f_sec_rd)
    # Do preliminaries
    godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1])
    assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0]
    grprobj = Grouper("test", usrgos, hdrobj, gosubdag)
    # Create text and Python sections files
    objsecwr = WrSectionsTxt(grprobj)
    objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr))
    objsecpy = WrSectionsPy(grprobj)
    objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version)
    # Read text and Python sections files
    sec_wr = _read_sections(f_sec_wr)
    sec_py = _read_sections(f_sec_py)
예제 #15
0
def test_write_hier_bp_mf_cc():
    """Test that write hierarchy writes all: BP, MF, CC"""
    fin_anno = os.path.join(REPO, 'gene2go')
    fin_dag = os.path.join(REPO, "go-basic.obo")
    _dnld_anno(fin_anno)
    #godag = get_godag(os.path.join(REPO, 'go-basic.obo'), loading_bar=None)

    print('\nTEST STORING ONLY ONE SPECIES')
    #### obj = Gene2GoReader(fin_anno)
    godag = get_godag(fin_dag)
    gene2gos = read_annotations(namespace='ALL')
    tcntobj = TermCounts(godag, gene2gos) if gene2gos else None
    gosubdag = GoSubDag(godag.keys(),
                        godag,
                        relationships=False,
                        tcntobj=tcntobj,
                        children=True,
                        prt=sys.stdout)
    objwr = WrHierGO(gosubdag)

    # 2020 11:
    #     594,748 GO lines under GO:0008150
    #      23,199 GO lines under GO:0003674
    #       6,259 GO lines under GO:0005575
    #     624,206 items WROTE: tmp_test_wr_hier_BP_MF_CC.txt
    assert len(_wr_hier(['BP', 'MF', 'CC'], gosubdag.go2nt, objwr)) > 600000
    assert len(_wr_hier([
        'BP',
    ], gosubdag.go2nt, objwr)) > 500000
    assert len(_wr_hier([
        'MF',
    ], gosubdag.go2nt, objwr)) > 20000
    assert len(_wr_hier([
        'CC',
    ], gosubdag.go2nt, objwr)) > 5000
예제 #16
0
 def _get_tcntobj(goids, go2obj, **kws):
     """Get a TermCounts object if the user provides an annotation file, otherwise None."""
     # kws: gaf (gene2go taxid)
     if 'gaf' in kws or 'gene2go' in kws:
         # Get a reduced go2obj set for TermCounts
         _gosubdag = GoSubDag(goids, go2obj, rcntobj=False, prt=None)
         return get_tcntobj(_gosubdag.go2obj, **kws)  # TermCounts
예제 #17
0
파일: metrics.py 프로젝트: hreinwal/MegaGO
def get_ic_of_most_informative_ancestor(id, term_counts, go_dag):
    """get the information content of the go_id parent with the highest information content.

    Parameters
    ----------
    go_id : str
        GO term
    term_counts : dict
        dictionary: key: GO terms, values: number of occurrences of GO term and its children in body of evidence
    go_dag : GODag object
        GODag object from the goatools package


    Returns
    -------
    float

    """
    if term_counts.get(id, 0) > 0:
        return 0
    gosubdag_r0 = GoSubDag([id], go_dag, prt=None)
    if id in gosubdag_r0.rcntobj.go2ancestors:
        P = gosubdag_r0.rcntobj.go2ancestors[id]
        max_ic = 0
        for i in P:
            ic = get_info_content(i, term_counts, go_dag)
            if max_ic < ic:
                max_ic = ic
        return max_ic
    else:
        return 0
예제 #18
0
def build_hierarcy():
    print "fetching ppi"
    go_edges = fetch_string_ppi_edges()

    go2geneids, geneids2go = fetch_go_hierarcy()
    """Run numerous tests for various reports."""
    dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME)
    tic = timeit.default_timer()
    godag = GODag(dag_fin, optional_attrs=['relationship'])
    gosubdag = GoSubDag(godag.keys(), godag)
    toc = timeit.default_timer()
    out = file(
        os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"),
        "w+")  # sys.stdout
    dict_result = {}
    for cur_term in ['GO:0005575']:
        vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids)
        dict_result[cur_term] = {"vertices": vertices, "edges": edges}

    go_edges_filtered = {}
    lines = []
    for cur_edges, score in go_edges.iteritems():
        vertices = cur_edges.split("=")
        if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575']['vertices'].has_key(vertices[1]) and score > 1000 \
                and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']:
            go_edges_filtered[cur_edges] = score
            lines.append("{}\t{}\n".format(cur_edges, score))

    print "about to write filtered ppi go edges to file ({} lines)".format(
        len(lines))
    with file(
            os.path.join(constants.OUTPUT_GLOBAL_DIR,
                         "GO_edges_ppi_filtered.txt"), "w+") as f:
        f.writelines(lines)
예제 #19
0
 def cli(self, prt=sys.stdout):
     """Command-line interface for go_draw script."""
     kws = self.objdoc.get_docargs(prt=None)
     godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship'])
     usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt)
     tcntobj = self._get_tcntobj(usrgos, godag, **kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None)
     grprdflt = GrouperDflts(self.gosubdag, kws['slims'])
     ver_list = [godag.version, grprdflt.ver_goslims]
     prt.write("{VER}\n".format(VER="\n".join(ver_list)))
     sections = self._read_sections(kws['ifile'])
     # print("SECSECSEC", sections)
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag)
     # Write sections
     objsecwr = WrSectionsTxt(grprobj, ver_list)
     if not os.path.exists(kws['ifile']):
         objsecwr.wr_txt_section_hdrgos(kws['ifile'])
     objsecwr.wr_txt_section_hdrgos(kws['ofile'])
     objsecpy = WrSectionsPy(grprobj, ver_list)
     if 'py' in kws:
         objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version)
     # Write user GO IDs in sections
     sortobj = Sorter(grprobj)
     objgowr = WrXlsxSortedGos("init", sortobj, ver_list)
     objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt)
     #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt)
     self._prt_cnt_usrgos(usrgos, sys.stdout)
예제 #20
0
def show_go_dag_for_terms(terms, add_relationships=True):

    if type(terms) is pd.core.series.Series:
        terms = terms.tolist()

    if not terms:
        return

    with open(os.devnull, 'w') as null, redirect_stdout(null):

        obo_fname = download_and_move_go_basic_obo(prt=null)

        file_gene2go = download_ncbi_associations(prt=null)

        if add_relationships:
            optional_attrs = ['relationship', 'def']
        else:
            optional_attrs = ['def']
        obodag = GODag("geneinfo_cache/go-basic.obo",
                       optional_attrs=optional_attrs,
                       prt=null)

        gosubdag = GoSubDag(terms, obodag, relationships=add_relationships)
        GoSubDagPlot(gosubdag).plt_dag('geneinfo_cache/plot.png')

    return Image('geneinfo_cache/plot.png')
예제 #21
0
def plt_goea_results(fout_img, goea_results, **kws):
    """Plot a single page."""
    go_sources = [rec.GO for rec in goea_results]
    go2obj = {rec.GO: rec.goterm for rec in goea_results}
    gosubdag = GoSubDag(go_sources, go2obj, rcntobj=True)
    godagplot = GoSubDagPlot(gosubdag, goea_results=goea_results, **kws)
    godagplot.plt_dag(fout_img)
예제 #22
0
def test_example():
    """Test GoeaResults in plotting package."""
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    # Gene Ontology Enrichment Analysis (GOEA)
    # --------------------------------------------------------------------
    # --------------------------------------------------------------------
    taxid = 10090  # Mouse study
    # Load ontologies, associations, and population ids
    geneids_pop = GeneID2nt_mus.keys()
    geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    geneids_study = geneids2symbol_study.keys()
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    go2obj = goeaobj.obo_dag
    # Run GOEA on study
    goea_results_all = goeaobj.run_study(geneids_study)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
    assert goea_results_nt
    ns2gos = get_ns2gos(goea_results_sig)
    # Test plotting GOEA results
    gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj)
    plot_results("test_plot_goids_a_goea_{NS}.png",
                 goea_results_sig,
                 id2symbol=geneids2symbol_study,
                 parentcnt=True,
                 childcnt=True)
    for nss, goids in ns2gos.items():
        plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids)
        plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
예제 #23
0
def _get_grprobj():
    """Get object for grouping GO IDs."""
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS)
    return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
예제 #24
0
def test_i177():
    """Run code from issue #177, which is reporting a recursion error"""
    go_id = 'GO:0050807'
    godag = get_godag('go.obo', optional_attrs='relationship')
    gosubdag_r0 = GoSubDag([go_id], godag, prt=None)
    print('{GO} ancestors: {P}'.format(
        GO=go_id,
        P=gosubdag_r0.rcntobj.go2ancestors[go_id]))
예제 #25
0
def test_full(out=sys.stdout, opt_fields=None):
    """Use OBOReader in default operation."""
    dag_fin = "./go-basic.obo"
    dag = _load_dag(dag_fin, opt_fields, out)
    goleafs = set(o.id for o in dag.values() if not o.children)
    gosubdag = GoSubDag(goleafs, dag)
    test_write_hier_all("FULL", "GO:0000009", gosubdag, out)
    test_write_hier_norep("FULL", "GO:0000010", gosubdag, out)
def _get_enriched_goids(top, godag):
    """Get a set of GO IDs related to specified top term"""
    gosubdag = GoSubDag(None, godag, relationships=True)
    return {
        go
        for go, s in gosubdag.rcntobj.go2descendants.items()
        if top in s or top == go
    }
예제 #27
0
def _get_gosubdag():
    """Get GO DAG."""
    fin = os.path.join(REPO, 'go-basic.obo')
    godag = get_godag(fin,
                      prt=sys.stdout,
                      loading_bar=False,
                      optional_attrs=['relationship'])
    return GoSubDag(None, godag)
예제 #28
0
 def __init__(self, obo, gaf, prt):
     self.prt = prt
     self.cwd = os.getcwd()
     # Gene Ontologies
     self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
     # Annotations
     #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
     _file_gaf = dnld_gaf(gaf)
     print("GAF: {GAF}\n".format(GAF=_file_gaf))
     self.gene2gos = read_gaf(_file_gaf)
     self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
     # GoSubDag
     self.gosubdag_all = GoSubDag(None,
                                  self.go2obj_all,
                                  tcntobj=self.tcntobj,
                                  prt=prt)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']
예제 #29
0
 def get_gosubdag_all(self, prt=sys.stdout):
     '''
         Get GO DAG subset include descendants which are not included in the annotations
     '''
     goids = set()
     for gos in self.gosubdag.rcntobj.go2descendants.values():
         goids.update(gos)
     return GoSubDag(goids, self.go2obj, self.gosubdag.relationships, tcntobj=self, prt=prt)
def test_nb():
    """Test notebook code"""
    godag = get_godag("go-basic.obo", optional_attrs={'relationship'})
    go_leafs = set(o.item_id for o in godag.values() if not o.children)
    virion = 'GO:0019012'
    gosubdag_r0 = GoSubDag(go_leafs, godag)
    nt_virion = gosubdag_r0.go2nt[virion]
    print(nt_virion)
    print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True)
    nt_virion = gosubdag_r1.go2nt[virion]
    print(nt_virion)
    print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'})
    nt_virion = gosubdag_partof.go2nt[virion]
    print(nt_virion)
    print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))
    virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion]
    print('{N} descendants of virion were found'.format(
        N=len(virion_descendants)))
    # Limit plot of descendants to get a smaller plot
    virion_capsid_fiber = {'GO:0098033', 'GO:0098032'}
    gosubdag_partof.prt_goids(virion_capsid_fiber,
                              '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}')

    # Limit plot size by choosing just two virion descendants
    # Get a subset containing only a couple virion descendants and their ancestors
    pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'})
    pltobj = GoSubDagPlot(pltdag)
    pltobj.plt_dag('virion_capsid_fiber.png')
예제 #31
0
def plt_goids(gosubdag, fout_img, goids, **kws_plt):
    """Plot GO IDs in a DAG (Directed Acyclic Graph)."""
    gosubdag_plt = GoSubDag(goids,
                            gosubdag.go2obj,
                            rcntobj=gosubdag.rcntobj,
                            **kws_plt)
    godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt)
    godagplot.plt_dag(fout_img)
    return godagplot
예제 #32
0
 def _plot_grouped_gos(self, fout_img, pltgosusr, kws_plt, kws_dag):
     gosubdag_plt = GoSubDag(pltgosusr,
                             self.grprobj.gosubdag.get_go2obj(pltgosusr),
                             self.grprobj.gosubdag.relationships,
                             rcntobj=self.grprobj.gosubdag.rcntobj,
                             go2nt=self.grprobj.gosubdag.go2nt,
                             **kws_dag)
     godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt)
     godagplot.plt_dag(fout_img)
예제 #33
0
class Run(object):
    """Printing GO IDs and Plotting; GODag from obo using GoSubDag."""

    def __init__(self, obo):
        self.cwd = os.getcwd()
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        self.gosubdag_all = GoSubDag(None, self.go2obj_all)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.go2obj_all)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
예제 #34
0
def test_semantic_i88():
    """Computing basic semantic similarities between GO terms."""
    godag = obo_parser.GODag("go-basic.obo")
    goids = set(go for go, o in godag.items() if go == o.id)
    goids = set(godag.keys())
    # Get all the annotations from arabidopsis.
    fin_gaf = os.path.join(REPO, "tair.gaf")
    # dnld_assc includes read_gaf
    associations = dnld_assc(fin_gaf, godag, prt=None)

    # First get the counts and information content for each GO term.
    termcounts = TermCounts(godag, associations)
    gosubdag = GoSubDag(goids, godag, tcntobj=termcounts)

    # Now we can calculate the semantic distance and semantic similarity, as so:
    #       "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.
    go_id3 = 'GO:0048364' # BP level-03 depth-04 root development
    go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process
    go_root = deepest_common_ancestor([go_id3, go_id4], godag)
    sim = semantic_similarity(go_id3, go_id4, godag)
    print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim))
    gosubdag.prt_goids([go_root, go_id3, go_id4])

    # Calculate the information content
    go_id = "GO:0048364"
    infocontent = get_info_content(go_id, termcounts)
    print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent))

    # Resnik's similarity measure is defined as the information content of the most
    # informative common ancestor. That is, the most specific common parent-term in
    # the GO. Then we can calculate this as follows:
    #       "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252
    sim_r = resnik_sim(go_id3, go_id4, godag, termcounts)
    print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim_r))

    # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763
    sim_l = lin_sim(go_id3, go_id4, godag, termcounts)
    print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim_l))
예제 #35
0
 def __init__(self, obo, gaf, prt):
     self.prt = prt
     self.cwd = os.getcwd()
     # Gene Ontologies
     self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
     # Annotations
     #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
     _file_gaf = dnld_gaf(gaf)
     print("GAF: {GAF}\n".format(GAF=_file_gaf))
     self.gene2gos = read_gaf(_file_gaf)
     self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
     # GoSubDag
     self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']
예제 #36
0
 def get_nts_sections(self, sections, sortby=None):
     """Given a list of sections containing GO IDs, get a list of sections w/GO nts."""
     goids = self.get_goids_sections(sections)
     gosubdag = GoSubDag(goids, self.go2obj)
     return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]
    def plot_all(self, goids, name, prt=sys.stdout):
        """Create plots with various numbers of relationships."""
        prt.write("\nCreate GoSubDag not loading any relationship")
        gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt)
        gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name)))

        # goids.update(['GO:0007507'], ['GO:0072359'])
        prt.write("\nCreate GoSubDag while loading only the 'part_of' relationship")
        gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt)
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True)
        prt.write("GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name)))

        prt.write("\nCreate GoSubDag while loading all relationships")
        gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt)
        prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj)))
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("2 GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj))
        go2color = {go:'#d5ffff' for go in goids_new}
        prt.write("{N} NEW GO IDS:".format(N=len(goids_new)))
        gosubdag.prt_goids(goids_new, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))
예제 #38
0
 def __init__(self, obo):
     self.go2obj_all = get_godag(os.path.join(REPO, obo))
     self.gosubdag_all = GoSubDag(None, self.go2obj_all)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']
예제 #39
0
 def __init__(self, obo):
     self.cwd = os.getcwd()
     self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
     self.gosubdag_all = GoSubDag(None, self.go2obj_all)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']