Esempio n. 1
0
def test_gosubdag_relationships(wr_new_obo_subset=False):
    """Plot both the standard 'is_a' field and the 'part_of' relationship."""

    # Leaf GO: viral triggering of virus induced gene silencing
    goid_chosen = 'GO:0060150'

    # Load GODag with all relationships
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag_r0 = get_godag(fin_obo, loading_bar=None)
    godag_r1 = get_godag(fin_obo,
                         loading_bar=None,
                         optional_attrs=['relationship'])

    file_sub = os.path.join(REPO, "tests/data/viral_gene_silence.obo")

    # Get all GO terms above this low-level GO ID using all relationships
    if wr_new_obo_subset:
        _wr_sub_obo(file_sub, goid_chosen, godag_r1, fin_obo)

    gosubdag_r0 = GoSubDag(set([goid_chosen]), godag_r0)
    gosubdag_r1 = GoSubDag(set([goid_chosen]), godag_r1, relationships=True)

    _run_baseline_r0(gosubdag_r0, gosubdag_r1)

    # BASELINE r1: Test that GOTerm.get_all_upper() is the same as GoSubDag ancestors
    for goid, term in gosubdag_r1.go2obj.items():
        ancestors_r1 = gosubdag_r1.rcntobj.go2parents[goid]
        assert ancestors_r1 == term.get_all_upper()
def test_pc_w_rels(prt=sys.stdout):
    """Test P-value calculations."""
    file_obo = os.path.join(REPO, "go-basic.obo")
    godag_r0 = get_godag(file_obo, prt, loading_bar=None)
    godag_r1 = get_godag(file_obo, prt, loading_bar=None, optional_attrs=['relationship'])
    results_r0 = _get_results(godag_r1, propagate_counts=True, relationships=False, prt=prt)
    results_r1 = _get_results(godag_r1, propagate_counts=True, relationships=True, prt=prt)
    _chk_results(results_r0, results_r1, prt)
Esempio n. 3
0
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_fin = "{REPO}/go-basic.obo".format(REPO=REPO)
    if not os.path.isfile(obo_fin):
        get_godag("go-basic.obo")
    obo_dag = GODag(obo_fin)
    assoc = read_associations(
        "{REPO}/tests/data/small_association".format(REPO=REPO), no_top=True)
    popul_fin = "{REPO}/tests/data/small_population".format(REPO=REPO)
    popul_ids = [line.rstrip() for line in open(popul_fin)]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
def test_semsim_wang(prt=stdout):
    """Test setting edge weights for various relationships"""
    # Log file
    # Check that all relationships seem in DAG are expected by SsWang
    fin_godag = join(REPO, 'go-basic.obo')
    godag_r0 = get_godag(fin_godag, prt=prt)

    passed = False
    try:
        wang = SsWang({}, godag_r0, {'part_of',})
    except RuntimeError as err:
        assert str(err) == '**ERROR: SsWang GODag not loaded with relationships', '({})'.format(err)
        passed = True
    assert passed

    wang = SsWang({}, godag_r0)
    assert wang.w_e == {'is_a': 0.8}

    wang = SsWang({}, godag_r0, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9}

    godag_r1 = get_godag(fin_godag, optional_attrs=['relationship'], prt=prt)
    _chk_relationships(godag_r1)
    # Run randoms
    relationships = {'part_of'}
    wang = SsWang({}, godag_r1, relationships, rel2scf={})
    assert wang.w_e == {'is_a': 0.8, 'part_of': 0.6}

    wang = SsWang({}, godag_r1, relationships, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9, 'part_of': 0.7}

    # pylint: disable=line-too-long
    wang = SsWang({}, godag_r1, relationships, rel2scf={'is_a': 0.9, 'part_of': 0.7, 'regulates':0.2})
    assert wang.w_e == {'is_a': 0.9, 'part_of': 0.7}

    wang = SsWang({}, godag_r1)
    assert wang.w_e == {'is_a': 0.8}

    wang = SsWang({}, godag_r1, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9}

    wang = SsWang({}, godag_r1, rel2scf={'is_a': 0.9, 'part_of': 0.7, 'regulates':0.2})
    assert wang.w_e == {'is_a': 0.9}

    wang = SsWang({}, godag_r1, relationships={'mock_rel'})
    assert wang.w_e == {'is_a': 0.8}
    print('**PASSED: Properly reported ERROR in relationship, mock_rel')

    wang = SsWang({}, godag_r1, rel2scf={'mock_rel':.7})
    assert wang.w_e == {'is_a': 0.8}
Esempio n. 5
0
def test_find_enrichment():
    """Recreate run in run.sh."""
    # Set params
    objtest = ArgsDict()
    get_godag(objtest.namespace['obo'], loading_bar=None)
    objtest.namespace['indent'] = True
    args = objtest.ntobj(**objtest.namespace)
    # Run test
    objcli = GoeaCliFnc(args)

    # Check results
    ## expected_cnts = {'fdr_bh': 17, 'sidak': 5, 'holm': 5, 'bonferroni': 5}
    expected_cnts = {'fdr_bh': 19, 'sidak': 9, 'holm': 9, 'bonferroni': 9}
    _chk_results(objcli.results_all, expected_cnts, objcli)
    print("TEST PASSED")
def test_find_enrichment():
    """Recreate run in run.sh."""
    # Set params
    objtest = ArgsDict()
    get_godag(objtest.namespace['obo'], loading_bar=None)
    objtest.namespace['indent'] = True
    args = objtest.ntobj(**objtest.namespace)
    # Run test
    objcli = GoeaCliFnc(args)

    # Check results
    ## expected_cnts = {'fdr_bh': 17, 'sidak': 5, 'holm': 5, 'bonferroni': 5}
    expected_cnts = {'fdr_bh': 19, 'sidak': 9, 'holm': 9, 'bonferroni': 9}
    _chk_results(objcli.results_all, expected_cnts, objcli)
    print("TEST PASSED")
Esempio n. 7
0
def test_find_enrichment(run_all=False):
    """RUn an enrichments using all annotation file formats"""

    if run_all:
        fin_obo = join(REPO, 'go-basic.obo')
        get_godag(fin_obo, optional_attrs={'relationship'}, loading_bar=None)
        fin_gaf = join(REPO, 'goa_human.gaf')
        dnld_annotation(fin_gaf)
        for idx, cmd in enumerate(_get_cmds()):
            print('------------------- TEST {I} ------------------------------------'.format(I=idx))
            print('CMD: {CMD}'.format(CMD=cmd))
            assert system(cmd) == 0
        print("TEST PASSED")
    else:
        print('RUN THIS TEST WITH AN ARGUMENT')
Esempio n. 8
0
def test_update_association():
    """Compare new propagate cnts function with original function. Test assc results is same."""

    print('\n1) READ GODAG:')
    assc_name = "goa_human.gaf" # gene_association.fb gene_association.mgi
    obo = os.path.join(REPO, "go-basic.obo")
    tic = timeit.default_timer()
    godag = get_godag(obo)
    tic = prt_hms(tic, "Created two GODags: One for original and one for new propagate counts")

    print('\n2) READ ANNOTATIONS:')
    assc_orig = dnld_assc(os.path.join(REPO, assc_name), godag)
    tic = prt_hms(tic, "Associations Read")
    objanno = get_objanno(os.path.join(REPO, assc_name), 'gaf', godag=godag)
    tic = prt_hms(tic, "Associations Read")

    print('\n3) MAKE COPIES OF ASSOCIATIONS:')
    assc1 = {g:set(gos) for g, gos in assc_orig.items()}
    assc2 = {g:set(gos) for g, gos in assc_orig.items()}
    tic = prt_hms(tic, "Associations Copied: One for original and one for new")

    print('\n4) UPDATE ASSOCIATIONS (PROPAGATE COUNTS):')
    godag.update_association(assc1)
    tic = prt_hms(tic, "ORIG: godag.update_association(assc)")
    update_association(assc2, godag)
    tic = prt_hms(tic, "NEW SA:    update_association(go2obj, assc_orig)")
    assc3 = objanno.get_id2gos(namespace='BP', propagate_counts=True)
    tic = prt_hms(tic, "NEW BASE:  update_association(go2obj, assc_orig)")

    print('\n5) RUN CHECKS')
    _chk_assc(assc1, assc2)
    _chk_assc(assc1, assc3)
    _chk_godag(godag, obo)
Esempio n. 9
0
def test_all(prt=sys.stdout):
    """Test initialization and operation of CountRelatives for GO term branch(s) visualization."""
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), prt=sys.stdout)
    rcntobj = CountRelatives(godag)
    _wr_xlsx_d1(rcntobj)
    _run_get_letters_d1(rcntobj)
    _run_get_letters_d2(godag, rcntobj, prt)
Esempio n. 10
0
def test_go_parents():
    """Run GO parent tests"""
    gosubdag_all = GoSubDag(None,
                            get_godag("go-basic.obo", prt=None),
                            rcntobj=True)
    run_1(gosubdag_all)
    run_2(gosubdag_all)
Esempio n. 11
0
def main(prt=sys.stdout):
    """Statistics for the protein-coding mouse gene association."""
    godag = get_godag()
    params = {
        'association_file': os.path.join(REPO, 'gene_association.mgi'),
        'genes_population': ensm2nt.keys()
    }  # Population genes
    objassc = DataAssc(params, godag)
    # Statistics for number of genes per GO in the mouse association for protein-coding genes
    go2numgenes = {go: len(genes) for go, genes in objassc.go2genes.items()}
    objdesc = StatsDescribe("GOs", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# genes/GO", go2numgenes.values(), prt)
    # Statistics for number of GOs per gene in the mouse association for protein-coding genes
    gene2numgos = {
        gene: len(gos)
        for gene, gos in objassc.objassc_all.assc_geneid2gos.items()
    }
    objdesc = StatsDescribe("genes", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# GOs/gene", gene2numgos.values(), prt)
    # Percentage of Ensembl mouse genes covered by GO annotations
    num_pc = len(params['genes_population'])
    num_assc = len(objassc.objassc_all.assc_geneid2gos)
    prt.write(
        "{PERC:2.0f}% of {A} of {P} Mouse protein-coding genes are annotated by GO IDs.\n"
        .format(PERC=100.0 * num_assc / num_pc, P=num_pc, A=num_assc))
Esempio n. 12
0
    def __init__(self, taxid, fin_gene2go, fin_gobasic):
        _fin = os.path.join(REPO, fin_gene2go)
        dnld_ncbi_gene_file(_fin, loading_bar=None)
        self.gene2go = read_ncbi_gene2go(_fin, [taxid])

        _fin_obo = os.path.join(REPO, fin_gobasic)
        self.godag = get_godag(_fin_obo, loading_bar=None)
Esempio n. 13
0
    def __init__(self, taxid, fin_gene2go, fin_gobasic):
        _fin = os.path.join(REPO, fin_gene2go)
        dnld_ncbi_gene_file(_fin, loading_bar=None)
        self.gene2go = read_ncbi_gene2go(_fin, [taxid])

        _fin_obo = os.path.join(REPO, fin_gobasic)
        self.godag = get_godag(_fin_obo, loading_bar=None)
Esempio n. 14
0
def test_get_children(prt=sys.stdout):
    """Semantic Similarity test for Issue #86."""
    # Load GO-DAG
    fin_obo = "go-basic.obo"
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, fin_obo))
    go2obj = {go: o for go, o in godag.items() if go == o.id}
    # Get all children for all GO IDs using get_all_children in GOTerm class
    tic = timeit.default_timer()
    go2children_orig = {}
    go2children_empty = set()
    for goobj in go2obj.values():
        children = goobj.get_all_children()
        if children:
            go2children_orig[goobj.id] = children
        else:
            go2children_empty.add(goobj.id)
    tic = prt_hms(tic,
                  "Get all goobj's children using GOTerm.get_all_children()",
                  prt)
    # Get all children for all GO IDs using GOTerm get_all_children
    go2children_fast = get_id2children(go2obj.values())
    prt_hms(tic, "Get all goobj's children using go_tasks::get_id2children",
            prt)
    # Compare children lists
    CheckGOs('test_get_children', go2obj).chk_a2bset(go2children_orig,
                                                     go2children_fast)
Esempio n. 15
0
def test_gpad_read(run_desc="mouse", prt=sys.stdout):
    """Test reading GPAD files from GOA source http://www.ebi.ac.uk/GOA."""
    objdnld = DnldGoa()
    species2gpad = _dnld_gpad(objdnld, run_desc)
    # Count Annotation Extension Relations across all species
    relations = cx.Counter()
    godag = get_godag()
    pat = "{N:8,} of {M:8,} {P:5.2f}% associations have Annotation Extensions in {ORG}\n"
    for org, gpad_file in sorted(species2gpad.items()):
        orgstr = "{ORG} {GPAD}".format(ORG=org,
                                       GPAD=os.path.basename(gpad_file))
        prt.write("\n{GPAD}\n".format(GPAD=orgstr))
        objgpad = GpadReader(gpad_file, godag=godag)
        for ntgpad in objgpad.associations:
            # Assertions are present in the GPAD reader class
            if ntgpad.Extension:
                relations += ntgpad.Extension.get_relations_cnt()
        num_ext = len(
            [nt for nt in objgpad.associations if nt.Extension is not None])
        # The Extensions field is new in GPAD
        prt.write(
            pat.format(N=num_ext,
                       M=objgpad.qty,
                       P=100. * num_ext / objgpad.qty,
                       ORG=org))
        for rel, cnt in objgpad.get_relation_cnt().most_common():
            prt.write("    {C:6,} {R}\n".format(C=cnt, R=rel))

    prt.write(
        "\n{N} Annotation Extensions Relations found among all species:\n".
        format(N=len(relations)))
    for rel, cnt in relations.most_common():
        prt.write("{C:10,} {R}\n".format(C=cnt, R=rel))
Esempio n. 16
0
def test_wr_sections_all():
    """Test that all sections files generated by wr_sections have the same content."""
    f_sec_rd = "data/gjoneska_pfenning/sections_in.txt"
    f_sec_wr  = "tmp_test_sections_out.txt"
    # Travis-CI path is cwd
    f_sec_py  = "tmp_test_sections.py"
    # f_sec_mod = "tmp_test_sections"
    # Read user GO IDs. Setup to write sections text file and Python file
    usrgos = [getattr(nt, 'GO') for nt in goea_results]
    sec_rd = _read_sections(f_sec_rd)
    # Do preliminaries
    godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    # Exclude ungrouped "Misc." section of sections var(sec_rd)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1])
    assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0]
    grprobj = Grouper("test", usrgos, hdrobj, gosubdag)
    # Create text and Python sections files
    objsecwr = WrSectionsTxt(grprobj)
    objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr))
    objsecpy = WrSectionsPy(grprobj)
    objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version)
    # Read text and Python sections files
    sec_wr = _read_sections(f_sec_wr)
    sec_py = _read_sections(f_sec_py)
def test_find_enrichment():
    """RUn an enrichments using all annotation file formats"""

    godag = get_godag("go-basic.obo", optional_attrs=['relationship'])
    gos = _get_enriched_goids('GO:0006959', godag)  # GO IDs related to humoral response

    # pylint: disable=superfluous-parens
    print('- DOWNLOAD AND LOAD -----------------------------------------------')
    annoobjs = [
        _get_objanno('gene2go', taxid=10090),
        _get_objanno('gene2go', taxid=9606),
        _get_objanno('goa_human.gaf'),
        _get_objanno('goa_human.gpad', godag=godag),
        _get_objanno('data/association', anno_type='id2gos', godag=godag),
    ]

    for obj in annoobjs:
        ns2assc = obj.get_ns2assc()
        pop = list(itertools.chain.from_iterable(ns2assc.values()))
        print('{N:6,} population IDs'.format(N=len(pop)))
        enriched = set(nt.DB_ID for nt in obj.associations if nt.GO_ID in gos)
        objgoeans = _get_objgoeans(pop, ns2assc, godag)
        results = objgoeans.run_study(enriched)
        print('{N} results'.format(N=len(results)))
        # Run one branch
        bp2assc = {'BP': ns2assc['BP']}
        objgoeabp = _get_objgoeans(pop, bp2assc, godag)
        results_bp = objgoeabp.run_study(enriched)
        print('{N} results'.format(N=len(results_bp)))
    print("TEST PASSED")
def test_find_enrichment():
    """RUn an enrichments using all annotation file formats"""

    godag = get_godag("go-basic.obo", optional_attrs=['relationship'])
    gos = _get_enriched_goids('GO:0006959',
                              godag)  # GO IDs related to humoral response

    # pylint: disable=superfluous-parens
    print(
        '- DOWNLOAD AND LOAD -----------------------------------------------')
    annoobjs = [
        _get_objanno('gene2go', taxid=10090),
        _get_objanno('gene2go', taxid=9606),
        _get_objanno('goa_human.gaf'),
        _get_objanno('goa_human.gpad', godag=godag),
        _get_objanno('data/association', anno_type='id2gos', godag=godag),
    ]

    for obj in annoobjs:
        ns2assc = obj.get_ns2assc()
        pop = list(itertools.chain.from_iterable(ns2assc.values()))
        print('{N:6,} population IDs'.format(N=len(pop)))
        enriched = set(nt.DB_ID for nt in obj.associations if nt.GO_ID in gos)
        objgoeans = _get_objgoeans(pop, ns2assc, godag)
        results = objgoeans.run_study(enriched)
        print('{N} results'.format(N=len(results)))
        # Run one branch
        bp2assc = {'BP': ns2assc['BP']}
        objgoeabp = _get_objgoeans(pop, bp2assc, godag)
        results_bp = objgoeabp.run_study(enriched)
        print('{N} results'.format(N=len(results_bp)))
    print("TEST PASSED")
Esempio n. 19
0
def _get_godag():
    """Get GO DAG."""
    fin = os.path.join(REPO, 'go-basic.obo')
    return get_godag(fin,
                     prt=None,
                     loading_bar=False,
                     optional_attrs=['relationship'])
def test_get_lowerselect(prt=sys.stdout):
    """Test getting parents and user-specfied ancestor relationships"""
    # Load GO-DAG
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, 'go-basic.obo'), optional_attrs='relationship')
    run = RelationshipCombos(godag)
    run.chk_relationships_all()
    rels_combo = run.get_relationship_combos()
    print('{N} COMBINATIONS OF RELATIONSHIPS'.format(N=len(rels_combo)))

    for relidx, rels_set in enumerate(rels_combo, 1):
        print('{I}) RELATIONSHIPS[{N}]: {Rs}'.format(
            I=relidx, N=len(rels_set), Rs=' '.join(sorted(rels_set))))
        # ------------------------------------------------------------------------
        # Get all parents for all GO IDs using get_all_parents in GOTerm class
        tic = timeit.default_timer()
        # pylint: disable=line-too-long
        go2lowerselect_orig = {o.item_id:get_all_lowerselect(o, rels_set) for o in run.go2obj.values()}
        tic = prt_hms(tic, "Get all goobj's parents using get_all_lowerselect(GOTerm)", prt)
        # ------------------------------------------------------------------------
        # Get all parents for all GO IDs using GOTerm get_all_parents
        go2lowerselect_fast = get_id2lowerselect(run.go2obj.values(), rels_set)
        tic = prt_hms(tic, "Get all goobj's parents using go_tasks::get_id2lowerselect", prt)
        # ------------------------------------------------------------------------
        # Compare parent lists
        chkr = CheckGOs('test_get_lower_select', godag)
        chkr.chk_a2bset(go2lowerselect_orig, go2lowerselect_fast)  # EXPECTED, ACTUAL
        print("PASSED: get_lowerselect RELATIONSHIPS[{N}]: {Rs}".format(
            N=len(rels_set), Rs=' '.join(sorted(rels_set))))
Esempio n. 21
0
def test_alt_id():
    """Ensure that alternate GO IDs."""
    obo_dag = get_godag("go-basic.obo", loading_bar=None)
    alt_ids = get_altids(obo_dag)
    obo_goids = obo_dag.keys()
    obo_goids_set = set(obo_goids)
    assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
Esempio n. 22
0
def test_write_hier_bp_mf_cc():
    """Test that write hierarchy writes all: BP, MF, CC"""
    fin_anno = os.path.join(REPO, 'gene2go')
    fin_dag = os.path.join(REPO, "go-basic.obo")
    _dnld_anno(fin_anno)
    #godag = get_godag(os.path.join(REPO, 'go-basic.obo'), loading_bar=None)

    print('\nTEST STORING ONLY ONE SPECIES')
    #### obj = Gene2GoReader(fin_anno)
    godag = get_godag(fin_dag)
    gene2gos = read_annotations(namespace='ALL')
    tcntobj = TermCounts(godag, gene2gos) if gene2gos else None
    gosubdag = GoSubDag(godag.keys(),
                        godag,
                        relationships=False,
                        tcntobj=tcntobj,
                        children=True,
                        prt=sys.stdout)
    objwr = WrHierGO(gosubdag)

    # 2020 11:
    #     594,748 GO lines under GO:0008150
    #      23,199 GO lines under GO:0003674
    #       6,259 GO lines under GO:0005575
    #     624,206 items WROTE: tmp_test_wr_hier_BP_MF_CC.txt
    assert len(_wr_hier(['BP', 'MF', 'CC'], gosubdag.go2nt, objwr)) > 600000
    assert len(_wr_hier([
        'BP',
    ], gosubdag.go2nt, objwr)) > 500000
    assert len(_wr_hier([
        'MF',
    ], gosubdag.go2nt, objwr)) > 20000
    assert len(_wr_hier([
        'CC',
    ], gosubdag.go2nt, objwr)) > 5000
Esempio n. 23
0
def test_all(prt=sys.stdout):
    """Test initialization and operation of CountRelatives for GO term branch(s) visualization."""
    obo_dag = get_godag("go-basic.obo", prt=None)
    rcntobj = CountRelatives(obo_dag)
    _wr_xlsx_d1(rcntobj)
    _run_get_letters_d1(rcntobj)
    _run_get_letters_d2(obo_dag, rcntobj, prt)
Esempio n. 24
0
 def cli(self, prt=sys.stdout):
     """Command-line interface for go_draw script."""
     kws = self.objdoc.get_docargs(prt=None)
     godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship'])
     usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt)
     tcntobj = self._get_tcntobj(usrgos, godag, **kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None)
     grprdflt = GrouperDflts(self.gosubdag, kws['slims'])
     ver_list = [godag.version, grprdflt.ver_goslims]
     prt.write("{VER}\n".format(VER="\n".join(ver_list)))
     sections = self._read_sections(kws['ifile'])
     # print("SECSECSEC", sections)
     hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
     grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag)
     # Write sections
     objsecwr = WrSectionsTxt(grprobj, ver_list)
     if not os.path.exists(kws['ifile']):
         objsecwr.wr_txt_section_hdrgos(kws['ifile'])
     objsecwr.wr_txt_section_hdrgos(kws['ofile'])
     objsecpy = WrSectionsPy(grprobj, ver_list)
     if 'py' in kws:
         objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version)
     # Write user GO IDs in sections
     sortobj = Sorter(grprobj)
     objgowr = WrXlsxSortedGos("init", sortobj, ver_list)
     objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt)
     #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt)
     self._prt_cnt_usrgos(usrgos, sys.stdout)
def test_nb():
    """Test notebook code"""
    godag = get_godag("go-basic.obo", optional_attrs={'relationship'})
    go_leafs = set(o.item_id for o in godag.values() if not o.children)
    virion = 'GO:0019012'
    gosubdag_r0 = GoSubDag(go_leafs, godag)
    nt_virion = gosubdag_r0.go2nt[virion]
    print(nt_virion)
    print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True)
    nt_virion = gosubdag_r1.go2nt[virion]
    print(nt_virion)
    print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'})
    nt_virion = gosubdag_partof.go2nt[virion]
    print(nt_virion)
    print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))
    virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion]
    print('{N} descendants of virion were found'.format(
        N=len(virion_descendants)))
    # Limit plot of descendants to get a smaller plot
    virion_capsid_fiber = {'GO:0098033', 'GO:0098032'}
    gosubdag_partof.prt_goids(virion_capsid_fiber,
                              '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}')

    # Limit plot size by choosing just two virion descendants
    # Get a subset containing only a couple virion descendants and their ancestors
    pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'})
    pltobj = GoSubDagPlot(pltdag)
    pltobj.plt_dag('virion_capsid_fiber.png')
Esempio n. 26
0
def test_gpad_read(run_desc="mouse", prt=sys.stdout):
    """Test reading GPAD files from GOA source http://www.ebi.ac.uk/GOA."""
    objdnld = DnldGoa()
    species2gpad = _dnld_gpad(objdnld, run_desc)
    # Count Annotation Extension Relations across all species
    relations = cx.Counter()
    godag = get_godag()
    pat = "{N:8,} of {M:8,} {P:5.2f}% associations have Annotation Extensions in {ORG}\n"
    for org, gpad_file in sorted(species2gpad.items()):
        orgstr = "{ORG} {GPAD}".format(ORG=org, GPAD=os.path.basename(gpad_file))
        prt.write("\n{GPAD}\n".format(GPAD=orgstr))
        objgpad = GpadReader(gpad_file, godag=godag)
        for ntgpad in objgpad.associations:
            # Assertions are present in the GPAD reader class
            if ntgpad.Extension:
                relations += ntgpad.Extension.get_relations_cnt()
        num_ext = len([nt for nt in objgpad.associations if nt.Extension is not None])
        # The Extensions field is new in GPAD
        prt.write(pat.format(N=num_ext, M=objgpad.qty, P=100.*num_ext/objgpad.qty, ORG=org))
        for rel, cnt in objgpad.get_relation_cnt().most_common():
            prt.write("    {C:6,} {R}\n".format(C=cnt, R=rel))

    prt.write("\n{N} Annotation Extensions Relations found among all species:\n".format(
        N=len(relations)))
    for rel, cnt in relations.most_common():
        prt.write("{C:10,} {R}\n".format(C=cnt, R=rel))
Esempio n. 27
0
def test_i177():
    """Run code from issue #177, which is reporting a recursion error"""
    go_id = 'GO:0050807'
    godag = get_godag('go.obo', optional_attrs='relationship')
    gosubdag_r0 = GoSubDag([go_id], godag, prt=None)
    print('{GO} ancestors: {P}'.format(
        GO=go_id,
        P=gosubdag_r0.rcntobj.go2ancestors[go_id]))
Esempio n. 28
0
def _get_gosubdag():
    """Get GO DAG."""
    fin = os.path.join(REPO, 'go-basic.obo')
    godag = get_godag(fin,
                      prt=sys.stdout,
                      loading_bar=False,
                      optional_attrs=['relationship'])
    return GoSubDag(None, godag)
Esempio n. 29
0
def _get_grprobj():
    """Get object for grouping GO IDs."""
    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship'])
    gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None)
    grprdflt = GrouperDflts(gosubdag)
    hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS)
    return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
Esempio n. 30
0
def test_godag(prt=sys.stdout):
    """Test downloading GO DAG."""
    cwd = os.getcwd()
    for fin_obo in ['go-basic.obo', 'goslim_generic.obo']:
        fin_full = os.path.join(cwd, fin_obo)
        os.system("rm -f {OBO}".format(OBO=fin_obo))
        godag = get_godag(fin_full, prt, loading_bar=None)  # Get GODag object
        assert godag, "GO-DAG({OBO}) NOT PROPERLY LOADED".format(OBO=fin_obo)
Esempio n. 31
0
def test_godag(prt=sys.stdout):
    """Test downloading GO DAG."""
    cwd = os.getcwd()
    for fin_obo in ['go-basic.obo', 'goslim_generic.obo']:
        fin_full = os.path.join(cwd, fin_obo)
        os.system("rm -f {OBO}".format(OBO=fin_obo))
        godag = get_godag(fin_full, prt, loading_bar=None) # Get GODag object
        assert godag, "GO-DAG({OBO}) NOT PROPERLY LOADED".format(OBO=fin_obo)
def test_pc_w_rels(prt=sys.stdout):
    """Test P-value calculations."""
    file_obo = os.path.join(REPO, "go-basic.obo")
    godag_r0 = get_godag(file_obo, prt, loading_bar=None)
    godag_r1 = get_godag(file_obo,
                         prt,
                         loading_bar=None,
                         optional_attrs=['relationship'])
    results_r0 = _get_results(godag_r1,
                              propagate_counts=True,
                              relationships=False,
                              prt=prt)
    results_r1 = _get_results(godag_r1,
                              propagate_counts=True,
                              relationships=True,
                              prt=prt)
    _chk_results(results_r0, results_r1, prt)
Esempio n. 33
0
def init_goea(**kws):
    """Initialize GODag and GOEnrichmentStudy."""
    godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
    fin_assc = ROOT + "association"
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(ROOT + "population")]
    methods = kws['methods'] if 'methods' in kws else ['not_bonferroni']
    study_ids = [line.rstrip() for line in open(ROOT + "study")]
    return GOEnrichmentStudy(popul_ids, assoc, godag, methods=methods), study_ids
Esempio n. 34
0
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_fin = os.path.join(REPO, "go-basic.obo")
    obo_dag = get_godag(obo_fin, loading_bar=None)
    fin_assc = "{REPO}/tests/data/small_association".format(REPO=REPO)
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_fin = "{REPO}/tests/data/small_population".format(REPO=REPO)
    popul_ids = [line.rstrip() for line in open(popul_fin)]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
Esempio n. 35
0
 def __init__(self,
              gosubdag=None,
              goslim_filename="goslim_generic.obo",
              hdrgos=None):
     self.gosubdag = self.get_gosubdag(gosubdag)
     _dagslim = get_godag(goslim_filename, prt=None, loading_bar=False)
     self.ver_goslims = _dagslim.version
     self.goslims = self._init_goslims(_dagslim)
     self.hdrgos_dflt = self._init_hdrgos(
     ) if hdrgos is None else hdrgos  # goid set
def test_find_enrichment():
    """Recreate run in run.sh."""
    fin_genes = os.path.join(REPO, "data/study")
    pop = set(_.strip() for _ in open(fin_genes) if _.strip())
    stu_orig = pop
    num_pop = len(pop)
    objtest = ArgsDict()
    get_godag(objtest.namespace['obo'], loading_bar=None)
    for min_overlap in [.25, .50, .75]:
        objtest.namespace['min_overlap'] = min_overlap
        args = objtest.ntobj(**objtest.namespace)
        objcli = GoeaCliFnc(args)
        num_stu_in_pop = int(round(min_overlap*num_pop)) + 10
        study = _get_studygenes(stu_orig, num_stu_in_pop)
        overlap = objcli.get_overlap(study, pop)
        print("{N:3} of {M} ({OL}%) in study in pop".format(
            N=num_stu_in_pop, M=num_pop, OL=100.0*overlap))
        objcli.chk_genes(study, pop)
    print("TEST PASSED")
Esempio n. 37
0
def main():
    """Write Table of depth-01 GO terms w/child count"""

    fout_tex = "gos_depth01.tex"

    fin_dag = 'go-basic.obo'
    godag = get_godag(fin_dag, optional_attrs='relationship')
    rcntobj = CountRelatives(godag, relationships=True, dcnt=True)
    wrobj = GoDepth1LettersWr(rcntobj)
    wrobj.wr_tex(fout_tex)
def test_find_enrichment():
    """Recreate run in run.sh."""
    fin_genes = os.path.join(REPO, "data/study")
    pop = set(_.strip() for _ in open(fin_genes) if _.strip())
    stu_orig = pop
    num_pop = len(pop)
    objtest = ArgsDict()
    get_godag(objtest.namespace['obo'], loading_bar=None)
    for min_overlap in [.25, .50, .75]:
        objtest.namespace['min_overlap'] = min_overlap
        args = objtest.ntobj(**objtest.namespace)
        objcli = GoeaCliFnc(args)
        num_stu_in_pop = int(round(min_overlap*num_pop)) + 10
        study = _get_studygenes(stu_orig, num_stu_in_pop)
        overlap = objcli.get_overlap(study, pop)
        print("{N:3} of {M} ({OL}%) in study in pop".format(
            N=num_stu_in_pop, M=num_pop, OL=100.0*overlap))
        objcli.chk_genes(study, pop)
    print("TEST PASSED")
Esempio n. 39
0
 def get_gosubdag(gosubdag=None):
     """Gets a GoSubDag initialized for use by a Grouper object."""
     if gosubdag is not None:
         if gosubdag.rcntobj is not None:
             return gosubdag
         else:
             gosubdag.init_auxobjs()
             return gosubdag
     else:
         go2obj = get_godag()
         return GoSubDag(None, go2obj, rcntobj=True)
Esempio n. 40
0
 def __init__(self, **kws):
     _objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
     self.kws = _objdoc.get_docargs(prt=None) if not kws else kws
     self.godag = get_godag(self.kws.get('obo'), prt=sys.stdout,
                            loading_bar=False, optional_attrs=['relationship'])
     _ini = _Init(self.godag)
     self.go_ntsets = _ini.get_go_ntsets(self.kws.get('GO_FILE'))
     self.go_all = set.union(*[nt.go_set for nt in self.go_ntsets])
     _tcntobj = _ini.get_tcntobj(self.go_all, **self.kws)  # Gets TermCounts or None
     self.gosubdag = GoSubDag(self.go_all, self.godag, True, tcntobj=_tcntobj, prt=sys.stdout)
     self.objgrpd = _ini.get_grouped(self.go_ntsets, self.go_all, self.gosubdag, **self.kws)
Esempio n. 41
0
def test_assc_stats(prt=sys.stdout):
    """Test association statistics."""
    associations = [
        ('hsa', 'goa_human.gaf'), # human
        ('mus', 'mgi.gaf'),       # mouse
        ('dme', 'fb.gaf')]        # fly
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    describe_go2obj(godag, prt)
    obj = StatsDescribe('Assc', "{:6,}")
    obj.prt_hdr(prt, "Assc.")
    for org, assc_name in associations:
        fin_assc = os.path.join(REPO, assc_name)
        describe_assc(org, fin_assc, godag, obj, prt)
Esempio n. 42
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    obo_dag = get_godag()
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid])
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        obo_dag,
        propagate_counts=False,
        alpha=0.05,
        methods=[method])
     # obo_dag is also found in goeaobj.obo_dag
    return goeaobj
def test_find_enrichment():
    """RUn an enrichments using all annotation file formats"""

    godag = get_godag("go-basic.obo", optional_attrs=['relationship'])
    e_goids = _get_enriched_e_goids('GO:0006959', godag)  # GO IDs related to humoral response

    # pylint: disable=superfluous-parens
    print('- DOWNLOAD AND LOAD -----------------------------------------------')
    annoobjs = [
        _get_objanno('gene2go', taxid=10090),
        _get_objanno('gene2go', taxid=9606),
        _get_objanno('goa_human.gaf'),
        _get_objanno('goa_human.gpad', godag=godag),
        _get_objanno('data/association', anno_type='id2gos', godag=godag),
    ]

    pat = ('python3 scripts/find_enrichment.py {STU} {POP} {ASSC} '
           '--pval=0.05 --method=fdr_bh --pval_field=fdr_bh '
           '--taxid={TAXID} {INC} {EXC} --outfile=results_{NAME}.xlsx')
    cmds = []
    for obj in annoobjs:
        ns2assc = obj.get_ns2assc()
        _idngos_list = list(chain.from_iterable([k2v.items() for k2v in ns2assc.values()]))
        pop = set(d for d, _ in _idngos_list)
        # TODO: 20,263 pop IDs      6,847 stu IDs      2,884 int IDs
        enriched = set(nt.DB_ID for nt in obj.get_associations() if nt.GO_ID in e_goids)
        stu = enriched.intersection(pop)
        print('{N:6,} pop IDs: {ID}'.format(N=len(pop), ID=list(pop)[:4]))
        print('{N:6,} enr IDs: {ID}'.format(N=len(enriched), ID=list(enriched)[:4]))
        print('{N:6,} int IDs: {ID}'.format(N=len(stu), ID=list(stu)[:4]))
        fout_pop = os.path.join(REPO, 'ids_pop_{BASE}.txt'.format(BASE=obj.get_name()))
        fout_stu = os.path.join(REPO, 'ids_stu_{BASE}.txt'.format(BASE=obj.get_name()))
        _wr(fout_pop, pop)
        _wr(fout_stu, list(stu)[:100])
        cmd = pat.format(STU=fout_stu, POP=fout_pop, ASSC=obj.filename,
                         TAXID=obj.get_taxid(), NAME=obj.get_name(),
                         INC='', EXC='')
        cmds.append(cmd)
        print('\nRUNNING {NAME}: {CMD}\n'.format(CMD=cmd, NAME=obj.get_name()))
        assert os.system(cmd) == 0

    fout_scr = 'test_find_enrichment_script.sh'
    with open(fout_scr, 'w') as prt:
        print("COMANDS RUN:")
        for cmd in cmds:
            print(cmd)
            prt.write('{CMD}\n'.format(CMD=cmd))
        print('  WROTE: {SCRIPT}'.format(SCRIPT=fout_scr))


    print("TEST PASSED")
Esempio n. 44
0
 def __init__(self, obo, gaf, prt):
     self.prt = prt
     self.cwd = os.getcwd()
     # Gene Ontologies
     self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
     # Annotations
     #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
     _file_gaf = dnld_gaf(gaf)
     print("GAF: {GAF}\n".format(GAF=_file_gaf))
     self.gene2gos = read_gaf(_file_gaf)
     self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
     # GoSubDag
     self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']
Esempio n. 45
0
def test_alt_id():
    """Ensure that alternate GO IDs."""
    obo_dag = get_godag(os.path.join(REPO, "go-basic.obo"))
    # Create/Initialize GoSubDag
    goids = _get_data0()
    gosubdag = GoSubDag(obo_dag.keys(), obo_dag)
    grprdflt = _get_grprdflt(gosubdag)
    # Create/Initialize Grouper
    hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None)
    Grouper("test_altid_gosubdag", goids, hdrobj, grprdflt.gosubdag, go2nt=None)
    alt_ids = _get_altids(obo_dag)
    obo_goids = obo_dag.keys()
    obo_goids_set = set(obo_goids)
    assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
Esempio n. 46
0
def get_goeaobj(method, geneids_pop, taxid):
    """Load: ontologies, associations, and population geneids."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
    goeaobj = GOEnrichmentStudy(
        geneids_pop,
        assoc_geneid2gos,
        godag,
        propagate_counts=False,
        alpha=0.05,
        methods=[method])
    # godag is also found in goeaobj.godag
    return goeaobj
def run_bonferroni():
    """Do Gene Ontology Enrichment Analysis w/Bonferroni multipletest. Print results 3 ways."""
    # ---------------------------------------------------------------------
    # Run Gene Ontology Analysis (GOEA)
    #
    # 1. Initialize
    godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
    fin_assc = os.path.join(REPO, "data/association")
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/population"))]
    study_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/study"))]
    # 2. Run enrichment analysis
    goea = GOEnrichmentStudy(popul_ids, assoc, godag, alpha=0.05, methods=['bonferroni'])
    results_nt = goea.run_study(study_ids)
    return results_nt, goea
Esempio n. 48
0
 def __init__(self, args=None, prt=sys.stdout):
     self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs(
         args, intvals=set(['max_indent', 'dash_len']))
     opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys())
     godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs)
     self.gene2gos = read_annotations(**self.kws)
     self.tcntobj = TermCounts(godag, self.gene2gos) if self.gene2gos is not None else None
     self.gosubdag = GoSubDag(godag.keys(), godag,
                              relationships='relationship' in opt_attrs,
                              tcntobj=self.tcntobj,
                              children=True,
                              prt=prt)
     self.goids = self._init_goids()
     self._adj_item_marks()
     self._adj_include_only()
     self._adj_for_assc()
def test_semantic_similarity():
    """Computing basic semantic similarities between GO terms."""
    goids = [
        "GO:0140101",
        "GO:0140097",
        "GO:0140096",
        "GO:0140098",
        "GO:0015318",
        "GO:0140110",
    ]
    # Get all the annotations from arabidopsis.
    associations = [
        ('human', 'goa_human.gaf'),
        ('yeast', 'sgd.gaf'),
    ]


    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    for species, assc_name in associations:  # Limit test numbers for speed
        print()
        # Get all the annotations for the current species
        fin_assc = os.path.join(REPO, assc_name)
        assc_gene2gos = dnld_assc(fin_assc, godag, namespace='MF', prt=None)
        # Calculate the information content of the single term, GO:0048364
        termcounts = TermCounts(godag, assc_gene2gos)

        # Print information values for each GO term
        for goid in sorted(goids):
            infocontent = get_info_content(goid, termcounts)
            term = godag[goid]
            print('{SPECIES} Information content {INFO:8.6f} {NS} {GO} {NAME}'.format(
                SPECIES=species, GO=goid, INFO=infocontent, NS=term.namespace, NAME=term.name))

        # Print semantic similarities between each pair of GO terms
        print("GO #1      GO #2      Resnik Lin")
        print("---------- ---------- ------ -------")
        for go_a, go_b in itertools.combinations(sorted(goids), 2):
            # Resnik's similarity measure is defined as the information content of the most
            # informative common ancestor. That is, the most specific common parent-term in the GO.
            sim_r = resnik_sim(go_a, go_b, godag, termcounts)
            # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763
            sim_l = lin_sim(go_a, go_b, godag, termcounts)
            print('{GO1} {GO2} {RESNIK:6.4f} {LIN:7.4f}'.format(
                GO1=go_a, GO2=go_b, RESNIK=sim_r, LIN=sim_l))
            assert sim_r, "FATAL RESNIK SCORE"
            assert sim_l, "FATAL LIN SCORE"
Esempio n. 50
0
def test_get_children(prt=sys.stdout):
    """Semantic Similarity test for Issue #86."""
    # Load GO-DAG
    fin_obo = "go-basic.obo"
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, fin_obo))
    go2obj = {go:o for go, o in godag.items() if go == o.id}
    # Get all children for all GO IDs using get_all_children in GOTerm class
    tic = timeit.default_timer()
    go2children_orig = {}
    for goobj in go2obj.values():
        go2children_orig[goobj.id] = goobj.get_all_children()
    tic = prt_hms(tic, "Get all goobj's children using GOTerm.get_all_children()", prt)
    # Get all children for all GO IDs using GOTerm get_all_children
    go2children_fast = get_id2children(go2obj.values())
    prt_hms(tic, "Get all goobj's children using go_tasks::get_go3children", prt)
    # Compare children lists
    _chk_a2bset(go2children_orig, go2children_fast)
Esempio n. 51
0
def test_i96():
    """Test to re-produce issue#96: Passes currently."""
    # Trying to duplicate: ValueError("All values in table must be nonnegative.
    # Get genes
    print('CWD', os.getcwd())
    study_ids = _get_geneids()
    population_ids = GENEID2NT.keys()
    # Get databases

    print(os.getcwd())
    fin = os.path.join(REPO, 'gene2go')
    dnld_ncbi_gene_file(fin, loading_bar=None)
    gene2go = read_ncbi_gene2go(fin, [9606])

    fin_obo = os.path.join(REPO, "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    goeaobj = GOEnrichmentStudy(population_ids, gene2go, godag, methods=['fdr_bh'])
    # Run GOEA Gene Ontology Enrichment Analysis
    results_goeas = goeaobj.run_study(study_ids)
def test_semantic_similarity():
    """Computing basic semantic similarities between GO terms."""
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    # Get all the annotations from arabidopsis.
    associations = dnld_assc(os.path.join(REPO, 'tair.gaf'), godag)


    # Now we can calculate the semantic distance and semantic similarity, as so:
    #       "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.
    go_id3 = 'GO:0048364' # BP level-03 depth-04 root development
    go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process
    sim = semantic_similarity(go_id3, go_id4, godag)
    print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim))
    print(godag[go_id3])
    print(godag[go_id4])

    # Then we can calculate the information content of the single term, <code>GO:0048364</code>.
    #       "Information content (GO:0048364) = 7.75481392334

    # First get the counts of each GO term.
    termcounts = TermCounts(godag, associations)

    # Calculate the information content
    go_id = "GO:0048364"
    infocontent = get_info_content(go_id, termcounts)
    print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent))
    assert infocontent, "FATAL INFORMATION CONTENT"

    # Resnik's similarity measure is defined as the information content of the most
    # informative common ancestor. That is, the most specific common parent-term in
    # the GO. Then we can calculate this as follows:
    #       "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252
    sim_r = resnik_sim(go_id3, go_id4, godag, termcounts)
    print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim_r))
    assert sim_r, "FATAL RESNIK SCORE"

    # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763
    sim_l = lin_sim(go_id3, go_id4, godag, termcounts)
    print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format(GO1=go_id3, GO2=go_id4, VAL=sim_l))
    assert sim_l, "FATAL LIN SCORE"
def test_write_summary_cnts(log=sys.stdout):
    """Print level/depth summaries for various sets of GO terms."""
    fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
    godag = get_godag(fin_obo, loading_bar=None)
    rptobj = RptLevDepth(godag, log)
    # Report level/depth summary for all GOs in a dag
    log.write("\nSummary for all Ontologies:\n")
    rptobj.write_summary_cnts_all()
    # Report level/depth summary for all GOs in human, fly, and mouse
    taxids = [9606, 7227, 10090]
    # (optional) multi-level dictionary separate associations by taxid
    taxid2asscs = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
    # Get associations for human fly and mouse
    get_assoc_ncbi_taxids(taxids, taxid2asscs=taxid2asscs, loading_bar=None)
    assert taxid2asscs, 'taxid2asscs EMPTY'
    for taxid, assc in taxid2asscs.items():
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_ids = assc['GO2IDs'].keys()
        rptobj.write_summary_cnts(go_ids)
        log.write("\nSummary for Ontologies for taxid({T}):\n".format(T=taxid))
        go_objs = [godag.get(goid) for goid in go_ids]
        rptobj.write_summary_cnts_goobjs(go_objs)
    # Print GO depth count table for full GO DAG in LaTeX format
    rptobj.prttex_summary_cnts_all(prt=log)
Esempio n. 54
0
def test_typedef():
    """Ensure that alternate GO IDs."""
    obo_dag = get_godag("go-basic.obo", loading_bar=None)
    print(obo_dag.typedefs['negatively_regulates'])
Esempio n. 55
0
def get_go2obj():
    """Read GODag and return go2obj."""
    godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
    return {go:o for go, o in godag.items() if not o.is_obsolete}
Esempio n. 56
0
def _get_gosubdag():
    """Get GO DAG."""
    fin = os.path.join(REPO, 'go-basic.obo')
    godag = get_godag(fin, prt=sys.stdout, loading_bar=False, optional_attrs=['relationship'])
    return GoSubDag(None, godag)
Esempio n. 57
0
 def __init__(self, obo):
     self.cwd = os.getcwd()
     self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
     self.gosubdag_all = GoSubDag(None, self.go2obj_all)
     self.prtfmt = self.gosubdag_all.prt_attr['fmta']