예제 #1
0
def test_get_children(prt=sys.stdout):
    """Semantic Similarity test for Issue #86."""
    # Load GO-DAG
    fin_obo = "go-basic.obo"
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, fin_obo))
    go2obj = {go: o for go, o in godag.items() if go == o.id}
    # Get all children for all GO IDs using get_all_children in GOTerm class
    tic = timeit.default_timer()
    go2children_orig = {}
    go2children_empty = set()
    for goobj in go2obj.values():
        children = goobj.get_all_children()
        if children:
            go2children_orig[goobj.id] = children
        else:
            go2children_empty.add(goobj.id)
    tic = prt_hms(tic,
                  "Get all goobj's children using GOTerm.get_all_children()",
                  prt)
    # Get all children for all GO IDs using GOTerm get_all_children
    go2children_fast = get_id2children(go2obj.values())
    prt_hms(tic, "Get all goobj's children using go_tasks::get_id2children",
            prt)
    # Compare children lists
    CheckGOs('test_get_children', go2obj).chk_a2bset(go2children_orig,
                                                     go2children_fast)
예제 #2
0
def test_get_upperselect(prt=sys.stdout):
    """Test getting parents and user-specfied ancestor relationships"""
    # Load GO-DAG
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, 'go-basic.obo'), optional_attrs='relationship')
    run = RelationshipCombos(godag)
    run.chk_relationships_all()
    rels_combo = run.get_relationship_combos()
    print('{N} COMBINATIONS OF RELATIONSHIPS'.format(N=len(rels_combo)))

    for relidx, rels_set in enumerate(rels_combo, 1):
        print('{I}) RELATIONSHIPS[{N}]: {Rs}'.format(
            I=relidx, N=len(rels_set), Rs=' '.join(sorted(rels_set))))
        # ------------------------------------------------------------------------
        # Get all parents for all GO IDs using get_all_parents in GOTerm class
        tic = timeit.default_timer()
        # pylint: disable=line-too-long
        go2upperselect_orig = {o.item_id:get_all_upperselect(o, rels_set) for o in run.go2obj.values()}
        tic = prt_hms(tic, "Get all goobj's parents using get_all_upperselect(GOTerm)", prt)
        # ------------------------------------------------------------------------
        # Get all parents for all GO IDs using GOTerm get_all_parents
        go2upperselect_fast = get_id2upperselect(run.go2obj.values(), rels_set)
        tic = prt_hms(tic, "Get all goobj's parents using go_tasks::get_id2upperselect", prt)
        # ------------------------------------------------------------------------
        # Compare parent lists
        chkr = CheckGOs('test_get_upper_select', godag)
        chkr.chk_a2bset(go2upperselect_orig, go2upperselect_fast)  # EXPECTED, ACTUAL
        print("PASSED: get_upperselect RELATIONSHIPS[{N}]: {Rs}".format(
            N=len(rels_set), Rs=' '.join(sorted(rels_set))))
예제 #3
0
def test_david_chart():
    """Read in a small obo, print list of GO terms and plot."""
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    david_dir = "{REPO}/data/gjoneska_pfenning".format(REPO=repo)
    ntobj = cx.namedtuple("david6p8", "TOTAL FDR Bonferroni Benjamini PValue")
    # pylint: disable=bad-whitespace
    fin2exp = {
        "david_chart6p8_Consistent_Decrease.txt":
        ntobj._make([1773, 259, 249, 432, 1316]),
        "david_chart6p8_Transient_Decrease.txt":
        ntobj._make([423, 0, 2, 2, 246]),
        "david_chart6p8_Consistent_Increase.txt":
        ntobj._make([2359, 353, 308, 781, 1868]),
        "david_chart6p8_Transient_Increase.txt":
        ntobj._make([2191, 658, 652, 1105, 1786]),
        "david_chart6p8_Late_Decrease.txt":
        ntobj._make([2752, 591, 568, 1153, 2187]),
        "david_chart6p8_Late_Increase.txt":
        ntobj._make([4597, 708, 616, 1715, 3603]),
    }
    tic = timeit.default_timer()
    fin2obj = {
        f: DavidChartReader(os.path.join(david_dir, f))
        for f in fin2exp.keys()
    }
    prt_hms(tic, "Created DavidChartReader objects")
    for fin, obj in fin2obj.items():
        ntexp = fin2exp[fin]
        assert ntexp.TOTAL == len(obj.nts)
        obj.prt_num_sig()
        ctr = obj.get_num_sig()
        for fld, cnt_actual in ctr.most_common():
            assert cnt_actual == getattr(
                ntexp, fld), "{FIN}: {FLD} Act({ACT}) Exp({EXP})".format(
                    FIN=fin, FLD=fld, ACT=cnt_actual, EXP=getattr(ntexp, fld))
예제 #4
0
 def chk_get_goterms_upper(self):
     """Check that GOTerm's 'get_upper' returns parents and relationships."""
     tic = timeit.default_timer()
     for goterm in self.go2obj.values():
         goids_act = set(o.item_id for o in goterm.get_goterms_upper())
         goids_exp = self._get_goterms_upper(goterm.item_id)
         assert goids_act == goids_exp
     prt_hms(tic, "get_goterms_upper")
예제 #5
0
 def chk_get_goterms_lower(self):
     """Check that GOTerm's 'get_lower' returns parents and relationships."""
     tic = timeit.default_timer()
     for goterm in self.go2obj.values():
         goids_act = set(o.item_id for o in goterm.get_goterms_lower())
         goids_exp = self._get_goterms_lower(goterm.item_id)
         assert goids_act == goids_exp, "{GO} EXP({E}) ACT({A})".format(
             GO=goterm.item_id, E=goids_exp, A=goids_act)
     prt_hms(tic, "get_goterms_lower")
예제 #6
0
 def get_gosubdag_r0(self, goids):
     """Return a GoSubDag with N randomly chosen GO sources."""
     tic = timeit.default_timer()
     gosubdag = GoSubDag(
         goids,
         self.godag_r0,
         relationships=None,
         #rcntobj=self.gosubdag_r0.rcntobj,
         prt=None)
     prt_hms(
         tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
             N=len(gosubdag.go2obj), S=len(gosubdag.go_sources)))
     return gosubdag
 def _randoms(self, prt):
     """Randomly select GO terms for semantic similarity calculations"""
     #pylint: disable=line-too-long
     goids = self.goids
     go_pairs = [(goids[i], goids[i + 1])
                 for i in range(0, len(self.goids), 2)]
     tic = timeit.default_timer()
     # Information on Python's round, which is used in 2 spots in pygosemsim:
     #     https://stackoverflow.com/questions/13479163/round-float-to-x-decimals
     #     from decimal import Decimal
     #     >>> Decimal('66.66666666666').quantize(Decimal('1e-4'))
     #     Decimal('66.6667')
     #     >>> Decimal('1.29578293').quantize(Decimal('1e-6'))
     #     Decimal('1.295783')
     # In issue, https://github.com/micropython/micropython/issues/3516,
     # https://github.com/mdickinson dreams of deprecating the two-argument form of round in Python....
     #     https://github.com/micropython/micropython/issues/3516#issuecomment-625298591
     # Use the decimal type instead: https://docs.python.org/3.10/library/decimal.html
     acts = [self.wang.get_sim(a, b) for a, b in go_pairs]
     tic = prt_hms(tic, 'GOATOOLS wang calc')
     exps = [similarity.wang(self.graph, a, b) for a, b in go_pairs]
     tic = prt_hms(tic, 'pysemsim wang')
     assert len(acts) == len(exps)
     failures = 0
     for idx, (act, exp, (go_a,
                          go_b)) in enumerate(zip(acts, exps, go_pairs)):
         assert act is not None, self._prt_ab(idx, go_a, go_b, act, exp,
                                              stdout)
         assert exp is not None, self._prt_ab(idx, go_a, go_b, act, exp,
                                              stdout)
         if abs(exp - act) > 0.02:
             for strm in [prt, stdout]:
                 go_a = goids[2 * idx]
                 go_b = goids[2 * idx + 1]
                 self._prt_ab(idx, go_a, go_b, act, exp, strm)
             stdout.flush()
             prt.flush()
             failures += 1
             self.prt_ancestors(go_a, True)
             self.prt_ancestors(go_b, True)
         else:
             prt.write('{i} PASS {A} {B} pygosemsim={b:f} GOATOOLS={a:f}\n'.
                       format(i=idx,
                              A=goids[2 * idx],
                              B=goids[2 * idx + 1],
                              a=act,
                              b=exp))
 def __init__(self, fin_godag, num_calcs, relationships, w_e, seed, prt):
     tic = timeit.default_timer()
     self.godag = get_godag(fin_godag,
                            optional_attrs=['relationship'],
                            prt=prt)
     tic = prt_hms(tic, 'GOATOOLS read godag')
     # Needed because pysemsim not understand cygwin pathes
     self.graph = graph.from_resource(splitext(fin_godag)[0])
     tic = prt_hms(tic, 'pygosemsim read godag')
     self.seedobj = RandomSeed32(seed)
     self.goids = self._init_goids(num_calcs)
     tic = timeit.default_timer()
     self.wang = SsWang(self.goids, self.godag, relationships, w_e)
     self.go2reldepth = get_go2reldepth(
         {self.godag[go]
          for go in self.godag}, relationships)
     tic = prt_hms(tic, 'GOATOOLS wang setup')
예제 #9
0
def test_i154_semsim_lin():
    """Test for issue 148, Lin Similarity if a term has no annotations"""
    fin_dag = download_go_basic_obo()
    tic = timeit.default_timer()

    optional_attrs = {'consider', 'replaced_by'}
    load_obsolete = True
    prt = sys.stdout

    godag = GODag(fin_dag, optional_attrs, load_obsolete, prt)
    prt_hms(tic, 'Loaded GO DAG')
    assert godag['GO:0000067'].consider
    assert godag['GO:0003734'].replaced_by == 'GO:0030532'

    godag = GODag(fin_dag, 'consider', load_obsolete, prt)
    prt_hms(tic, 'Loaded GO DAG')
    assert godag['GO:0000067'].consider
예제 #10
0
def test_update_association():
    """Compare new propagate cnts function with original function. Test assc results is same."""

    print('\n1) READ GODAG:')
    assc_name = "goa_human.gaf" # gene_association.fb gene_association.mgi
    obo = join(REPO, "go-basic.obo")
    tic = timeit.default_timer()
    godag = get_godag(obo)
    tic = prt_hms(tic, "Created two GODags: One for original and one for new propagate counts")

    print('\n2) READ ANNOTATIONS:')
    assc_orig = dnld_assc(join(REPO, assc_name), godag, prt=stdout)
    tic = prt_hms(tic, "Associations Read")
    objanno = get_objanno(join(REPO, assc_name), 'gaf', godag=godag)
    tic = prt_hms(tic, "Associations Read")

    print('\n3) MAKE COPIES OF ASSOCIATIONS:')
    assc1 = {g:set(gos) for g, gos in assc_orig.items()}
    assc2 = {g:set(gos) for g, gos in assc_orig.items()}
    tic = prt_hms(tic, "Associations Copied: One for original and one for new")

    print('\n4) UPDATE ASSOCIATIONS (PROPAGATE COUNTS):')
    godag.update_association(assc1)
    tic = prt_hms(tic, "ORIG: godag.update_association(assc)")
    update_association(assc2, godag)
    tic = prt_hms(tic, "NEW SA:    update_association(go2obj, assc_orig)")
    assc3 = objanno.get_id2gos(namespace='BP', propagate_counts=True)
    tic = prt_hms(tic, "NEW BASE:  update_association(go2obj, assc_orig)")

    print('\n5) RUN CHECKS')
    _chk_assc(assc1, assc2)
    _chk_assc(assc1, assc3)
    _chk_godag(godag, obo)
예제 #11
0
 def __init__(self):
     download_go_basic_obo(self.obo, sys.stdout, loading_bar=None)
     self.godag_r0 = GODag(self.obo)
     self.godag_r1 = GODag(self.obo, optional_attrs=set(['relationship']))
     self.goids = list(set(o.id for o in self.godag_r0.values()))
     # GoSubDag (plain)
     tic = timeit.default_timer()
     self.gosubdag_r0 = GoSubDag(self.goids, self.godag_r0, prt=None)
     prt_hms(
         tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
             N=len(self.gosubdag_r0.go2obj),
             S=len(self.gosubdag_r0.go_sources)))
     # GoSubDag with relationships
     self.gosubdag_r1 = GoSubDag(self.goids,
                                 self.godag_r1,
                                 prt=None,
                                 relationships=True)
     prt_hms(
         tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
             N=len(self.gosubdag_r1.go2obj),
             S=len(self.gosubdag_r1.go_sources)))
예제 #12
0
def test_get_parent(prt=sys.stdout):
    """Semantic Similarity test for Issue #86."""
    # Load GO-DAG
    fin_obo = "go-basic.obo"
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
    godag = get_godag(os.path.join(repo, fin_obo))
    go2obj = {go: o for go, o in godag.items() if go == o.id}
    # ------------------------------------------------------------------------
    # Get all parents for all GO IDs using get_all_parents in GOTerm class
    tic = timeit.default_timer()
    go2parents_orig = {}
    ## go_noparents = set()
    for goterm in go2obj.values():
        parents = goterm.get_all_parents()
        #if parents:
        go2parents_orig[goterm.id] = parents
        #else:
        #    go_noparents.add(goterm.id)
    tic = prt_hms(tic,
                  "Get all goobj's parents using GOTerm.get_all_parents()",
                  prt)
    # ------------------------------------------------------------------------
    # Get all parents for all GO IDs using GOTerm get_all_parents
    go2parents_fast = get_id2parents(go2obj.values())
    tic = prt_hms(tic,
                  "Get all goobj's parents using go_tasks::get_id2parents",
                  prt)
    # ------------------------------------------------------------------------
    go2parents_fast2 = get_id2parents2(go2obj.values())
    tic = prt_hms(tic,
                  "Get all goobj's parents using go_tasks::get_id2parents2",
                  prt)
    # ------------------------------------------------------------------------
    # Compare parent lists
    chkr = CheckGOs('test_get_parents', go2obj)
    chkr.chk_a2bset_equiv(go2parents_orig, go2parents_fast)
    chkr.chk_a2bset_equiv(go2parents_orig, go2parents_fast2)
    print("PASSED: get_parent test")