Ejemplo n.º 1
0
def test_semsim_wang(seed=None, num_calcs=1000, prt=stdout):
    """Wang Semantic Similarity tests"""
    # Log file
    logfile = join(REPO, 'test_semsim_wang_termwise_{SEED}.log')
    ## assert not exists(logfile), 'REMOVE TO RUN: {}'.format(logfile)
    # Check that all relationships seem in DAG are expected by SsWang
    fin_godag = join(REPO, 'go-basic.obo')
    # Run randoms
    edge_weights = {
        'is_a': 0.8,
        'part_of': 0.6,
        'regulates': 0.0,
        'negatively_regulates': 0.0,
        'positively_regulates': 0.0,
    }

    # Using only 'is_a' and 'part_of' is not the same as setting the 'regulates' weights to 0:
    # relationships = {'part_of'}
    # pygosemsim loads all relationships, then sets 'regulates' relationships to 0 edge_weight
    relationships = {
        'part_of', 'regulates', 'negatively_regulates', 'positively_regulates'
    }
    run = Run(fin_godag, num_calcs, relationships, edge_weights, seed, prt)
    run.randoms(logfile)

    # Test both main GO and alt GO, GO:0008150 GO:0000004, have the same comparison to GO:0008152
    goids = {'GO:0008150', 'GO:0000004', 'GO:0008152'}
    wang = SsWang(goids, run.godag, relationships, edge_weights)
    assert wang.get_sim('GO:0008150',
                        'GO:0008152') == wang.get_sim('GO:0000004',
                                                      'GO:0008152')
Ejemplo n.º 2
0
def test_nb_semantic_similarity_wang():
    """Run Jupyter notebook: semantic_similarity_wang"""

    go_a = 'GO:0007608'
    go_b = 'GO:0050911'
    go_c = 'GO:0042221'
    relationships = {'part_of'}

    go2txt = {go_a: 'GO TERM A', go_b: 'GO TERM B', go_c: 'GO TERM C'}
    goids = {go_a, go_b, go_c}
    run = Run()

    # pylint: disable=bad-whitespace
    rel_list = [
        (relationships, {}),
        (relationships, {
            'part_of': .9,
            'is_a': .9
        }),
        ({}, {}),
    ]
    rel_vals = []
    for rels, edge2weight in rel_list:
        wang = SsWang(goids, run.godag, rels, edge2weight)
        wang.prt_cfg()
        fout_png = 'smell_r{N}.png'.format(N=len(rels))
        run.plt(fout_png, goids, rels, go2txt)
        vals = [
            run.get_sim(wang, go_a, go_b),
            run.get_sim(wang, go_a, go_c),
            run.get_sim(wang, go_b, go_c)
        ]
        #rel_vals.append[(rels, vals)]
    print(rel_vals)
Ejemplo n.º 3
0
def createMatrix(goTerms, background, method):
    """ Return a numerical matrix

    Keyword arguments:
    goTerms -- list of go terms
    background -- flattened background: lists of genes and GO Terms
    method -- semantic similarity method, either "Lin", "Resnik", "Wang" or "Edge-based"
    Creates semantic similarity matrix
    """
    termcounts = TermCounts(godag, background)
    matrix = list()
    wang_r1 = None
    if method == "Wang":
        wang_r1 = SsWang(goTerms, godag)
    # only create half of matrix, fill rest with -1
    i = 0
    for termA in goTerms:
        j = 0
        row = list()
        for termB in goTerms:
            sim = -1
            if i < j:
                if method == "Lin":
                    sim = lin_sim(termA, termB, godag, termcounts)
                elif method == "Resnik":
                    sim = resnik_sim(termA, termB, godag, termcounts)
                elif method == "Wang":
                    sim = wang_r1.get_sim(termA, termB)
                else:
                    sim = semantic_similarity(termA, termB, godag)
            row.append(sim)
            j += 1
        matrix.append(row)
        i += 1
    return matrix
Ejemplo n.º 4
0
    def run_semsim_wang_tbl1(self, go_a, go_b, relationships):
        """Test S-value for Table 1 in Wang_2007 (Alt ID)a65Gkk"""
        wang = SsWang({go_a, go_b}, self.godag, relationships)
        dag_a = wang.go2dag[go_a]
        if relationships == ['part_of']:
            self._chk_svalues_a(dag_a)

        # Wang 2.2 Test Semantic similarity of GO terms
        ss_rel = wang.get_sim(go_a, go_b)
        print('RUN: {A} {B} rels={R} SS={S:6.4f}'.format(A=go_a,
                                                         B=go_b,
                                                         R=relationships,
                                                         S=ss_rel))
        return ss_rel
Ejemplo n.º 5
0
 def __init__(self, fin_godag, num_calcs, relationships, w_e, seed, prt):
     tic = timeit.default_timer()
     self.godag = get_godag(fin_godag,
                            optional_attrs=['relationship'],
                            prt=prt)
     tic = prt_hms(tic, 'GOATOOLS read godag')
     # Needed because pysemsim not understand cygwin pathes
     self.graph = graph.from_resource(splitext(fin_godag)[0])
     tic = prt_hms(tic, 'pygosemsim read godag')
     self.seedobj = RandomSeed32(seed)
     self.goids = self._init_goids(num_calcs)
     tic = timeit.default_timer()
     self.wang = SsWang(self.goids, self.godag, relationships, w_e)
     self.go2reldepth = get_go2reldepth(
         {self.godag[go]
          for go in self.godag}, relationships)
     tic = prt_hms(tic, 'GOATOOLS wang setup')
def test_semsim_wang(prt=stdout):
    """Test setting edge weights for various relationships"""
    # Log file
    # Check that all relationships seem in DAG are expected by SsWang
    fin_godag = join(REPO, 'go-basic.obo')
    godag_r0 = get_godag(fin_godag, prt=prt)

    passed = False
    try:
        wang = SsWang({}, godag_r0, {'part_of',})
    except RuntimeError as err:
        assert str(err) == '**ERROR: SsWang GODag not loaded with relationships', '({})'.format(err)
        passed = True
    assert passed

    wang = SsWang({}, godag_r0)
    assert wang.w_e == {'is_a': 0.8}

    wang = SsWang({}, godag_r0, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9}

    godag_r1 = get_godag(fin_godag, optional_attrs=['relationship'], prt=prt)
    _chk_relationships(godag_r1)
    # Run randoms
    relationships = {'part_of'}
    wang = SsWang({}, godag_r1, relationships, rel2scf={})
    assert wang.w_e == {'is_a': 0.8, 'part_of': 0.6}

    wang = SsWang({}, godag_r1, relationships, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9, 'part_of': 0.7}

    # pylint: disable=line-too-long
    wang = SsWang({}, godag_r1, relationships, rel2scf={'is_a': 0.9, 'part_of': 0.7, 'regulates':0.2})
    assert wang.w_e == {'is_a': 0.9, 'part_of': 0.7}

    wang = SsWang({}, godag_r1)
    assert wang.w_e == {'is_a': 0.8}

    wang = SsWang({}, godag_r1, rel2scf={'is_a': 0.9, 'part_of': 0.7})
    assert wang.w_e == {'is_a': 0.9}

    wang = SsWang({}, godag_r1, rel2scf={'is_a': 0.9, 'part_of': 0.7, 'regulates':0.2})
    assert wang.w_e == {'is_a': 0.9}

    wang = SsWang({}, godag_r1, relationships={'mock_rel'})
    assert wang.w_e == {'is_a': 0.8}
    print('**PASSED: Properly reported ERROR in relationship, mock_rel')

    wang = SsWang({}, godag_r1, rel2scf={'mock_rel':.7})
    assert wang.w_e == {'is_a': 0.8}
Ejemplo n.º 7
0
class Run:
    """Wang Semantic Similarity tests"""
    def __init__(self, fin_godag, num_calcs, relationships, w_e, seed, prt):
        tic = timeit.default_timer()
        self.godag = get_godag(fin_godag,
                               optional_attrs=['relationship'],
                               prt=prt)
        tic = prt_hms(tic, 'GOATOOLS read godag')
        # Needed because pysemsim not understand cygwin pathes
        self.graph = graph.from_resource(splitext(fin_godag)[0])
        tic = prt_hms(tic, 'pygosemsim read godag')
        self.seedobj = RandomSeed32(seed)
        self.goids = self._init_goids(num_calcs)
        tic = timeit.default_timer()
        self.wang = SsWang(self.goids, self.godag, relationships, w_e)
        self.go2reldepth = get_go2reldepth(
            {self.godag[go]
             for go in self.godag}, relationships)
        tic = prt_hms(tic, 'GOATOOLS wang setup')

    def prt_ancestors(self, goid, prt_if_diff=False):
        """Print ancestors for both Wang and GOATOOLS"""
        a_w = nx.ancestors(self.graph, goid)
        a_g = self.wang.go2dag[goid].ancestors
        if prt_if_diff and a_w != a_g:
            print('{GO} {w:2} Wang  {g:2} GOATOOLS'.format(GO=goid,
                                                           w=len(a_w),
                                                           g=len(a_g)))

    def randoms(self, logfile):
        """Run random simulations. Compare SsWang in GOATOOLS to pygosemsim"""
        logfile = logfile.format(SEED=self.seedobj.get_seed_hexstr())
        with open(logfile, 'w') as prt:
            self.seedobj.prt(prt)
            self.seedobj.prt(stdout)
            self._randoms(prt)
            print('  **WROTE: {LOG}'.format(LOG=logfile))

    def _randoms(self, prt):
        """Randomly select GO terms for semantic similarity calculations"""
        #pylint: disable=line-too-long
        goids = self.goids
        go_pairs = [(goids[i], goids[i + 1])
                    for i in range(0, len(self.goids), 2)]
        tic = timeit.default_timer()
        # Information on Python's round, which is used in 2 spots in pygosemsim:
        #     https://stackoverflow.com/questions/13479163/round-float-to-x-decimals
        #     from decimal import Decimal
        #     >>> Decimal('66.66666666666').quantize(Decimal('1e-4'))
        #     Decimal('66.6667')
        #     >>> Decimal('1.29578293').quantize(Decimal('1e-6'))
        #     Decimal('1.295783')
        # In issue, https://github.com/micropython/micropython/issues/3516,
        # https://github.com/mdickinson dreams of deprecating the two-argument form of round in Python....
        #     https://github.com/micropython/micropython/issues/3516#issuecomment-625298591
        # Use the decimal type instead: https://docs.python.org/3.10/library/decimal.html
        acts = [self.wang.get_sim(a, b) for a, b in go_pairs]
        tic = prt_hms(tic, 'GOATOOLS wang calc')
        exps = [similarity.wang(self.graph, a, b) for a, b in go_pairs]
        tic = prt_hms(tic, 'pysemsim wang')
        assert len(acts) == len(exps)
        failures = 0
        for idx, (act, exp, (go_a,
                             go_b)) in enumerate(zip(acts, exps, go_pairs)):
            assert act is not None, self._prt_ab(idx, go_a, go_b, act, exp,
                                                 stdout)
            assert exp is not None, self._prt_ab(idx, go_a, go_b, act, exp,
                                                 stdout)
            if abs(exp - act) > 0.02:
                for strm in [prt, stdout]:
                    go_a = goids[2 * idx]
                    go_b = goids[2 * idx + 1]
                    self._prt_ab(idx, go_a, go_b, act, exp, strm)
                stdout.flush()
                prt.flush()
                failures += 1
                self.prt_ancestors(go_a, True)
                self.prt_ancestors(go_b, True)
            else:
                prt.write('{i} PASS {A} {B} pygosemsim={b:f} GOATOOLS={a:f}\n'.
                          format(i=idx,
                                 A=goids[2 * idx],
                                 B=goids[2 * idx + 1],
                                 a=act,
                                 b=exp))

    def _prt_ab(self, idx, go_a, go_b, act, exp, strm):
        """Print GO IDs and similarities"""
        ## dif = abs(exp - act) if exp is not None and act is not None else 'XXX'
        strm.write(
            '{i:3}) FAIL {x:2} {A}   {y:2} {B} pygosemsim={b:f} GOATOOLS={a:f} DIFF={ab:f}\n'
            .format(i=idx,
                    A=go_a,
                    x=self.go2reldepth[go_a],
                    B=go_b,
                    y=self.go2reldepth[go_b],
                    a=act,
                    b=exp,
                    ab=abs(exp - act)))

    def _init_goids(self, num_calcs):
        """Pick random GO IDs"""
        goids = sorted([
            o.item_id for o in set(self.godag.values())
            if o.namespace == 'biological_process'
        ])
        shuffle(goids)
        return goids[:2 * num_calcs]