Exemple #1
0
    def update_beliefs(self, corpus_id):
        """Return updated belief scores for a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus for which beliefs are to be updated.

        Returns
        -------
        dict
            A dictionary of belief scores with keys corresponding to Statement
            UUIDs and values to new belief scores.
        """
        corpus = self.get_corpus(corpus_id)
        be = BeliefEngine(self.scorer)
        stmts = list(corpus.statements.values())
        be.set_prior_probs(stmts)
        # Here we set beliefs based on actual curation
        for uuid, correct in corpus.curations.items():
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return belief_dict
Exemple #2
0
    def update_beliefs(self, corpus_id):
        """Return updated belief scores for a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus for which beliefs are to be updated.

        Returns
        -------
        dict
            A dictionary of belief scores with keys corresponding to Statement
            UUIDs and values to new belief scores.
        """
        corpus = self.get_corpus(corpus_id)
        be = BeliefEngine(self.scorer)
        stmts = list(corpus.statements.values())
        be.set_prior_probs(stmts)
        # Here we set beliefs based on actual curation
        for uuid, correct in corpus.curations.items():
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return belief_dict
def test_prior_prob_assertion():
    be = BeliefEngine()
    st = Phosphorylation(None, Agent('a'),
                         evidence=[ev1, deepcopy(ev1), ev2, ev3])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == 1
def test_prior_prob_one():
    be = BeliefEngine()
    prob = 1 - (default_probs['rand']['reach'] +
                default_probs['syst']['reach'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == prob
def test_prior_prob_one():
    be = BeliefEngine()
    prob = 1 - (be.prior_probs['rand']['reach'] +
                be.prior_probs['syst']['reach'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1])
    assert(st.belief == 1)
    be.set_prior_probs([st])
    assert(st.belief == prob)
Exemple #6
0
def test_prior_prob_two_same():
    be = BeliefEngine()
    prob = 1 - (be.prior_probs['rand']['reach']**2 +
                be.prior_probs['syst']['reach'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1])
    assert(st.belief == 1)
    be.set_prior_probs([st])
    assert(st.belief == prob)
Exemple #7
0
def test_prior_prob_two_same():
    be = BeliefEngine()
    prob = 1 - (default_probs['rand']['reach']**2 +
                default_probs['syst']['reach'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == prob
Exemple #8
0
def test_cycle():
    st1 = Phosphorylation(Agent('B'), Agent('A1'))
    st2 = Phosphorylation(None, Agent('A1'))
    st1.supports = [st2]
    st1.supported_by = [st2]
    st2.supports = [st1]
    st2.supported_by = [st1]
    engine = BeliefEngine()
    engine.set_hierarchy_probs([st1, st2])
def test_hierarchy_probs1():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st2.supports = [st1]
    st1.supported_by = [st2]
    be.set_hierarchy_probs([st1, st2])
    assert_close_enough(st1.belief, 1-0.35)
    assert_close_enough(st2.belief, 1-0.35*0.35)
Exemple #10
0
def test_hierarchy_probs1():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st2.supports = [st1]
    st1.supported_by = [st2]
    be.set_hierarchy_probs([st1, st2])
    assert_close_enough(st1.belief, 1 - 0.35)
    assert_close_enough(st2.belief, 1 - 0.35 * 0.35)
def test_cycle():
    st1 = Phosphorylation(Agent('B'), Agent('A1'))
    st2 = Phosphorylation(None, Agent('A1'))
    st1.supports = [st2]
    st1.supported_by = [st2]
    st2.supports = [st1]
    st2.supported_by = [st1]
    engine = BeliefEngine()
    engine.set_hierarchy_probs([st1, st2])
def test_wm_scorer():
    scorer = wm_scorer.get_eidos_scorer()
    stmt = Influence(Concept('a'), Concept('b'),
                     evidence=[Evidence(source_api='eidos')])
    # Make sure other sources are still in the map
    assert 'hume' in scorer.prior_probs['rand']
    assert 'biopax' in scorer.prior_probs['syst']
    engine = BeliefEngine(scorer)
    engine.set_prior_probs([stmt])
Exemple #13
0
def test_evidence_random_noise_prior():
    type_probs = {'biopax': 0.9, 'geneways': 0.2}
    biopax_subtype_probs = {'reactome': 0.4, 'biogrid': 0.2}
    geneways_subtype_probs = {'phosphorylate': 0.5, 'bind': 0.7}
    subtype_probs = {
        'biopax': biopax_subtype_probs,
        'geneways': geneways_subtype_probs
    }

    ev_geneways_bind = Evidence(source_api='geneways',
                                source_id=0,
                                pmid=0,
                                text=None,
                                epistemics={},
                                annotations={'actiontype': 'bind'})
    ev_biopax_reactome = Evidence(source_api='biopax',
                                  source_id=0,
                                  pmid=0,
                                  text=None,
                                  epistemics={},
                                  annotations={'source_sub_id': 'reactome'})
    ev_biopax_pid = Evidence(source_api='biopax',
                             source_id=0,
                             pmid=0,
                             text=None,
                             epistemics={},
                             annotations={'source_sub_id': 'pid'})

    # Random noise prior for geneways bind evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_geneways_bind, \
                                       type_probs, subtype_probs) == 0.7

    # Random noise prior for reactome biopax evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_biopax_reactome, \
                                       type_probs, subtype_probs) == 0.4

    # Random noise prior for pid evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_biopax_pid, type_probs,
                                       subtype_probs) == 0.9

    # Make sure this all still works when we go through the belief engine
    statements = []
    members = [Agent('a'), Agent('b')]
    statements.append(Complex(members, evidence=ev_geneways_bind))
    statements.append(Complex(members, evidence=ev_biopax_reactome))
    statements.append(Complex(members, evidence=ev_biopax_pid))
    p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}}

    scorer = SimpleScorer(p, subtype_probs)
    engine = BeliefEngine(scorer)
    engine.set_prior_probs(statements)
    assert statements[0].belief == 1 - 0.7
    assert statements[1].belief == 1 - 0.4
    assert statements[2].belief == 1 - 0.9
Exemple #14
0
def test_wm_scorer():
    scorer = wm_scorer.get_eidos_scorer()
    stmt = Influence(Concept('a'),
                     Concept('b'),
                     evidence=[Evidence(source_api='eidos')])
    # Make sure other sources are still in the map
    assert 'hume' in scorer.prior_probs['rand']
    assert 'biopax' in scorer.prior_probs['syst']
    engine = BeliefEngine(scorer)
    engine.set_prior_probs([stmt])
def test_prior_prob_one_two():
    be = BeliefEngine()
    prob = 1 - (default_probs['rand']['reach']**2 +
                default_probs['syst']['reach']) * \
               (default_probs['rand']['trips'] +
                default_probs['syst']['trips'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, deepcopy(ev1), ev2])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == prob
Exemple #16
0
def test_hierarchy_probs1():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st2.supports = [st1]
    st1.supported_by = [st2]
    st1.belief = 0.5
    st2.belief = 0.8
    be.set_hierarchy_probs([st1, st2])
    assert(st1.belief == 0.5)
    assert(st2.belief == 0.9)
def test_evidence_random_noise_prior():
    type_probs = {'biopax': 0.9, 'geneways': 0.2}
    biopax_subtype_probs = {
            'reactome': 0.4,
            'biogrid': 0.2}
    geneways_subtype_probs = {
            'phosphorylate': 0.5,
            'bind': 0.7}
    subtype_probs = {'biopax': biopax_subtype_probs,
                     'geneways': geneways_subtype_probs}

    ev_geneways_bind = Evidence(source_api='geneways', source_id=0,
                                pmid=0, text=None, epistemics={},
                                annotations={'actiontype': 'bind'})
    ev_biopax_reactome = Evidence(source_api='biopax', source_id=0,
                                  pmid=0, text=None, epistemics={},
                                  annotations={'source_sub_id': 'reactome'})
    ev_biopax_pid = Evidence(source_api='biopax', source_id=0,
                             pmid=0, text=None, epistemics={},
                             annotations={'source_sub_id': 'pid'})

    # Random noise prior for geneways bind evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_geneways_bind, \
                                       type_probs, subtype_probs) == 0.7

    # Random noise prior for reactome biopax evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_biopax_reactome, \
                                       type_probs, subtype_probs) == 0.4

    # Random noise prior for pid evidence is the subtype prior,
    # since we specified it
    assert evidence_random_noise_prior(ev_biopax_pid,
                                       type_probs, subtype_probs) == 0.9

    # Make sure this all still works when we go through the belief engine
    statements = []
    members = [Agent('a'), Agent('b')]
    statements.append(Complex(members, evidence=ev_geneways_bind))
    statements.append(Complex(members, evidence=ev_biopax_reactome))
    statements.append(Complex(members, evidence=ev_biopax_pid))
    p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}}

    scorer = SimpleScorer(p, subtype_probs)
    engine = BeliefEngine(scorer)
    engine.set_prior_probs(statements)
    assert statements[0].belief == 1 - 0.7
    assert statements[1].belief == 1 - 0.4
    assert statements[2].belief == 1 - 0.9
Exemple #18
0
def test_belief_calc_up_to_prior():
    be = BeliefEngine()
    test_stmts = [
        MockStatement(1, [MockEvidence('sparser'), MockEvidence('reach')]),
        MockStatement(2, MockEvidence('biopax')),
        MockStatement(3, MockEvidence('signor')),
        MockStatement(4, MockEvidence('biogrid')),
        MockStatement(5, MockEvidence('bel')),
        MockStatement(6, [MockEvidence('phosphosite'), MockEvidence('trips')]),
        ]
    be.set_prior_probs(test_stmts)
    results = {s.matches_key(): s.belief for s in test_stmts}
    print(results)
    assert len(results) == len(test_stmts), (len(results), len(test_stmts))
    assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.'
Exemple #19
0
def setup_belief(include_more_specific=False):
    # Make a model
    lr = LogisticRegression()
    # Get all the sources
    source_list = CountsScorer.get_all_sources(test_stmts_cur)
    cs = CountsScorer(lr,
                      source_list,
                      include_more_specific=include_more_specific)
    # Train on curated stmt data
    if include_more_specific:
        extra_evidence = [[
            ev for supp in stmt.supports for ev in supp.evidence
        ] for stmt in test_stmts_cur]
    else:
        extra_evidence = None
    # Fit with extra evidence, if any
    cs.fit(test_stmts_cur, y_arr_stmts_cur, extra_evidence)
    # Run predictions on test statements without extra evidence to get prior
    # probs
    probs = cs.predict_proba(test_stmts_cur)[:, 1]
    # Now check if we get these same beliefs set on the statements when we
    # run with the belief engine:
    # Get scorer and belief engine instances for trained model
    be = BeliefEngine(scorer=cs)
    # Make a shallow copy of the test stmts so that we don't change beliefs
    # of the global instances as a side-effect of this test
    test_stmts_copy = copy(test_stmts_cur)
    return be, test_stmts_copy, probs
Exemple #20
0
def test_hierarchy_probs4():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[ev1])
    st4 = Phosphorylation(None, Agent('d'), evidence=[ev1])
    st4.supports = [st1, st2, st3]
    st3.supports = [st1]
    st2.supports = [st1]
    st1.supported_by = [st2, st3, st4]
    st2.supported_by = [st4]
    st3.supported_by = [st4]
    be.set_hierarchy_probs([st1, st2, st3, st4])
    assert_close_enough(st1.belief, 1 - 0.35)
    assert_close_enough(st2.belief, 1 - 0.35 * 0.35)
    assert_close_enough(st3.belief, 1 - (0.05 + 0.3 * 0.3))
    assert_close_enough(st4.belief, 1 - 0.35 * (0.05 + 0.3 * 0.3 * 0.3))
def test_hierarchy_probs4():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[ev1])
    st4 = Phosphorylation(None, Agent('d'), evidence=[ev1])
    st4.supports = [st1, st2, st3]
    st3.supports = [st1]
    st2.supports = [st1]
    st1.supported_by = [st2, st3, st4]
    st2.supported_by = [st4]
    st3.supported_by = [st4]
    be.set_hierarchy_probs([st1, st2, st3, st4])
    assert_close_enough(st1.belief, 1-0.35)
    assert_close_enough(st2.belief, 1-0.35*0.35)
    assert_close_enough(st3.belief, 1-(0.05 + 0.3*0.3))
    assert_close_enough(st4.belief, 1-0.35*(0.05 + 0.3*0.3*0.3))
Exemple #22
0
def test_default_probs_override():
    """Make sure default probs are overriden by constructor argument."""
    be = BeliefEngine(prior_probs={'rand': {'assertion': 0.5}})
    for err_type in ('rand', 'syst'):
        for k, v in be.prior_probs[err_type].items():
            if err_type == 'rand' and k == 'assertion':
                assert v == 0.5
            else:
                assert default_probs[err_type][k] == v
def test_negative_evidence():
    prior_probs = {'rand': {'new_source': 0.1},
                   'syst': {'new_source': 0.05}}
    getev = lambda x: Evidence(source_api='new_source',
                               epistemics={'negated': x})
    evs1 = [getev(x) for x in [True, True, False]]
    evs2 = [getev(x) for x in [False, False, False]]
    evs3 = [getev(x) for x in [True, True, True]]
    stmts = [Phosphorylation(None, Agent('a'), evidence=e)
             for e in [evs1, evs2, evs3]]
    scorer = SimpleScorer(prior_probs)
    engine = BeliefEngine(scorer)
    engine.set_prior_probs(stmts)
    pr = prior_probs['rand']['new_source']
    ps = prior_probs['syst']['new_source']
    assert_close_enough(stmts[0].belief, ((1-pr)-ps)*(1-((1-pr*pr)-ps)))
    assert_close_enough(stmts[1].belief, (1-pr*pr*pr)-ps)
    assert stmts[2].belief == 0
def test_negative_evidence():
    prior_probs = {'rand': {'new_source': 0.1},
                   'syst': {'new_source': 0.05}}
    getev = lambda x: Evidence(source_api='new_source',
                               epistemics={'negated': x})
    evs1 = [getev(x) for x in [True, True, False]]
    evs2 = [getev(x) for x in [False, False, False]]
    evs3 = [getev(x) for x in [True, True, True]]
    stmts = [Phosphorylation(None, Agent('a'), evidence=e)
             for e in [evs1, evs2, evs3]]
    scorer = SimpleScorer(prior_probs)
    engine = BeliefEngine(scorer)
    engine.set_prior_probs(stmts)
    pr = prior_probs['rand']['new_source']
    ps = prior_probs['syst']['new_source']
    assert_close_enough(stmts[0].belief, ((1-pr)-ps)*(1-((1-pr*pr)-ps)))
    assert_close_enough(stmts[1].belief, (1-pr*pr*pr)-ps)
    assert stmts[2].belief == 0
Exemple #25
0
def test_hierarchy_probs4():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[ev3])
    st4 = Phosphorylation(None, Agent('d'), evidence=[ev1])
    st4.supports = [st1, st2, st3]
    st3.supports = [st1]
    st2.supports = [st1]
    st1.supported_by = [st2, st3, st4]
    st2.supported_by = [st4]
    st3.supported_by = [st4]
    st1.belief = 0.5
    st2.belief = 0.8
    st3.belief = 0.2
    st4.belief = 0.6
    be.set_hierarchy_probs([st1, st2, st3])
    assert(st1.belief == 0.5)
    assert(st2.belief == 0.9)
    assert(st3.belief == 0.6)
    assert(st4.belief == 0.968)
Exemple #26
0
def calculate_belief(stmts):
    scorer = SimpleScorer(subtype_probs={
        'biopax': {'pc11': 0.2, 'phosphosite': 0.01},
    })
    be = BeliefEngine(scorer=scorer)
    be.set_prior_probs(stmts)
    be.set_hierarchy_probs(stmts)
    return {str(s.get_hash()): s.belief for s in stmts}
Exemple #27
0
def test_default_probs_extend():
    """Make sure default probs are extended by constructor argument."""
    prior_probs = {'rand': {'new_source': 0.1}, 'syst': {'new_source': 0.05}}
    scorer = SimpleScorer(prior_probs)

    be = BeliefEngine(scorer)
    for err_type in ('rand', 'syst'):
        assert 'new_source' in scorer.prior_probs[err_type]
        for k, v in scorer.prior_probs[err_type].items():
            if err_type == 'rand' and k == 'new_source':
                assert v == 0.1
            elif err_type == 'syst' and k == 'new_source':
                assert v == 0.05
            else:
                assert default_probs[err_type][k] == v
Exemple #28
0
def run_preassembly(stmts_in, **kwargs):
    """Run preassembly on a list of statements.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to preassemble.
    return_toplevel : Optional[bool]
        If True, only the top-level statements are returned. If False,
        all statements are returned irrespective of level of specificity.
        Default: True
    poolsize : Optional[int]
        The number of worker processes to use to parallelize the
        comparisons performed by the function. If None (default), no
        parallelization is performed. NOTE: Parallelization is only
        available on Python 3.4 and above.
    size_cutoff : Optional[int]
        Groups with size_cutoff or more statements are sent to worker
        processes, while smaller groups are compared in the parent process.
        Default value is 100. Not relevant when parallelization is not
        used.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.
    save_unique : Optional[str]
        The name of a pickle file to save the unique statements into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of preassembled top-level statements.
    """
    dump_pkl_unique = kwargs.get('save_unique')
    be = BeliefEngine()
    pa = Preassembler(hierarchies, stmts_in)
    run_preassembly_duplicate(pa, be, save=dump_pkl_unique)

    dump_pkl = kwargs.get('save')
    return_toplevel = kwargs.get('return_toplevel', True)
    poolsize = kwargs.get('poolsize', None)
    size_cutoff = kwargs.get('size_cutoff', 100)
    options = {
        'save': dump_pkl,
        'return_toplevel': return_toplevel,
        'poolsize': poolsize,
        'size_cutoff': size_cutoff
    }
    stmts_out = run_preassembly_related(pa, be, **options)
    return stmts_out
def setup_belief():
    # Make a model
    lr = LogisticRegression()
    # Get all the sources
    source_list = CountsScorer.get_all_sources(test_stmts_cur)
    cs = CountsScorer(lr, source_list)
    # Train on curated stmt data
    cs.fit(test_stmts_cur, y_arr_stmts_cur)
    # Run predictions on test statements
    probs = cs.predict_proba(test_stmts_cur)[:, 1]
    # Now check if we get these same beliefs set on the statements when we
    # run with the belief engine:
    # Get scorer and belief engine instances for trained model
    be = BeliefEngine(scorer=cs)
    # Make a shallow copy of the test stmts so that we don't change beliefs
    # of the global instances as a side-effect of this test
    test_stmts_copy = copy(test_stmts_cur)
    return be, test_stmts_copy, probs
Exemple #30
0
def test_belief_calc_up_to_hierarchy():
    be = BeliefEngine()
    test_stmts = [
        MockStatement(1, [MockEvidence('sparser'),
                          MockEvidence('reach')]),
        MockStatement(2, MockEvidence('biopax')),
        MockStatement(3, MockEvidence('signor')),
        MockStatement(4, MockEvidence('biogrid')),
        MockStatement(5, MockEvidence('bel')),
        MockStatement(6, [MockEvidence('phosphosite'),
                          MockEvidence('trips')]),
    ]
    be.set_prior_probs(test_stmts)
    init_results = {s.matches_key(): s.belief for s in test_stmts}
    print(init_results)
    supp_links = [(1, 2), (1, 3), (2, 3), (1, 5), (4, 3)]
    populate_support(test_stmts, supp_links)
    be.set_hierarchy_probs(test_stmts)
    results = {s.matches_key(): s.belief for s in test_stmts}
    print(results)

    # Test a couple very simple properties.
    assert len(results) == len(test_stmts), (len(results), len(test_stmts))
    assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.'

    # Test the change from the initial.
    all_deltas_correct = True
    deltas_dict = {}
    for s in test_stmts:
        h = s.matches_key()
        b = s.belief

        # Get results
        res = {'actual': b - init_results[h]}

        # Define expectations.
        if s.supports:
            res['expected'] = 'increase'
            if res['actual'] <= 0:
                all_deltas_correct = False
        else:
            res['expected'] = 'no change'
            if res['actual'] != 0:
                all_deltas_correct = False

        deltas_dict[h] = res
    assert all_deltas_correct, deltas_dict
Exemple #31
0
def run_preassembly(stmts_in, **kwargs):
    """Run preassembly on a list of statements.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to preassemble.
    return_toplevel : Optional[bool]
        If True, only the top-level statements are returned. If False,
        all statements are returned irrespective of level of specificity.
        Default: True
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.
    save_unique : Optional[str]
        The name of a pickle file to save the unique statements into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of preassembled top-level statements.
    """
    dump_pkl = kwargs.get('save')
    dump_pkl_unique = kwargs.get('save_unique')
    be = BeliefEngine()
    pa = Preassembler(hierarchies, stmts_in)

    options = {'save': dump_pkl_unique}
    run_preassembly_duplicate(pa, be, **options)

    return_toplevel = kwargs.get('return_toplevel', True)
    options = {'save': dump_pkl, 'return_toplevel': return_toplevel}
    start = time.time()
    stmts_out = run_preassembly_related(pa, be, **options)
    end = time.time()
    elapsed = end - start
    logger.debug("Time elapsed, run_preassembly_related: %s" % elapsed)
    return stmts_out
Exemple #32
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
Exemple #33
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = map_onto(statements)
    ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl')
    statements = ac.filter_grounded_only(statements, score_threshold=0.7)

    #statements = ac.filter_by_db_refs(statements, 'UN',
    #    ['conflict', 'food_security', 'precipitation'], policy='one',
    #    match_suffix=True)
    statements = ac.filter_by_db_refs(
        statements,
        'UN', [
            'conflict', 'food_security', 'flooding', 'food_production',
            'human_migration', 'drought', 'food_availability', 'market',
            'food_insecurity'
        ],
        policy='all',
        match_suffix=True)
    assume_polarity(statements)
    statements = filter_has_polarity(statements)

    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    #related_stmts = ac.filter_belief(related_stmts, 0.8)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)

    pa.stmts = top_stmts
    print('%d top-level statements' % len(top_stmts))
    conflicts = pa.find_contradicts()
    top_stmts = remove_contradicts(top_stmts, conflicts)

    ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl')

    return top_stmts
Exemple #34
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Exemple #35
0
if __name__ == '__main__':
    if len(sys.argv) < 3:
        logger.error('Usage: assemble_corpus.py <pickle_file> <output_folder>')
        sys.exit()
    stmts_fname = sys.argv[1]
    out_folder = sys.argv[2]

    stmts = load_statements(stmts_fname)

    logger.info('All statements: %d' % len(stmts))

    cache_pkl = os.path.join(out_folder, 'mapped_stmts.pkl')
    options = {'save': cache_pkl, 'do_rename': True}
    stmts = map_grounding(stmts, **options)

    cache_pkl = os.path.join(out_folder, 'sequence_valid_stmts.pkl')
    options = {'save': cache_pkl}
    mapped_stmts = map_sequence(stmts, **options)

    be = BeliefEngine()
    pa = Preassembler(hierarchies, mapped_stmts)

    cache_pkl = os.path.join(out_folder, 'unique_stmts.pkl')
    options = {'save': cache_pkl}
    unique_stmts = run_preassembly_duplicate(pa, be, **options)

    cache_pkl = os.path.join(out_folder, 'top_stmts.pkl')
    options = {'save': cache_pkl}
    stmts = run_preassembly_related(pa, be, **options)
Exemple #36
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Exemple #37
0
def test_prior_prob_assertion():
    be = BeliefEngine()
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1, ev2, ev3])
    assert(st.belief == 1)
    be.set_prior_probs([st])
    assert(st.belief == 1)
Exemple #38
0
def calculate_belief(stmts):
    be = BeliefEngine()
    be.set_prior_probs(stmts)
    be.set_hierarchy_probs(stmts)
    return {s.matches_key(): s.belief for s in stmts}
def test_check_prior_probs():
    be = BeliefEngine()
    st = Phosphorylation(None, Agent('ERK'),
                         evidence=[Evidence(source_api='xxx')])
    be.set_prior_probs([st])
    def preassemble(self, filters=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - model_one: require that at least one Agent is in the incremental model
        - model_all: require that all Agents are in the incremental model
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model
        Note that model_one -> prior_all are increasingly more restrictive
        options.

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        """
        stmts = self.get_statements()
        logger.info("%d raw Statements in total" % len(stmts))

        # Fix grounding
        logger.info("Running grounding map")
        twg = gm.agent_texts_with_grounding(stmts)
        prot_map = gm.protein_map_from_twg(twg)
        gm.default_grounding_map.update(prot_map)
        gmap = gm.GroundingMapper(gm.default_grounding_map)
        stmts = gmap.map_agents(stmts, do_rename=True)

        logger.info("%d Statements after grounding map" % len(stmts))

        # Fix sites
        sm = SiteMapper(default_site_map)
        stmts, _ = sm.map_sites(stmts)

        logger.info("%d Statements with valid sequence" % len(stmts))

        if filters:
            if "grounding" in filters:
                # Filter out ungrounded statements
                logger.info("Running grounding filter")
                stmts = self._relevance_filter(stmts, ["grounding"])
                logger.info("%s Statements after filter" % len(stmts))
            if "human_only" in filters:
                # Filter out non-human proteins
                logger.info("Running non-human protein filter")
                stmts = self._relevance_filter(stmts, ["human_only"])
                logger.info("%s Statements after filter" % len(stmts))
            for rel_key in ("prior_one", "model_one", "prior_all", "model_all"):
                if rel_key in filters:
                    logger.info("Running %s relevance filter" % rel_key)
                    stmts = self._relevance_filter(stmts, [rel_key])
                    logger.info("%s Statements after filter" % len(stmts))

        # Combine duplicates
        logger.info("Preassembling %d Statements" % len(stmts))
        pa = Preassembler(hierarchies, stmts)
        self.unique_stmts = pa.combine_duplicates()
        logger.info("%d unique Statements" % len(self.unique_stmts))

        # Run BeliefEngine on unique statements
        be = BeliefEngine()
        be.set_prior_probs(self.unique_stmts)

        # Build statement hierarchy
        self.unique_stmts = pa.combine_related(return_toplevel=False)
        self.toplevel_stmts = [st for st in self.unique_stmts if not st.supports]
        logger.info("%d top-level Statements" % len(self.toplevel_stmts))
        # Run BeliefEngine on hierarchy
        be.set_hierarchy_probs(self.unique_stmts)
Exemple #41
0
    generate belief scores for each of the statements and output a new pickle
    file containing a list of statements with beliefs. Script throws away the
    dictionaries keys. Input statements should contain evidence, otherwise
    their belief scores will be set to 0."""
    parser = ArgumentParser(description=description, epilog=epilog)

    parser.add_argument("-d",
                        action="store_true",
                        help="set if input is a dictionary of statements")
    help_text = ("path to a pickle file containing a list or dict of"
                 "statements.")
    parser.add_argument("infile", help=help_text)
    args = parser.parse_args()
    infile = args.infile
    filename, file_extension = path.splitext(infile)
    outfile = filename + "_with_beliefs" + file_extension
    with open(args.infile, 'rb') as f:
        stmts = pickle.load(f)
    if args.d:
        stmts = [stmt for _, stmt in stmts.items()]

    # get belief scores
    for stmt in stmts:
        stmt.belief = 1
    be = BeliefEngine()
    be.set_prior_probs(stmts)

    # using pickle instead of assemble_corpus to avoid printing logging
    with open(outfile, 'wb') as f:
        pickle.dump(stmts, f)
Exemple #42
0
def test_check_prior_probs():
    be = BeliefEngine()
    st = Phosphorylation(None,
                         Agent('ERK'),
                         evidence=[Evidence(source_api='xxx')])
    be.set_prior_probs([st])
Exemple #43
0
def test_default_probs():
    """Make sure default probs are set with empty constructor."""
    be = BeliefEngine()
    for err_type in ('rand', 'syst'):
        for k, v in default_probs[err_type].items():
            assert default_probs[err_type][k] == v