Esempio n. 1
0
def test_prior_prob_one_two():
    be = BeliefEngine()
    prob = 1 - (default_probs['rand']['reach']**2 +
                default_probs['syst']['reach']) * \
               (default_probs['rand']['trips'] +
                default_probs['syst']['trips'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1, ev2])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == prob
Esempio n. 2
0
def test_prior_prob_two_different():
    be = BeliefEngine()
    prob = 1 - (be.prior_probs['rand']['reach'] +
                 be.prior_probs['syst']['reach']) * \
               (be.prior_probs['rand']['trips'] +
                 be.prior_probs['syst']['trips'])
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev2])
    assert (st.belief == 1)
    be.set_prior_probs([st])
    assert (st.belief == prob)
Esempio n. 3
0
def test_wm_scorer():
    scorer = wm_scorer.get_eidos_scorer()
    stmt = Influence(Concept('a'),
                     Concept('b'),
                     evidence=[Evidence(source_api='eidos')])
    # Make sure other sources are still in the map
    assert 'hume' in scorer.prior_probs['rand']
    assert 'biopax' in scorer.prior_probs['syst']
    engine = BeliefEngine(scorer)
    engine.set_prior_probs([stmt])
Esempio n. 4
0
def test_evidence_random_noise_prior():
    type_probs = {'biopax': 0.9, 'geneways': 0.2}
    biopax_subtype_probs = {'reactome': 0.4, 'biogrid': 0.2}
    geneways_subtype_probs = {'phosphorylate': 0.5, 'bind': 0.7}
    subtype_probs = {
        'biopax': biopax_subtype_probs,
        'geneways': geneways_subtype_probs
    }

    ev_geneways_bind = Evidence(source_api='geneways',
                                source_id=0,
                                pmid=0,
                                text=None,
                                epistemics={},
                                annotations={'actiontype': 'bind'})
    ev_biopax_reactome = Evidence(source_api='biopax',
                                  source_id=0,
                                  pmid=0,
                                  text=None,
                                  epistemics={},
                                  annotations={'source_sub_id': 'reactome'})
    ev_biopax_pid = Evidence(source_api='biopax',
                             source_id=0,
                             pmid=0,
                             text=None,
                             epistemics={},
                             annotations={'source_sub_id': 'pid'})

    #Random noise prior for geneways bind evidence is the subtype prior,
    #since we specified it
    assert (evidence_random_noise_prior(ev_geneways_bind, type_probs,
                                        subtype_probs) == 0.7)

    #Random noise prior for reactome biopax evidence is the subtype prior,
    #since we specified it
    assert (evidence_random_noise_prior(ev_biopax_reactome, type_probs,
                                        subtype_probs) == 0.4)

    #Random noise prior for pid evidence is the subtype prior,
    #since we specified it
    assert (evidence_random_noise_prior(ev_biopax_pid, type_probs,
                                        subtype_probs) == 0.9)

    #Make sure this all still works when we go through the belief engine
    statements = []
    members = [Agent('a'), Agent('b')]
    statements.append(Complex(members, evidence=ev_geneways_bind))
    statements.append(Complex(members, evidence=ev_biopax_reactome))
    statements.append(Complex(members, evidence=ev_biopax_pid))
    p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}}
    engine = BeliefEngine(p, subtype_probs)
    engine.set_prior_probs(statements)
    assert (statements[0].belief == 1 - 0.7)
    assert (statements[1].belief == 1 - 0.4)
    assert (statements[2].belief == 1 - 0.9)
Esempio n. 5
0
def test_hierarchy_probs1():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st2.supports = [st1]
    st1.supported_by = [st2]
    st1.belief = 0.5
    st2.belief = 0.8
    be.set_hierarchy_probs([st1, st2])
    assert(st1.belief == 0.5)
    assert(st2.belief == 0.9)
Esempio n. 6
0
def calculate_belief(stmts):
    scorer = SimpleScorer(subtype_probs={
        'biopax': {
            'pc11': 0.2,
            'phosphosite': 0.01
        },
    })
    be = BeliefEngine(scorer=scorer)
    be.set_prior_probs(stmts)
    be.set_hierarchy_probs(stmts)
    return {str(s.get_hash()): s.belief for s in stmts}
Esempio n. 7
0
def test_default_probs_override():
    """Make sure default probs are overriden by constructor argument."""
    prior_probs = {'rand': {'assertion': 0.5}}
    scorer = SimpleScorer(prior_probs)

    be = BeliefEngine(scorer)
    for err_type in ('rand', 'syst'):
        for k, v in scorer.prior_probs[err_type].items():
            if err_type == 'rand' and k == 'assertion':
                assert v == 0.5
            else:
                assert default_probs[err_type][k] == v
Esempio n. 8
0
def test_hierarchy_probs3():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[ev4])
    st3.supports = [st1, st2]
    st1.supported_by = [st3]
    st2.supported_by = [st3]
    be.set_hierarchy_probs([st1, st2, st3])
    assert_close_enough(st1.belief, 1 - 0.35)
    assert_close_enough(st2.belief, 1 - 0.35)
    assert_close_enough(st3.belief, 1 - 0.35 * 0.35 * 0.21)
Esempio n. 9
0
def test_belief_calc_up_to_prior():
    be = BeliefEngine()
    test_stmts = [
        MockStatement(1, [MockEvidence('sparser'), MockEvidence('reach')]),
        MockStatement(2, MockEvidence('biopax')),
        MockStatement(3, MockEvidence('signor')),
        MockStatement(4, MockEvidence('biogrid')),
        MockStatement(5, MockEvidence('bel')),
        MockStatement(6, [MockEvidence('phosphosite'), MockEvidence('trips')]),
        ]
    be.set_prior_probs(test_stmts)
    results = {s.matches_key(): s.belief for s in test_stmts}
    print(results)
    assert len(results) == len(test_stmts), (len(results), len(test_stmts))
    assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.'
Esempio n. 10
0
def test_default_probs_extend():
    """Make sure default probs are extended by constructor argument."""
    prior_probs = {'rand': {'new_source': 0.1}, 'syst': {'new_source': 0.05}}
    scorer = SimpleScorer(prior_probs)

    be = BeliefEngine(scorer)
    for err_type in ('rand', 'syst'):
        assert 'new_source' in scorer.prior_probs[err_type]
        for k, v in scorer.prior_probs[err_type].items():
            if err_type == 'rand' and k == 'new_source':
                assert v == 0.1
            elif err_type == 'syst' and k == 'new_source':
                assert v == 0.05
            else:
                assert default_probs[err_type][k] == v
Esempio n. 11
0
def run_preassembly(stmts_in, **kwargs):
    """Run preassembly on a list of statements.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to preassemble.
    return_toplevel : Optional[bool]
        If True, only the top-level statements are returned. If False,
        all statements are returned irrespective of level of specificity.
        Default: True
    poolsize : Optional[int]
        The number of worker processes to use to parallelize the
        comparisons performed by the function. If None (default), no
        parallelization is performed. NOTE: Parallelization is only
        available on Python 3.4 and above.
    size_cutoff : Optional[int]
        Groups with size_cutoff or more statements are sent to worker
        processes, while smaller groups are compared in the parent process.
        Default value is 100. Not relevant when parallelization is not
        used.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.
    save_unique : Optional[str]
        The name of a pickle file to save the unique statements into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of preassembled top-level statements.
    """
    dump_pkl_unique = kwargs.get('save_unique')
    be = BeliefEngine()
    pa = Preassembler(hierarchies, stmts_in)
    run_preassembly_duplicate(pa, be, save=dump_pkl_unique)

    dump_pkl = kwargs.get('save')
    return_toplevel = kwargs.get('return_toplevel', True)
    poolsize = kwargs.get('poolsize', None)
    size_cutoff = kwargs.get('size_cutoff', 100)
    options = {
        'save': dump_pkl,
        'return_toplevel': return_toplevel,
        'poolsize': poolsize,
        'size_cutoff': size_cutoff
    }
    stmts_out = run_preassembly_related(pa, be, **options)
    return stmts_out
Esempio n. 12
0
def test_belief_calc_up_to_hierarchy():
    be = BeliefEngine()
    test_stmts = [
        MockStatement(1, [MockEvidence('sparser'),
                          MockEvidence('reach')]),
        MockStatement(2, MockEvidence('biopax')),
        MockStatement(3, MockEvidence('signor')),
        MockStatement(4, MockEvidence('biogrid')),
        MockStatement(5, MockEvidence('bel')),
        MockStatement(6, [MockEvidence('phosphosite'),
                          MockEvidence('trips')]),
    ]
    be.set_prior_probs(test_stmts)
    init_results = {s.matches_key(): s.belief for s in test_stmts}
    print(init_results)
    supp_links = [(1, 2), (1, 3), (2, 3), (1, 5), (4, 3)]
    populate_support(test_stmts, supp_links)
    be.set_hierarchy_probs(test_stmts)
    results = {s.matches_key(): s.belief for s in test_stmts}
    print(results)

    # Test a couple very simple properties.
    assert len(results) == len(test_stmts), (len(results), len(test_stmts))
    assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.'

    # Test the change from the initial.
    all_deltas_correct = True
    deltas_dict = {}
    for s in test_stmts:
        h = s.matches_key()
        b = s.belief

        # Get results
        res = {'actual': b - init_results[h]}

        # Define expectations.
        if s.supports:
            res['expected'] = 'increase'
            if res['actual'] <= 0:
                all_deltas_correct = False
        else:
            res['expected'] = 'no change'
            if res['actual'] != 0:
                all_deltas_correct = False

        deltas_dict[h] = res
    assert all_deltas_correct, deltas_dict
Esempio n. 13
0
def test_hierarchy_probs4():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[deepcopy(ev1)])
    st4 = Phosphorylation(None, Agent('d'), evidence=[deepcopy(ev1)])
    st4.supports = [st1, st2, st3]
    st3.supports = [st1]
    st2.supports = [st1]
    st1.supported_by = [st2, st3, st4]
    st2.supported_by = [st4]
    st3.supported_by = [st4]
    be.set_hierarchy_probs([st1, st2, st3, st4])
    assert_close_enough(st1.belief, 1-0.35)
    assert_close_enough(st2.belief, 1-0.35*0.35)
    assert_close_enough(st3.belief, 1-(0.05 + 0.3*0.3))
    assert_close_enough(st4.belief, 1-0.35*(0.05 + 0.3*0.3*0.3))
Esempio n. 14
0
def test_negative_evidence():
    prior_probs = {'rand': {'new_source': 0.1},
                   'syst': {'new_source': 0.05}}
    getev = lambda x: Evidence(source_api='new_source',
                               epistemics={'negated': x})
    evs1 = [getev(x) for x in [True, True, False]]
    evs2 = [getev(x) for x in [False, False, False]]
    evs3 = [getev(x) for x in [True, True, True]]
    stmts = [Phosphorylation(None, Agent('a'), evidence=e)
             for e in [evs1, evs2, evs3]]
    scorer = SimpleScorer(prior_probs)
    engine = BeliefEngine(scorer)
    engine.set_prior_probs(stmts)
    pr = prior_probs['rand']['new_source']
    ps = prior_probs['syst']['new_source']
    assert_close_enough(stmts[0].belief, ((1-pr)-ps)*(1-((1-pr*pr)-ps)))
    assert_close_enough(stmts[1].belief, (1-pr*pr*pr)-ps)
    assert stmts[2].belief == 0
Esempio n. 15
0
def setup_belief():
    # Make a model
    lr = LogisticRegression()
    # Get all the sources
    source_list = CountsScorer.get_all_sources(test_stmts_cur)
    cs = CountsScorer(lr, source_list)
    # Train on curated stmt data
    cs.fit(test_stmts_cur, y_arr_stmts_cur)
    # Run predictions on test statements
    probs = cs.predict_proba(test_stmts_cur)[:, 1]
    # Now check if we get these same beliefs set on the statements when we
    # run with the belief engine:
    # Get scorer and belief engine instances for trained model
    be = BeliefEngine(scorer=cs)
    # Make a shallow copy of the test stmts so that we don't change beliefs
    # of the global instances as a side-effect of this test
    test_stmts_copy = copy(test_stmts_cur)
    return be, test_stmts_copy, probs
Esempio n. 16
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
Esempio n. 17
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = map_onto(statements)
    ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl')
    statements = ac.filter_grounded_only(statements, score_threshold=0.7)

    #statements = ac.filter_by_db_refs(statements, 'UN',
    #    ['conflict', 'food_security', 'precipitation'], policy='one',
    #    match_suffix=True)
    statements = ac.filter_by_db_refs(
        statements,
        'UN', [
            'conflict', 'food_security', 'flooding', 'food_production',
            'human_migration', 'drought', 'food_availability', 'market',
            'food_insecurity'
        ],
        policy='all',
        match_suffix=True)
    assume_polarity(statements)
    statements = filter_has_polarity(statements)

    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    #related_stmts = ac.filter_belief(related_stmts, 0.8)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)

    pa.stmts = top_stmts
    print('%d top-level statements' % len(top_stmts))
    conflicts = pa.find_contradicts()
    top_stmts = remove_contradicts(top_stmts, conflicts)

    ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl')

    return top_stmts
Esempio n. 18
0
def test_hierarchy_probs4():
    be = BeliefEngine()
    st1 = Phosphorylation(None, Agent('a'), evidence=[ev1])
    st2 = Phosphorylation(None, Agent('b'), evidence=[ev2])
    st3 = Phosphorylation(None, Agent('c'), evidence=[ev3])
    st4 = Phosphorylation(None, Agent('d'), evidence=[ev1])
    st4.supports = [st1, st2, st3]
    st3.supports = [st1]
    st2.supports = [st1]
    st1.supported_by = [st2, st3, st4]
    st2.supported_by = [st4]
    st3.supported_by = [st4]
    st1.belief = 0.5
    st2.belief = 0.8
    st3.belief = 0.2
    st4.belief = 0.6
    be.set_hierarchy_probs([st1, st2, st3])
    assert(st1.belief == 0.5)
    assert(st2.belief == 0.9)
    assert(st3.belief == 0.6)
    assert(st4.belief == 0.968)
Esempio n. 19
0
def run_preassembly(stmts_in, **kwargs):
    """Run preassembly on a list of statements.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to preassemble.
    return_toplevel : Optional[bool]
        If True, only the top-level statements are returned. If False,
        all statements are returned irrespective of level of specificity.
        Default: True
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.
    save_unique : Optional[str]
        The name of a pickle file to save the unique statements into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of preassembled top-level statements.
    """
    dump_pkl = kwargs.get('save')
    dump_pkl_unique = kwargs.get('save_unique')
    be = BeliefEngine()
    pa = Preassembler(hierarchies, stmts_in)

    options = {'save': dump_pkl_unique}
    run_preassembly_duplicate(pa, be, **options)

    return_toplevel = kwargs.get('return_toplevel', True)
    options = {'save': dump_pkl, 'return_toplevel': return_toplevel}
    start = time.time()
    stmts_out = run_preassembly_related(pa, be, **options)
    end = time.time()
    elapsed = end - start
    logger.debug("Time elapsed, run_preassembly_related: %s" % elapsed)
    return stmts_out
Esempio n. 20
0
if __name__ == '__main__':
    if len(sys.argv) < 3:
        logger.error('Usage: assemble_corpus.py <pickle_file> <output_folder>')
        sys.exit()
    stmts_fname = sys.argv[1]
    out_folder = sys.argv[2]

    stmts = load_statements(stmts_fname)

    logger.info('All statements: %d' % len(stmts))

    cache_pkl = os.path.join(out_folder, 'mapped_stmts.pkl')
    options = {'save': cache_pkl, 'do_rename': True}
    stmts = map_grounding(stmts, **options)

    cache_pkl = os.path.join(out_folder, 'sequence_valid_stmts.pkl')
    options = {'save': cache_pkl}
    mapped_stmts = map_sequence(stmts, **options)

    be = BeliefEngine()
    pa = Preassembler(hierarchies, mapped_stmts)

    cache_pkl = os.path.join(out_folder, 'unique_stmts.pkl')
    options = {'save': cache_pkl}
    unique_stmts = run_preassembly_duplicate(pa, be, **options)

    cache_pkl = os.path.join(out_folder, 'top_stmts.pkl')
    options = {'save': cache_pkl}
    stmts = run_preassembly_related(pa, be, **options)
Esempio n. 21
0
def test_default_probs():
    """Make sure default probs are set with empty constructor."""
    be = BeliefEngine()
    for err_type in ('rand', 'syst'):
        for k, v in default_probs[err_type].items():
            assert default_probs[err_type][k] == v
Esempio n. 22
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Esempio n. 23
0
def calculate_belief(stmts):
    be = BeliefEngine()
    be.set_prior_probs(stmts)
    be.set_hierarchy_probs(stmts)
    return {s.matches_key(): s.belief for s in stmts}
Esempio n. 24
0
def test_prior_prob_assertion():
    be = BeliefEngine()
    st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1, ev2, ev3])
    assert st.belief == 1
    be.set_prior_probs([st])
    assert st.belief == 1
Esempio n. 25
0
def test_check_prior_probs():
    be = BeliefEngine()
    st = Phosphorylation(None,
                         Agent('ERK'),
                         evidence=[Evidence(source_api='xxx')])
    be.set_prior_probs([st])
Esempio n. 26
0
def update_beliefs():
    if request.json is None:
        abort(Response('Missing application/json header.', 415))

    # Get input parameters
    corpus_id = request.json.get('corpus_id')
    curations = request.json.get('curations', {})
    return_beliefs = request.json.get('return_beliefs', False)

    # Get the right corpus
    try:
        corpus = corpora[corpus_id]
    except KeyError:
        abort(Response('The corpus_id "%s" is unknown.' % corpus_id, 400))
        return

    # Start tabulating the curation counts
    prior_counts = {}
    subtype_counts = {}
    # Take each curation from the input
    for uuid, correct in curations.items():
        # Save the curation in the corpus
        # TODO: handle already existing curation
        stmt = corpus.statements.get(uuid)
        if stmt is None:
            logger.warning('%s is not in the corpus.' % uuid)
            continue
        corpus.curations[uuid] = correct
        # Now take all the evidences of the statement and assume that
        # they follow the correctness of the curation and contribute to
        # counts for their sources
        for ev in stmt.evidence:
            # Make the index in the curation count list
            idx = 0 if correct else 1
            extraction_rule = ev.annotations.get('found_by')
            # If there is no extraction rule then we just score the source
            if not extraction_rule:
                try:
                    prior_counts[ev.source_api][idx] += 1
                except KeyError:
                    prior_counts[ev.source_api] = [0, 0]
                    prior_counts[ev.source_api][idx] += 1
            # Otherwise we score the specific extraction rule
            else:
                try:
                    subtype_counts[ev.source_api][extraction_rule][idx] += 1
                except KeyError:
                    if ev.source_api not in subtype_counts:
                        subtype_counts[ev.source_api] = {}
                    subtype_counts[ev.source_api][extraction_rule] = [0, 0]
                    subtype_counts[ev.source_api][extraction_rule][idx] += 1
    # Finally, we update the scorer with the new curation counts
    scorer.update_counts(prior_counts, subtype_counts)
    # If not belief return is needed, we just stop here
    if not return_beliefs:
        return jsonify({})
    # Otherwise we rerun the belief calculation on the corpus with
    # the updated scorer and return a dict of beliefs
    else:
        be = BeliefEngine(scorer)
        stmts = list(corpus.statements.values())
        be.set_prior_probs(stmts)
        # Here we set beliefs based on actual curation
        for uuid, correct in corpus.curations.items():
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return jsonify(belief_dict)