def get_eidos_scorer() -> SimpleScorer: """Return a SimpleScorer based on Eidos curated precision estimates. Returns ------- scorer : A SimpleScorer instance loaded with default prior probabilities as well as prior probabilities derived from curation-based counts. """ with open(get_resource_file('default_belief_probs.json'), 'r') as fh: prior_probs = json.load(fh) table = load_eidos_curation_table() # Get the overall precision total_num = table['COUNT of RULE'].sum() weighted_sum = table['COUNT of RULE'].dot(table['% correct']) precision = weighted_sum / total_num # We have to divide this into a random and systematic component, for now # in an ad-hoc manner syst_error = 0.05 rand_error = 1 - precision - syst_error prior_probs['rand']['eidos'] = rand_error prior_probs['syst']['eidos'] = syst_error # Get a dict of rule-specific errors. subtype_probs = { 'eidos': { k: 1.0 - min(v, 0.95) - syst_error for k, v in zip(table['RULE'], table['% correct']) } } scorer = SimpleScorer(prior_probs, subtype_probs) return scorer
def calculate_belief(stmts): scorer = SimpleScorer(subtype_probs={ 'biopax': {'pc11': 0.2, 'phosphosite': 0.01}, }) be = BeliefEngine(scorer=scorer) be.set_prior_probs(stmts) be.set_hierarchy_probs(stmts) return {str(s.get_hash()): s.belief for s in stmts}
def test_evidence_random_noise_prior(): type_probs = {'biopax': 0.9, 'geneways': 0.2} biopax_subtype_probs = {'reactome': 0.4, 'biogrid': 0.2} geneways_subtype_probs = {'phosphorylate': 0.5, 'bind': 0.7} subtype_probs = { 'biopax': biopax_subtype_probs, 'geneways': geneways_subtype_probs } ev_geneways_bind = Evidence(source_api='geneways', source_id=0, pmid=0, text=None, epistemics={}, annotations={'actiontype': 'bind'}) ev_biopax_reactome = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'reactome'}) ev_biopax_pid = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'pid'}) # Random noise prior for geneways bind evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_geneways_bind, \ type_probs, subtype_probs) == 0.7 # Random noise prior for reactome biopax evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_reactome, \ type_probs, subtype_probs) == 0.4 # Random noise prior for pid evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_pid, type_probs, subtype_probs) == 0.9 # Make sure this all still works when we go through the belief engine statements = [] members = [Agent('a'), Agent('b')] statements.append(Complex(members, evidence=ev_geneways_bind)) statements.append(Complex(members, evidence=ev_biopax_reactome)) statements.append(Complex(members, evidence=ev_biopax_pid)) p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}} scorer = SimpleScorer(p, subtype_probs) engine = BeliefEngine(scorer) engine.set_prior_probs(statements) assert statements[0].belief == 1 - 0.7 assert statements[1].belief == 1 - 0.4 assert statements[2].belief == 1 - 0.9
def test_default_probs_override(): """Make sure default probs are overriden by constructor argument.""" prior_probs = {'rand': {'assertion': 0.5}} scorer = SimpleScorer(prior_probs) be = BeliefEngine(scorer) for err_type in ('rand', 'syst'): for k, v in scorer.prior_probs[err_type].items(): if err_type == 'rand' and k == 'assertion': assert v == 0.5 else: assert default_probs[err_type][k] == v
def test_default_probs_extend(): """Make sure default probs are extended by constructor argument.""" prior_probs = {'rand': {'new_source': 0.1}, 'syst': {'new_source': 0.05}} scorer = SimpleScorer(prior_probs) be = BeliefEngine(scorer) for err_type in ('rand', 'syst'): assert 'new_source' in scorer.prior_probs[err_type] for k, v in scorer.prior_probs[err_type].items(): if err_type == 'rand' and k == 'new_source': assert v == 0.1 elif err_type == 'syst' and k == 'new_source': assert v == 0.05 else: assert default_probs[err_type][k] == v
def test_score_statement(): """Check that we can correctly score a single statement.""" prior_probs = {'rand': {'reach': 0.1, 'trips': 0.2}, 'syst': {'reach': 0, 'trips': 0}} scorer = SimpleScorer(prior_probs) # ev1 is from "reach" st1 = Phosphorylation(None, Agent('a'), evidence=[ev1]) belief = scorer.score_statement(st1) assert belief == 0.9 # try extra_evidence empty list: belief = scorer.score_statement(st1, extra_evidence=[]) assert belief == 0.9 # Now we try extra_evidence from trips. # Expected result is 1 - (0.1 * 0.2) = 0.98 belief = scorer.score_statement(st1, extra_evidence=[ev2]) assert belief == 0.98
def test_negative_evidence(): prior_probs = {'rand': {'new_source': 0.1}, 'syst': {'new_source': 0.05}} getev = lambda x: Evidence(source_api='new_source', epistemics={'negated': x}) evs1 = [getev(x) for x in [True, True, False]] evs2 = [getev(x) for x in [False, False, False]] evs3 = [getev(x) for x in [True, True, True]] stmts = [Phosphorylation(None, Agent('a'), evidence=e) for e in [evs1, evs2, evs3]] scorer = SimpleScorer(prior_probs) engine = BeliefEngine(scorer) engine.set_prior_probs(stmts) pr = prior_probs['rand']['new_source'] ps = prior_probs['syst']['new_source'] assert_close_enough(stmts[0].belief, ((1-pr)-ps)*(1-((1-pr*pr)-ps))) assert_close_enough(stmts[1].belief, (1-pr*pr*pr)-ps) assert stmts[2].belief == 0
import json import logging from os import path import numpy as np import pandas as pd import networkx as nx from decimal import Decimal import indra from indra.belief import SimpleScorer from indra.statements import Evidence from indra.statements import Statement logger = logging.getLogger(__name__) simple_scorer = SimpleScorer() np.seterr(all='raise') NP_PRECISION = 10 ** -np.finfo(np.longfloat).precision # Numpy precision default_sign_dict = {'Activation': 0, 'Inhibition': 1, 'IncreaseAmount': 0, 'DecreaseAmount': 1} INDRA_ROOT = path.abspath(path.dirname(path.abspath(indra.__file__))) INDRA_RESOURCES = path.join(INDRA_ROOT, 'resources') with open(path.join(INDRA_RESOURCES, 'source_mapping.json'), 'r') as f: db_source_mapping = json.load(f) class IndraNet(nx.MultiDiGraph):