def update_beliefs(self, corpus_id): """Return updated belief scores for a given corpus. Parameters ---------- corpus_id : str The ID of the corpus for which beliefs are to be updated. Returns ------- dict A dictionary of belief scores with keys corresponding to Statement UUIDs and values to new belief scores. """ corpus = self.get_corpus(corpus_id) be = BeliefEngine(self.scorer) stmts = list(corpus.statements.values()) be.set_prior_probs(stmts) # Here we set beliefs based on actual curation for uuid, correct in corpus.curations.items(): stmt = corpus.statements.get(uuid) if stmt is None: logger.warning('%s is not in the corpus.' % uuid) continue stmt.belief = correct belief_dict = {st.uuid: st.belief for st in stmts} return belief_dict
def test_prior_prob_assertion(): be = BeliefEngine() st = Phosphorylation(None, Agent('a'), evidence=[ev1, deepcopy(ev1), ev2, ev3]) assert st.belief == 1 be.set_prior_probs([st]) assert st.belief == 1
def calculate_belief(stmts): scorer = SimpleScorer(subtype_probs={ 'biopax': {'pc11': 0.2, 'phosphosite': 0.01}, }) be = BeliefEngine(scorer=scorer) be.set_prior_probs(stmts) be.set_hierarchy_probs(stmts) return {str(s.get_hash()): s.belief for s in stmts}
def test_prior_prob_one(): be = BeliefEngine() prob = 1 - (default_probs['rand']['reach'] + default_probs['syst']['reach']) st = Phosphorylation(None, Agent('a'), evidence=[ev1]) assert st.belief == 1 be.set_prior_probs([st]) assert st.belief == prob
def test_prior_prob_one(): be = BeliefEngine() prob = 1 - (be.prior_probs['rand']['reach'] + be.prior_probs['syst']['reach']) st = Phosphorylation(None, Agent('a'), evidence=[ev1]) assert(st.belief == 1) be.set_prior_probs([st]) assert(st.belief == prob)
def test_prior_prob_two_same(): be = BeliefEngine() prob = 1 - (be.prior_probs['rand']['reach']**2 + be.prior_probs['syst']['reach']) st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1]) assert(st.belief == 1) be.set_prior_probs([st]) assert(st.belief == prob)
def test_prior_prob_two_same(): be = BeliefEngine() prob = 1 - (default_probs['rand']['reach']**2 + default_probs['syst']['reach']) st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1]) assert st.belief == 1 be.set_prior_probs([st]) assert st.belief == prob
def test_wm_scorer(): scorer = wm_scorer.get_eidos_scorer() stmt = Influence(Concept('a'), Concept('b'), evidence=[Evidence(source_api='eidos')]) # Make sure other sources are still in the map assert 'hume' in scorer.prior_probs['rand'] assert 'biopax' in scorer.prior_probs['syst'] engine = BeliefEngine(scorer) engine.set_prior_probs([stmt])
def test_evidence_random_noise_prior(): type_probs = {'biopax': 0.9, 'geneways': 0.2} biopax_subtype_probs = {'reactome': 0.4, 'biogrid': 0.2} geneways_subtype_probs = {'phosphorylate': 0.5, 'bind': 0.7} subtype_probs = { 'biopax': biopax_subtype_probs, 'geneways': geneways_subtype_probs } ev_geneways_bind = Evidence(source_api='geneways', source_id=0, pmid=0, text=None, epistemics={}, annotations={'actiontype': 'bind'}) ev_biopax_reactome = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'reactome'}) ev_biopax_pid = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'pid'}) # Random noise prior for geneways bind evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_geneways_bind, \ type_probs, subtype_probs) == 0.7 # Random noise prior for reactome biopax evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_reactome, \ type_probs, subtype_probs) == 0.4 # Random noise prior for pid evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_pid, type_probs, subtype_probs) == 0.9 # Make sure this all still works when we go through the belief engine statements = [] members = [Agent('a'), Agent('b')] statements.append(Complex(members, evidence=ev_geneways_bind)) statements.append(Complex(members, evidence=ev_biopax_reactome)) statements.append(Complex(members, evidence=ev_biopax_pid)) p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}} scorer = SimpleScorer(p, subtype_probs) engine = BeliefEngine(scorer) engine.set_prior_probs(statements) assert statements[0].belief == 1 - 0.7 assert statements[1].belief == 1 - 0.4 assert statements[2].belief == 1 - 0.9
def test_prior_prob_one_two(): be = BeliefEngine() prob = 1 - (default_probs['rand']['reach']**2 + default_probs['syst']['reach']) * \ (default_probs['rand']['trips'] + default_probs['syst']['trips']) st = Phosphorylation(None, Agent('a'), evidence=[ev1, deepcopy(ev1), ev2]) assert st.belief == 1 be.set_prior_probs([st]) assert st.belief == prob
def test_evidence_random_noise_prior(): type_probs = {'biopax': 0.9, 'geneways': 0.2} biopax_subtype_probs = { 'reactome': 0.4, 'biogrid': 0.2} geneways_subtype_probs = { 'phosphorylate': 0.5, 'bind': 0.7} subtype_probs = {'biopax': biopax_subtype_probs, 'geneways': geneways_subtype_probs} ev_geneways_bind = Evidence(source_api='geneways', source_id=0, pmid=0, text=None, epistemics={}, annotations={'actiontype': 'bind'}) ev_biopax_reactome = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'reactome'}) ev_biopax_pid = Evidence(source_api='biopax', source_id=0, pmid=0, text=None, epistemics={}, annotations={'source_sub_id': 'pid'}) # Random noise prior for geneways bind evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_geneways_bind, \ type_probs, subtype_probs) == 0.7 # Random noise prior for reactome biopax evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_reactome, \ type_probs, subtype_probs) == 0.4 # Random noise prior for pid evidence is the subtype prior, # since we specified it assert evidence_random_noise_prior(ev_biopax_pid, type_probs, subtype_probs) == 0.9 # Make sure this all still works when we go through the belief engine statements = [] members = [Agent('a'), Agent('b')] statements.append(Complex(members, evidence=ev_geneways_bind)) statements.append(Complex(members, evidence=ev_biopax_reactome)) statements.append(Complex(members, evidence=ev_biopax_pid)) p = {'rand': type_probs, 'syst': {'biopax': 0, 'geneways': 0}} scorer = SimpleScorer(p, subtype_probs) engine = BeliefEngine(scorer) engine.set_prior_probs(statements) assert statements[0].belief == 1 - 0.7 assert statements[1].belief == 1 - 0.4 assert statements[2].belief == 1 - 0.9
def test_belief_calc_up_to_prior(): be = BeliefEngine() test_stmts = [ MockStatement(1, [MockEvidence('sparser'), MockEvidence('reach')]), MockStatement(2, MockEvidence('biopax')), MockStatement(3, MockEvidence('signor')), MockStatement(4, MockEvidence('biogrid')), MockStatement(5, MockEvidence('bel')), MockStatement(6, [MockEvidence('phosphosite'), MockEvidence('trips')]), ] be.set_prior_probs(test_stmts) results = {s.matches_key(): s.belief for s in test_stmts} print(results) assert len(results) == len(test_stmts), (len(results), len(test_stmts)) assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.'
def test_belief_calc_up_to_hierarchy(): be = BeliefEngine() test_stmts = [ MockStatement(1, [MockEvidence('sparser'), MockEvidence('reach')]), MockStatement(2, MockEvidence('biopax')), MockStatement(3, MockEvidence('signor')), MockStatement(4, MockEvidence('biogrid')), MockStatement(5, MockEvidence('bel')), MockStatement(6, [MockEvidence('phosphosite'), MockEvidence('trips')]), ] be.set_prior_probs(test_stmts) init_results = {s.matches_key(): s.belief for s in test_stmts} print(init_results) supp_links = [(1, 2), (1, 3), (2, 3), (1, 5), (4, 3)] populate_support(test_stmts, supp_links) be.set_hierarchy_probs(test_stmts) results = {s.matches_key(): s.belief for s in test_stmts} print(results) # Test a couple very simple properties. assert len(results) == len(test_stmts), (len(results), len(test_stmts)) assert all([0 < b < 1 for b in results.values()]), 'Beliefs out of range.' # Test the change from the initial. all_deltas_correct = True deltas_dict = {} for s in test_stmts: h = s.matches_key() b = s.belief # Get results res = {'actual': b - init_results[h]} # Define expectations. if s.supports: res['expected'] = 'increase' if res['actual'] <= 0: all_deltas_correct = False else: res['expected'] = 'no change' if res['actual'] != 0: all_deltas_correct = False deltas_dict[h] = res assert all_deltas_correct, deltas_dict
def run_preassembly(statements, hierarchies): print('%d total statements' % len(statements)) # Filter to grounded only statements = ac.filter_grounded_only(statements, score_threshold=0.4) # Make a Preassembler with the Eidos and TRIPS ontology pa = Preassembler(hierarchies, statements) # Make a BeliefEngine and run combine duplicates be = BeliefEngine() unique_stmts = pa.combine_duplicates() print('%d unique statements' % len(unique_stmts)) be.set_prior_probs(unique_stmts) # Run combine related related_stmts = pa.combine_related(return_toplevel=False) be.set_hierarchy_probs(related_stmts) # Filter to top-level Statements top_stmts = ac.filter_top_level(related_stmts) print('%d top-level statements' % len(top_stmts)) return top_stmts
def test_negative_evidence(): prior_probs = {'rand': {'new_source': 0.1}, 'syst': {'new_source': 0.05}} getev = lambda x: Evidence(source_api='new_source', epistemics={'negated': x}) evs1 = [getev(x) for x in [True, True, False]] evs2 = [getev(x) for x in [False, False, False]] evs3 = [getev(x) for x in [True, True, True]] stmts = [Phosphorylation(None, Agent('a'), evidence=e) for e in [evs1, evs2, evs3]] scorer = SimpleScorer(prior_probs) engine = BeliefEngine(scorer) engine.set_prior_probs(stmts) pr = prior_probs['rand']['new_source'] ps = prior_probs['syst']['new_source'] assert_close_enough(stmts[0].belief, ((1-pr)-ps)*(1-((1-pr*pr)-ps))) assert_close_enough(stmts[1].belief, (1-pr*pr*pr)-ps) assert stmts[2].belief == 0
def run_preassembly(statements, hierarchies): print('%d total statements' % len(statements)) # Filter to grounded only statements = map_onto(statements) ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl') statements = ac.filter_grounded_only(statements, score_threshold=0.7) #statements = ac.filter_by_db_refs(statements, 'UN', # ['conflict', 'food_security', 'precipitation'], policy='one', # match_suffix=True) statements = ac.filter_by_db_refs( statements, 'UN', [ 'conflict', 'food_security', 'flooding', 'food_production', 'human_migration', 'drought', 'food_availability', 'market', 'food_insecurity' ], policy='all', match_suffix=True) assume_polarity(statements) statements = filter_has_polarity(statements) # Make a Preassembler with the Eidos and TRIPS ontology pa = Preassembler(hierarchies, statements) # Make a BeliefEngine and run combine duplicates be = BeliefEngine() unique_stmts = pa.combine_duplicates() print('%d unique statements' % len(unique_stmts)) be.set_prior_probs(unique_stmts) # Run combine related related_stmts = pa.combine_related(return_toplevel=False) be.set_hierarchy_probs(related_stmts) #related_stmts = ac.filter_belief(related_stmts, 0.8) # Filter to top-level Statements top_stmts = ac.filter_top_level(related_stmts) pa.stmts = top_stmts print('%d top-level statements' % len(top_stmts)) conflicts = pa.find_contradicts() top_stmts = remove_contradicts(top_stmts, conflicts) ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl') return top_stmts
def preassemble(self, filters=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - model_one: require that at least one Agent is in the incremental model - model_all: require that all Agents are in the incremental model - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Note that model_one -> prior_all are increasingly more restrictive options. Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None """ stmts = self.get_statements() logger.info("%d raw Statements in total" % len(stmts)) # Fix grounding logger.info("Running grounding map") twg = gm.agent_texts_with_grounding(stmts) prot_map = gm.protein_map_from_twg(twg) gm.default_grounding_map.update(prot_map) gmap = gm.GroundingMapper(gm.default_grounding_map) stmts = gmap.map_agents(stmts, do_rename=True) logger.info("%d Statements after grounding map" % len(stmts)) # Fix sites sm = SiteMapper(default_site_map) stmts, _ = sm.map_sites(stmts) logger.info("%d Statements with valid sequence" % len(stmts)) if filters: if "grounding" in filters: # Filter out ungrounded statements logger.info("Running grounding filter") stmts = self._relevance_filter(stmts, ["grounding"]) logger.info("%s Statements after filter" % len(stmts)) if "human_only" in filters: # Filter out non-human proteins logger.info("Running non-human protein filter") stmts = self._relevance_filter(stmts, ["human_only"]) logger.info("%s Statements after filter" % len(stmts)) for rel_key in ("prior_one", "model_one", "prior_all", "model_all"): if rel_key in filters: logger.info("Running %s relevance filter" % rel_key) stmts = self._relevance_filter(stmts, [rel_key]) logger.info("%s Statements after filter" % len(stmts)) # Combine duplicates logger.info("Preassembling %d Statements" % len(stmts)) pa = Preassembler(hierarchies, stmts) self.unique_stmts = pa.combine_duplicates() logger.info("%d unique Statements" % len(self.unique_stmts)) # Run BeliefEngine on unique statements be = BeliefEngine() be.set_prior_probs(self.unique_stmts) # Build statement hierarchy self.unique_stmts = pa.combine_related(return_toplevel=False) self.toplevel_stmts = [st for st in self.unique_stmts if not st.supports] logger.info("%d top-level Statements" % len(self.toplevel_stmts)) # Run BeliefEngine on hierarchy be.set_hierarchy_probs(self.unique_stmts)
def update_beliefs(): if request.json is None: abort(Response('Missing application/json header.', 415)) # Get input parameters corpus_id = request.json.get('corpus_id') curations = request.json.get('curations', {}) return_beliefs = request.json.get('return_beliefs', False) # Get the right corpus try: corpus = corpora[corpus_id] except KeyError: abort(Response('The corpus_id "%s" is unknown.' % corpus_id, 400)) return # Start tabulating the curation counts prior_counts = {} subtype_counts = {} # Take each curation from the input for uuid, correct in curations.items(): # Save the curation in the corpus # TODO: handle already existing curation stmt = corpus.statements.get(uuid) if stmt is None: logger.warning('%s is not in the corpus.' % uuid) continue corpus.curations[uuid] = correct # Now take all the evidences of the statement and assume that # they follow the correctness of the curation and contribute to # counts for their sources for ev in stmt.evidence: # Make the index in the curation count list idx = 0 if correct else 1 extraction_rule = ev.annotations.get('found_by') # If there is no extraction rule then we just score the source if not extraction_rule: try: prior_counts[ev.source_api][idx] += 1 except KeyError: prior_counts[ev.source_api] = [0, 0] prior_counts[ev.source_api][idx] += 1 # Otherwise we score the specific extraction rule else: try: subtype_counts[ev.source_api][extraction_rule][idx] += 1 except KeyError: if ev.source_api not in subtype_counts: subtype_counts[ev.source_api] = {} subtype_counts[ev.source_api][extraction_rule] = [0, 0] subtype_counts[ev.source_api][extraction_rule][idx] += 1 # Finally, we update the scorer with the new curation counts scorer.update_counts(prior_counts, subtype_counts) # If not belief return is needed, we just stop here if not return_beliefs: return jsonify({}) # Otherwise we rerun the belief calculation on the corpus with # the updated scorer and return a dict of beliefs else: be = BeliefEngine(scorer) stmts = list(corpus.statements.values()) be.set_prior_probs(stmts) # Here we set beliefs based on actual curation for uuid, correct in corpus.curations.items(): stmt = corpus.statements.get(uuid) if stmt is None: logger.warning('%s is not in the corpus.' % uuid) continue stmt.belief = correct belief_dict = {st.uuid: st.belief for st in stmts} return jsonify(belief_dict)
def test_check_prior_probs(): be = BeliefEngine() st = Phosphorylation(None, Agent('ERK'), evidence=[Evidence(source_api='xxx')]) be.set_prior_probs([st])
def test_prior_prob_assertion(): be = BeliefEngine() st = Phosphorylation(None, Agent('a'), evidence=[ev1, ev1, ev2, ev3]) assert(st.belief == 1) be.set_prior_probs([st]) assert(st.belief == 1)
def calculate_belief(stmts): be = BeliefEngine() be.set_prior_probs(stmts) be.set_hierarchy_probs(stmts) return {s.matches_key(): s.belief for s in stmts}
generate belief scores for each of the statements and output a new pickle file containing a list of statements with beliefs. Script throws away the dictionaries keys. Input statements should contain evidence, otherwise their belief scores will be set to 0.""" parser = ArgumentParser(description=description, epilog=epilog) parser.add_argument("-d", action="store_true", help="set if input is a dictionary of statements") help_text = ("path to a pickle file containing a list or dict of" "statements.") parser.add_argument("infile", help=help_text) args = parser.parse_args() infile = args.infile filename, file_extension = path.splitext(infile) outfile = filename + "_with_beliefs" + file_extension with open(args.infile, 'rb') as f: stmts = pickle.load(f) if args.d: stmts = [stmt for _, stmt in stmts.items()] # get belief scores for stmt in stmts: stmt.belief = 1 be = BeliefEngine() be.set_prior_probs(stmts) # using pickle instead of assemble_corpus to avoid printing logging with open(outfile, 'wb') as f: pickle.dump(stmts, f)
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker ml = MechLinker(related_stmts) # Link statements linked_stmts = ml.link_statements() # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh, protocol=2) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model()) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [stmt for stmt in related_stmts if stmt not in background_assertions] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')