def assemble_english(stmts): txts = [] for stmt in stmts: ea = EnglishAssembler([stmt]) txt = ea.make_model() if txt and txt[-1] == '.': txt = txt[:-1] txts.append(txt) return txts
def make_english_output(results, model, stmts): citations = {} citation_count = 1 for source, target, polarity, value, found_path, paths, flag in results: cond = 'How does treatment with %s %s %s?' % \ (source, 'increase' if polarity == 'positive' else 'decrease', target) print(cond) print('=' * len(cond)) if paths: path = paths[0] sentences = [] for i, (path_rule, sign) in enumerate(path): for rule in model.rules: if rule.name == path_rule: stmt = _stmt_from_rule(model, path_rule, stmts) if i == 0: sentences.append('%s is a target of %s.' % (stmt.agent_list()[0].name, source)) # Make citations pmids = [ev.pmid for ev in stmt.evidence if ev.pmid] cit_nums = [] for pmid in pmids: cit_num = citations.get(pmid) if cit_num is None: citations[pmid] = citation_count cit_num = citation_count citation_count += 1 cit_nums.append(cit_num) if cit_nums: cit_nums = sorted(list(set(cit_nums))) cit_str = ' [%s]' % (','.join([str(c) for c in cit_nums])) else: cit_str = '' ea = EnglishAssembler([stmt]) sentence = ea.make_model() sentence = sentence[:-1] + cit_str + '.' sentences.append(sentence) sentences[-1] = sentences[-1][:-1] + \ ', which is measured by %s.' % target text = ' '.join(sentences) print('INDRA\'s hypothesis: ' + text) elif found_path: print('INDRA determined that there exists an explanation but' ' it is intractable to reconstruct.') else: print('INDRA couldn\'t find an explanation for this observation.') print('\n') references = 'References\n==========\n' for k, v in sorted(citations.items(), key=lambda x: x[1]): references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k) print(references)
def report_paths(scored_paths, model, stmts, cell_line): """Report paths for a specific cell line.""" citations = {} citation_count = 1 ab_name = 'Total c-Jun' if cell_line == 'C32': pol = 'decreased' else: pol = 'increased' for drug in scored_paths.keys(): paths = scored_paths[drug] for path, score in paths[:1]: title = 'How does %s treatment result in %s %s' % \ (drug, pol, ab_name) title += ' in %s cells?' % cell_line print(title) print('=' * len(title)) path_stmts = stmts_from_path(path, model, stmts) sentences = [] for i, stmt in enumerate(path_stmts): if i == 0: target = stmt.agent_list()[0].name sentences.append('%s is a target of %s.' % (target, drug)) # Make citations pmids = [ev.pmid for ev in stmt.evidence if ev.pmid] cit_nums = [] for pmid in pmids: cit_num = citations.get(pmid) if cit_num is None: citations[pmid] = citation_count cit_num = citation_count citation_count += 1 cit_nums.append(cit_num) if cit_nums: cit_nums = sorted(list(set(cit_nums))) cit_str = ' [%s]' % (','.join([str(c) for c in cit_nums])) else: cit_str = '' ea = EnglishAssembler([stmt]) sentence = ea.make_model() sentence = sentence[:-1] + cit_str + '.' sentences.append(sentence) sentences[-1] = sentences[-1][:-1] + \ ', which is measured by %s.' % ab_name print(' '.join(sentences)) print() references = 'References\n==========\n' for k, v in sorted(citations.items(), key=lambda x: x[1]): references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k) print(references)
def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i + 1, t) if i < len(source_txts) - 1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt
def assemble_english(): """Assemble each statement into """ if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) sentences = {} for st in stmts: enga = EnglishAssembler() enga.add_statements([st]) model_str = enga.make_model() sentences[st.uuid] = model_str res = {'sentences': sentences} return res
def respond_find_qca_path(self, content): """Response content to find-qca-path request""" if self.qca.ndex is None: reply = self.make_failure('SERVICE_UNAVAILABLE') return reply source_arg = content.gets('SOURCE') target_arg = content.gets('TARGET') reltype_arg = content.get('RELTYPE') if not source_arg: raise ValueError("Source list is empty") if not target_arg: raise ValueError("Target list is empty") target = self._get_term_name(target_arg) if target is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('TARGET_MISSING') return reply source = self._get_term_name(source_arg) if source is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('SOURCE_MISSING') return reply if reltype_arg is None or len(reltype_arg) == 0: relation_types = None else: relation_types = [str(k.data) for k in reltype_arg.data] results_list = self.qca.find_causal_path([source], [target], relation_types=relation_types) if not results_list: reply = self.make_failure('NO_PATH_FOUND') return reply first_result = results_list[0] first_edges = first_result[1::2] indra_edges = [fe[0]['INDRA json'] for fe in first_edges] indra_edges = [json.loads(e) for e in indra_edges] indra_edges = _fix_indra_edges(indra_edges) indra_edge_stmts = stmts_from_json(indra_edges) for stmt in indra_edge_stmts: txt = EnglishAssembler([stmt]).make_model() self.send_provenance_for_stmts( [stmt], "the path from %s to %s (%s)" % (source, target, txt)) indra_edges_str = json.dumps(indra_edges) ks = KQMLString(indra_edges_str) reply = KQMLList('SUCCESS') reply.set('paths', KQMLList([ks])) return reply
def get_ev_desc(ev, stmt): "Get a description of the evidence." if ev.text: entry = "<i>'%s'</i>" % ev.text # If the entry at least has a source ID in a database elif ev.source_id: entry = "Database entry in '%s': %s" % \ (ev.source_api, ev.source_id) # Otherwise turn it into English else: txt = EnglishAssembler([stmt]).make_model() entry = "Database entry in '%s' representing: %s" % \ (ev.source_api, txt) return entry
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker ml = MechLinker(related_stmts) # Link statements linked_stmts = ml.link_statements() # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh, protocol=2) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model()) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def run_assembly(stmts, folder, pmcid): indexcard_prefix = folder + '/index_cards/' + pmcid otherout_prefix = folder + '/other_outputs/' + pmcid # Filter for grounding grounded_stmts = [] for st in stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(eh, mh) pa.add_statements(grounded_stmts) print '%d statements collected in total.' % len(pa.stmts) unique_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(unique_stmts) ml = MechLinker(unique_stmts) ml.link_statements() pa = Preassembler(eh, mh, ml.statements) pa.combine_duplicates() related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts) with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(related_stmts, fh) flattened_evidence_stmts = flatten_evidence(related_stmts) card_counter = 1 card_lim = float('inf') top_stmts = [] for st in sorted(flattened_evidence_stmts, key=lambda x: len(x.evidence), reverse=True): print len(st.evidence), st if is_background_knowledge(st): print 'This statement is background knowledge - skipping.' continue # Assemble IndexCards ia = IndexCardAssembler([st]) ia.make_model() if ia.cards: ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break ea = EnglishAssembler(top_stmts) print '=======================' print ea.make_model() print '=======================' # Print the statement graph graph = render_stmt_graph(related_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv') pya = PysbAssembler() pya.add_statements(related_stmts) model = pya.make_model() print 'PySB model has %d monomers and %d rules' %\ (len(model.monomers), len(model.rules))