def assemble_english(stmts): txts = [] for stmt in stmts: ea = EnglishAssembler([stmt]) txt = ea.make_model() if txt and txt[-1] == '.': txt = txt[:-1] txts.append(txt) return txts
def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i + 1, t) if i < len(source_txts) - 1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt
def respond_describe_model(self, content): """Convert the model to natural language.""" # Get the model. model_id = self._get_model_id(content) model = self.mra.get_model_by_id(model_id) # Turn the model into a text description. english_assembler = EnglishAssembler(model) desc = english_assembler.make_model() # Respond to the BA. resp = KQMLList('SUCCESS') resp.sets('description', desc) return resp
def assemble_english(): """Assemble each statement into """ if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) sentences = {} for st in stmts: enga = EnglishAssembler() enga.add_statements([st]) model_str = enga.make_model() sentences[st.uuid] = model_str res = {'sentences': sentences} return res
def assemble_english(): """Assemble each statement into """ if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) sentences = {} for st in stmts: enga = EnglishAssembler() enga.add_statements([st]) model_str = enga.make_model() sentences[st.uuid] = model_str res = {'sentences': sentences} return res
def report_paths(scored_paths, model, stmts, cell_line): citations = {} citation_count = 1 ab_name = 'p-S6(S235/236)' for drug in scored_paths.keys(): paths = scored_paths[drug] for path, score in paths[:1]: title = 'How does %s treatment result in decreased %s' % \ (drug, ab_name) title += ' in %s cells?' % cell_line print(title) print('=' * len(title)) path_stmts = stmts_from_path(path, model, stmts) sentences = [] for i, stmt in enumerate(path_stmts): if i == 0: target = stmt.agent_list()[0].name sentences.append('%s is a target of %s.' % (target, drug)) # Make citations pmids = [ev.pmid for ev in stmt.evidence if ev.pmid] cit_nums = [] for pmid in pmids: cit_num = citations.get(pmid) if cit_num is None: citations[pmid] = citation_count cit_num = citation_count citation_count += 1 cit_nums.append(cit_num) if cit_nums: cit_nums = sorted(list(set(cit_nums))) cit_str = ' [%s]' % (','.join([str(c) for c in cit_nums])) else: cit_str = '' ea = EnglishAssembler([stmt]) sentence = ea.make_model() sentence = sentence[:-1] + cit_str + '.' sentences.append(sentence) sentences[-1] = sentences[-1][:-1] + \ ', which is measured by %s.' % ab_name print(' '.join(sentences)) print() references = 'References\n==========\n' for k, v in sorted(citations.items(), key=lambda x: x[1]): references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k) print(references)
def send_null_provenance(self, stmt, for_what, reason=''): """Send out that no provenance could be found for a given Statement.""" content_fmt = ('<h4>No supporting evidence found for {statement} from ' '{cause}{reason}.</h4>') content = KQMLList('add-provenance') stmt_txt = EnglishAssembler([stmt]).make_model() content.sets('html', content_fmt.format(statement=stmt_txt, cause=for_what, reason=reason)) return self.tell(content)
def _english_from_agents_type(agA_name, agB_name, stmt_type): agA = Agent(agA_name) agB = Agent(agB_name) StmtClass = get_statement_by_name(stmt_type) if stmt_type.lower() == 'complex': stmt = StmtClass([agA, agB]) else: stmt = StmtClass(agA, agB) return EnglishAssembler([stmt]).make_model()
def send_model_diagnoses(self, res): # SUGGESTIONS # If there is an explanation, english assemble it expl_path = res.get('explanation_path') if expl_path: # Only send this if we haven't already sent an explanation if not self.have_explanation: ea_path = EnglishAssembler(expl_path) path_str = ea_path.make_model() ea_goal = EnglishAssembler([self.mra.explain]) goal_str = ea_goal.make_model() if path_str and goal_str: explanation_str = ( 'Our model can now explain how %s: <i>%s</i>' % (goal_str[:-1], path_str)) content = KQMLList('SPOKEN') content.sets('WHAT', explanation_str) self.tell(content) # If there is a suggestion, say it suggs = res.get('stmt_suggestions') if suggs: say = 'I have some suggestions on how to complete our model.' say += ' We could try modeling one of:<br>' stmt_str = '<ul>%s</ul>' % \ ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model()) for stmt in suggs]) say += stmt_str content = KQMLList('SPOKEN') content.sets('WHAT', say) self.tell(content) # If there are corrections corrs = res.get('stmt_corrections') if corrs: stmt = corrs[0] say = 'It looks like a required activity is missing,' say += ' consider revising to <i>%s</i>' % \ (EnglishAssembler([stmt]).make_model()) content = KQMLList('SPOKEN') content.sets('WHAT', say) self.tell(content)
def send_model_diagnoses(self, res): diagnostic_tells = [] # SUGGESTIONS # If there is an explanation, english assemble it expl_path = res.get('explanation_path') if expl_path: # Only send this if we haven't already sent an explanation if not self.have_explanation: ea_path = EnglishAssembler(expl_path) path_str = ea_path.make_model() ea_goal = EnglishAssembler([self.mra.explain]) goal_str = ea_goal.make_model() if path_str and goal_str: explanation_str = ( 'Our model can now explain how %s: <i>%s</i>' % (goal_str[:-1], path_str)) diagnostic_tells.append(explanation_str) # If there is a suggestion, say it suggs = res.get('stmt_suggestions') if suggs: say = 'I have some suggestions on how to complete our model.' say += ' We could try modeling one of:<br>' stmt_str = '<ul>%s</ul>' % \ ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model()) for stmt in suggs]) say += stmt_str diagnostic_tells.append(say) # If there are corrections corrs = res.get('stmt_corrections') if corrs: stmt = corrs[0] say = 'It looks like a required activity may be missing,' say += ' say \'%s\' to add it.' % \ (EnglishAssembler([stmt]).make_model()) diagnostic_tells.append(say) # Finally, say all we have to say for text in diagnostic_tells: content = KQMLList('SPOKEN') content.sets('WHAT', text) # TELLING DIRECTLY HERE IS CURRENTLY INACTIVATED, # IT'S THE BA's RESPONSIBILITY TO DO THIS # self.tell(content) return diagnostic_tells
def _format_stmt_text(stmt): # Get the English assembled statement ea = EnglishAssembler([stmt]) english = ea.make_model() if not english: english = str(stmt) indices = [] for ag in stmt.agent_list(): if ag is None or not ag.name: continue url = id_url(ag) if url is None: continue # Build up a set of indices tag_start = "<a href='%s'>" % url tag_close = "</a>" # FIXME: the EnglishAssembler capitalizes the first letter of # each sentence. In some cases this causes no match here # and not produce agent links. indices += [(m.start(), m.start() + len(ag.name), ag.name, tag_start, tag_close) for m in re.finditer(re.escape(ag.name), english)] return tag_text(english, indices)
def get_annotation_text(stmt, annotate_agents=True): ea = EnglishAssembler(stmts=[stmt]) annotation_text = ea.make_model() if annotate_agents: inserts = [] for agent_wc in ea.stmt_agents[0]: for insert_begin, insert_len in inserts: if insert_begin < agent_wc.coords[0]: agent_wc.update_coords(insert_len) db_ns, db_id = get_grounding(agent_wc.db_refs, grounding_ns) if not db_ns: continue identifiers_url = \ identifiers.get_identifiers_url(db_ns, db_id) grounding_text = '[%s](%s)' % (agent_wc.name, identifiers_url) insert_len = len(grounding_text) - agent_wc.coords[1] + \ agent_wc.coords[0] inserts.append((agent_wc.coords[0], insert_len)) before_part = annotation_text[:agent_wc.coords[0]] after_part = annotation_text[agent_wc.coords[1]:] annotation_text = ''.join( [before_part, grounding_text, after_part]) return annotation_text
def _format_stmt_text(stmt): # Get the English assembled statement ea = EnglishAssembler([stmt]) english = ea.make_model() if not english: english = str(stmt) indices = [] for ag in stmt.agent_list(): if ag is None or not ag.name: continue url = id_url(ag) if url is None: continue # Build up a set of indices tag_start = "<a href='%s'>" % url tag_close = "</a>" # FIXME: the EnglishAssembler capitalizes the first letter of # each sentence. In some cases this causes no match here # and not produce agent links. indices += [(m.start(), m.start() + len(ag.name), ag.name, tag_start, tag_close) for m in re.finditer(re.escape(ag.name), english)] return tag_text(english, indices)
def post(self): """Assemble each statement into English sentence. Parameters ---------- statements : list[indra.statements.Statement.to_json()] A list of INDRA Statements to assemble. Returns ------- sentences : dict Dictionary mapping Statement UUIDs with English sentences. """ args = request.json stmts_json = args.get('statements') stmts = stmts_from_json(stmts_json) sentences = {} for st in stmts: enga = EnglishAssembler() enga.add_statements([st]) model_str = enga.make_model() sentences[st.uuid] = model_str res = {'sentences': sentences} return res
def format_stmts(stmts, output_format, ev_counts=None, source_counts=None): if output_format == 'tsv': msg = '' for stmt in stmts: if not stmt.evidence: logger.warning('Statement %s without evidence' % stmt.uuid) txt = '' pmid = '' else: txt = '"%s"' % stmt.evidence[0].text if \ stmt.evidence[0].text else '' pmid = stmt.evidence[0].pmid if stmt.evidence[0].pmid else '' try: ea_txt = EnglishAssembler([stmt]).make_model() except Exception as e: ea_txt = '' logger.error('English assembly failed for %s' % stmt) logger.error(e) line = '%s\t%s\t%s\tPMID%s\n' % (stmt, ea_txt, txt, pmid) msg += line return msg elif output_format == 'pkl': fname = 'indrabot.pkl' with open(fname, 'wb') as fh: pickle.dump(stmts, fh) return fname elif output_format == 'pdf': fname = 'indrabot.pdf' ga = GraphAssembler(stmts) ga.make_model() ga.save_pdf(fname) return fname elif output_format == 'json': msg = json.dumps(stmts_to_json(stmts), indent=1) return msg elif output_format == 'html': ev_counts = {} if not ev_counts else ev_counts ha = HtmlAssembler(stmts, ev_totals=ev_counts, source_counts=source_counts) fname = 'indrabot.html' ha.save_model(fname) return fname return None
stmt_freq = [(_stmt_from_rule(model, r[0], statements), r[1]) for r in dist_filt] combined_freq = {} for stmt, freq in stmt_freq: if stmt.uuid not in combined_freq: combined_freq[stmt.uuid] = (stmt, freq) else: _, old_freq = combined_freq[stmt.uuid] combined_freq[stmt.uuid] = (stmt, freq + old_freq) top_stmts = list(combined_freq.values()) top_stmts.sort(key=lambda x: x[1], reverse=True) desc = [] for s, freq in top_stmts: ea = EnglishAssembler([s]) text = ea.make_model() desc.append((text, freq)) for t, f in desc[:30]: print('%s,%s' % (t, f)) """ str_names, freqs = zip(*dist_filt) num_genes = 30 plt.ion() plt.figure(figsize=(5,2), dpi=150) ypos = np.array(range(num_genes)) * 1.0 plt.bar(ypos, freqs[:num_genes], align='center') plt.xticks(ypos, str_names[:num_genes], rotation='vertical') ax = plt.gca() plt.ylabel('Frequency') plt.subplots_adjust(bottom=0.3)
def stmt_to_english(stmt): """Return an English assembled Statement as a sentence.""" ea = EnglishAssembler([stmt]) return ea.make_model()[:-1]
def render_stmt_graph(statements, reduce=True, english=False, rankdir=None, agent_style=None): """Render the statement hierarchy as a pygraphviz graph. Parameters ---------- stmts : list of :py:class:`indra.statements.Statement` A list of top-level statements with associated supporting statements resulting from building a statement hierarchy with :py:meth:`combine_related`. reduce : bool Whether to perform a transitive reduction of the edges in the graph. Default is True. english : bool If True, the statements in the graph are represented by their English-assembled equivalent; otherwise they are represented as text-formatted Statements. rank_dir : str or None Argument to pass through to the pygraphviz `AGraph` constructor specifying graph layout direction. In particular, a value of 'LR' specifies a left-to-right direction. If None, the pygraphviz default is used. agent_style : dict or None Dict of attributes specifying the visual properties of nodes. If None, the following default attributes are used:: agent_style = {'color': 'lightgray', 'style': 'filled', 'fontname': 'arial'} Returns ------- pygraphviz.AGraph Pygraphviz graph with nodes representing statements and edges pointing from supported statements to supported_by statements. Examples -------- Pattern for getting statements and rendering as a Graphviz graph: >>> from indra.preassembler.hierarchy_manager import hierarchies >>> braf = Agent('BRAF') >>> map2k1 = Agent('MAP2K1') >>> st1 = Phosphorylation(braf, map2k1) >>> st2 = Phosphorylation(braf, map2k1, residue='S') >>> pa = Preassembler(hierarchies, [st1, st2]) >>> pa.combine_related() # doctest:+ELLIPSIS [Phosphorylation(BRAF(), MAP2K1(), S)] >>> graph = render_stmt_graph(pa.related_stmts) >>> graph.write('example_graph.dot') # To make the DOT file >>> graph.draw('example_graph.png', prog='dot') # To make an image Resulting graph: .. image:: /images/example_graph.png :align: center :alt: Example statement graph rendered by Graphviz """ from indra.assemblers.english import EnglishAssembler # Set the default agent formatting properties if agent_style is None: agent_style = {'color': 'lightgray', 'style': 'filled', 'fontname': 'arial'} # Sets to store all of the nodes and edges as we recursively process all # of the statements nodes = set([]) edges = set([]) stmt_dict = {} # Recursive function for processing all statements def process_stmt(stmt): nodes.add(str(stmt.matches_key())) stmt_dict[str(stmt.matches_key())] = stmt for sby_ix, sby_stmt in enumerate(stmt.supported_by): edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key()))) process_stmt(sby_stmt) # Process all of the top-level statements, getting the supporting statements # recursively for stmt in statements: process_stmt(stmt) # Create a networkx graph from the nodes nx_graph = nx.DiGraph() nx_graph.add_edges_from(edges) # Perform transitive reduction if desired if reduce: nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph) # Create a pygraphviz graph from the nx graph try: pgv_graph = pgv.AGraph(name='statements', directed=True, rankdir=rankdir) except NameError: logger.error('Cannot generate graph because ' 'pygraphviz could not be imported.') return None for node in nx_graph.nodes(): stmt = stmt_dict[node] if english: ea = EnglishAssembler([stmt]) stmt_str = ea.make_model() else: stmt_str = str(stmt) pgv_graph.add_node(node, label='%s (%d)' % (stmt_str, len(stmt.evidence)), **agent_style) pgv_graph.add_edges_from(nx_graph.edges()) return pgv_graph
def respond_find_qca_path(self, content): """Response content to find-qca-path request""" if self.qca.ndex is None: reply = self.make_failure('SERVICE_UNAVAILABLE') return reply source_arg = content.get('SOURCE') target_arg = content.get('TARGET') reltype_arg = content.get('RELTYPE') if not source_arg: raise ValueError("Source list is empty") if not target_arg: raise ValueError("Target list is empty") target = self.get_agent(target_arg) if target is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('TARGET_MISSING') return reply source = self.get_agent(source_arg) if source is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('SOURCE_MISSING') return reply if reltype_arg is None or len(reltype_arg) == 0: relation_types = None else: relation_types = [str(k.data) for k in reltype_arg.data] results_list = self.qca.find_causal_path([source.name], [target.name], relation_types=relation_types) if not results_list: reply = self.make_failure('NO_PATH_FOUND') return reply def get_path_statements(results_list): stmts_list = [] for res in results_list: # Edges of the first result edges = res[1::2] # INDRA JSON of the edges of the result try: indra_edges = [fe[0]['__INDRA json'] for fe in edges] except Exception: indra_edges = [fe[0]['INDRA json'] for fe in edges] # Make the JSONs dicts from strings indra_edges = [json.loads(e) for e in indra_edges] # Now fix the edges if needed due to INDRA Statement changes indra_edges = _fix_indra_edges(indra_edges) stmts_list.append(indra_edges) return stmts_list paths_list = get_path_statements(results_list) self.report_paths_graph(paths_list) # Take the first one to report indra_edges = paths_list[0] # Get the INDRA Statement objects indra_edge_stmts = stmts_from_json(indra_edges) # Assemble into English for stmt in indra_edge_stmts: txt = EnglishAssembler([stmt]).make_model() self.send_provenance_for_stmts( [stmt], "the path from %s to %s (%s)" % (source, target, txt)) edges_cl_json = self.make_cljson(indra_edge_stmts) paths = KQMLList() paths.append(edges_cl_json) reply = KQMLList('SUCCESS') reply.set('paths', paths) return reply
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(bio_ontology) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(bio_ontology, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def render_stmt_graph(statements, reduce=True, english=False, rankdir=None, agent_style=None): """Render the statement hierarchy as a pygraphviz graph. Parameters ---------- stmts : list of :py:class:`indra.statements.Statement` A list of top-level statements with associated supporting statements resulting from building a statement hierarchy with :py:meth:`combine_related`. reduce : bool Whether to perform a transitive reduction of the edges in the graph. Default is True. english : bool If True, the statements in the graph are represented by their English-assembled equivalent; otherwise they are represented as text-formatted Statements. rank_dir : str or None Argument to pass through to the pygraphviz `AGraph` constructor specifying graph layout direction. In particular, a value of 'LR' specifies a left-to-right direction. If None, the pygraphviz default is used. agent_style : dict or None Dict of attributes specifying the visual properties of nodes. If None, the following default attributes are used:: agent_style = {'color': 'lightgray', 'style': 'filled', 'fontname': 'arial'} Returns ------- pygraphviz.AGraph Pygraphviz graph with nodes representing statements and edges pointing from supported statements to supported_by statements. Examples -------- Pattern for getting statements and rendering as a Graphviz graph: >>> from indra.ontology.bio import bio_ontology >>> braf = Agent('BRAF') >>> map2k1 = Agent('MAP2K1') >>> st1 = Phosphorylation(braf, map2k1) >>> st2 = Phosphorylation(braf, map2k1, residue='S') >>> pa = Preassembler(bio_ontology, [st1, st2]) >>> pa.combine_related() # doctest:+ELLIPSIS [Phosphorylation(BRAF(), MAP2K1(), S)] >>> graph = render_stmt_graph(pa.related_stmts) >>> graph.write('example_graph.dot') # To make the DOT file >>> graph.draw('example_graph.png', prog='dot') # To make an image Resulting graph: .. image:: /images/example_graph.png :align: center :alt: Example statement graph rendered by Graphviz """ import pygraphviz as pgv from indra.assemblers.english import EnglishAssembler # Set the default agent formatting properties if agent_style is None: agent_style = { 'color': 'lightgray', 'style': 'filled', 'fontname': 'arial' } # Sets to store all of the nodes and edges as we recursively process all # of the statements nodes = set([]) edges = set([]) stmt_dict = {} # Recursive function for processing all statements def process_stmt(stmt): nodes.add(str(stmt.matches_key())) stmt_dict[str(stmt.matches_key())] = stmt for sby_ix, sby_stmt in enumerate(stmt.supported_by): edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key()))) process_stmt(sby_stmt) # Process all of the top-level statements, getting the supporting statements # recursively for stmt in statements: process_stmt(stmt) # Create a networkx graph from the nodes nx_graph = nx.DiGraph() nx_graph.add_edges_from(edges) # Perform transitive reduction if desired if reduce: nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph) # Create a pygraphviz graph from the nx graph try: pgv_graph = pgv.AGraph(name='statements', directed=True, rankdir=rankdir) except NameError: logger.error('Cannot generate graph because ' 'pygraphviz could not be imported.') return None for node in nx_graph.nodes(): stmt = stmt_dict[node] if english: ea = EnglishAssembler([stmt]) stmt_str = ea.make_model() else: stmt_str = str(stmt) pgv_graph.add_node(node, label='%s (%d)' % (stmt_str, len(stmt.evidence)), **agent_style) pgv_graph.add_edges_from(nx_graph.edges()) return pgv_graph
def path_to_english(path, model, stmts): path_stmts = stmts_from_path(path, model, stmts) ea = EnglishAssembler(path_stmts) return ea.make_model()
def stmt_to_english(stmt): """Return an English assembled Statement as a sentence.""" ea = EnglishAssembler([stmt]) return ea.make_model()[:-1]
write_unicode_csv('reach_rule_frequencies.tsv', frequencies, delimiter='\t') sample_rows = [] max_sample_size = 20 for rule, freq in frequencies: stmts = stmts_by_rule[rule] if max_sample_size < len(stmts): sample_stmts = np.random.choice(stmts, max_sample_size, replace=False) else: sample_stmts = stmts for stmt in sample_stmts: for ag in stmt.agent_list(): if ag is not None: ag.name = ag.db_refs.get('TEXT') is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '') is_direct = stmt.evidence[0].epistemics.get('direct', '') # Get the English assembly of the statement eng = EnglishAssembler([stmt]) eng_sentence = eng.make_model() if eng_sentence == '': eng_sentence = str(stmt) sample_rows.append([eng_sentence, is_hypothesis, '', '', '', stmt.evidence[0].pmid, stmt.evidence[0].text, rule, freq, stmt, is_direct]) write_unicode_csv('stmts_by_rule_to_curate.tsv', sample_rows, delimiter='\t')
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [stmt for stmt in related_stmts if stmt not in background_assertions] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
sample_rows = [] max_sample_size = 20 for rule, freq in frequencies: stmts = stmts_by_rule[rule] if max_sample_size < len(stmts): sample_stmts = np.random.choice(stmts, max_sample_size, replace=False) else: sample_stmts = stmts for stmt in sample_stmts: for ag in stmt.agent_list(): if ag is not None: ag.name = ag.db_refs.get('TEXT') is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '') is_direct = stmt.evidence[0].epistemics.get('direct', '') # Get the English assembly of the statement eng = EnglishAssembler([stmt]) eng_sentence = eng.make_model() if eng_sentence == '': eng_sentence = str(stmt) sample_rows.append([ eng_sentence, is_hypothesis, '', '', '', stmt.evidence[0].pmid, stmt.evidence[0].text, rule, freq, stmt, is_direct ]) write_unicode_csv('stmts_by_rule_to_curate.tsv', sample_rows, delimiter='\t')
def get_metadata(level): start = datetime.utcnow() query = request.args.copy() # Figure out authorization. has = dict.fromkeys(['elsevier', 'medscan'], False) user, roles = resolve_auth(query) for role in roles: for resource in has.keys(): has[resource] |= role.permissions.get(resource, False) logger.info('Auths: %s' % str(has)) w_curations = _pop(query, 'with_cur_counts', False) kwargs = dict(limit=_pop(query, 'limit', type_cast=int), offset=_pop(query, 'offset', type_cast=int), best_first=_pop(query, 'best_first', True)) try: db_query = _db_query_from_web_query(query, {'HasAgent'}, True) except Exception as e: abort(Response(f'Problem forming query: {e}', 400)) return if not has['medscan']: db_query -= HasOnlySource('medscan') if level == 'hashes': res = db_query.get_interactions(**kwargs) elif level == 'relations': res = db_query.get_relations(with_hashes=w_curations, **kwargs) elif level == 'agents': res = db_query.get_agents(with_hashes=w_curations, **kwargs) else: abort(Response(f'Invalid level: {level}')) return dt = (datetime.utcnow() - start).total_seconds() logger.info("Got %s results after %.2f." % (len(res.results), dt)) ret = res.json() res_list = [] for key, entry in ret.pop('results').items(): # Filter medscan from source counts. if not has['medscan']: res.evidence_totals[key] -= entry['source_counts'].pop( 'medscan', 0) entry['total_count'] = res.evidence_totals[key] if not entry['source_counts']: logger.warning("Censored content present.") continue # Create english if level == 'agents': ag_dict = entry['agents'] if len(ag_dict) == 0: eng = '' else: ag_list = list(ag_dict.values()) eng = ag_list[0] if len(ag_dict) > 1: eng += ' interacts with ' + ag_list[1] if len(ag_dict) > 3: eng += ', ' + ', '.join(ag_list[2:-1]) if len(ag_dict) > 2: eng += ', and ' + ag_list[-1] else: eng = EnglishAssembler([stmt_from_interaction(entry)]).make_model() entry['english'] = eng res_list.append(entry) # Look up curations, if result with_curations was set. if w_curations: rel_hash_lookup = {} if level == 'hashes': for rel in res_list: rel['cur_count'] = 0 rel_hash_lookup[rel['hash']] = rel else: for rel in res_list: for h in rel['hashes']: rel['cur_count'] = 0 rel_hash_lookup[h] = rel curations = get_curations(pa_hash=set(rel_hash_lookup.keys())) for cur in curations: rel_hash_lookup[cur.pa_hash]['cur_count'] += 1 # Finish up the query. dt = (datetime.utcnow() - start).total_seconds() logger.info("Returning with %s results after %.2f seconds." % (len(res_list), dt)) ret['relations'] = res_list resp = Response(json.dumps(ret), mimetype='application/json') dt = (datetime.utcnow() - start).total_seconds() logger.info("Result prepared after %.2f seconds." % dt) return resp