def check_response_to_message(self, output): assert output.head() == 'SUCCESS', output paths = output.get('paths') for path in paths: stmts = stmts_from_json(json.loads(path.string_value())) assert stmts[0].agent_list()[0].name == self.agents[0] assert stmts[-1].agent_list()[1].name == self.agents[1]
def get_statements_by_hash(hash_list, ev_limit=100, best_first=True, tries=2): """Get fully formed statements from a list of hashes. Parameters ---------- hash_list : list[int or str] A list of statement hashes. ev_limit : int or None Limit the amount of evidence returned per Statement. Default is 100. best_first : bool If True, the preassembled statements will be sorted by the amount of evidence they have, and those with the most evidence will be prioritized. When using `max_stmts`, this means you will get the "best" statements. If False, statements will be queried in arbitrary order. tries : int > 0 Set the number of times to try the query. The database often caches results, so if a query times out the first time, trying again after a timeout will often succeed fast enough to avoid a timeout. This can also help gracefully handle an unreliable connection, if you're willing to wait. Default is 2. """ if not isinstance(hash_list, list): raise ValueError("The `hash_list` input is a list, not %s." % type(hash_list)) if not hash_list: return [] if isinstance(hash_list[0], str): hash_list = [int(h) for h in hash_list] if not all([isinstance(h, int) for h in hash_list]): raise ValueError("Hashes must be ints or strings that can be " "converted into ints.") resp = submit_statement_request('post', 'from_hashes', ev_limit=ev_limit, data={'hashes': hash_list}, best_first=best_first, tries=tries) return stmts_from_json(resp.json()['statements'].values())
def check_response_to_message(self, output): assert output.head() == 'SUCCESS', output paths = output.get('paths') assert len(paths) == 1, len(paths) path = paths[0].string_value() path_json = json.loads(path) stmts = stmts_from_json(path_json) assert len(stmts) == 1, stmts assert isinstance(stmts[0], Gef), stmts[0] assert stmts[0].ras.name == 'KRAS', stmts[0].ras.name assert stmts[0].gef.name == 'SOS1', stmts[0].get.name
def build_model_from_json(self, model_json): """Build a model using INDRA JSON.""" stmts = stmts_from_json(json.loads(model_json)) model_id = self.new_model(stmts) res = {'model_id': model_id, 'model': stmts} if not stmts: return res model_exec = self.assemble_pysb(stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, model_id, self.models[model_id], self.context) return res
def report_paths_graph(self, paths_list): from indra.assemblers.graph import GraphAssembler from indra.util import flatten path_stmts = [stmts_from_json(l) for l in paths_list] all_stmts = flatten(path_stmts) ga = GraphAssembler(all_stmts) ga.make_model() resource = get_img_path('qca_paths.png') ga.save_pdf(resource) content = KQMLList('display-image') content.set('type', 'simulation') content.sets('path', resource) self.tell(content)
def _merge_json(self, stmt_json, ev_counts): """Merge these statement jsons with new jsons.""" # Where there is overlap, there _should_ be agreement. self.__evidence_counts.update(ev_counts) for k, sj in stmt_json.items(): if k not in self.__statement_jsons: self.__statement_jsons[k] = sj # This should be most of them else: # This should only happen rarely. for evj in sj['evidence']: self.__statement_jsons[k]['evidence'].append(evj) if not self.__started: self.statements_sample = stmts_from_json( self.__statement_jsons.values()) self.__started = True return
def expand_model_from_json(self, model_json, model_id): """Expand a model using INDRA JSON.""" stmts = stmts_from_json(json.loads(model_json)) new_model_id, new_stmts = self.extend_model(stmts, model_id) logger.info('Old model id: %s, New model id: %s' % (model_id, new_model_id)) model_stmts = self.models[new_model_id] res = {'model_id': new_model_id, 'model': model_stmts} if not model_stmts: return res res['model_new'] = new_stmts model_exec = self.assemble_pysb(model_stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, new_model_id, self.models[new_model_id], self.context) return res
def get_statements_for_paper(ids, ev_limit=10, best_first=True, tries=2, max_stmts=None): """Get the set of raw Statements extracted from a paper given by the id. Parameters ---------- ids : list[(<id type>, <id value>)] A list of tuples with ids and their type. The type can be any one of 'pmid', 'pmcid', 'doi', 'pii', 'manuscript id', or 'trid', which is the primary key id of the text references in the database. ev_limit : int or None Limit the amount of evidence returned per Statement. Default is 10. best_first : bool If True, the preassembled statements will be sorted by the amount of evidence they have, and those with the most evidence will be prioritized. When using `max_stmts`, this means you will get the "best" statements. If False, statements will be queried in arbitrary order. tries : int > 0 Set the number of times to try the query. The database often caches results, so if a query times out the first time, trying again after a timeout will often succeed fast enough to avoid a timeout. This can also help gracefully handle an unreliable connection, if you're willing to wait. Default is 2. max_stmts : int or None Select a maximum number of statements to be returned. Default is None. Returns ------- stmts : list[:py:class:`indra.statements.Statement`] A list of INDRA Statement instances. """ id_l = [{'id': id_val, 'type': id_type} for id_type, id_val in ids] resp = submit_statement_request('post', 'from_papers', data={'ids': id_l}, ev_limit=ev_limit, best_first=best_first, tries=tries, max_stmts=max_stmts) stmts_json = resp.json()['statements'] return stmts_from_json(stmts_json.values())
def decode_indra_stmts(stmts_json_str): stmts_json = json.loads(stmts_json_str) stmts = stmts_from_json(stmts_json) return stmts
def insert_agents(db, stmt_tbl_obj, agent_tbl_obj, *other_stmt_clauses, **kwargs): """Insert agents for statements that don't have any agents. Note: This method currently works for both Statements and PAStatements and their corresponding agents (Agents and PAAgents). Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are adding agents. stmt_tbl_obj : :py:class:`sqlalchemy.Base` table object For example, `db.Statements`. The object corresponding to the statements column you creating agents for. agent_tbl_obj : :py:class:`sqlalchemy.Base` table object That agent table corresponding to the statement table above. *other_stmt_clauses : sqlalchemy clauses Further arguments, such as `db.Statements.db_ref == 1' are used to restrict the scope of statements whose agents may be added. verbose : bool If True, print extra information and a status bar while compiling agents for insert from statements. Default False. num_per_yield : int To conserve memory, statements are loaded in batches of `num_per_yeild` using the `yeild_per` feature of sqlalchemy queries. """ verbose = kwargs.pop('verbose', False) num_per_yield = kwargs.pop('num_per_yield', 100) override_default_query = kwargs.pop('override_default_query', False) if len(kwargs): raise IndraDatabaseError("Unrecognized keyword argument(s): %s." % kwargs) # Build a dict mapping stmt UUIDs to statement IDs logger.info("Getting %s that lack %s in the database." % (stmt_tbl_obj.__tablename__, agent_tbl_obj.__tablename__)) if not override_default_query: stmts_w_agents_q = db.filter_query( stmt_tbl_obj, stmt_tbl_obj.id == agent_tbl_obj.stmt_id) stmts_wo_agents_q = (db.filter_query( stmt_tbl_obj, *other_stmt_clauses).except_(stmts_w_agents_q)) else: stmts_wo_agents_q = db.filter_query(stmt_tbl_obj, *other_stmt_clauses) logger.debug("Getting stmts with query:\n%s" % str(stmts_wo_agents_q)) if verbose: num_stmts = stmts_wo_agents_q.count() print("Adding agents for %d statements." % num_stmts) stmts_wo_agents = stmts_wo_agents_q.yield_per(num_per_yield) # Now assemble agent records logger.info("Building agent data for insert...") if verbose: print("Loading:", end='', flush=True) agent_data = [] for i, db_stmt in enumerate(stmts_wo_agents): # Convert the database statement entry object into an indra statement. stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0] # Figure out how the agents are structured and assign roles. ag_list = stmt.agent_list() nary_stmt_types = [ Complex, SelfModification, ActiveForm, Conversion, Translocation ] if any([isinstance(stmt, tp) for tp in nary_stmt_types]): agents = {('OTHER', ag) for ag in ag_list} elif len(ag_list) == 2: agents = {('SUBJECT', ag_list[0]), ('OBJECT', ag_list[1])} else: raise IndraDatabaseError("Unhandled agent structure for stmt %s " "with agents: %s." % (str(stmt), str(stmt.agent_list()))) # Prep the agents for copy into the database. for role, ag in agents: # If no agent, or no db_refs for the agent, skip the insert # that follows. if ag is None or ag.db_refs is None: continue for ns, ag_id in ag.db_refs.items(): if isinstance(ag_id, list): for sub_id in ag_id: agent_data.append((db_stmt.id, ns, sub_id, role)) else: agent_data.append((db_stmt.id, ns, ag_id, role)) # Optionally print another tick on the progress bar. if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0: print('|', end='', flush=True) if verbose and num_stmts > 25: print() cols = ('stmt_id', 'db_name', 'db_id', 'role') db.copy(agent_tbl_obj.__tablename__, agent_data, cols) return
def make_stmts_from_db_list(db_stmt_objs): stmt_json_list = [] for st_obj in db_stmt_objs: stmt_json_list.append(json.loads(st_obj.json.decode('utf8'))) return stmts_from_json(stmt_json_list)
def process_entries(self, result): if result.result_type == 'hashes': # There is really nothing to do for hashes. return elsevier_redactions = 0 if not all(self.has.values()) or self.fmt == 'json-js' \ or self.w_english: for key, entry in result.results.copy().items(): # Build english reps of each result (unless their just hashes) if self.w_english and result.result_type != 'hashes': stmt = None # Fix the agent order if self.strict: if result.result_type == 'statements': stmt = stmts_from_json([entry])[0] if type(stmt) == Complex: id_lookup = {v: int(k) for k, v in self.agent_dict.items()} stmt.members.sort( key=lambda ag: id_lookup.get(ag.name, 10) ) agent_set = {ag.name for ag in stmt.agent_list() if ag is not None} else: agent_set = set(entry['agents'].values()) if result.result_type == 'relations' \ and entry['type'] == 'Complex': entry['agents'] = self.agent_dict if agent_set < self.agent_set: result.results.pop(key, None) continue # Construct the english. if result.result_type == 'statements': if stmt is None: stmt = stmts_from_json([entry])[0] eng = _format_stmt_text(stmt) entry['evidence'] = _format_evidence_text(stmt) else: eng = _make_english_from_meta(entry['agents'], entry.get('type')) if not eng: logger.warning(f"English not formed for {key}:\n" f"{entry}") entry['english'] = eng # Filter out medscan if user does not have medscan privileges. if not self.has['medscan']: if result.result_type == 'statements': result.source_counts[key].pop('medscan', 0) else: result.evidence_counts[key] -= \ entry['source_counts'].pop('medscan', 0) entry['total_count'] = result.evidence_counts[key] if not entry['source_counts']: logger.warning("Censored content present.") # In most cases we can stop here if self.has['elsevier'] and self.fmt != 'json-js' \ and not self.w_english: continue if result.result_type == 'statements': # If there is evidence, loop through it if necessary. for ev_json in entry['evidence'][:]: if self.fmt == 'json-js': ev_json['source_hash'] = str(ev_json['source_hash']) # Check for elsevier and redact if necessary if not self.has['elsevier'] and \ get_source(ev_json) == 'elsevier': text = ev_json['text'] if len(text) > 200: ev_json['text'] = text[:200] + REDACT_MESSAGE elsevier_redactions += 1 elif result.result_type != 'hashes' and self.fmt == 'json-js': # Stringify lists of hashes. if 'hashes' in entry and entry['hashes'] is not None: entry['hashes'] = [str(h) for h in entry['hashes']] elif 'hash' in entry: entry['hash'] = str(entry['hash']) if result.result_type == 'statements': logger.info(f"Redacted {elsevier_redactions} pieces of elsevier " f"evidence.") logger.info(f"Process entries for {self.__class__.__name__} after " f"{sec_since(self.start_time)} seconds.") return
def produce_response(self, result): if result.result_type == 'statements': res_json = result.json() # Add derived values to the res_json. if self.w_cur_counts: res_json['num_curations'] = self.get_curation_counts(result) res_json['statement_limit'] = MAX_STMTS res_json['statements_returned'] = len(result.results) res_json['end_of_statements'] = \ (len(result.results) < MAX_STMTS) res_json['statements_removed'] = 0 res_json['evidence_returned'] = result.returned_evidence # Build the HTML if HTML, else just tweak the JSON. stmts_json = result.results if self.fmt == 'html': title = TITLE ev_counts = res_json.pop('evidence_counts') beliefs = res_json.pop('belief_scores') stmts = stmts_from_json(stmts_json.values()) db_rest_url = request.url_root[:-1] \ + self._env.globals['url_for']('root')[:-1] html_assembler = \ HtmlAssembler(stmts, summary_metadata=res_json, ev_counts=ev_counts, beliefs=beliefs, sort_by=self.sort_by, title=title, source_counts=result.source_counts, db_rest_url=db_rest_url) idbr_template = \ self._env.get_template('idbr_statements_view.html') if not TESTING['status']: identity = self.user.identity() if self.user else None else: identity = None source_info, source_colors = get_html_source_info() resp_content = html_assembler.make_model( idbr_template, identity=identity, source_info=source_info, source_colors=source_colors, simple=False ) if self.tracker.get_messages(): level_stats = ['%d %ss' % (n, lvl.lower()) for lvl, n in self.tracker.get_level_stats().items()] msg = ' '.join(level_stats) resp_content = html_assembler.append_warning(msg) mimetype = 'text/html' else: # Return JSON for all other values of the format argument res_json.update(self.tracker.get_level_stats()) res_json['statements'] = stmts_json resp_content = json.dumps(res_json) mimetype = 'application/json' resp = Response(resp_content, mimetype=mimetype) logger.info("Exiting with %d statements with %d/%d evidence of " "size %f MB after %s seconds." % (res_json['statements_returned'], res_json['evidence_returned'], res_json['total_evidence'], sys.getsizeof(resp.data) / 1e6, sec_since(self.start_time))) elif result.result_type != 'hashes': # Look up curations, if result with_curations was set. if self.w_cur_counts: rel_hash_lookup = defaultdict(list) if result.result_type == 'interactions': for h, rel in result.results.items(): rel['cur_count'] = 0 rel_hash_lookup[int(h)].append(rel) else: for rel in result.results.values(): for h in rel['hashes']: rel['cur_count'] = 0 rel_hash_lookup[int(h)].append(rel) if not self.special['with_hashes']: rel['hashes'] = None curations = get_curations(pa_hash=set(rel_hash_lookup.keys())) for cur in curations: for rel in rel_hash_lookup[cur['pa_hash']]: rel['cur_count'] += 1 logger.info("Returning with %s results after %.2f seconds." % (len(result.results), sec_since(self.start_time))) res_json = result.json() res_json['relations'] = list(res_json['results'].values()) if result.result_type == 'agents' and self.fmt == 'json-js': res_json['complexes_covered'] = \ [str(h) for h in res_json['complexes_covered']] res_json.pop('results') res_json['query_str'] = str(self.db_query) resp = Response(json.dumps(res_json), mimetype='application/json') logger.info("Result prepared after %.2f seconds." % sec_since(self.start_time)) else: return super(StatementApiCall, self).produce_response(result) return resp
def insert_agents(db, prefix, stmts_wo_agents=None, **kwargs): """Insert agents for statements that don't have any agents. Note: This method currently works for both Statements and PAStatements and their corresponding agents (Agents and PAAgents). Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are adding agents. prefix : str Select which stage of statements for which you wish to insert agents. The choices are 'pa' for preassembled statements or 'raw' for raw statements. *other_stmt_clauses : sqlalchemy clauses Further arguments, such as `db.Statements.db_ref == 1' are used to restrict the scope of statements whose agents may be added. verbose : bool If True, print extra information and a status bar while compiling agents for insert from statements. Default False. num_per_yield : int To conserve memory, statements are loaded in batches of `num_per_yeild` using the `yeild_per` feature of sqlalchemy queries. """ verbose = kwargs.pop('verbose', False) if len(kwargs): raise IndraDatabaseError("Unrecognized keyword argument(s): %s." % kwargs) agent_tbl_obj = db.tables[prefix + '_agents'] if stmts_wo_agents is None: stmts_wo_agents = get_statements_without_agents(db, prefix) if verbose: num_stmts = len(stmts_wo_agents) # Construct the agent records logger.info("Building agent data for insert...") if verbose: print("Loading:", end='', flush=True) agent_data = [] for i, db_stmt in enumerate(stmts_wo_agents): # Convert the database statement entry object into an indra statement. stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0] # Figure out how the agents are structured and assign roles. ag_list = stmt.agent_list() nary_stmt_types = [ Complex, SelfModification, ActiveForm, Conversion, Translocation ] if any([isinstance(stmt, tp) for tp in nary_stmt_types]): agents = {('OTHER', ag) for ag in ag_list} elif len(ag_list) == 2: agents = {('SUBJECT', ag_list[0]), ('OBJECT', ag_list[1])} else: raise IndraDatabaseError("Unhandled agent structure for stmt %s " "with agents: %s." % (str(stmt), str(stmt.agent_list()))) # Prep the agents for copy into the database. for role, ag in agents: # If no agent, or no db_refs for the agent, skip the insert # that follows. if ag is None or ag.db_refs is None: continue for ns, ag_id in ag.db_refs.items(): if prefix == 'pa': stmt_id = db_stmt.mk_hash else: # prefix == 'raw' stmt_id = db_stmt.id if isinstance(ag_id, list): for sub_id in ag_id: agent_data.append((stmt_id, ns, sub_id, role)) else: agent_data.append((stmt_id, ns, ag_id, role)) # Optionally print another tick on the progress bar. if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0: print('|', end='', flush=True) if verbose and num_stmts > 25: print() if prefix == 'pa': cols = ('stmt_mk_hash', 'db_name', 'db_id', 'role') else: # prefix == 'raw' cols = ('stmt_id', 'db_name', 'db_id', 'role') db.copy(agent_tbl_obj.__tablename__, agent_data, cols) return
def remove_mechanism(self, mech_js, model_id): """Return a new model with the given mechanism having been removed.""" rem_stmts = stmts_from_json(json.loads(mech_js)) return self.remove_mechanism_from_stmts(rem_stmts, model_id)
def decorator(*args, **kwargs): tracker = LogTracker() start_time = datetime.now() logger.info("Got query for %s at %s!" % (f.__name__, start_time)) query = request.args.copy() offs = query.pop('offset', None) ev_lim = query.pop('ev_limit', None) best_first_str = query.pop('best_first', 'true') best_first = True if best_first_str.lower() == 'true' \ or best_first_str else False do_stream_str = query.pop('stream', 'false') do_stream = True if do_stream_str == 'true' else False max_stmts = min(int(query.pop('max_stmts', MAX_STATEMENTS)), MAX_STATEMENTS) format = query.pop('format', 'json') api_key = query.pop('api_key', None) logger.info("Running function %s after %s seconds." % (f.__name__, sec_since(start_time))) result = f(query, offs, max_stmts, ev_lim, best_first, *args, **kwargs) logger.info("Finished function %s after %s seconds." % (f.__name__, sec_since(start_time))) # Redact elsevier content for those without permission. if api_key is None or not _has_elsevier_auth(api_key): for stmt_json in result['statements'].values(): for ev_json in stmt_json['evidence']: if get_source(ev_json) == 'elsevier': text = ev_json['text'] if len(text) > 200: ev_json['text'] = text[:200] + REDACT_MESSAGE logger.info("Finished redacting evidence for %s after %s seconds." % (f.__name__, sec_since(start_time))) result['offset'] = offs result['evidence_limit'] = ev_lim result['statement_limit'] = MAX_STATEMENTS result['statements_returned'] = len(result['statements']) if format == 'html': stmts_json = result.pop('statements') ev_totals = result.pop('evidence_totals') stmts = stmts_from_json(stmts_json.values()) html_assembler = HtmlAssembler(stmts, result, ev_totals, title='INDRA DB REST Results', db_rest_url=request.url_root[:-1]) content = html_assembler.make_model() if tracker.get_messages(): level_stats = [ '%d %ss' % (n, lvl.lower()) for lvl, n in tracker.get_level_stats().items() ] msg = ' '.join(level_stats) content = html_assembler.append_warning(msg) mimetype = 'text/html' else: # Return JSON for all other values of the format argument result.update(tracker.get_level_stats()) content = json.dumps(result) mimetype = 'application/json' if do_stream: # Returning a generator should stream the data. resp_json_bts = content gen = batch_iter(resp_json_bts, 10000) resp = Response(gen, mimetype=mimetype) else: resp = Response(content, mimetype=mimetype) logger.info("Exiting with %d statements with %d evidence of size " "%f MB after %s seconds." % (result['statements_returned'], result['total_evidence'], sys.getsizeof(resp.data) / 1e6, sec_since(start_time))) return resp
def _compile_results(self): """Generate statements from the jsons.""" self.statements = stmts_from_json(self.__statement_jsons.values()) if self.use_obtained_counts: self.__source_counts = get_available_source_counts(self.statements) self.__evidence_counts = get_available_ev_counts(self.statements)
def statements(self) -> list: """Get a list of Statements from the results.""" assert isinstance(self.results, dict), "Results must be a dictionary." return stmts_from_json(list(self.results.values()))
def _compile_statements(self): """Generate statements from the jsons.""" self.statements = stmts_from_json(self.__statement_jsons.values())
The input files are JSON files containing JSON-serialized INDRA statements. This script was prepared for ISI. """ import json import pickle from glob import glob from tqdm import tqdm from indra.statements import stmts_from_json, Statement files = glob("../data/UN_stmt_jsons/*") d = {} for file in tqdm(files): with open(file, "r") as f: sts = stmts_from_json(json.load(f)["statements"]) for s in sts: for x in (s.subj, s.obj): db_refs = x.db_refs text = db_refs["TEXT"] if db_refs.get("UN") is not None: top_un_grounding = db_refs["UN"][0][0].split("/")[-1] if top_un_grounding not in d: d[top_un_grounding] = [text] else: d[top_un_grounding].append(text) for k, v in d.items(): d[k] = list(set(v)) with open("groundings.json", "w") as f:
def respond_find_qca_path(self, content): """Response content to find-qca-path request""" if self.qca.ndex is None: reply = self.make_failure('SERVICE_UNAVAILABLE') return reply source_arg = content.get('SOURCE') target_arg = content.get('TARGET') reltype_arg = content.get('RELTYPE') if not source_arg: raise ValueError("Source list is empty") if not target_arg: raise ValueError("Target list is empty") target = self.get_agent(target_arg) if target is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('TARGET_MISSING') return reply source = self.get_agent(source_arg) if source is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('SOURCE_MISSING') return reply if reltype_arg is None or len(reltype_arg) == 0: relation_types = None else: relation_types = [str(k.data) for k in reltype_arg.data] results_list = self.qca.find_causal_path([source.name], [target.name], relation_types=relation_types) if not results_list: reply = self.make_failure('NO_PATH_FOUND') return reply def get_path_statements(results_list): stmts_list = [] for res in results_list: # Edges of the first result edges = res[1::2] # INDRA JSON of the edges of the result try: indra_edges = [fe[0]['__INDRA json'] for fe in edges] except Exception: indra_edges = [fe[0]['INDRA json'] for fe in edges] # Make the JSONs dicts from strings indra_edges = [json.loads(e) for e in indra_edges] # Now fix the edges if needed due to INDRA Statement changes indra_edges = _fix_indra_edges(indra_edges) stmts_list.append(indra_edges) return stmts_list paths_list = get_path_statements(results_list) self.report_paths_graph(paths_list) # Take the first one to report indra_edges = paths_list[0] # Get the INDRA Statement objects indra_edge_stmts = stmts_from_json(indra_edges) # Assemble into English for stmt in indra_edge_stmts: txt = EnglishAssembler([stmt]).make_model() self.send_provenance_for_stmts( [stmt], "the path from %s to %s (%s)" % (source, target, txt)) edges_cl_json = self.make_cljson(indra_edge_stmts) paths = KQMLList() paths.append(edges_cl_json) reply = KQMLList('SUCCESS') reply.set('paths', paths) return reply
def build_model_from_json(self, model_json): """Build a model using INDRA JSON.""" stmts = stmts_from_json(json.loads(model_json)) return self.build_model_from_stmts(stmts)
def post(self): args = self.process_args(request.json) stmts = stmts_from_json(args.pop('statements')) stmts_out = pipeline_functions[self.func_name](stmts, **args) return _return_stmts(stmts_out)
def get_hash_statements_dict(self): """Return a dict of Statements keyed by hashes.""" res = {stmt_hash: stmts_from_json([stmt])[0] for stmt_hash, stmt in self.__statement_jsons.items()} return res
def respond_find_qca_path(self, content): """Response content to find-qca-path request""" if self.qca.ndex is None: reply = self.make_failure('SERVICE_UNAVAILABLE') return reply source_arg = content.gets('SOURCE') target_arg = content.gets('TARGET') reltype_arg = content.get('RELTYPE') if not source_arg: raise ValueError("Source list is empty") if not target_arg: raise ValueError("Target list is empty") target = self._get_term_name(target_arg) if target is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('TARGET_MISSING') return reply source = self._get_term_name(source_arg) if source is None: reply = self.make_failure('NO_PATH_FOUND') # NOTE: use the one below if it's handled by NLG #reply = self.make_failure('SOURCE_MISSING') return reply if reltype_arg is None or len(reltype_arg) == 0: relation_types = None else: relation_types = [str(k.data) for k in reltype_arg.data] results_list = self.qca.find_causal_path([source], [target], relation_types=relation_types) if not results_list: reply = self.make_failure('NO_PATH_FOUND') return reply def get_path_statements(results_list): stmts_list = [] for res in results_list: # Edges of the first result edges = res[1::2] # INDRA JSON of the edges of the result try: indra_edges = [fe[0]['__INDRA json'] for fe in edges] except Exception: indra_edges = [fe[0]['INDRA json'] for fe in edges] # Make the JSONs dicts from strings indra_edges = [json.loads(e) for e in indra_edges] # Now fix the edges if needed due to INDRA Statement changes indra_edges = _fix_indra_edges(indra_edges) stmts_list.append(indra_edges) return stmts_list paths_list = get_path_statements(results_list) self.report_paths_graph(paths_list) # Take the first one to report indra_edges = paths_list[0] # Get the INDRA Statement objects indra_edge_stmts = stmts_from_json(indra_edges) # Assemble into English for stmt in indra_edge_stmts: txt = EnglishAssembler([stmt]).make_model() self.send_provenance_for_stmts( [stmt], "the path from %s to %s (%s)" % (source, target, txt)) indra_edges_str = json.dumps(indra_edges) ks = KQMLString(indra_edges_str) reply = KQMLList('SUCCESS') reply.set('paths', KQMLList([ks])) return reply
def expand_model_from_json(self, model_json, model_id): """Expand a model using INDRA JSON.""" stmts = stmts_from_json(json.loads(model_json)) return self.expand_model_from_stmts(stmts, model_id)
def test_eidos_json(): from indra_world.tests.test_eidos import test_jsonld, _get_data_file sc.db = DbManager(url='sqlite:///:memory:') sc.db.create_all() with open(test_jsonld, 'r') as fh: jsonld = fh.read() res_json = _call_api('post', 'sources/eidos/process_jsonld', json={'jsonld': jsonld}) stmts_json = res_json.get('statements') stmts = stmts_from_json(stmts_json) assert len(stmts) == 1 stmt = stmts[0] assert len(stmt.subj.concept.db_refs) > 2 assert len(stmt.obj.concept.db_refs) > 2 # Grounding NS res_json = _call_api('post', 'sources/eidos/process_jsonld', json={ 'jsonld': jsonld, 'grounding_ns': ['UN'] }) stmts_json = res_json.get('statements') stmts = stmts_from_json(stmts_json) assert len(stmts) == 1 stmt = stmts[0] assert set(stmt.subj.concept.db_refs.keys()) == {'TEXT', 'UN'} assert set(stmt.obj.concept.db_refs.keys()) == {'TEXT', 'UN'} # Extract filter res_json = _call_api('post', 'sources/eidos/process_jsonld', json={ 'jsonld': jsonld, 'extract_filter': ['influence'] }) stmts_json = res_json.get('statements') stmts = stmts_from_json(stmts_json) assert len(stmts) == 1 res_json = _call_api('post', 'sources/eidos/process_jsonld', json={ 'jsonld': jsonld, 'extract_filter': ['event'] }) stmts_json = res_json.get('statements') stmts = stmts_from_json(stmts_json) assert len(stmts) == 0 # Grounding mode with open(_get_data_file('eidos_compositional.jsonld'), 'r') as fh: jsonld = fh.read() res_json = _call_api('post', 'sources/eidos/process_jsonld', json={ 'jsonld': jsonld, 'grounding_mode': 'compositional' }) stmts_json = res_json.get('statements') stmts = stmts_from_json(stmts_json) assert len(stmts) == 1
def get_statements_for_record(self, record_key): """Return prepared statements for given record.""" qfilter = wms_schema.PreparedStatements.record_key.like(record_key) q = self.query(wms_schema.PreparedStatements.stmt).filter(qfilter) stmts = stmts_from_json([r[0] for r in q.all()]) return stmts
def get_statements(self): """Return all prepared statements in the DB.""" q = self.query(wms_schema.PreparedStatements.stmt) stmts = stmts_from_json([r[0] for r in q.all()]) return stmts
def insert_agents(db, prefix, stmts_wo_agents=None, **kwargs): """Insert agents for statements that don't have any agents. Note: This method currently works for both Statements and PAStatements and their corresponding agents (Agents and PAAgents). However, if you already have preassembled INDRA Statement objects that you know don't have agents in the database, you can use `insert_pa_agents_directly` to insert the agents much faster. Parameters ---------- db : :py:class:`DatabaseManager` The manager for the database into which you are adding agents. prefix : str Select which stage of statements for which you wish to insert agents. The choices are 'pa' for preassembled statements or 'raw' for raw statements. *other_stmt_clauses : sqlalchemy clauses Further arguments, such as `db.Statements.db_ref == 1' are used to restrict the scope of statements whose agents may be added. verbose : bool If True, print extra information and a status bar while compiling agents for insert from statements. Default False. num_per_yield : int To conserve memory, statements are loaded in batches of `num_per_yeild` using the `yeild_per` feature of sqlalchemy queries. """ verbose = kwargs.pop('verbose', False) if len(kwargs): raise IndraDatabaseError("Unrecognized keyword argument(s): %s." % kwargs) agent_tbl_obj = db.tables[prefix + '_agents'] if stmts_wo_agents is None: stmts_wo_agents, num_stmts = \ get_statements_without_agents(db, prefix, verbose=verbose) else: num_stmts = None if verbose: if num_stmts is None: try: num_stmts = len(stmts_wo_agents) except TypeError: logger.info("Could not get length from type: %s. Turning off " "verbose messaging." % type(stmts_wo_agents)) verbose = False # Construct the agent records logger.info("Building agent data for insert...") if verbose: print("Loading:", end='', flush=True) agent_data = [] for i, db_stmt in enumerate(stmts_wo_agents): # Convert the database statement entry object into an indra statement. stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0] if prefix == 'pa': stmt_id = db_stmt.mk_hash else: # prefix == 'raw' stmt_id = db_stmt.id agent_data.extend(_get_agent_tuples(stmt, stmt_id)) # Optionally print another tick on the progress bar. if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0: print('|', end='', flush=True) if verbose and num_stmts > 25: print() if prefix == 'pa': cols = ('stmt_mk_hash', 'db_name', 'db_id', 'role') else: # prefix == 'raw' cols = ('stmt_id', 'db_name', 'db_id', 'role') db.copy(agent_tbl_obj.__tablename__, agent_data, cols) return
def get_statements(self, model_id, date=None, offset=0, limit=None, sort_by=None, stmt_types=None, min_belief=None, max_belief=None): """Load the statements by model and date. Parameters ---------- model_id : str The standard name of the model to get statements for. date : str The date when the model was generated. offset : int The offset to start at. limit : int The number of statements to return. Returns ------- list[indra.statements.Statement] A list of statements corresponding to the model and date. """ if not date: date = self.get_latest_date(model_id) logger.info(f'Got request to get statements for model {model_id} ' f'on date {date} with offset {offset} and limit {limit} ' f'and sort by {sort_by}') with self.get_session() as sess: q = sess.query(Statement.statement_json).filter( Statement.model_id == model_id, Statement.date == date) if stmt_types: stmt_types = [stmt_type.lower() for stmt_type in stmt_types] q = q.filter( func.lower(Statement.statement_json['type'].astext).in_( stmt_types)) if min_belief: q = q.filter(Statement.statement_json['belief'].astext.cast( Float) >= float(min_belief)) if max_belief: q = q.filter(Statement.statement_json['belief'].astext.cast( Float) <= float(max_belief)) if sort_by == 'evidence': q = q.order_by( nullslast( jsonb_array_length( Statement.statement_json["evidence"]).desc())) elif sort_by == 'belief': q = q.order_by( nullslast(Statement.statement_json['belief'].desc())) elif sort_by == 'paths': q = q.order_by(Statement.path_count.desc()) if offset: q = q.offset(offset) if limit: q = q.limit(limit) stmts = stmts_from_json([s for s, in q.all()]) logger.info(f'Got {len(stmts)} statements') return stmts