def _type_query_from_web_query(self, db_query): # Get the raw name of the statement type (we fix some variations). act_raw = self._pop('type', None) if act_raw is not None: if isinstance(act_raw, list): assert len(act_raw) == 1, \ f"Got multiple entries for statement type: {act_raw}." act_raw = act_raw[0] act = make_statement_camel(act_raw) db_query &= HasType([act]) return db_query
def get_statements(): """Get some statements constrained by query.""" logger.info("Got query for statements!") query_dict = request.args.copy() limit_behavior = query_dict.pop('on_limit', 'sample') if limit_behavior not in ['sample', 'truncate', 'error']: abort( Response('Unrecognized value for on_limit: %s' % limit_behavior, 400)) logger.info("Getting query details.") try: # Get the agents without specified locations (subject or object). free_agents = [ __process_agent(ag) for ag in query_dict.poplist('agent') ] # Get the agents with specified roles. roled_agents = { role: __process_agent(query_dict.pop(role)) for role in ['subject', 'object'] if query_dict.get(role) is not None } except DbAPIError as e: logger.exception(e) abort(Response('Failed to make agents from names: %s\n' % str(e), 400)) return # Get the raw name of the statement type (we allow for variation in case). act_uncamelled = query_dict.pop('type', None) # Fix the case, if we got a statement type. if act_uncamelled is not None: act = make_statement_camel(act_uncamelled) else: act = None # If there was something else in the query, there shouldn't be, so someone's # probably confused. if query_dict: abort( Response( "Unrecognized query options; %s." % list(query_dict.keys()), 400)) return # Make sure we got SOME agents. We will not simply return all # phosphorylations, or all activations. if not any(roled_agents.values()) and not free_agents: logger.error("No agents.") abort( Response(("No agents. Must have 'subject', 'object', or " "'other'!\n"), 400)) # Check to make sure none of the agents are None. assert None not in roled_agents.values() and None not in free_agents, \ "None agents found. No agents should be None." # Now find the statements. stmts = [] logger.info("Getting statements...") # First look for statements matching the role'd agents. for role, (ag_id, ns) in roled_agents.items(): stmts = _get_relevant_statements(stmts, ag_id, ns, act, role) if not len(stmts): break else: # Second (if we're still looking), look for statements from free agents. for ag_id, ns in free_agents: stmts = _get_relevant_statements(stmts, ag_id, ns, act) if not len(stmts): break # Check to make sure we didn't get too many statements hit_limit = (len(stmts) > MAX_STATEMENTS) if hit_limit: if limit_behavior == 'error': # Divide the statements up by type, and get counts of each type. stmt_counts = {} for stmt in stmts: stmt_type = type(stmt).__name__ if stmt_type not in stmt_counts: stmt_counts[stmt_type] = {'count': 0} stmt_counts[stmt_type]['count'] += 1 return jsonify({'limited': True, 'statements': stmt_counts}), 413 elif limit_behavior == 'truncate': stmts = stmts[:MAX_STATEMENTS] elif limit_behavior == 'sample': from random import sample stmts = sample(stmts, MAX_STATEMENTS) # TODO: This is a temporary patch. Remove ASAP. # Fix the names from BE to FPLX for s in stmts: for ag in s.agent_list(): if ag is not None: if 'BE' in ag.db_refs.keys(): ag.db_refs['FPLX'] = ag.db_refs.pop('BE') # Create the json response, and send off. resp = jsonify({ 'limited': hit_limit, 'statements': [stmt.to_json() for stmt in stmts] }) logger.info("Exiting with %d statements of nominal size %f MB." % (len(stmts), sys.getsizeof(resp.data) / 1e6)) return resp
def _db_query_from_web_query(query_dict, require=None, empty_web_query=False): db_query = EmptyQuery() num_agents = 0 filter_ev = query_dict.pop('filter_ev', 'false').lower() == 'true' ev_filter = EvidenceFilter() # Get the agents without specified locations (subject or object). for raw_ag in iter_free_agents(query_dict): ag, ns = process_agent(raw_ag) db_query &= HasAgent(ag, namespace=ns) # Get the agents with specified roles. for role in ['subject', 'object']: raw_ag = query_dict.pop(role, None) if raw_ag is None: continue if isinstance(raw_ag, list): assert len(raw_ag) == 1, f'Malformed agent for {role}: {raw_ag}' raw_ag = raw_ag[0] num_agents += 1 ag, ns = process_agent(raw_ag) db_query &= HasAgent(ag, namespace=ns, role=role.upper()) # Get the raw name of the statement type (we allow for variation in case). act_raw = query_dict.pop('type', None) if act_raw is not None: if isinstance(act_raw, list): assert len(act_raw) == 1, \ f"Got multiple entries for statement type: {act_raw}." act_raw = act_raw[0] act = make_statement_camel(act_raw) db_query &= HasType([act]) # Get whether the user wants a strict match if query_dict.pop('strict', 'false').lower() == 'true': db_query &= HasNumAgents((num_agents, )) # Unpack hashes, if present. hashes = query_dict.pop('hashes', None) if hashes: db_query &= HasHash(hashes) # Unpack paper ids, if present: id_tpls = set() for id_dict in query_dict.pop('paper_ids', []): val = id_dict['id'] typ = id_dict['type'] # Turn tcids and trids into integers. id_val = int(val) if typ in ['tcid', 'trid'] else val id_tpls.add((typ, id_val)) if id_tpls: paper_q = FromPapers(id_tpls) db_query &= paper_q if filter_ev: ev_filter &= paper_q.ev_filter() # Unpack mesh ids. for mesh_id in query_dict.pop('mesh_ids', []): mesh_q = FromMeshId(mesh_id) db_query &= mesh_q if filter_ev: ev_filter &= mesh_q.ev_filter() # Check for health of the resulting query, and some other things. if isinstance(db_query, EmptyQuery): raise DbAPIError(f"No arguments from web query {query_dict} mapped to " f"db query.") assert isinstance(db_query, QueryCore), "Somehow db_query is not QueryCore." if require and (db_query is None or set(require) > set(db_query.get_component_queries())): raise DbAPIError(f"Required query elements not found: {require}") if query_dict and empty_web_query: raise DbAPIError(f"Invalid query options: {query_dict.keys()}.") return db_query
def get_statements(query_dict, offs, max_stmts, ev_limit, best_first): """Get some statements constrained by query.""" logger.info("Getting query details.") if ev_limit is None: ev_limit = 10 try: # Get the agents without specified locations (subject or object). free_agents = [ __process_agent(ag) for ag in query_dict.poplist('agent') ] ofaks = {k for k in query_dict.keys() if k.startswith('agent')} free_agents += [__process_agent(query_dict.pop(k)) for k in ofaks] # Get the agents with specified roles. roled_agents = { role: __process_agent(query_dict.pop(role)) for role in ['subject', 'object'] if query_dict.get(role) is not None } except DbAPIError as e: logger.exception(e) abort(Response('Failed to make agents from names: %s\n' % str(e), 400)) return # Get the raw name of the statement type (we allow for variation in case). act_raw = query_dict.pop('type', None) # Fix the case, if we got a statement type. act = None if act_raw is None else make_statement_camel(act_raw) # If there was something else in the query, there shouldn't be, so # someone's probably confused. if query_dict: abort( Response( "Unrecognized query options; %s." % list(query_dict.keys()), 400)) return # Make sure we got SOME agents. We will not simply return all # phosphorylations, or all activations. if not any(roled_agents.values()) and not free_agents: logger.error("No agents.") abort( Response(("No agents. Must have 'subject', 'object', or " "'other'!\n"), 400)) # Check to make sure none of the agents are None. assert None not in roled_agents.values() and None not in free_agents, \ "None agents found. No agents should be None." # Now find the statements. logger.info("Getting statements...") agent_iter = [(role, ag_dbid, ns) for role, (ag_dbid, ns) in roled_agents.items()] agent_iter += [(None, ag_dbid, ns) for ag_dbid, ns in free_agents] result = \ get_statement_jsons_from_agents(agent_iter, stmt_type=act, offset=offs, max_stmts=max_stmts, ev_limit=ev_limit, best_first=best_first) return result