Beispiel #1
0
 def _type_query_from_web_query(self, db_query):
     # Get the raw name of the statement type (we fix some variations).
     act_raw = self._pop('type', None)
     if act_raw is not None:
         if isinstance(act_raw, list):
             assert len(act_raw) == 1, \
                 f"Got multiple entries for statement type: {act_raw}."
             act_raw = act_raw[0]
         act = make_statement_camel(act_raw)
         db_query &= HasType([act])
     return db_query
Beispiel #2
0
def get_statements():
    """Get some statements constrained by query."""
    logger.info("Got query for statements!")
    query_dict = request.args.copy()
    limit_behavior = query_dict.pop('on_limit', 'sample')
    if limit_behavior not in ['sample', 'truncate', 'error']:
        abort(
            Response('Unrecognized value for on_limit: %s' % limit_behavior,
                     400))

    logger.info("Getting query details.")
    try:
        # Get the agents without specified locations (subject or object).
        free_agents = [
            __process_agent(ag) for ag in query_dict.poplist('agent')
        ]

        # Get the agents with specified roles.
        roled_agents = {
            role: __process_agent(query_dict.pop(role))
            for role in ['subject', 'object']
            if query_dict.get(role) is not None
        }
    except DbAPIError as e:
        logger.exception(e)
        abort(Response('Failed to make agents from names: %s\n' % str(e), 400))
        return

    # Get the raw name of the statement type (we allow for variation in case).
    act_uncamelled = query_dict.pop('type', None)

    # Fix the case, if we got a statement type.
    if act_uncamelled is not None:
        act = make_statement_camel(act_uncamelled)
    else:
        act = None

    # If there was something else in the query, there shouldn't be, so someone's
    # probably confused.
    if query_dict:
        abort(
            Response(
                "Unrecognized query options; %s." % list(query_dict.keys()),
                400))
        return

    # Make sure we got SOME agents. We will not simply return all
    # phosphorylations, or all activations.
    if not any(roled_agents.values()) and not free_agents:
        logger.error("No agents.")
        abort(
            Response(("No agents. Must have 'subject', 'object', or "
                      "'other'!\n"), 400))

    # Check to make sure none of the agents are None.
    assert None not in roled_agents.values() and None not in free_agents, \
        "None agents found. No agents should be None."

    # Now find the statements.
    stmts = []
    logger.info("Getting statements...")

    # First look for statements matching the role'd agents.
    for role, (ag_id, ns) in roled_agents.items():
        stmts = _get_relevant_statements(stmts, ag_id, ns, act, role)
        if not len(stmts):
            break
    else:
        # Second (if we're still looking), look for statements from free agents.
        for ag_id, ns in free_agents:
            stmts = _get_relevant_statements(stmts, ag_id, ns, act)
            if not len(stmts):
                break

    # Check to make sure we didn't get too many statements
    hit_limit = (len(stmts) > MAX_STATEMENTS)
    if hit_limit:
        if limit_behavior == 'error':
            # Divide the statements up by type, and get counts of each type.
            stmt_counts = {}
            for stmt in stmts:
                stmt_type = type(stmt).__name__
                if stmt_type not in stmt_counts:
                    stmt_counts[stmt_type] = {'count': 0}
                stmt_counts[stmt_type]['count'] += 1

            return jsonify({'limited': True, 'statements': stmt_counts}), 413
        elif limit_behavior == 'truncate':
            stmts = stmts[:MAX_STATEMENTS]
        elif limit_behavior == 'sample':
            from random import sample
            stmts = sample(stmts, MAX_STATEMENTS)

    # TODO: This is a temporary patch. Remove ASAP.
    # Fix the names from BE to FPLX
    for s in stmts:
        for ag in s.agent_list():
            if ag is not None:
                if 'BE' in ag.db_refs.keys():
                    ag.db_refs['FPLX'] = ag.db_refs.pop('BE')

    # Create the json response, and send off.
    resp = jsonify({
        'limited': hit_limit,
        'statements': [stmt.to_json() for stmt in stmts]
    })
    logger.info("Exiting with %d statements of nominal size %f MB." %
                (len(stmts), sys.getsizeof(resp.data) / 1e6))
    return resp
Beispiel #3
0
def _db_query_from_web_query(query_dict, require=None, empty_web_query=False):
    db_query = EmptyQuery()
    num_agents = 0

    filter_ev = query_dict.pop('filter_ev', 'false').lower() == 'true'
    ev_filter = EvidenceFilter()

    # Get the agents without specified locations (subject or object).
    for raw_ag in iter_free_agents(query_dict):
        ag, ns = process_agent(raw_ag)
        db_query &= HasAgent(ag, namespace=ns)

    # Get the agents with specified roles.
    for role in ['subject', 'object']:
        raw_ag = query_dict.pop(role, None)
        if raw_ag is None:
            continue
        if isinstance(raw_ag, list):
            assert len(raw_ag) == 1, f'Malformed agent for {role}: {raw_ag}'
            raw_ag = raw_ag[0]
        num_agents += 1
        ag, ns = process_agent(raw_ag)
        db_query &= HasAgent(ag, namespace=ns, role=role.upper())

    # Get the raw name of the statement type (we allow for variation in case).
    act_raw = query_dict.pop('type', None)
    if act_raw is not None:
        if isinstance(act_raw, list):
            assert len(act_raw) == 1, \
                f"Got multiple entries for statement type: {act_raw}."
            act_raw = act_raw[0]
        act = make_statement_camel(act_raw)
        db_query &= HasType([act])

    # Get whether the user wants a strict match
    if query_dict.pop('strict', 'false').lower() == 'true':
        db_query &= HasNumAgents((num_agents, ))

    # Unpack hashes, if present.
    hashes = query_dict.pop('hashes', None)
    if hashes:
        db_query &= HasHash(hashes)

    # Unpack paper ids, if present:
    id_tpls = set()
    for id_dict in query_dict.pop('paper_ids', []):
        val = id_dict['id']
        typ = id_dict['type']

        # Turn tcids and trids into integers.
        id_val = int(val) if typ in ['tcid', 'trid'] else val
        id_tpls.add((typ, id_val))

    if id_tpls:
        paper_q = FromPapers(id_tpls)
        db_query &= paper_q
        if filter_ev:
            ev_filter &= paper_q.ev_filter()

    # Unpack mesh ids.
    for mesh_id in query_dict.pop('mesh_ids', []):
        mesh_q = FromMeshId(mesh_id)
        db_query &= mesh_q
        if filter_ev:
            ev_filter &= mesh_q.ev_filter()

    # Check for health of the resulting query, and some other things.
    if isinstance(db_query, EmptyQuery):
        raise DbAPIError(f"No arguments from web query {query_dict} mapped to "
                         f"db query.")
    assert isinstance(db_query,
                      QueryCore), "Somehow db_query is not QueryCore."

    if require and (db_query is None
                    or set(require) > set(db_query.get_component_queries())):
        raise DbAPIError(f"Required query elements not found: {require}")

    if query_dict and empty_web_query:
        raise DbAPIError(f"Invalid query options: {query_dict.keys()}.")

    return db_query
Beispiel #4
0
def get_statements(query_dict, offs, max_stmts, ev_limit, best_first):
    """Get some statements constrained by query."""
    logger.info("Getting query details.")
    if ev_limit is None:
        ev_limit = 10
    try:
        # Get the agents without specified locations (subject or object).
        free_agents = [
            __process_agent(ag) for ag in query_dict.poplist('agent')
        ]
        ofaks = {k for k in query_dict.keys() if k.startswith('agent')}
        free_agents += [__process_agent(query_dict.pop(k)) for k in ofaks]

        # Get the agents with specified roles.
        roled_agents = {
            role: __process_agent(query_dict.pop(role))
            for role in ['subject', 'object']
            if query_dict.get(role) is not None
        }
    except DbAPIError as e:
        logger.exception(e)
        abort(Response('Failed to make agents from names: %s\n' % str(e), 400))
        return

    # Get the raw name of the statement type (we allow for variation in case).
    act_raw = query_dict.pop('type', None)

    # Fix the case, if we got a statement type.
    act = None if act_raw is None else make_statement_camel(act_raw)

    # If there was something else in the query, there shouldn't be, so
    # someone's probably confused.
    if query_dict:
        abort(
            Response(
                "Unrecognized query options; %s." % list(query_dict.keys()),
                400))
        return

    # Make sure we got SOME agents. We will not simply return all
    # phosphorylations, or all activations.
    if not any(roled_agents.values()) and not free_agents:
        logger.error("No agents.")
        abort(
            Response(("No agents. Must have 'subject', 'object', or "
                      "'other'!\n"), 400))

    # Check to make sure none of the agents are None.
    assert None not in roled_agents.values() and None not in free_agents, \
        "None agents found. No agents should be None."

    # Now find the statements.
    logger.info("Getting statements...")
    agent_iter = [(role, ag_dbid, ns)
                  for role, (ag_dbid, ns) in roled_agents.items()]
    agent_iter += [(None, ag_dbid, ns) for ag_dbid, ns in free_agents]

    result = \
        get_statement_jsons_from_agents(agent_iter, stmt_type=act, offset=offs,
                                        max_stmts=max_stmts, ev_limit=ev_limit,
                                        best_first=best_first)
    return result