예제 #1
0
def get_concept_details(concept_id):  # noqa: E501
    """get_concept_details

    Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source.  # noqa: E501

    :param concept_id: (url-encoded) CURIE identifier of concept of interest
    :type concept_id: str

    :rtype: BeaconConceptWithDetails
    """
    q = """
    MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId})
    RETURN
        n.id AS id,
        n.uri AS uri,
        n.iri AS iri,
        n.name AS name,
        n.category AS category,
        n.symbol AS symbol,
        n.description AS description,
        n.synonym AS synonyms,
        n.clique AS clique,
        n.xrefs AS xrefs,
        n AS node
    LIMIT 1
    """

    results = db.query(q, conceptId=concept_id)

    for result in results:
        uri = result['uri'] if result['uri'] is not None else result['iri']

        clique = utils.listify(result['clique'])
        xrefs = utils.listify(result['xrefs'])
        exact_matches = clique + xrefs
        exact_matches = utils.remove_all(exact_matches, result['id'])

        details_dict = create_details_dict(result['node'])
        details = []
        for key, value in details_dict.items():
            details.append(BeaconConceptDetail(
                tag=key,
                value=utils.stringify(value)
            ))

        return BeaconConceptWithDetails(
            id=result['id'],
            uri=utils.stringify(uri),
            name=utils.stringify(result['name']),
            categories=utils.standardize(result['category']),
            symbol=utils.stringify(result['symbol']),
            description=utils.stringify(result['description']),
            synonyms=utils.listify(result['synonyms']),
            exact_matches=exact_matches,
            details=details
        )
    else:
        return BeaconConceptWithDetails()
def populate_dict(d, db_dict, prefix=None):
    for key, value in db_dict.items():
        value = utils.stringify(value)
        if prefix != None:
            d['{}_{}'.format(prefix, key)] = value
        else:
            d[key] = value
예제 #3
0
def get_statement_details(statementId, keywords=None, size=None):
    statement_components = statementId.split(':')

    if len(statement_components) == 2:
        q = """
        MATCH (s)-[r {id: {statement_id}}]-(o)
        RETURN s AS subject, r AS relation, o AS object
        LIMIT 1;
        """
        results = db.query(q, statement_id=statementId)

    elif len(statement_components) == 5:
        s_prefix, s_num, edge_label, o_prefix, o_num = statement_components
        subject_id = '{}:{}'.format(s_prefix, s_num)
        object_id = '{}:{}'.format(o_prefix, o_num)
        q = """
        MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}})
        WHERE
            TOLOWER(type(r)) = TOLOWER({edge_label}) OR
            TOLOWER(r.edge_label) = TOLOWER({edge_label})
        RETURN
            s AS subject,
            r AS relation,
            o AS object
        LIMIT 1;
        """
        results = db.query(q,
                           subject_id=subject_id,
                           object_id=object_id,
                           edge_label=edge_label)
    else:
        raise Exception(
            '{} must either be a curie, or curie:edge_label:curie'.format(
                statementId))

    for result in results:
        d = {}
        s = result['subject']
        r = result['relation']
        o = result['object']

        d['relationship_type'] = r.type

        populate_dict(d, s, 'subject')
        populate_dict(d, o, 'object')
        populate_dict(d, r)

        evidences = []
        if 'evidence' in r:
            for uri in r['evidence']:
                evidences.append(BeaconStatementCitation(uri=uri, ))
        if 'publications' in r:
            for pm_uri in r['publications']:
                evidences.append(BeaconStatementCitation(uri=pm_uri))

        annotations = []
        for key, value in d.items():
            annotations.append(
                BeaconStatementAnnotation(tag=key,
                                          value=utils.stringify(value)))

        return BeaconStatementWithDetails(
            id=statementId,
            is_defined_by=utils.stringify(r.get('is_defined_by', None)),
            provided_by=utils.stringify(r.get('provided_by', None)),
            qualifiers=r.get('qualifiers', None),
            annotation=annotations,
            evidence=evidences)
def get_statement_details(statement_id,
                          keywords=None,
                          offset=None,
                          size=None):  # noqa: E501
    """get_statement_details

    Retrieves a details relating to a specified concept-relationship statement include 'is_defined_by and 'provided_by' provenance; extended edge properties exported as tag = value; and any associated annotations (publications, etc.)  cited as evidence for the given statement.  # noqa: E501

    :param statement_id: (url-encoded) CURIE identifier of the concept-relationship statement (\"assertion\", \"claim\") for which associated evidence is sought
    :type statement_id: str
    :param keywords: an array of keywords or substrings against which to  filter annotation names (e.g. publication titles).
    :type keywords: List[str]
    :param offset: offset (cursor position) to next batch of annotation entries of amount 'size' to return.
    :type offset: int
    :param size: maximum number of evidence citation entries requested by the client; if this  argument is omitted, then the query is expected to returned all of the available annotation for this statement
    :type size: int

    :rtype: BeaconStatementWithDetails
    """
    statement_components = statement_id.split(':')

    if len(statement_components) == 2:
        q = """
        MATCH (s)-[r {id: {statement_id}}]-(o)
        RETURN s AS subject, r AS relation, o AS object
        LIMIT 1;
        """
        results = db.query(q, statement_id=statement_id)

    elif len(statement_components) == 5:
        s_prefix, s_num, edge_label, o_prefix, o_num = statement_components
        subject_id = '{}:{}'.format(s_prefix, s_num)
        object_id = '{}:{}'.format(o_prefix, o_num)
        q = """
        MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}})
        WHERE
            TOLOWER(type(r)) = TOLOWER({edge_label}) OR
            TOLOWER(r.edge_label) = TOLOWER({edge_label})
        RETURN
            s AS subject,
            r AS relation,
            o AS object
        LIMIT 1;
        """
        results = db.query(q,
                           subject_id=subject_id,
                           object_id=object_id,
                           edge_label=edge_label)
    else:
        raise Exception(
            '{} must either be a curie, or curie:edge_label:curie'.format(
                statement_id))

    for result in results:
        d = {}
        s = result['subject']
        r = result['relation']
        o = result['object']

        d['relationship_type'] = r.type

        populate_dict(d, s, 'subject')
        populate_dict(d, o, 'object')
        populate_dict(d, r)

        evidences = []
        if 'evidence' in r:
            for uri in r['evidence']:
                evidences.append(
                    BeaconStatementCitation(uri=utils.stringify(uri), ))
        if 'publications' in r:
            publications = r['publications']
            if isinstance(publications, list):
                for publication in publications:
                    evidences.append(build_evidence(publication))
            else:
                evidences.append(build_evidence(publications))

        annotations = []
        for key, value in d.items():
            annotations.append(
                BeaconStatementAnnotation(tag=key,
                                          value=utils.stringify(value)))

        return BeaconStatementWithDetails(
            id=statement_id,
            is_defined_by=utils.stringify(r.get('is_defined_by', None)),
            provided_by=utils.stringify(r.get('provided_by', None)),
            qualifiers=r.get('qualifiers', None),
            annotation=annotations,
            evidence=evidences)
def get_statements(s=None,
                   s_keywords=None,
                   s_categories=None,
                   edge_label=None,
                   relation=None,
                   t=None,
                   t_keywords=None,
                   t_categories=None,
                   offset=None,
                   size=None):  # noqa: E501
    """get_statements

    Given a constrained set of some [CURIE-encoded](https://www.w3.org/TR/curie/) 's' ('source') concept identifiers, categories and/or keywords (to match in the concept name or description), retrieves a list of relationship statements where either the subject or the object concept matches any of the input source concepts provided.  Optionally, a set of some 't' ('target') concept identifiers, categories and/or keywords (to match in the concept name or description) may also be given, in which case a member of the 't' concept set should matchthe concept opposite an 's' concept in the statement. That is, if the 's' concept matches a subject, then the 't' concept should match the object of a given statement (or vice versa).  # noqa: E501

    :param s: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of 'source' ('start') concepts possibly known to the beacon. Unknown CURIES should simply be ignored (silent match failure).
    :type s: List[str]
    :param s_keywords: An (optional) array of keywords or substrings against which to filter 'source' concept names and synonyms
    :type s_keywords: List[str]
    :param s_categories: An (optional) array set of 'source' concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type s_categories: List[str]
    :param edge_label: (Optional) predicate edge label against which to constrain the search for statements ('edges') associated with the given query seed concept. The predicate edge_names for this parameter should be as published by the /predicates API endpoint and must be taken from the minimal predicate ('slot') list of the [Biolink Model](https://biolink.github.io/biolink-model).
    :type edge_label: str
    :param relation: (Optional) predicate relation against which to constrain the search for statements ('edges') associated with the given query seed concept. The predicate relations for this parameter should be as published by the /predicates API endpoint and the preferred format is a CURIE  where one exists, but strings/labels acceptable. This relation may be equivalent to the edge_label (e.g. edge_label: has_phenotype, relation: RO:0002200), or a more specific relation in cases where the source provides more granularity (e.g. edge_label: molecularly_interacts_with, relation: RO:0002447)
    :type relation: str
    :param t: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of 'target' ('opposite' or 'end') concepts possibly known to the beacon. Unknown CURIEs should simply be ignored (silent match failure).
    :type t: List[str]
    :param t_keywords: An (optional) array of keywords or substrings against which to filter 'target' concept names and synonyms
    :type t_keywords: List[str]
    :param t_categories: An (optional) array set of 'target' concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type t_categories: List[str]
    :param offset: offset (cursor position) to next batch of statements of amount 'size' to return.
    :type offset: int
    :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all  the available data for the query
    :type size: int

    :rtype: List[BeaconStatement]
    """
    if size is None:
        size = 100

    conjuncts = []
    unwinds = []
    data = {}

    if s is not None:
        unwinds.append("[x IN {sources} | toLower(x)] AS s")
        conjuncts.append("toLower(n.id) = s")
        data['sources'] = s

    if t is not None:
        unwinds.append("[x IN {targets} | toLower(x)] AS t")
        conjuncts.append("toLower(m.id) = t")
        data['targets'] = t

    if s_keywords is not None:
        unwinds.append("[x IN {s_keywords} | toLower(x)] AS s_keyword")
        disjuncts = [
            "toLower(n.name) CONTAINS s_keyword",
            "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS s_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("toLower(n.name) CONTAINS s_keyword OR ANY(synonym IN n.synonym WHERE toLower(synonym) CONTAINS s_keyword)")
        data['s_keywords'] = s_keywords

    if t_keywords is not None:
        unwinds.append("[x IN {t_keywords} | toLower(x)] AS t_keyword")
        disjuncts = [
            "toLower(m.name) CONTAINS t_keyword",
            "ANY(syn IN m.synonym WHERE toLower(syn) CONTAINS t_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("ANY(keyword in {t_keywords} WHERE keyword CONTAINS toLower(m.name))")
        # conjuncts.append("toLower(m.name) CONTAINS t_keyword OR ANY(synonym IN m.synonym WHERE toLower(synonym) CONTAINS t_keyword)")
        data['t_keywords'] = t_keywords

    if edge_label is not None:
        conjuncts.append("type(r) = {edge_label}")
        data['edge_label'] = edge_label

    if relation is not None:
        conjuncts.append("r.relation = {relation}")
        data['relation'] = relation

    if s_categories is not None:
        unwinds.append("[x IN {s_categories} | toLower(x)] AS s_category")
        conjuncts.append("s_category IN labels(n))")
        data['s_categories'] = s_categories

    if t_categories is not None:
        unwinds.append("[x IN {t_categories} | toLower(x)] AS t_category")
        conjuncts.append("t_category IN labels(m)")
        data['t_categories'] = t_categories

    q = "MATCH (n)-[r]->(m)"

    if unwinds != []:
        q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q

    if conjuncts != []:
        q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")"

    q += """
    RETURN
        n AS subject,
        m AS object,
        type(r) AS edge_type,
        r.edge_label AS edge_label,
        r.relation AS relation,
        r.negated AS negated,
        r.id AS statement_id
    """

    if isinstance(offset, int) and offset >= 0:
        q += f' SKIP {offset}'
    if isinstance(size, int) and size >= 1:
        q += f' LIMIT {size}'

    results = db.query(q, **data)

    statements = []

    for result in results:
        s, o = result['subject'], result['object']

        s_categories = utils.standardize(s['category'])
        o_categories = utils.standardize(o['category'])

        if result['edge_label'] != None:
            edge_label = utils.stringify(result['edge_label'])
        else:
            edge_label = utils.stringify(result['edge_type'])

        beacon_subject = BeaconStatementSubject(
            id=s['id'],
            name=utils.stringify(s['name']),
            categories=utils.standardize(s['category']))

        beacon_predicate = BeaconStatementPredicate(
            edge_label=edge_label,
            relation=utils.stringify(result['relation']),
            negated=bool(result['negated']))

        beacon_object = BeaconStatementObject(id=o['id'],
                                              name=utils.stringify(o['name']),
                                              categories=utils.standardize(
                                                  o['category']))

        statement_id = result['statement_id']
        if statement_id == None:
            statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id'])

        statements.append(
            BeaconStatement(id=statement_id,
                            subject=beacon_subject,
                            predicate=beacon_predicate,
                            object=beacon_object))

    return statements