def get_concept_details(concept_id): # noqa: E501 """get_concept_details Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source. # noqa: E501 :param concept_id: (url-encoded) CURIE identifier of concept of interest :type concept_id: str :rtype: BeaconConceptWithDetails """ q = """ MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId}) RETURN n.id AS id, n.uri AS uri, n.iri AS iri, n.name AS name, n.category AS category, n.symbol AS symbol, n.description AS description, n.synonym AS synonyms, n.clique AS clique, n.xrefs AS xrefs, n AS node LIMIT 1 """ results = db.query(q, conceptId=concept_id) for result in results: uri = result['uri'] if result['uri'] is not None else result['iri'] clique = utils.listify(result['clique']) xrefs = utils.listify(result['xrefs']) exact_matches = clique + xrefs exact_matches = utils.remove_all(exact_matches, result['id']) details_dict = create_details_dict(result['node']) details = [] for key, value in details_dict.items(): details.append(BeaconConceptDetail( tag=key, value=utils.stringify(value) )) return BeaconConceptWithDetails( id=result['id'], uri=utils.stringify(uri), name=utils.stringify(result['name']), categories=utils.standardize(result['category']), symbol=utils.stringify(result['symbol']), description=utils.stringify(result['description']), synonyms=utils.listify(result['synonyms']), exact_matches=exact_matches, details=details ) else: return BeaconConceptWithDetails()
def populate_dict(d, db_dict, prefix=None): for key, value in db_dict.items(): value = utils.stringify(value) if prefix != None: d['{}_{}'.format(prefix, key)] = value else: d[key] = value
def get_statement_details(statementId, keywords=None, size=None): statement_components = statementId.split(':') if len(statement_components) == 2: q = """ MATCH (s)-[r {id: {statement_id}}]-(o) RETURN s AS subject, r AS relation, o AS object LIMIT 1; """ results = db.query(q, statement_id=statementId) elif len(statement_components) == 5: s_prefix, s_num, edge_label, o_prefix, o_num = statement_components subject_id = '{}:{}'.format(s_prefix, s_num) object_id = '{}:{}'.format(o_prefix, o_num) q = """ MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}}) WHERE TOLOWER(type(r)) = TOLOWER({edge_label}) OR TOLOWER(r.edge_label) = TOLOWER({edge_label}) RETURN s AS subject, r AS relation, o AS object LIMIT 1; """ results = db.query(q, subject_id=subject_id, object_id=object_id, edge_label=edge_label) else: raise Exception( '{} must either be a curie, or curie:edge_label:curie'.format( statementId)) for result in results: d = {} s = result['subject'] r = result['relation'] o = result['object'] d['relationship_type'] = r.type populate_dict(d, s, 'subject') populate_dict(d, o, 'object') populate_dict(d, r) evidences = [] if 'evidence' in r: for uri in r['evidence']: evidences.append(BeaconStatementCitation(uri=uri, )) if 'publications' in r: for pm_uri in r['publications']: evidences.append(BeaconStatementCitation(uri=pm_uri)) annotations = [] for key, value in d.items(): annotations.append( BeaconStatementAnnotation(tag=key, value=utils.stringify(value))) return BeaconStatementWithDetails( id=statementId, is_defined_by=utils.stringify(r.get('is_defined_by', None)), provided_by=utils.stringify(r.get('provided_by', None)), qualifiers=r.get('qualifiers', None), annotation=annotations, evidence=evidences)
def get_statement_details(statement_id, keywords=None, offset=None, size=None): # noqa: E501 """get_statement_details Retrieves a details relating to a specified concept-relationship statement include 'is_defined_by and 'provided_by' provenance; extended edge properties exported as tag = value; and any associated annotations (publications, etc.) cited as evidence for the given statement. # noqa: E501 :param statement_id: (url-encoded) CURIE identifier of the concept-relationship statement (\"assertion\", \"claim\") for which associated evidence is sought :type statement_id: str :param keywords: an array of keywords or substrings against which to filter annotation names (e.g. publication titles). :type keywords: List[str] :param offset: offset (cursor position) to next batch of annotation entries of amount 'size' to return. :type offset: int :param size: maximum number of evidence citation entries requested by the client; if this argument is omitted, then the query is expected to returned all of the available annotation for this statement :type size: int :rtype: BeaconStatementWithDetails """ statement_components = statement_id.split(':') if len(statement_components) == 2: q = """ MATCH (s)-[r {id: {statement_id}}]-(o) RETURN s AS subject, r AS relation, o AS object LIMIT 1; """ results = db.query(q, statement_id=statement_id) elif len(statement_components) == 5: s_prefix, s_num, edge_label, o_prefix, o_num = statement_components subject_id = '{}:{}'.format(s_prefix, s_num) object_id = '{}:{}'.format(o_prefix, o_num) q = """ MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}}) WHERE TOLOWER(type(r)) = TOLOWER({edge_label}) OR TOLOWER(r.edge_label) = TOLOWER({edge_label}) RETURN s AS subject, r AS relation, o AS object LIMIT 1; """ results = db.query(q, subject_id=subject_id, object_id=object_id, edge_label=edge_label) else: raise Exception( '{} must either be a curie, or curie:edge_label:curie'.format( statement_id)) for result in results: d = {} s = result['subject'] r = result['relation'] o = result['object'] d['relationship_type'] = r.type populate_dict(d, s, 'subject') populate_dict(d, o, 'object') populate_dict(d, r) evidences = [] if 'evidence' in r: for uri in r['evidence']: evidences.append( BeaconStatementCitation(uri=utils.stringify(uri), )) if 'publications' in r: publications = r['publications'] if isinstance(publications, list): for publication in publications: evidences.append(build_evidence(publication)) else: evidences.append(build_evidence(publications)) annotations = [] for key, value in d.items(): annotations.append( BeaconStatementAnnotation(tag=key, value=utils.stringify(value))) return BeaconStatementWithDetails( id=statement_id, is_defined_by=utils.stringify(r.get('is_defined_by', None)), provided_by=utils.stringify(r.get('provided_by', None)), qualifiers=r.get('qualifiers', None), annotation=annotations, evidence=evidences)
def get_statements(s=None, s_keywords=None, s_categories=None, edge_label=None, relation=None, t=None, t_keywords=None, t_categories=None, offset=None, size=None): # noqa: E501 """get_statements Given a constrained set of some [CURIE-encoded](https://www.w3.org/TR/curie/) 's' ('source') concept identifiers, categories and/or keywords (to match in the concept name or description), retrieves a list of relationship statements where either the subject or the object concept matches any of the input source concepts provided. Optionally, a set of some 't' ('target') concept identifiers, categories and/or keywords (to match in the concept name or description) may also be given, in which case a member of the 't' concept set should matchthe concept opposite an 's' concept in the statement. That is, if the 's' concept matches a subject, then the 't' concept should match the object of a given statement (or vice versa). # noqa: E501 :param s: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of 'source' ('start') concepts possibly known to the beacon. Unknown CURIES should simply be ignored (silent match failure). :type s: List[str] :param s_keywords: An (optional) array of keywords or substrings against which to filter 'source' concept names and synonyms :type s_keywords: List[str] :param s_categories: An (optional) array set of 'source' concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes) :type s_categories: List[str] :param edge_label: (Optional) predicate edge label against which to constrain the search for statements ('edges') associated with the given query seed concept. The predicate edge_names for this parameter should be as published by the /predicates API endpoint and must be taken from the minimal predicate ('slot') list of the [Biolink Model](https://biolink.github.io/biolink-model). :type edge_label: str :param relation: (Optional) predicate relation against which to constrain the search for statements ('edges') associated with the given query seed concept. The predicate relations for this parameter should be as published by the /predicates API endpoint and the preferred format is a CURIE where one exists, but strings/labels acceptable. This relation may be equivalent to the edge_label (e.g. edge_label: has_phenotype, relation: RO:0002200), or a more specific relation in cases where the source provides more granularity (e.g. edge_label: molecularly_interacts_with, relation: RO:0002447) :type relation: str :param t: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of 'target' ('opposite' or 'end') concepts possibly known to the beacon. Unknown CURIEs should simply be ignored (silent match failure). :type t: List[str] :param t_keywords: An (optional) array of keywords or substrings against which to filter 'target' concept names and synonyms :type t_keywords: List[str] :param t_categories: An (optional) array set of 'target' concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes) :type t_categories: List[str] :param offset: offset (cursor position) to next batch of statements of amount 'size' to return. :type offset: int :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all the available data for the query :type size: int :rtype: List[BeaconStatement] """ if size is None: size = 100 conjuncts = [] unwinds = [] data = {} if s is not None: unwinds.append("[x IN {sources} | toLower(x)] AS s") conjuncts.append("toLower(n.id) = s") data['sources'] = s if t is not None: unwinds.append("[x IN {targets} | toLower(x)] AS t") conjuncts.append("toLower(m.id) = t") data['targets'] = t if s_keywords is not None: unwinds.append("[x IN {s_keywords} | toLower(x)] AS s_keyword") disjuncts = [ "toLower(n.name) CONTAINS s_keyword", "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS s_keyword)" ] conjuncts.append(" OR ".join(disjuncts)) # conjuncts.append("toLower(n.name) CONTAINS s_keyword OR ANY(synonym IN n.synonym WHERE toLower(synonym) CONTAINS s_keyword)") data['s_keywords'] = s_keywords if t_keywords is not None: unwinds.append("[x IN {t_keywords} | toLower(x)] AS t_keyword") disjuncts = [ "toLower(m.name) CONTAINS t_keyword", "ANY(syn IN m.synonym WHERE toLower(syn) CONTAINS t_keyword)" ] conjuncts.append(" OR ".join(disjuncts)) # conjuncts.append("ANY(keyword in {t_keywords} WHERE keyword CONTAINS toLower(m.name))") # conjuncts.append("toLower(m.name) CONTAINS t_keyword OR ANY(synonym IN m.synonym WHERE toLower(synonym) CONTAINS t_keyword)") data['t_keywords'] = t_keywords if edge_label is not None: conjuncts.append("type(r) = {edge_label}") data['edge_label'] = edge_label if relation is not None: conjuncts.append("r.relation = {relation}") data['relation'] = relation if s_categories is not None: unwinds.append("[x IN {s_categories} | toLower(x)] AS s_category") conjuncts.append("s_category IN labels(n))") data['s_categories'] = s_categories if t_categories is not None: unwinds.append("[x IN {t_categories} | toLower(x)] AS t_category") conjuncts.append("t_category IN labels(m)") data['t_categories'] = t_categories q = "MATCH (n)-[r]->(m)" if unwinds != []: q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q if conjuncts != []: q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")" q += """ RETURN n AS subject, m AS object, type(r) AS edge_type, r.edge_label AS edge_label, r.relation AS relation, r.negated AS negated, r.id AS statement_id """ if isinstance(offset, int) and offset >= 0: q += f' SKIP {offset}' if isinstance(size, int) and size >= 1: q += f' LIMIT {size}' results = db.query(q, **data) statements = [] for result in results: s, o = result['subject'], result['object'] s_categories = utils.standardize(s['category']) o_categories = utils.standardize(o['category']) if result['edge_label'] != None: edge_label = utils.stringify(result['edge_label']) else: edge_label = utils.stringify(result['edge_type']) beacon_subject = BeaconStatementSubject( id=s['id'], name=utils.stringify(s['name']), categories=utils.standardize(s['category'])) beacon_predicate = BeaconStatementPredicate( edge_label=edge_label, relation=utils.stringify(result['relation']), negated=bool(result['negated'])) beacon_object = BeaconStatementObject(id=o['id'], name=utils.stringify(o['name']), categories=utils.standardize( o['category'])) statement_id = result['statement_id'] if statement_id == None: statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id']) statements.append( BeaconStatement(id=statement_id, subject=beacon_subject, predicate=beacon_predicate, object=beacon_object)) return statements