def get_subgraph(seedNodes): # noqa: E501 """Get a GNBR subgraph Query a list of nodes to get the GNBR subgraph induced by those nodes # noqa: E501 :param seedNodes: gnbrIDs for subgraph :type seedNodes: List[str] :rtype: GnbrSubgraph """ q = """ MATCH (n) WHERE n.id IN {seedNodes} MATCH p=(n)--(m) WHERE m.id IN {seedNodes} RETURN relationships(p) LIMIT 25 """ results = neo4j.run(query=q, param={"seedNodes": seedNodes}) subgraph = GnbrSubgraph() for d in results: gnbr_edge = GNBR_edge() ## Add some subgraph!!! # TODO!! return subgraph
def get_node_neighbors(entity1): # noqa: E501 """Get all neighbors of a particular node Query node to get all nodes connected by at least one edge to the input node within GNBR # noqa: E501 :param entity1: GNBR-ID for first entity :type entity1: str :rtype: GnbrSubgraph """ q = """ MATCH p=(n)--(m) WHERE m.id = {entity1} RETURN relationships(p) LIMIT 25 """ results = neo4j.run(query=q, param={"entity1": entity1}) subgraph = GnbrSubgraph() for d in results: gnbr_edge = GNBR_edge() ## Add some subgraph!!! # TODO!! return subgraph
def get_concept_details(conceptId): """ get_concept_details Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source. :param conceptId: (url-encoded) CURIE identifier of concept of interest :type conceptId: str :rtype: List[ConceptDetail] """ q = """ MATCH (protein:Protein {stringId: {conceptId}}) RETURN protein.stringId as stringId, protein.alias as alias, protein.source as source LIMIT 1 """ results = neo4j.run(query=q, param={"conceptId" : conceptId}) details = [] for d in results: concept_detail = ConceptDetail() concept_detail.id = d["stringId"] concept_detail.name = d["alias"] concept_detail.semantic_group = "CHEM" concept_detail.details = [ConceptsconceptIdDetails(tag="source", value=d["source"])] details.append(concept_detail) return details
def linked_types(): """ linked_types Get a list of types and # of instances in the knowledge source, and a link to the API call for the list of equivalent terminology :rtype: List[DataType] """ results = neo4j.run(""" MATCH (p:Protein) RETURN COUNT(p) as frequency """) return [DataType(id="CHEM", frequency=d["frequency"]) for d in results]
def get_concepts(keywords, semgroups=None, pageNumber=None, pageSize=None): """ get_concepts Retrieves a (paged) list of concepts in the system :param keywords: a (urlencoded) space delimited set of keywords or substrings against which to match concept names and synonyms :type keywords: str :param semgroups: a (url-encoded) space-delimited set of semantic groups (specified as codes CHEM, GENE, ANAT, etc.) to which to constrain concepts matched by the main keyword search (see [SemGroups](https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt) for the full list of codes) :type semgroups: str :param pageNumber: (1-based) number of the page to be returned in a paged set of query results :type pageNumber: int :param pageSize: number of concepts per page to be returned in a paged set of query results :type pageSize: int :rtype: List[Concept] """ q = """ MATCH (protein:Protein) WITH SIZE(FILTER(x IN {filter} WHERE LOWER(protein.alias) CONTAINS LOWER(x))) AS num_matches, SIZE((protein)-[:ACTION]-()) as degree, protein as protein WHERE num_matches > 0 AND degree > 0 RETURN protein.stringId as stringId, protein.alias as alias, protein.source as source ORDER BY num_matches DESC, degree DESC SKIP ({pageNumber} - 1) * {pageSize} LIMIT {pageSize} """ results = neo4j.run( query=q, param={ "pageNumber" : pageNumber if pageNumber != None and pageNumber > 0 else 1, "pageSize" : pageSize if pageSize != None and pageSize > 0 else 10, "filter" : keywords.split() } ) return [Concept(id=d["stringId"], name=d["alias"], semantic_group="CHEM") for d in results]
def get_identifier(searchString, limit=None): # noqa: E501 """Find GNBR identifier Searches entities within GNBR for a matching ID, based on input string # noqa: E501 :param searchString: pass a search string to find matching identifiers :type searchString: str :param limit: maximum number of records to return :type limit: int :rtype: IdMapping """ ###### NOTE: THIS INFORMATION SHOULD PROBABLY BE FOUND USING THE PUBTATOR DICTIONARIES (SIMPLE LOOKUP) ######## q = """ MATCH (m) WHERE m.formatted contains {searchString} RETURN (m) LIMIT 25 """ results = neo4j.run(query=q, param={"searchString": searchString}) subgraph = GnbrSubgraph() for d in results: id_mapping = IdMapping() ## Add some subgraph!!! # TODO!! """ example node output looks like: { "raw": "ace|acetylcholinesterase|acetylcholinesterase enzyme|ach-e|ache|ache-r|anti-acetylcholinesterase|as-ache|mache", "formatted": "ace|acetylcholinesterase|acetylcholinesterase_enzyme|ach-e|ache|ache-r|anti-acetylcholinesterase|as-ache|mache", "id": "11423" } """ return subgraph
def get_statements(c, pageNumber=None, pageSize=None, keywords=None, semgroups=None): """ get_statements Given a list of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of exactly matching concepts, retrieves a paged list of concept-relations where either the subject or object concept matches at least one concept in the input list :param c: set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of exactly matching concepts to be used in a search for associated concept-relation statements :type c: List[str] :param pageNumber: (1-based) number of the page to be returned in a paged set of query results :type pageNumber: int :param pageSize: number of concepts per page to be returned in a paged set of query results :type pageSize: int :param keywords: a (url-encoded, space-delimited) string of keywords or substrings against which to match the subject, predicate or object names of the set of concept-relations matched by any of the input exact matching concepts :type keywords: str :param semgroups: a (url-encoded, space-delimited) string of semantic groups (specified as codes CHEM, GENE, ANAT, etc.) to which to constrain the subject or object concepts associated with the query seed concept (see [SemGroups](https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt) for the full list of codes) :type semgroups: str :rtype: List[Statement] """ query = """ MATCH (a:Protein)-[r:ACTION]-(b:Protein) WHERE (NOT a.alias IS NULL) AND (NOT b.alias IS NULL) AND ANY (x in {conceptIds} WHERE LOWER(a.stringId) = LOWER(x) OR LOWER(a.stringId) = LOWER(x) ) RETURN a.stringId as id_a, a.alias as alias_a, b.stringId as id_b, b.alias as alias_b, r.mode as relation, ID(r) as relation_id SKIP ({pageNumber} - 1) * {pageSize} LIMIT {pageSize} """ results = neo4j.run( query, { "pageNumber": pageNumber if pageNumber != None and pageNumber > 0 else 1, "pageSize": pageSize if pageSize != None and pageSize > 0 else 10, "conceptIds": c if c != None else [] }) statements = [] for row in results: statement = Statement() statement_object = StatementObject() statement_subject = StatementSubject() statement_predicate = StatementPredicate() statement_object.name = row["alias_a"] statement_object.id = row["id_a"] statement_subject.name = row["alias_b"] statement_subject.id = row["id_b"] statement_predicate.name = row["relation"] statement_predicate.id = str(row["relation_id"]) statement.subject = statement_subject statement.object = statement_object statement.predicate = statement_predicate statement.id = str(row["relation_id"]) statements.append(statement) return statements
def get_edge(entity1, entity2): # noqa: E501 """Query for an edge Query for edges connecting two entities within GNBR # noqa: E501 :param entity1: GNBR-ID for first entity :type entity1: str :param entity2: GNBR-ID for second entity :type entity2: str :rtype: GnbrEdge """ q = """ MATCH p=(m)--(n) WHERE m.id={entity1} AND n.id={entity2} RETURN relationships(p) LIMIT 1 """ results = neo4j.run(query=q, param={ "entity1": entity1, "entity2": entity2 }) gnbr_edge = GNBR_edge() # do something with results[0] ###### """ Typical d in result would look like: { "first_entity_name_loc_char": "1428,1431", "a-.ind": 0, "first_entity_type": "Chemical", "o.ind": 0, "second_entity_name_raw": "Myc", "sentence_number": "9", "e+.ind": 0, "first_entity_name": "TSA", "dependency_path": "presence|nmod|START_ENTITY END_ENTITY|nmod|presence", "a+.ind": 0, "b": 164, "first_entity_name_raw": "TSA", "e": 639, "e+": 56, "k.ind": 0, "z.ind": 0, "b.ind": 0, "e-": 66, "a+": 47, "k": 536, "a-": 38, "second_entity_type": "Gene", "tax_id": "9606", "n": 100, "o": 154, "second_entity_name_loc_char": "1405,1408", "e-.ind": 0, "e.ind": 0, "sentence_tokenized": "TSA treatment induced a similar epidermal phenotype to activation of Myc , and activation of Myc in the presence of TSA resulted in massive stimulation of terminal differentiation .", "z": 18, "n.ind": 0, "PMID": "17712411", "second_entity_name": "Myc" } """ return gnbr_edge