Beispiel #1
0
def get_subgraph(seedNodes):  # noqa: E501
    """Get a GNBR subgraph

    Query a list of nodes to get the GNBR subgraph induced by those nodes # noqa: E501

    :param seedNodes: gnbrIDs for subgraph
    :type seedNodes: List[str]

    :rtype: GnbrSubgraph
    """

    q = """
    MATCH (n) WHERE n.id IN {seedNodes}
    MATCH p=(n)--(m) 
    WHERE m.id IN  {seedNodes}
    RETURN relationships(p)
    LIMIT 25

    """

    results = neo4j.run(query=q, param={"seedNodes": seedNodes})

    subgraph = GnbrSubgraph()
    for d in results:
        gnbr_edge = GNBR_edge()
        ## Add some subgraph!!!
        # TODO!!
    return subgraph
Beispiel #2
0
def get_node_neighbors(entity1):  # noqa: E501
    """Get all neighbors of a particular node

    Query node to get all nodes connected by at least one edge to the input node within GNBR # noqa: E501

    :param entity1: GNBR-ID for first entity
    :type entity1: str

    :rtype: GnbrSubgraph
    """
    q = """
    MATCH p=(n)--(m) 
    WHERE m.id = {entity1}
    RETURN relationships(p)
    LIMIT 25
    """

    results = neo4j.run(query=q, param={"entity1": entity1})

    subgraph = GnbrSubgraph()
    for d in results:
        gnbr_edge = GNBR_edge()
        ## Add some subgraph!!!
        # TODO!!
    return subgraph
def get_concept_details(conceptId):
    """
    get_concept_details
    Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source.
    :param conceptId: (url-encoded) CURIE identifier of concept of interest
    :type conceptId: str

    :rtype: List[ConceptDetail]
    """

    q = """
    MATCH (protein:Protein {stringId: {conceptId}})
    RETURN
        protein.stringId as stringId,
        protein.alias as alias,
        protein.source as source
    LIMIT 1
    """

    results = neo4j.run(query=q, param={"conceptId" : conceptId})

    details = []
    for d in results:
        concept_detail = ConceptDetail()
        concept_detail.id = d["stringId"]
        concept_detail.name = d["alias"]
        concept_detail.semantic_group = "CHEM"
        concept_detail.details = [ConceptsconceptIdDetails(tag="source", value=d["source"])]

        details.append(concept_detail)

    return details
Beispiel #4
0
def linked_types():
    """
    linked_types
    Get a list of types and # of instances in the knowledge source, and a link to the API call for the list of equivalent terminology

    :rtype: List[DataType]
    """

    results = neo4j.run("""
        MATCH (p:Protein)
        RETURN COUNT(p) as frequency
        """)

    return [DataType(id="CHEM", frequency=d["frequency"]) for d in results]
def get_concepts(keywords, semgroups=None, pageNumber=None, pageSize=None):
    """
    get_concepts
    Retrieves a (paged) list of concepts in the system
    :param keywords: a (urlencoded) space delimited set of keywords or substrings against which to match concept names and synonyms
    :type keywords: str
    :param semgroups: a (url-encoded) space-delimited set of semantic groups (specified as codes CHEM, GENE, ANAT, etc.) to which to constrain concepts matched by the main keyword search (see [SemGroups](https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt) for the full list of codes)
    :type semgroups: str
    :param pageNumber: (1-based) number of the page to be returned in a paged set of query results
    :type pageNumber: int
    :param pageSize: number of concepts per page to be returned in a paged set of query results
    :type pageSize: int

    :rtype: List[Concept]
    """

    q = """
    MATCH (protein:Protein)
    WITH
        SIZE(FILTER(x IN {filter} WHERE LOWER(protein.alias) CONTAINS LOWER(x))) AS num_matches,
        SIZE((protein)-[:ACTION]-()) as degree,
        protein as protein
    WHERE num_matches > 0 AND degree > 0
    RETURN
        protein.stringId as stringId,
        protein.alias as alias,
        protein.source as source
    ORDER BY num_matches DESC, degree DESC
    SKIP ({pageNumber} - 1) * {pageSize} LIMIT {pageSize}
    """

    results = neo4j.run(
        query=q,
        param={
            "pageNumber" : pageNumber if pageNumber != None and pageNumber > 0 else 1,
            "pageSize"   : pageSize if pageSize != None and pageSize > 0 else 10,
            "filter"     : keywords.split()
        }
    )

    return [Concept(id=d["stringId"], name=d["alias"], semantic_group="CHEM") for d in results]
Beispiel #6
0
def get_identifier(searchString, limit=None):  # noqa: E501
    """Find GNBR identifier

    Searches entities within GNBR for a matching ID, based on input string # noqa: E501

    :param searchString: pass a search string to find matching identifiers
    :type searchString: str
    :param limit: maximum number of records to return
    :type limit: int

    :rtype: IdMapping
    """

    ###### NOTE: THIS INFORMATION SHOULD PROBABLY BE FOUND USING THE PUBTATOR DICTIONARIES (SIMPLE LOOKUP) ########
    q = """
    MATCH (m)
    WHERE m.formatted contains {searchString}
    RETURN (m)
    LIMIT 25
    """
    results = neo4j.run(query=q, param={"searchString": searchString})

    subgraph = GnbrSubgraph()
    for d in results:
        id_mapping = IdMapping()
        ## Add some subgraph!!!
        # TODO!!
    """
    example node output looks like:
    {
  "raw": "ace|acetylcholinesterase|acetylcholinesterase enzyme|ach-e|ache|ache-r|anti-acetylcholinesterase|as-ache|mache",
  "formatted": "ace|acetylcholinesterase|acetylcholinesterase_enzyme|ach-e|ache|ache-r|anti-acetylcholinesterase|as-ache|mache",
  "id": "11423"
    }

    """
    return subgraph
def get_statements(c,
                   pageNumber=None,
                   pageSize=None,
                   keywords=None,
                   semgroups=None):
    """
    get_statements
    Given a list of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of exactly matching concepts, retrieves a paged list of concept-relations where either the subject or object concept matches at least one concept in the input list
    :param c: set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of exactly matching concepts to be used in a search for associated concept-relation statements
    :type c: List[str]
    :param pageNumber: (1-based) number of the page to be returned in a paged set of query results
    :type pageNumber: int
    :param pageSize: number of concepts per page to be returned in a paged set of query results
    :type pageSize: int
    :param keywords: a (url-encoded, space-delimited) string of keywords or substrings against which to match the subject, predicate or object names of the set of concept-relations matched by any of the input exact matching concepts
    :type keywords: str
    :param semgroups: a (url-encoded, space-delimited) string of semantic groups (specified as codes CHEM, GENE, ANAT, etc.) to which to constrain the subject or object concepts associated with the query seed concept (see [SemGroups](https://metamap.nlm.nih.gov/Docs/SemGroups_2013.txt) for the full list of codes)
    :type semgroups: str

    :rtype: List[Statement]
    """

    query = """
    MATCH (a:Protein)-[r:ACTION]-(b:Protein)
    WHERE
        (NOT a.alias IS NULL) AND
        (NOT b.alias IS NULL) AND
        ANY (x in {conceptIds} WHERE
            LOWER(a.stringId) = LOWER(x)
            OR
            LOWER(a.stringId) = LOWER(x)
        )
    RETURN
        a.stringId as id_a,
        a.alias as alias_a,
        b.stringId as id_b,
        b.alias as alias_b,
        r.mode as relation,
        ID(r) as relation_id
    SKIP ({pageNumber} - 1) * {pageSize} LIMIT {pageSize}
    """

    results = neo4j.run(
        query, {
            "pageNumber":
            pageNumber if pageNumber != None and pageNumber > 0 else 1,
            "pageSize": pageSize if pageSize != None and pageSize > 0 else 10,
            "conceptIds": c if c != None else []
        })

    statements = []

    for row in results:
        statement = Statement()
        statement_object = StatementObject()
        statement_subject = StatementSubject()
        statement_predicate = StatementPredicate()

        statement_object.name = row["alias_a"]
        statement_object.id = row["id_a"]

        statement_subject.name = row["alias_b"]
        statement_subject.id = row["id_b"]

        statement_predicate.name = row["relation"]
        statement_predicate.id = str(row["relation_id"])

        statement.subject = statement_subject
        statement.object = statement_object
        statement.predicate = statement_predicate
        statement.id = str(row["relation_id"])

        statements.append(statement)

    return statements
Beispiel #8
0
def get_edge(entity1, entity2):  # noqa: E501
    """Query for an edge

    Query for edges connecting two entities within GNBR # noqa: E501

    :param entity1: GNBR-ID for first entity
    :type entity1: str
    :param entity2: GNBR-ID for second entity
    :type entity2: str

    :rtype: GnbrEdge
    """
    q = """
    MATCH p=(m)--(n)
    WHERE m.id={entity1} AND n.id={entity2}
    RETURN relationships(p)
    LIMIT 1
    """

    results = neo4j.run(query=q,
                        param={
                            "entity1": entity1,
                            "entity2": entity2
                        })

    gnbr_edge = GNBR_edge()
    # do something with results[0]
    ######
    """
    Typical d in result would look like:
    {
      "first_entity_name_loc_char": "1428,1431",
      "a-.ind": 0,
      "first_entity_type": "Chemical",
      "o.ind": 0,
      "second_entity_name_raw": "Myc",
      "sentence_number": "9",
      "e+.ind": 0,
      "first_entity_name": "TSA",
      "dependency_path": "presence|nmod|START_ENTITY END_ENTITY|nmod|presence",
      "a+.ind": 0,
      "b": 164,
      "first_entity_name_raw": "TSA",
      "e": 639,
      "e+": 56,
      "k.ind": 0,
      "z.ind": 0,
      "b.ind": 0,
      "e-": 66,
      "a+": 47,
      "k": 536,
      "a-": 38,
      "second_entity_type": "Gene",
      "tax_id": "9606",
      "n": 100,
      "o": 154,
      "second_entity_name_loc_char": "1405,1408",
      "e-.ind": 0,
      "e.ind": 0,
      "sentence_tokenized": "TSA treatment induced a similar epidermal phenotype to activation of Myc , and activation of Myc in the presence of TSA resulted in massive stimulation of terminal differentiation .",
      "z": 18,
      "n.ind": 0,
      "PMID": "17712411",
      "second_entity_name": "Myc"
    }


    """
    return gnbr_edge