Python standardizeの例、beacon_controller.utils.standardize Pythonの例

コード例 #1

0

ファイルを表示

ファイル: concepts_controller.py プロジェクト: lhannest/tkg-beacon

def get_concepts(keywords, categories=None, size=None):
    size = size if size is not None and size > 0 else 100
    categories = categories if categories is not None else []

    q = """
    MATCH (n)
    WHERE
        (ANY (keyword IN {keywords} WHERE
            (ANY (name IN n.name WHERE LOWER(name) CONTAINS LOWER(keyword))))) AND
        (SIZE({categories}) = 0 OR
            ANY (category IN {categories} WHERE
            (ANY (name IN n.category WHERE LOWER(name) = LOWER(category)))))
    RETURN n
    LIMIT {limit}
    """

    nodes = db.query(q, Node, keywords=keywords, categories=categories, limit=size)

    concepts = []

    for node in nodes:
        if all(len(category) == 1 for category in node.category):
            node.category = [''.join(node.category)]
        categories = utils.standardize(node.category)
        concept = BeaconConcept(
            id=node.curie,
            name=node.name,
            categories=categories,
            description=node.description
        )

        concepts.append(concept)

    return concepts

コード例 #2

0

ファイルを表示

ファイル: metadata_controller.py プロジェクト: lhannest/tkg-beacon

def get_concept_categories():
    q = 'MATCH (x) RETURN DISTINCT x.category AS category, COUNT(*) AS frequency;'
    results = db.query(q)

    category_dict = {}
    for result in results:
        categories = utils.standardize(result['category'])
        for c in categories:
            if c in category_dict:
                category_dict[c] += result['frequency']
            else:
                category_dict[c] = result['frequency']

    categories = []
    sorted_results = sorted(category_dict.items(),
                            key=lambda k: k[1],
                            reverse=True)
    for category, frequency in sorted_results:
        uri = 'http://bioentity.io/vocab/{}'.format(camel_case(category))
        identifier = 'BLM:{}'.format(camel_case(category))
        categories.append(
            BeaconConceptCategory(id=identifier,
                                  uri=uri,
                                  frequency=frequency,
                                  category=category))

    return categories

コード例 #3

0

ファイルを表示

ファイル: metadata_controller.py プロジェクト: lhannest/tkg-beacon

 def split_up_by_key(dicts, key):
     new_dict = []
     for old_dict in dicts:
         for c in utils.standardize(old_dict[key]):
             d = dict(old_dict)
             d[key] = c
             new_dict.append(d)
     return new_dict

コード例 #4

0

ファイルを表示

def get_concept_details(concept_id):  # noqa: E501
    """get_concept_details

    Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source.  # noqa: E501

    :param concept_id: (url-encoded) CURIE identifier of concept of interest
    :type concept_id: str

    :rtype: BeaconConceptWithDetails
    """
    q = """
    MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId})
    RETURN
        n.id AS id,
        n.uri AS uri,
        n.iri AS iri,
        n.name AS name,
        n.category AS category,
        n.symbol AS symbol,
        n.description AS description,
        n.synonym AS synonyms,
        n.clique AS clique,
        n.xrefs AS xrefs,
        n AS node
    LIMIT 1
    """

    results = db.query(q, conceptId=concept_id)

    for result in results:
        uri = result['uri'] if result['uri'] is not None else result['iri']

        clique = utils.listify(result['clique'])
        xrefs = utils.listify(result['xrefs'])
        exact_matches = clique + xrefs
        exact_matches = utils.remove_all(exact_matches, result['id'])

        details_dict = create_details_dict(result['node'])
        details = []
        for key, value in details_dict.items():
            details.append(BeaconConceptDetail(
                tag=key,
                value=utils.stringify(value)
            ))

        return BeaconConceptWithDetails(
            id=result['id'],
            uri=utils.stringify(uri),
            name=utils.stringify(result['name']),
            categories=utils.standardize(result['category']),
            symbol=utils.stringify(result['symbol']),
            description=utils.stringify(result['description']),
            synonyms=utils.listify(result['synonyms']),
            exact_matches=exact_matches,
            details=details
        )
    else:
        return BeaconConceptWithDetails()

コード例 #5

0

ファイルを表示

ファイル: concepts_controller.py プロジェクト: lhannest/tkg-beacon

def get_concept_details(conceptId):
    q = """
    MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId})
    RETURN
        n.id AS id,
        n.uri AS uri,
        n.iri AS iri,
        n.name AS name,
        n.category AS category,
        n.symbol AS symbol,
        n.description AS description,
        n.synonym AS synonyms,
        n.clique AS clique,
        n.xrefs AS xrefs
    LIMIT 1
    """

    results = db.query(q, conceptId=conceptId)

    for result in results:
        uri = result['uri'] if result['uri'] != None else result['iri']
        synonyms = result['synonyms'] if result['synonyms'] != None else []

        clique = result['clique'] if result['clique'] != None else []
        xrefs = result['xrefs'] if result['xrefs'] != None else []

        exact_matches = list(set(clique + xrefs))

        exact_matches = utils.remove_all(exact_matches, result['id'])

        categories = utils.standardize(result['category'])

        return BeaconConceptWithDetails(
            id=result['id'],
            uri=uri,
            name=result['name'],
            categories=categories,
            symbol=result['symbol'],
            description=result['description'],
            synonyms=result['synonyms'],
            exact_matches=exact_matches
        )

コード例 #6

0

ファイルを表示

def get_statements(s,
                   edge_label=None,
                   relation=None,
                   t=None,
                   keywords=None,
                   categories=None,
                   size=None):
    size = 100 if size == None or size < 1 else size

    q = """
    MATCH (n)-[r]-(m)
    WHERE
        ANY(id IN {sources} WHERE TOLOWER(n.id) = TOLOWER(id)) AND
        ({targets} IS NULL OR ANY(id IN {targets} WHERE TOLOWER(m.id) = TOLOWER(id))) AND
        ({edge_label} IS NULL OR type(r) = {edge_label})
    RETURN
        n AS source,
        m AS target,
        EXISTS((n)-[r]->(m)) AS source_is_subject,
        type(r) AS type,
        r.edge_label AS edge_label,
        r.relation AS relation,
        r.negated AS negated,
        r.id AS statement_id
    LIMIT {limit}
    """

    results = db.query(q,
                       sources=s,
                       targets=t,
                       edge_label=edge_label,
                       relation=relation,
                       keywords=keywords,
                       categories=categories,
                       limit=size)

    statements = []

    for result in results:
        if result['source_is_subject']:
            s, o = result['source'], result['target']
        else:
            o, s = result['source'], result['target']

        s_categories = utils.standardize(s['category'])
        o_categories = utils.standardize(o['category'])

        if result['edge_label'] != None:
            edge_label = result['edge_label']
        else:
            edge_label = result['type']

        beacon_subject = BeaconStatementSubject(id=s['id'],
                                                name=s['name'],
                                                categories=s_categories)

        beacon_predicate = BeaconStatementPredicate(
            edge_label=edge_label,
            relation=result['relation'],
            negated=result['negated'])

        beacon_object = BeaconStatementObject(id=o['id'],
                                              name=o['name'],
                                              categories=o_categories)

        statement_id = result['statement_id']
        if statement_id == None:
            statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id'])

        statements.append(
            BeaconStatement(id=statement_id,
                            subject=beacon_subject,
                            predicate=beacon_predicate,
                            object=beacon_object))

    return statements

コード例 #7

0

ファイルを表示

ファイル: statements_controller.py プロジェクト: NCATS-Tangerine/tkg-beacon

def get_statements(s=None,
                   s_keywords=None,
                   s_categories=None,
                   edge_label=None,
                   relation=None,
                   t=None,
                   t_keywords=None,
                   t_categories=None,
                   offset=None,
                   size=None):  # noqa: E501
    """get_statements

    Given a constrained set of some [CURIE-encoded](https://www.w3.org/TR/curie/) &#39;s&#39; (&#39;source&#39;) concept identifiers, categories and/or keywords (to match in the concept name or description), retrieves a list of relationship statements where either the subject or the object concept matches any of the input source concepts provided.  Optionally, a set of some &#39;t&#39; (&#39;target&#39;) concept identifiers, categories and/or keywords (to match in the concept name or description) may also be given, in which case a member of the &#39;t&#39; concept set should matchthe concept opposite an &#39;s&#39; concept in the statement. That is, if the &#39;s&#39; concept matches a subject, then the &#39;t&#39; concept should match the object of a given statement (or vice versa).  # noqa: E501

    :param s: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of &#39;source&#39; (&#39;start&#39;) concepts possibly known to the beacon. Unknown CURIES should simply be ignored (silent match failure).
    :type s: List[str]
    :param s_keywords: An (optional) array of keywords or substrings against which to filter &#39;source&#39; concept names and synonyms
    :type s_keywords: List[str]
    :param s_categories: An (optional) array set of &#39;source&#39; concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type s_categories: List[str]
    :param edge_label: (Optional) predicate edge label against which to constrain the search for statements (&#39;edges&#39;) associated with the given query seed concept. The predicate edge_names for this parameter should be as published by the /predicates API endpoint and must be taken from the minimal predicate (&#39;slot&#39;) list of the [Biolink Model](https://biolink.github.io/biolink-model).
    :type edge_label: str
    :param relation: (Optional) predicate relation against which to constrain the search for statements (&#39;edges&#39;) associated with the given query seed concept. The predicate relations for this parameter should be as published by the /predicates API endpoint and the preferred format is a CURIE  where one exists, but strings/labels acceptable. This relation may be equivalent to the edge_label (e.g. edge_label: has_phenotype, relation: RO:0002200), or a more specific relation in cases where the source provides more granularity (e.g. edge_label: molecularly_interacts_with, relation: RO:0002447)
    :type relation: str
    :param t: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of &#39;target&#39; (&#39;opposite&#39; or &#39;end&#39;) concepts possibly known to the beacon. Unknown CURIEs should simply be ignored (silent match failure).
    :type t: List[str]
    :param t_keywords: An (optional) array of keywords or substrings against which to filter &#39;target&#39; concept names and synonyms
    :type t_keywords: List[str]
    :param t_categories: An (optional) array set of &#39;target&#39; concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type t_categories: List[str]
    :param offset: offset (cursor position) to next batch of statements of amount &#39;size&#39; to return.
    :type offset: int
    :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all  the available data for the query
    :type size: int

    :rtype: List[BeaconStatement]
    """
    if size is None:
        size = 100

    conjuncts = []
    unwinds = []
    data = {}

    if s is not None:
        unwinds.append("[x IN {sources} | toLower(x)] AS s")
        conjuncts.append("toLower(n.id) = s")
        data['sources'] = s

    if t is not None:
        unwinds.append("[x IN {targets} | toLower(x)] AS t")
        conjuncts.append("toLower(m.id) = t")
        data['targets'] = t

    if s_keywords is not None:
        unwinds.append("[x IN {s_keywords} | toLower(x)] AS s_keyword")
        disjuncts = [
            "toLower(n.name) CONTAINS s_keyword",
            "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS s_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("toLower(n.name) CONTAINS s_keyword OR ANY(synonym IN n.synonym WHERE toLower(synonym) CONTAINS s_keyword)")
        data['s_keywords'] = s_keywords

    if t_keywords is not None:
        unwinds.append("[x IN {t_keywords} | toLower(x)] AS t_keyword")
        disjuncts = [
            "toLower(m.name) CONTAINS t_keyword",
            "ANY(syn IN m.synonym WHERE toLower(syn) CONTAINS t_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("ANY(keyword in {t_keywords} WHERE keyword CONTAINS toLower(m.name))")
        # conjuncts.append("toLower(m.name) CONTAINS t_keyword OR ANY(synonym IN m.synonym WHERE toLower(synonym) CONTAINS t_keyword)")
        data['t_keywords'] = t_keywords

    if edge_label is not None:
        conjuncts.append("type(r) = {edge_label}")
        data['edge_label'] = edge_label

    if relation is not None:
        conjuncts.append("r.relation = {relation}")
        data['relation'] = relation

    if s_categories is not None:
        unwinds.append("[x IN {s_categories} | toLower(x)] AS s_category")
        conjuncts.append("s_category IN labels(n))")
        data['s_categories'] = s_categories

    if t_categories is not None:
        unwinds.append("[x IN {t_categories} | toLower(x)] AS t_category")
        conjuncts.append("t_category IN labels(m)")
        data['t_categories'] = t_categories

    q = "MATCH (n)-[r]->(m)"

    if unwinds != []:
        q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q

    if conjuncts != []:
        q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")"

    q += """
    RETURN
        n AS subject,
        m AS object,
        type(r) AS edge_type,
        r.edge_label AS edge_label,
        r.relation AS relation,
        r.negated AS negated,
        r.id AS statement_id
    """

    if isinstance(offset, int) and offset >= 0:
        q += f' SKIP {offset}'
    if isinstance(size, int) and size >= 1:
        q += f' LIMIT {size}'

    results = db.query(q, **data)

    statements = []

    for result in results:
        s, o = result['subject'], result['object']

        s_categories = utils.standardize(s['category'])
        o_categories = utils.standardize(o['category'])

        if result['edge_label'] != None:
            edge_label = utils.stringify(result['edge_label'])
        else:
            edge_label = utils.stringify(result['edge_type'])

        beacon_subject = BeaconStatementSubject(
            id=s['id'],
            name=utils.stringify(s['name']),
            categories=utils.standardize(s['category']))

        beacon_predicate = BeaconStatementPredicate(
            edge_label=edge_label,
            relation=utils.stringify(result['relation']),
            negated=bool(result['negated']))

        beacon_object = BeaconStatementObject(id=o['id'],
                                              name=utils.stringify(o['name']),
                                              categories=utils.standardize(
                                                  o['category']))

        statement_id = result['statement_id']
        if statement_id == None:
            statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id'])

        statements.append(
            BeaconStatement(id=statement_id,
                            subject=beacon_subject,
                            predicate=beacon_predicate,
                            object=beacon_object))

    return statements

コード例 #8

0

ファイルを表示

def get_concepts(keywords=None, categories=None, offset=None, size=None):  # noqa: E501
    """get_concepts

    Retrieves a list of whose concept in the beacon knowledge base with names and/or synonyms matching a set of keywords or substrings. The results returned should generally be returned in order of the quality of the match, that is, the highest ranked concepts should exactly match the most keywords, in the same order as the keywords were given. Lower quality hits with fewer keyword matches or out-of-order keyword matches, should be returned lower in the list.  # noqa: E501

    :param keywords: (Optional) array of keywords or substrings against which to match concept names and synonyms
    :type keywords: List[str]
    :param categories: (Optional) array set of concept categories - specified as Biolink name labels codes gene, pathway, etc. - to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of terms)
    :type categories: List[str]
    :param offset: offset (cursor position) to next batch of statements of amount &#39;size&#39; to return.
    :type offset: int
    :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all the available data for the query
    :type size: int

    :rtype: List[BeaconConcept]
    """
    if size is None:
        size = 100;

    conjuncts = []
    unwinds = []
    data = {}

    if keywords is not None:
        unwinds.append("[x IN {keywords} | toLower(x)] AS keyword")
        disjuncts = [
            "toLower(n.name) CONTAINS keyword",
            "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        data['keywords'] = keywords

    if categories is not None:
        unwinds.append("[x IN {categories} | toLower(x)] AS category")
        conjuncts.append("ANY(category IN {categories} WHERE category IN labels(n))")
        data['categories'] = categories

    q = "MATCH (n)"

    if unwinds != []:
        q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q

    if conjuncts != []:
        q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")"

    q += " RETURN n"

    if isinstance(offset, int) and offset >= 0:
        q += f' SKIP {offset}'
    if isinstance(size, int) and size >= 1:
        q += f' LIMIT {size}'

    nodes = db.query(q, Node, keywords=keywords, categories=categories, limit=size)

    concepts = []

    for node in nodes:
        if all(len(category) == 1 for category in node.category):
            node.category = [''.join(node.category)]
        categories = utils.standardize(node.category)
        concept = BeaconConcept(
            id=node.curie,
            name=node.name,
            categories=categories,
            description=node.description
        )

        concepts.append(concept)

    return concepts