Example #1
0
def update(
    index,
    input_prefix,
    output_prefix,
    input_value=None,
    subject_category=None,
    object_category=None,
    predicate=None
    ):

    try:
        index = int(index)
    except:
        raise Exception('Index "{}" is not a string'.format(index))

    uri = _uri_pattern.format(
        input_prefix=input_prefix,
        output_prefix=output_prefix,
        input_value=input_value
    )

    response = requests.get(uri)

    if response.ok:
        data = response.json()
    else:
        raise Exception('URI {} responsed with status code {}'.format(uri, response.status_code))

    mappings = data['data']

    if len(mappings) <= index:
        raise Exception('Tried to access index {} when there are only {} many'.format(index, len(mappings)))

    mapping = mappings[index]

    if len(mapping) < 1:
        raise Exception('The semantic query at {} is empty'.format(index))

    subject_id = safe_get(mapping[0], 'input')
    object_id = safe_get(mapping[-1], 'output', 'object', 'id')
    object_secondary_id = safe_get(mapping[-1], 'output', 'object', 'secondary-id')

    e = {
        'subject_id' : safe_get(mapping[0], 'input'),
        'object_id' : safe_get(mapping[-1], 'output', 'object', 'id'),
        'predicate' : predicate
    }


    return {'apis' : mapping, 'example' : e, 'uri' : uri}
Example #2
0
def get_statement_details(statementId, keywords=None, offset=None, size=None):
    if ':' not in statementId:
        return BeaconStatementWithDetails()

    p = statementId.split(':')

    if len(p) != 5:
        return BeaconStatementWithDetails()

    subject_id = '{}:{}'.format(p[0], p[1])
    predicate = '{}'.format(p[2])
    object_id = '{}:{}'.format(p[3], p[4])

    data = crawler.crawl(subject_id)

    for category, assocations in data.items():
        for a in assocations:
            object_match = object_id == simplify_curie(
                safe_get(a, 'object', 'id'))
            predicate_match = predicate == safe_get(a, 'predicate')

            edge_label = safe_get(a, 'edge', 'label')
            if edge_label is not None:
                edge_label = edge_label.replace(' ', '_')
                predicate = predicate.replace(' ', '_')

            label_match = predicate == edge_label

            if object_match and (label_match or predicate_match):
                provided_by = safe_get(a, 'edge', 'provided_by')
                probability = safe_get(a, 'edge', 'probability')
                predicate = safe_get(a, 'predicate')
                is_defined_by = safe_get(a, 'api')
                endpoint = safe_get(a, 'endpoint')

                annotations = []

                if probability is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='probability',
                                                  value=probability))
                if predicate is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='predicate',
                                                  value=predicate))
                if endpoint is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='endpoint',
                                                  value=endpoint))
                    annotations.append(
                        BeaconStatementAnnotation(tag='endpoint_input',
                                                  value=subject_id))

                return BeaconStatementWithDetails(provided_by=provided_by,
                                                  is_defined_by=is_defined_by,
                                                  annotation=annotations)

    return BeaconStatementWithDetails()
def get_predicates():  # noqa: E501
    """get_predicates

    Get a list of predicates used in statements issued by the knowledge source  # noqa: E501


    :rtype: List[BeaconPredicate]
    """
    response = requests.get('http://biothings.io/explorer/api/v2/knowledgemap')

    predicates = []

    if response.ok:
        data = response.json()

        s = set()
        for a in data['associations']:
            s.add(safe_get(a, 'predicate'))

        for p in s:
            if p == p.lower():
                predicates.append(BeaconPredicate(edge_label=p))
            else:
                predicates.append(BeaconPredicate(relation=p))

    return predicates
def get_exact_matches_to_concept_list(c):
    response = []

    for curie in c:
        if not isinstance(curie, str) or ':' not in curie:
            continue

        exact_matches = []

        data = crawler.crawl(curie)

        for category, associations in data.items():
            for a in associations:
                if a.get('predicate') == 'EquivalentAssociation':
                    object_id = safe_get(a, 'object', 'id')

                    prefix, local_id = object_id.split(':', 1)

                    if 'name' not in prefix.lower():
                        exact_matches.append(object_id)

        response.append(
            ExactMatchResponse(id=curie,
                               within_domain=data != {},
                               has_exact_matches=exact_matches))

    return response
Example #5
0
def find_subject_name(data: dict):
    subject_names = []
    for category, associations in data.items():
        for a in associations:
            if a.get('predicate') == 'EquivalentAssociation':
                object_id = safe_get(a, 'object', 'id')
                object_name = safe_get(a, 'object', 'label')

                if object_name != None:
                    subject_names.append(object_name)

                else:
                    object_prefix, object_local_id = object_id.split(':', 1)
                    if 'name' in object_prefix.lower(
                    ) or 'symbol' in object_prefix.lower():
                        subject_names.append(object_local_id)

    predicate_longest_under_sixty = lambda n: (len(n) > 60, -len(n))

    subject_names.sort(key=predicate_longest_under_sixty)

    return next((name for name in subject_names), None)
def get_knowledge_map():  # noqa: E501
    """get_knowledge_map

    Get a high level knowledge map of the all the beacons by subject semantic type, predicate and semantic object type  # noqa: E501


    :rtype: List[BeaconKnowledgeMapStatement]
    """
    response = requests.get('http://biothings.io/explorer/api/v2/knowledgemap')

    d = defaultdict(lambda: defaultdict(set))

    statements = []

    if response.ok:
        data = response.json()

        for a in data['associations']:
            subject_category = safe_get(a, 'subject', 'semantic_type')
            subject_prefix = safe_get(a, 'subject', 'prefix')
            predicate = safe_get(a, 'predicate')
            object_category = safe_get(a, 'object', 'semantic_type')
            object_prefix = safe_get(a, 'object', 'prefix')
            endpoint = safe_get(a, 'endpoint')

            k = '{}{}{}'.format(subject_category, predicate, object_category)

            d[k]['subject_prefix'].add(subject_prefix)
            d[k]['object_prefix'].add(object_prefix)
            d[k]['subject_category'] = subject_category
            d[k]['object_category'] = object_category
            d[k]['predicate'] = predicate
            d[k]['endpoint'] = endpoint

        for p in d.values():
            o = BeaconKnowledgeMapObject(category=p['object_category'],
                                         prefixes=list(p['object_prefix']))
            s = BeaconKnowledgeMapSubject(category=p['subject_category'],
                                          prefixes=list(p['subject_prefix']))
            if p['predicate'] == p['predicate'].lower():
                args = {'edge_label': p['predicate']}
            else:
                args = {'relation': p['predicate']}

            r = BeaconKnowledgeMapPredicate(negated=False, **args)
            statements.append(
                BeaconKnowledgeMapStatement(subject=s, object=o, predicate=r))

    return statements
Example #7
0
def get_predicates():  # noqa: E501
    """get_predicates

    Get a list of predicates used in statements issued by the knowledge source  # noqa: E501


    :rtype: List[BeaconPredicate]
    """
    data = utils.load_kmap()

    predicates = []

    s = set()
    for a in data['associations']:
        s.add(safe_get(a, 'predicate'))

    for p in s:
        if p.replace('_', ' ') in blm.schema().slots:
            edge_label = p.replace(' ', '_')
            predicate_id = f'blm:{edge_label}'
            relation = edge_label
            uri = blm.slot_uri(edge_label)
        else:
            predicate_id = f'blm:{blm.DEFAULT_EDGE_LABEL}'
            edge_label = blm.DEFAULT_EDGE_LABEL
            relation = p
            uri = blm.slot_uri(blm.DEFAULT_EDGE_LABEL)

        try:
            description = blm.schema().slots[edge_label.replace(
                '_', ' ')].description
        except:
            description = None

        predicates.append(
            BeaconPredicate(description=description,
                            edge_label=edge_label,
                            relation=relation,
                            frequency=None))

    return predicates
def get_concept_details(conceptId):
    data = crawler.crawl(conceptId)

    if data == {}:
        return BeaconConceptWithDetails()

    names, descriptions, xrefs = [], [], []

    for category, associations in data.items():
        for a in associations:
            predicate = a.get('predicate')
            if predicate == 'EquivalentAssociation' or predicate == 'HasDescriptionAssociation':
                object_id = safe_get(a, 'object', 'id')
                prefix, local_id = object_id.split(':', 1)

                if 'name' in prefix.lower() or 'symbol' in prefix.lower():
                    names.append(local_id)
                if 'description' in prefix.lower():
                    descriptions.append(local_id)
                if not 'name' in prefix.lower(
                ) and not 'description' in prefix.lower():
                    xrefs.append(object_id)

    names = list(set(names))
    predicate_longest_under_sixty = lambda n: (len(n) > 60, -len(n))
    names.sort(key=predicate_longest_under_sixty)

    prefix, _ = conceptId.split(':', 1)

    c = BeaconConceptWithDetails(id=conceptId,
                                 name=names[0] if len(names) >= 1 else None,
                                 synonyms=names[1:],
                                 exact_matches=xrefs,
                                 categories=[lookup_category(prefix)],
                                 description='; '.join(descriptions))

    return c
Example #9
0
def get_knowledge_map():  # noqa: E501
    """get_knowledge_map

    Get a high level knowledge map of the all the beacons by subject semantic type, predicate and semantic object type  # noqa: E501


    :rtype: List[BeaconKnowledgeMapStatement]
    """
    data = utils.load_kmap()

    d = defaultdict(lambda: defaultdict(set))

    statements = []

    for a in data['associations']:
        subject_category = safe_get(a, 'subject', 'semantic_type')
        subject_prefix = safe_get(a, 'subject', 'prefix')
        predicate = safe_get(a, 'predicate')
        object_category = safe_get(a, 'object', 'semantic_type')
        object_prefix = safe_get(a, 'object', 'prefix')
        endpoint = safe_get(a, 'endpoint')

        k = f'{subject_category}{predicate}{object_category}'

        d[k]['subject_prefix'].add(subject_prefix)
        d[k]['object_prefix'].add(object_prefix)
        d[k]['subject_category'] = subject_category
        d[k]['object_category'] = object_category
        d[k]['predicate'] = predicate
        d[k]['endpoint'] = endpoint

    for p in d.values():
        object_category = p['object_category']
        subject_category = p['subject_category']

        if object_category not in blm.schema().classes:
            object_category = blm.DEFAULT_CATEGORY

        if subject_category not in blm.schema().classes:
            subject_category = blm.DEFAULT_CATEGORY

        if p['predicate'].replace('_', ' ') in blm.schema().slots:
            edge_label = p['predicate'].replace(' ', '_')
            relation = edge_label
        else:
            edge_label = blm.DEFAULT_EDGE_LABEL
            relation = p['predicate']

        o = BeaconKnowledgeMapObject(category=object_category,
                                     prefixes=list(p['object_prefix']))
        s = BeaconKnowledgeMapSubject(category=subject_category,
                                      prefixes=list(p['subject_prefix']))

        r = BeaconKnowledgeMapPredicate(edge_label=edge_label,
                                        relation=relation,
                                        negated=False)

        statements.append(
            BeaconKnowledgeMapStatement(subject=s, object=o, predicate=r))

    return statements
Example #10
0
def get_statements(s=None,
                   s_keywords=None,
                   s_categories=None,
                   edge_label=None,
                   relation=None,
                   t=None,
                   t_keywords=None,
                   t_categories=None,
                   offset=None,
                   size=None):
    if s is None:
        abort(400,
              'Cannot search for statements without providing a subject ID')

    statements = []

    for subject_id in s:
        if ':' not in subject_id:
            continue

        data = crawler.crawl(subject_id)

        if data == {}:
            continue

        subject_name = find_subject_name(data)

        for category, associations in data.items():
            if category == 'null':
                category = None

            for a in associations:
                object_id = safe_get(a, 'object', 'id')
                if object_id != None and ':' in object_id:
                    object_prefix, _ = object_id.split(':', 1)
                    object_prefix = object_prefix.lower()
                    if 'name' in object_prefix or 'description' in object_prefix:
                        continue

                object_name = safe_get(a, 'object', 'label')
                if object_name == None:
                    secondary_id = safe_get(a, 'object', 'secondary-id')
                    if secondary_id != None and ':' in secondary_id:
                        secondary_prefix, symbol = secondary_id.split(':', 1)
                        object_name = symbol
                        if 'symbol' in secondary_prefix.lower():
                            taxonomy = safe_get(a, 'object', 'taxonomy')
                            if taxonomy != None:
                                taxonomy = ', '.join(t for t in taxonomy
                                                     if ':' not in t)
                                object_name += ' (taxonomy: {})'.format(
                                    taxonomy)

                subject_prefix, _ = subject_id.split(':', 1)

                predicate_name = safe_get(a, 'edge', 'label')
                if predicate_name == None:
                    predicate_name = safe_get(a, 'predicate')
                if predicate_name == 'EquivalentAssociation':
                    predicate_name = 'same_as'

                if isinstance(predicate_name, list):
                    if predicate_name != []:
                        predicate_name = predicate_name[0]
                    else:
                        predicate_name = blm.DEFAULT_EDGE_LABEL

                predicate_name = predicate_name.replace(' ', '_')

                object_id = simplify_curie(object_id)
                subject_id = simplify_curie(subject_id)

                statements.append(
                    build_statement(
                        object_id=object_id,
                        object_name=object_name,
                        object_category=category,
                        subject_id=subject_id,
                        subject_name=subject_name,
                        subject_category=utils.lookup_category(subject_prefix),
                        predicate_id=safe_get(a, 'edge', 'id'),
                        predicate_name=predicate_name))

    is_valid = build_filter(s, s_keywords, s_categories, edge_label, relation,
                            t, t_keywords, t_categories)

    statements = [s for s in statements if is_valid(s)]

    statements = remove_duplicates(statements)

    if offset is not None:
        statements = statements[offset:]
    if size is not None:
        statements = statements[:size]

    return statements