Beispiel #1
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(
            url='https://scigraph-data-dev.monarchinitiative.org/scigraph/')

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        if category == FUNCTION_CATEGORY:
            # get proteins for a gene only when the category is 'function'
            for s in subjects:
                if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                    prots = sg_dev.gene_to_uniprot_proteins(s)
                    if len(prots) == 0:
                        prots = [s]
                    slimmer_subjects += prots
                else:
                    slimmer_subjects.append(s)
        else:
            slimmer_subjects = subjects

        if category == ANATOMY_CATEGORY:
            category = 'anatomical entity'

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category=category,
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Beispiel #2
0
    def get(self, id):
        """
        Returns function associations for a gene.

        IMPLEMENTATION DETAILS
        ----------------------

        Note: currently this is implemented as a query to the GO/AmiGO solr instance.
        This directly supports IDs such as:

         - ZFIN e.g. ZFIN:ZDB-GENE-050417-357

        Note that the AmiGO GOlr natively stores MGI annotations to MGI:MGI:nn. However,
        the standard for biolink is MGI:nnnn, so you should use this (will be transparently
        mapped to legacy ID)

        Additionally, for some species such as Human, GO has the annotation attached to the UniProt ID.
        Again, this should be transparently handled; e.g. you can use NCBIGene:6469, and this will be
        mapped behind the scenes for querying.
        """

        assocs = search_associations(object_category='function',
                                     subject=id,
                                     **core_parser.parse_args())

        # If there are no associations for the given ID, try other IDs.
        # Note the AmiGO instance does *not* support equivalent IDs
        if len(assocs['associations']) == 0:
            # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
            # https://github.com/monarch-initiative/dipper/issues/461
            logging.debug(
                "Found no associations using {} - will try mapping to other IDs"
                .format(id))
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(id)
            for prot in prots:
                pr_assocs = search_associations(object_category='function',
                                                subject=prot,
                                                **core_parser.parse_args())
                assocs['associations'] += pr_assocs['associations']
        return assocs
Beispiel #3
0
    def get(self):
        """
        For a given gene(s), summarize its annotations over a defined set of slim
        """
        args = self.function_parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']

        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(get_biolink_config()['scigraph_data']['url'])

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        for s in subjects:
            if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                prots = identifier_converter.convert_gene_to_protein(s)
                if len(prots) == 0:
                    prots = [s]
                slimmer_subjects += prots
            else:
                slimmer_subjects.append(s)

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category='function',
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if proteinId not in checked:
                        genes = identifier_converter.convert_protein_to_gene(
                            proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]

        return results
Beispiel #4
0
class SciGraphIdentifierConverter(object):
    """
    Class for performing ID conversion using SciGraph
    """
    def __init__(self):
        self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url'])

    def convert_gene_to_protein(self, identifier):
        """
        Query SciGraph with a gene ID and get its corresponding UniProtKB ID
        """
        protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier)
        return protein_ids

    def convert_protein_to_gene(self, identifier):
        """
        Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID
        """
        gene_ids = self.scigraph.uniprot_protein_to_genes(identifier)
        return gene_ids
Beispiel #5
0
 def get(self, category):
     """
     Summarize a set of objects
     """
     args = parser.parse_args()
     logging.info("category is {}".format(category))
     slim = args.get('slim')
     del args['slim']
     subjects = args.get('subject')
     del args['subject']
     results = map2slim(subjects=subjects,
                        slim=slim,
                        rows=200,
                        object_category=category,
                        **args)
     # If there are no associations for the given ID, try other IDs.
     # Note the AmiGO instance does *not* support equivalent IDs
     assoc_count = 0
     for result in results:
         assoc_count += len(result['assocs'])
     if assoc_count == 0 and len(subjects) == 1:
         # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
         # https://github.com/monarch-initiative/dipper/issues/461
         # nota bene:
         # currently incomplete because code is not checking for the possibility of >1 subjects
         logging.info(
             "Found no associations using {} - will try mapping to other IDs"
             .format(subjects[0]))
         sg_dev = SciGraph(
             url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
         )
         prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
         if len(prots) > 0:
             results = map2slim(subjects=prots,
                                slim=slim,
                                rows=200,
                                object_category=category,
                                **args)
     return results
Beispiel #6
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461
        # nota bene:
        # currently incomplete because code is not checking for the possibility of >1 subjects

        subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1)

        if (subjects[0].startswith('HGNC')
                or subjects[0].startswith('NCBIGene')
                or subjects[0].startswith('ENSEMBL:')):
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
            if len(prots) == 0:
                prots = subjects
        else:
            prots = subjects

        results = map2slim(subjects=prots,
                           slim=slim,
                           rows=200,
                           exclude_automatic_assertions=True,
                           object_category=category,
                           **args)
        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        sg_dev = SciGraph(
                            url=
                            'https://scigraph-data-dev.monarchinitiative.org/scigraph/'
                        )
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Beispiel #7
0
import logging

from flask import request
from flask_restplus import Resource, inputs
from biolink.datamodel.serializers import bbop_graph, bio_object
from biolink.error_handlers import NoResultFoundException, UnhandledException
from scigraph.scigraph_util import SciGraph
from scigraph.model.BBOPGraph import BBOPGraph
from biolink.api.restplus import api
from biolink.settings import get_biolink_config

log = logging.getLogger(__name__)

sg_data = SciGraph(get_biolink_config()['scigraph_data']['url'])
sg_ont = SciGraph(get_biolink_config()['scigraph_ontology']['url'])


@api.doc(params={'id': 'CURIE e.g. HP:0000465'})
class NodeResource(Resource):
    @api.marshal_list_with(bio_object)
    def get(self, id):
        """
        Returns a graph node.

        A node is an abstract representation of some kind of entity. The entity may be a physical thing such as a patient,
        a molecular entity such as a gene or protein, or a conceptual entity such as a class from an ontology.
        """
        graph = sg_data.bioobject(id)
        return graph

Beispiel #8
0
from scigraph.scigraph_util import SciGraph
import json

sg = SciGraph()

def test_node():
    n = sg.node(id="MP:0000272")
    assert n.lbl == "abnormal aorta morphology"

def test_bio_operations():
    zp="ZP:0004204"
    enodes = sg.phenotype_to_entity_list(id=zp)
    assert len(enodes)>0
Beispiel #9
0
import logging

from flask import request
from flask_restplus import Resource, inputs
from biolink.datamodel.serializers import association, bbop_graph, bio_object
from biolink.error_handlers import NoResultFoundException
from scigraph.scigraph_util import SciGraph
from scigraph.model.BBOPGraph import BBOPGraph
from biolink.api.restplus import api
from biolink.settings import get_biolink_config

log = logging.getLogger(__name__)

sg = SciGraph(get_biolink_config()['scigraph_data']['url'])


@api.doc(params={'id': 'CURIE e.g. HP:0000465'})
class NodeResource(Resource):
    @api.marshal_list_with(bio_object)
    def get(self, id):
        """
        Returns a graph node.

        A node is an abstract representation of some kind of entity. The entity may be a physical thing such as a patient,
        a molecular entity such as a gene or protein, or a conceptual entity such as a class from an ontology.
        """
        graph = sg.bioobject(id)
        return graph


@api.doc(params={'id': 'CURIE e.g. HP:0000465'})
Beispiel #10
0
ns = api.namespace('bioentity', description='Retrieval of domain entities plus associations')

core_parser = api.parser()
core_parser.add_argument('rows', type=int, required=False, default=100, help='number of rows')
core_parser.add_argument('start', type=int, required=False, default=1, help='row to start at')
core_parser.add_argument('unselect_evidence', type=bool, help='If set, excludes evidence objects in response')
core_parser.add_argument('exclude_automatic_assertions', default=False, type=bool, help='If set, excludes associations that involve IEAs (ECO:0000501)')
core_parser.add_argument('fetch_objects', type=bool, default=True, help='If true, returns a distinct set of association.objects (typically ontology terms). This appears at the top level of the results payload')
core_parser.add_argument('use_compact_associations', type=bool, default=False, help='If true, returns results in compact associations format')
core_parser.add_argument('slim', action='append', help='Map objects up (slim) to a higher level category. Value can be ontology class ID or subset ID')
core_parser.add_argument('evidence', help="""Object id, e.g. ECO:0000501 (for IEA; Includes inferred by default)
                    or a specific publication or other supporting ibject, e.g. ZFIN:ZDB-PUB-060503-2.
                    """)

scigraph = SciGraph('https://scigraph-data.monarchinitiative.org/scigraph/')

homol_rel = HomologyTypes.Homolog.value

def get_object_gene(id, **args):
        obj = scigraph.bioobject(id, 'Gene')
        obj.phenotype_associations = search_associations(subject=id, object_category='phenotype', **args)['associations']
        obj.homology_associations = search_associations(subject=id, rel=homol_rel, object_category='gene', **args)['associations']
        obj.disease_associations = search_associations(subject=id, object_category='disease', **args)['associations']
        obj.genotype_associations = search_associations(subject=id, invert_subject_object=True, object_category='genotype', **args)['associations']

        return(obj)

def get_object_genotype(id, **args):
        obj = scigraph.bioobject(id, 'Genotype')
        obj.phenotype_associations = search_associations(subject=id, object_category='phenotype', **args)['associations']
Beispiel #11
0
 def __init__(self):
     self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url'])
Beispiel #12
0
    default=False,
    help='Should only the longest entity be returned for an overlapping group')
parser.add_argument('include_abbreviation',
                    type=inputs.boolean,
                    default=False,
                    help='Should abbreviations be included')
parser.add_argument('include_acronym',
                    type=inputs.boolean,
                    default=False,
                    help='Should acronyms be included')
parser.add_argument('include_numbers',
                    type=inputs.boolean,
                    default=False,
                    help='Should numbers be included')

scigraph = SciGraph(get_biolink_config()['scigraph_ontology']['url'])


def parse_args_for_annotator(parser):
    """
    Convenience method for parsing and preparing parameters for SciGraph annotator
    """
    args = parser.parse_args()
    if 'include_category' in args:
        val = args.pop('include_category')
        args['includeCat'] = val
    if 'exclude_category' in args:
        val = args.pop('exclude_category')
        args['excludeCat'] = val
    if 'min_length' in args:
        val = args.pop('min_length')