Example #1
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461

        sg_dev = SciGraph(
            url='https://scigraph-data-dev.monarchinitiative.org/scigraph/')

        subjects = [
            x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x
            for x in subjects
        ]
        slimmer_subjects = []
        if category == FUNCTION_CATEGORY:
            # get proteins for a gene only when the category is 'function'
            for s in subjects:
                if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s:
                    prots = sg_dev.gene_to_uniprot_proteins(s)
                    if len(prots) == 0:
                        prots = [s]
                    slimmer_subjects += prots
                else:
                    slimmer_subjects.append(s)
        else:
            slimmer_subjects = subjects

        if category == ANATOMY_CATEGORY:
            category = 'anatomical entity'

        results = map2slim(subjects=slimmer_subjects,
                           slim=slim,
                           object_category=category,
                           user_agent=USER_AGENT,
                           **args)

        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Example #2
0
    def get(self, category):
        """
        Summarize a set of objects
        """
        args = parser.parse_args()
        slim = args.get('slim')
        del args['slim']
        subjects = args.get('subject')
        del args['subject']
        # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
        # https://github.com/monarch-initiative/dipper/issues/461
        # nota bene:
        # currently incomplete because code is not checking for the possibility of >1 subjects

        subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1)

        if (subjects[0].startswith('HGNC')
                or subjects[0].startswith('NCBIGene')
                or subjects[0].startswith('ENSEMBL:')):
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
            if len(prots) == 0:
                prots = subjects
        else:
            prots = subjects

        results = map2slim(subjects=prots,
                           slim=slim,
                           rows=200,
                           exclude_automatic_assertions=True,
                           object_category=category,
                           **args)
        # To the fullest extent possible return HGNC ids
        checked = {}
        for result in results:
            for association in result['assocs']:
                taxon = association['subject']['taxon']['id']
                proteinId = association['subject']['id']
                if taxon == 'NCBITaxon:9606' and proteinId.startswith(
                        'UniProtKB:'):
                    if checked.get(proteinId) == None:
                        sg_dev = SciGraph(
                            url=
                            'https://scigraph-data-dev.monarchinitiative.org/scigraph/'
                        )
                        genes = sg_dev.uniprot_protein_to_genes(proteinId)
                        for gene in genes:
                            if gene.startswith('HGNC'):
                                association['subject']['id'] = gene
                                checked[proteinId] = gene
                    else:
                        association['subject']['id'] = checked[proteinId]
        return results
Example #3
0
    def get(self, id):
        """
        Returns function associations for a gene.

        IMPLEMENTATION DETAILS
        ----------------------

        Note: currently this is implemented as a query to the GO/AmiGO solr instance.
        This directly supports IDs such as:

         - ZFIN e.g. ZFIN:ZDB-GENE-050417-357

        Note that the AmiGO GOlr natively stores MGI annotations to MGI:MGI:nn. However,
        the standard for biolink is MGI:nnnn, so you should use this (will be transparently
        mapped to legacy ID)

        Additionally, for some species such as Human, GO has the annotation attached to the UniProt ID.
        Again, this should be transparently handled; e.g. you can use NCBIGene:6469, and this will be
        mapped behind the scenes for querying.
        """

        assocs = search_associations(object_category='function',
                                     subject=id,
                                     **core_parser.parse_args())

        # If there are no associations for the given ID, try other IDs.
        # Note the AmiGO instance does *not* support equivalent IDs
        if len(assocs['associations']) == 0:
            # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
            # https://github.com/monarch-initiative/dipper/issues/461
            logging.debug(
                "Found no associations using {} - will try mapping to other IDs"
                .format(id))
            sg_dev = SciGraph(
                url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
            )
            prots = sg_dev.gene_to_uniprot_proteins(id)
            for prot in prots:
                pr_assocs = search_associations(object_category='function',
                                                subject=prot,
                                                **core_parser.parse_args())
                assocs['associations'] += pr_assocs['associations']
        return assocs
Example #4
0
class SciGraphIdentifierConverter(object):
    """
    Class for performing ID conversion using SciGraph
    """
    def __init__(self):
        self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url'])

    def convert_gene_to_protein(self, identifier):
        """
        Query SciGraph with a gene ID and get its corresponding UniProtKB ID
        """
        protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier)
        return protein_ids

    def convert_protein_to_gene(self, identifier):
        """
        Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID
        """
        gene_ids = self.scigraph.uniprot_protein_to_genes(identifier)
        return gene_ids
Example #5
0
 def get(self, category):
     """
     Summarize a set of objects
     """
     args = parser.parse_args()
     logging.info("category is {}".format(category))
     slim = args.get('slim')
     del args['slim']
     subjects = args.get('subject')
     del args['subject']
     results = map2slim(subjects=subjects,
                        slim=slim,
                        rows=200,
                        object_category=category,
                        **args)
     # If there are no associations for the given ID, try other IDs.
     # Note the AmiGO instance does *not* support equivalent IDs
     assoc_count = 0
     for result in results:
         assoc_count += len(result['assocs'])
     if assoc_count == 0 and len(subjects) == 1:
         # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66
         # https://github.com/monarch-initiative/dipper/issues/461
         # nota bene:
         # currently incomplete because code is not checking for the possibility of >1 subjects
         logging.info(
             "Found no associations using {} - will try mapping to other IDs"
             .format(subjects[0]))
         sg_dev = SciGraph(
             url='https://scigraph-data-dev.monarchinitiative.org/scigraph/'
         )
         prots = sg_dev.gene_to_uniprot_proteins(subjects[0])
         if len(prots) > 0:
             results = map2slim(subjects=prots,
                                slim=slim,
                                rows=200,
                                object_category=category,
                                **args)
     return results