def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/') subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] if category == FUNCTION_CATEGORY: # get proteins for a gene only when the category is 'function' for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = sg_dev.gene_to_uniprot_proteins(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) else: slimmer_subjects = subjects if category == ANATOMY_CATEGORY: category = 'anatomical entity' results = map2slim(subjects=slimmer_subjects, slim=slim, object_category=category, user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1) if (subjects[0].startswith('HGNC') or subjects[0].startswith('NCBIGene') or subjects[0].startswith('ENSEMBL:')): sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) == 0: prots = subjects else: prots = subjects results = map2slim(subjects=prots, slim=slim, rows=200, exclude_automatic_assertions=True, object_category=category, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: sg_dev = SciGraph( url= 'https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self, id): """ Returns function associations for a gene. IMPLEMENTATION DETAILS ---------------------- Note: currently this is implemented as a query to the GO/AmiGO solr instance. This directly supports IDs such as: - ZFIN e.g. ZFIN:ZDB-GENE-050417-357 Note that the AmiGO GOlr natively stores MGI annotations to MGI:MGI:nn. However, the standard for biolink is MGI:nnnn, so you should use this (will be transparently mapped to legacy ID) Additionally, for some species such as Human, GO has the annotation attached to the UniProt ID. Again, this should be transparently handled; e.g. you can use NCBIGene:6469, and this will be mapped behind the scenes for querying. """ assocs = search_associations(object_category='function', subject=id, **core_parser.parse_args()) # If there are no associations for the given ID, try other IDs. # Note the AmiGO instance does *not* support equivalent IDs if len(assocs['associations']) == 0: # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 logging.debug( "Found no associations using {} - will try mapping to other IDs" .format(id)) sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(id) for prot in prots: pr_assocs = search_associations(object_category='function', subject=prot, **core_parser.parse_args()) assocs['associations'] += pr_assocs['associations'] return assocs
class SciGraphIdentifierConverter(object): """ Class for performing ID conversion using SciGraph """ def __init__(self): self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url']) def convert_gene_to_protein(self, identifier): """ Query SciGraph with a gene ID and get its corresponding UniProtKB ID """ protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier) return protein_ids def convert_protein_to_gene(self, identifier): """ Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID """ gene_ids = self.scigraph.uniprot_protein_to_genes(identifier) return gene_ids
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() logging.info("category is {}".format(category)) slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] results = map2slim(subjects=subjects, slim=slim, rows=200, object_category=category, **args) # If there are no associations for the given ID, try other IDs. # Note the AmiGO instance does *not* support equivalent IDs assoc_count = 0 for result in results: assoc_count += len(result['assocs']) if assoc_count == 0 and len(subjects) == 1: # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects logging.info( "Found no associations using {} - will try mapping to other IDs" .format(subjects[0])) sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) > 0: results = map2slim(subjects=prots, slim=slim, rows=200, object_category=category, **args) return results