def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/') subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] if category == FUNCTION_CATEGORY: # get proteins for a gene only when the category is 'function' for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = sg_dev.gene_to_uniprot_proteins(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) else: slimmer_subjects = subjects if category == ANATOMY_CATEGORY: category = 'anatomical entity' results = map2slim(subjects=slimmer_subjects, slim=slim, object_category=category, user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1) if (subjects[0].startswith('HGNC') or subjects[0].startswith('NCBIGene') or subjects[0].startswith('ENSEMBL:')): sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) == 0: prots = subjects else: prots = subjects results = map2slim(subjects=prots, slim=slim, rows=200, exclude_automatic_assertions=True, object_category=category, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: sg_dev = SciGraph( url= 'https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
class SciGraphIdentifierConverter(object): """ Class for performing ID conversion using SciGraph """ def __init__(self): self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url']) def convert_gene_to_protein(self, identifier): """ Query SciGraph with a gene ID and get its corresponding UniProtKB ID """ protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier) return protein_ids def convert_protein_to_gene(self, identifier): """ Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID """ gene_ids = self.scigraph.uniprot_protein_to_genes(identifier) return gene_ids