def __init__(self, filename=None): self._hgnc_service = HGNC() if filename == None: self.alldata = self.load_all_hgnc() self.df = self.build_dataframe() else: self.read_csv(filename)
def _lookup_hgnc_id(self): hgnc_web = HGNC() hgnc = hgnc_web.fetch('hgnc_id', self.external_id) if hgnc['response']['numFound'] == 1: self.name = hgnc['response']['docs'][0]['symbol'] self.description = hgnc['response']['docs'][0]['name'] # Get synonyms if requested. if self.get_synonyms: for item in hgnc['response']['docs'][0]['alias_symbol']: self.synonyms.append(item) elif hgnc['response']['numFound'] == 0: self.error = "No results found when querying HGNC for {}".format( self.external_id) return self
def test_hgnc(): h = HGNC() h.get_info() h.fetch('symbol', 'ZNF3') h.fetch('alias_name', 'A-kinase anchor protein, 350kDa') h.search('BRAF') h.search('symbol', 'ZNF*') h.search('symbol', 'ZNF?') h.search('symbol', 'ZNF*+AND+status:Approved') h.search('symbol', 'ZNF3+OR+ZNF12') h.search('symbol', 'ZNF*+NOT+status:Approved')
def __init__(self, verbosity="INFO"): super(Mapper, self).__init__(level=verbosity) self.logging.info("Initialising the services") self.logging.info("... uniprots") self._uniprot_service = UniProt() self.logging.info("... KEGG") self._kegg_service = KeggParser(verbose=False) self.logging.info("... HGNC") self._hgnc_service = HGNC() self.logging.info("... UniChem") self._unichem_service = UniChem() self.logging.info("...BioDBNet") self._biodbnet = BioDBNet()
def add_sequence_to_nodes(n: str, d: Dict[str, Any]): """ Maps UniProt ACC to UniProt ID. Retrieves sequence from UniProt and adds it to the node as a feature :param n: Graph node. :type n: str :param d: Graph attribute dictionary. :type d: Dict[str, Any] """ h = HGNC(verbose=False) u = UniProt(verbose=False) d["uniprot_ids"] = h.fetch( "symbol", d["protein_id"])["response"]["docs"][0]["uniprot_ids"] # Todo these API calls should probably be batched # Todo mapping with bioservices to support other protein IDs? for id in d["uniprot_ids"]: d[f"sequence_{id}"] = u.get_fasta_sequence(id)
def kegg_to_hugo(genes, species='hsa'): """ Converts all KEGG names to HGNC Parameters ---------- genes : list species : str Returns ------- dict """ prefix = species + ':' hugo = HGNC(verbose=True) hugo_dict = {} not_found = set() for i in genes: tmp_name = i.lstrip(prefix) mapping = hugo.search(tmp_name) if 'response' in mapping: response = mapping['response'] if 'numFound' in response: if response['numFound'] == 0: not_found.add(i) continue elif response['numFound'] == 1: docs = response['docs'][0] hugo_dict[i] = docs['symbol'] continue else: if 'symbol' in response['docs'][0]: hugo_dict[i] = response['docs'][0]['symbol'] else: not_found.add(i) if not_found != 0: print("{} not found after HGNC mapping".format(len(not_found))) print("{} ".format(not_found)) return hugo_dict, not_found
def __init__(self): self.s = HGNC(verbose=False)