def __init__(self, drug_decode): print("ChemSpiderSearch is still in progress, please do not use") self.dd = DrugDecode(drug_decode) self.dd_filled = DrugDecode(drug_decode) from bioservices.chemspider import ChemSpider from bioservices import ChEMBL from bioservices import UniChem try: print('Loading PubChem') from bioservices.pubchem import PubChem self.puchem = PubChem() except: # Pubchem was introduced only in dec 2015 pass print('Loading ChEMBL service') self.chembl = ChEMBL(cache=True) print('Loading ChemSpider service') self.chemspider = ChemSpider(cache=True) print('Loading UniChem service') # in unichem db number is 22 and chembl is 1 self.unichem = UniChem() print('Settings some data aliases') self._cs_find = self.chemspider.find self._cs_get = self.chemspider.GetExtendedCompoundInfo self.drug_ids = sorted(list(self.dd.df.index.values)) self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))
def get_compound_id(smiles): """ returns kegg id for compund with given smiles """ indigo = Indigo() # convert smiles to standard format mol = indigo.loadMolecule(smiles) mol.aromatize() moi_smiles = mol.canonicalSmiles() # Get list of possible kegg IDs url = "http://rest.genome.jp/subcomp/?smiles=%s&cutoff=1.0" % smiles http_client = HTTPClient() try: response = http_client.fetch(url).body except HTTPError as e: raise RuntimeError("Error:", str(e)) http_client.close() subcomp_results = response.split("\n") subcomp_results.pop() subcomp_results = ([i.split('\t')[0] for i in subcomp_results]) # get smiles for all compound IDs found all_smiles = [] uni = UniChem() mapping = uni.get_mapping("kegg_ligand", "chebi") ch = ChEBI() all_smiles = [ch.getCompleteEntity(mapping[x]).smiles for x in subcomp_results] # convert smiles to a standard format for pos, mol in enumerate(all_smiles): m = indigo.loadMolecule(mol) m.aromatize() all_smiles[pos] = m.canonicalSmiles() # check if smiles matches given and, if so, use that compound ID # if not, errors out try: index = all_smiles.index(moi_smiles) except: raise RuntimeError("SMILES unmatchable to: %s" % str(all_smiles)) return subcomp_results[index]
def test_get_source_id(): uni = UniChem() assert uni.get_source_id("chembl") == 1 assert uni.get_source_id("1") == 1 assert uni.get_source_id(1) == 1 try: uni.get_source_id("wrong") assert False except: assert True try: uni.get_source_id("20000") assert False except: assert True
def __init__(self, verbosity="INFO"): super(Mapper, self).__init__(level=verbosity) self.logging.info("Initialising the services") self.logging.info("... uniprots") self._uniprot_service = UniProt() self.logging.info("... KEGG") self._kegg_service = KeggParser(verbose=False) self.logging.info("... HGNC") self._hgnc_service = HGNC() self.logging.info("... UniChem") self._unichem_service = UniChem() self.logging.info("...BioDBNet") self._biodbnet = BioDBNet()
def convert_kegg_nodes(self, network): """ Maps network from kegg to gene names Parameters ---------- network : nx.DiGraph Returns ------- dict """ chem = UniChem() still_unknown = [] hits = [i for i in set(network.nodes) if i.startswith('cpd:')] net_kegg_names = dict() net_chem_names = dict() net_cpd_to_hmdb = dict() for i in hits: name_stripped = i.lstrip('cpd:') net_kegg_names[i] = name_stripped if name_stripped in self.kegg_to_hmdb: mapping = self.kegg_to_hmdb[name_stripped] if isinstance(mapping, (list, set, SortedSet)): names = '|'.join(set(mapping)) chem_names = set() for name in mapping: try: chem_names.update(self.hmdb_to_chem_name[name]) except: continue net_cpd_to_hmdb[i] = names net_chem_names[i] = order_merge(chem_names) elif isinstance(mapping, basestring): chem_n = self.hmdb_to_chem_name[mapping] net_cpd_to_hmdb[i] = mapping net_chem_names[i] = '|'.join(chem_n.encode('ascii', 'ignore')) else: print('Returned something else...', mapping) elif i in compound_manual: loc = compound_manual[i] net_cpd_to_hmdb[i] = loc if loc in self.hmdb_to_chem_name: net_chem_names[i] = order_merge( self.hmdb_to_chem_name[loc]) else: still_unknown.append(i) if len(still_unknown): kegg_hmdb = chem.get_mapping("kegg_ligand", "hmdb") for i in still_unknown: name_stripped = i.lstrip('cpd:') if name_stripped in kegg_hmdb: net_cpd_to_hmdb[i] = kegg_hmdb[name_stripped] # else: # print("Cannot find a HMDB mapping for %s " % i) return net_cpd_to_hmdb, net_kegg_names, net_chem_names
""" from argparse import ArgumentParser import mysql.connector as mysqlc from bioservices import UniChem #Define command line options and defaults parser = ArgumentParser() parser.add_argument("-c", "--mysqlconf", dest="mysql_conf", default="/home/kkmattil/Documents/DDCB/mysql_write.conf", help="MySQL_conf", metavar="MYSQL_CONF") args = parser.parse_args() uniC = UniChem() #Open mysql connection cnx = mysqlc.connect(option_files=args.mysql_conf) cursor = cnx.cursor() SQL_drop_table = ('DROP TABLE unichem_links;') cursor.execute(SQL_drop_table) cnx.commit() cursor = cnx.cursor() SQL_create_table_if_needed = ( 'CREATE TABLE IF NOT EXISTS unichem_links(comp_num INT, source_id INT, source VARCHAR(200), id_in_db VARCHAR(200)) ENGINE=INNODB;' ) cursor.execute(SQL_create_table_if_needed)
def test_get_auxiliary_mapping(): # this does nothing (behaviour of the function) uni = UniChem() res = uni.get_auxiliary_mappings(1)
def test_get_src_compoundid_url(): uni = UniChem() uni.get_src_compound_id_url("CHEMBL12", "chembl", "drugbank")
def test_unichem_src_compound_from_inchikey(): uni = UniChem() uni.get_src_compound_ids_from_inchikey("AAOVKJBEBIDNHE-UHFFFAOYSA-N") uni.get_src_compound_ids_all_from_inchikey("AAOVKJBEBIDNHE-UHFFFAOYSA-N")
def test_get_src_compound_ids_all_from_obsolete(): uni = UniChem() res = uni.get_src_compound_ids_all_from_obsolete("DB07699", 2) res = uni.get_src_compound_ids_all_from_obsolete("DB07699", 2, "chembl")
def test_unichem_src_compound(): uni = UniChem() uni.get_src_compound_ids_from_src_compound_id("CHEMBL12", "chembl", "chebi") uni.get_src_compound_ids_all_from_src_compound_id("CHEMBL12", "chembl","drugbank")
def test_structure(): uni = UniChem() uni.get_structure("CHEMBL12", "chembl") uni.get_structure_all("CHEMBL12", "chembl")
def test_src_ids(): uni = UniChem() uni.get_all_src_ids() uni.get_source_information("chembl")
def test_mapping(): uni = UniChem() res1 = uni.get_mapping("kegg_ligand", "chembl") assert len(res1)>0
def unichem(): u = UniChem(verbose=False) return u
def test_get_verbose_src_compound_ids_fron_inchikey(): uni = UniChem() uni.get_verbose_src_compound_ids_from_inchikey("GZUITABIAKMVPG-UHFFFAOYSA-N")
def convert_kegg_nodes(self, network): """ Maps network from kegg to gene names Parameters ---------- network : nx.DiGraph Returns ------- dict """ chem = UniChem() still_unknown = [] hits = [i for i in set(network.nodes) if i.startswith('cpd:')] net_kegg_names = dict() net_chem_names = dict() net_cpd_to_hmdb = dict() for i in hits: name_stripped = i.lstrip('cpd:') net_kegg_names[i] = name_stripped if name_stripped in self.kegg_to_hmdb: mapping = self.kegg_to_hmdb[name_stripped] if isinstance(mapping, (list, set, SortedSet)): names = '|'.join(set(mapping)) chem_names = set() for name in mapping: try: chem_names.update(self.hmdb_to_chem_name[name]) except: continue net_cpd_to_hmdb[i] = names net_chem_names[i] = order_merge(chem_names) elif isinstance(mapping, basestring): chem_n = self.hmdb_to_chem_name[mapping] net_cpd_to_hmdb[i] = mapping net_chem_names[i] = '|'.join( chem_n.encode('ascii', 'ignore')) else: print('Returned something else...', mapping) elif i in compound_manual: loc = compound_manual[i] net_cpd_to_hmdb[i] = loc if loc in self.hmdb_to_chem_name: net_chem_names[i] = order_merge( self.hmdb_to_chem_name[loc]) else: still_unknown.append(i) if len(still_unknown): kegg_hmdb = chem.get_mapping("kegg_ligand", "hmdb") for i in still_unknown: name_stripped = i.lstrip('cpd:') if name_stripped in kegg_hmdb: net_cpd_to_hmdb[i] = kegg_hmdb[name_stripped] # else: # print("Cannot find a HMDB mapping for %s " % i) return net_cpd_to_hmdb, net_kegg_names, net_chem_names