def chebi2name(self, name): """Return the ASCII name of a CHEBI identifier""" from bioservices import ChEBI c = ChEBI() name = dict(c.getLiteEntity(name)[0])["chebiAsciiName"] return name
def _lookup_chebi_id(self): chebi_web = ChEBI() lookup_id = str(self.external_id) if not lookup_id.startswith("CHEBI:"): lookup_id = "CHEBI:{}".format(self.external_id) chebi = chebi_web.getCompleteEntity(lookup_id) if not chebi: self.error = "No results found when querying ChEBI for {}".format( self.external_id) return self # Synonyms # We get duplicates so filter these. if self.get_synonyms: try: seen_it = {} for item in chebi.Synonyms: if item.data not in seen_it: self.synonyms.append(item.data) seen_it[item.data] = 1 except AttributeError: pass # Some may not have synonyms apparently # Name self.name = chebi.chebiAsciiName # inchikey try: self.inchikey = chebi.inchiKey except AttributeError: self.error = 'No InChIKey found for entry: {}'.format( self.external_id) return self
def test_completelist(): ch = ChEBI() names = [ x.chebiAsciiName for x in ch.getCompleteEntityByList(["CHEBI:27732", "CHEBI:36707"]) ] assert names == [ "caffeine", "2-acetyl-1-alkyl-sn-glycero-3-phosphocholine" ]
def test_chebi_mass(): ch = ChEBI() mass1 = ch.getCompleteEntity("CHEBI:27732").mass assert float(mass1) == 194.19076 res = ch.getLiteEntity("194.19076", "MASS", 5, 2) assert res[0]["chebiId"] == "CHEBI:27732" # should return nothing res = ch.getLiteEntity("194.19076", "SMILES", 5, 2)
def get_single_compound_metadata_online(compound_id): if compound_id.upper().startswith('C'): s = KEGG() res = s.get(compound_id) return s.parse(res) else: ch = ChEBI() res = ch.getCompleteEntity('CHEBI:'+compound_id) return res
def test_polymer(): ch = ChEBI() x = ch.serv.getUpdatedPolymer("CHEBI:27732") x.chebiId #Out[14]: 27732 x.globalCharge #0 x.globalFormula # C8H10N4O2 x.updatedStructure
def kegg_to_chebi(compound_ids): results = {} ch = ChEBI() for compound_id in compound_ids: try: if compound_id.startswith('C'): res = ch.getLiteEntity(compound_id) assert len(res) > 0 # we should always be able to convert from KEGG -> ChEBI ids le = res[0] chebi_number = le.chebiId.split(':')[1] print('KEGG %s --> ChEBI %s' % (compound_id, chebi_number)) results[compound_id] = chebi_number else: print('ChEBI %s --> ChEBI %s' % (compound_id, compound_id)) results[compound_id] = compound_id except Exception: print('KEGG %s --> KEGG %s' % (compound_id, compound_id)) results[compound_id] = compound_id return results
def get_compound_id(smiles): """ returns kegg id for compund with given smiles """ indigo = Indigo() # convert smiles to standard format mol = indigo.loadMolecule(smiles) mol.aromatize() moi_smiles = mol.canonicalSmiles() # Get list of possible kegg IDs url = "http://rest.genome.jp/subcomp/?smiles=%s&cutoff=1.0" % smiles http_client = HTTPClient() try: response = http_client.fetch(url).body except HTTPError as e: raise RuntimeError("Error:", str(e)) http_client.close() subcomp_results = response.split("\n") subcomp_results.pop() subcomp_results = ([i.split('\t')[0] for i in subcomp_results]) # get smiles for all compound IDs found all_smiles = [] uni = UniChem() mapping = uni.get_mapping("kegg_ligand", "chebi") ch = ChEBI() all_smiles = [ch.getCompleteEntity(mapping[x]).smiles for x in subcomp_results] # convert smiles to a standard format for pos, mol in enumerate(all_smiles): m = indigo.loadMolecule(mol) m.aromatize() all_smiles[pos] = m.canonicalSmiles() # check if smiles matches given and, if so, use that compound ID # if not, errors out try: index = all_smiles.index(moi_smiles) except: raise RuntimeError("SMILES unmatchable to: %s" % str(all_smiles)) return subcomp_results[index]
''' import os import click import json import requests import time import xmltodict import bioservices from bioservices import KEGG, ChEBI from zeep import Client from tqdm import tqdm k = KEGG(verbose=False) map_kegg_chebi = k.conv("chebi", "compound") c = ChEBI(verbose=False) chebi_client = Client( "https://www.ebi.ac.uk/webservices/chebi/2.0/webservice?wsdl") chemspider_client = Client("https://www.chemspider.com/InChI.asmx?WSDL") # For compounds that cant be found at all. not_founds = [] # Need to create a global dictonary for these annotations, as I don't # want to take the piss with the web services these wonderful people # provide to us free of charge. global CONVERTED_COMPOUNDS CONVERTED_COMPOUNDS = {}
def chebi2name(self, name): """Return the ASCII name of a CHEBI identifier""" from bioservices import ChEBI c = ChEBI() name = dict(c.getLiteEntity(name)[0])['chebiAsciiName'] return name
def test_structure(): ch = ChEBI() smiles = ch.getCompleteEntity("CHEBI:27732").smiles ch.getStructureSearch(smiles, "SMILES", "SIMILARITY", 3, 0.25)
def test_chebi(): ch = ChEBI() ch.getCompleteEntity("CHEBI:10102") res = ch.conv("CHEBI:10102", "ChEMBL COMPOUND") assert res == ["521332"] try: res = ch.conv("CHEBI:10102", "wrong db") assert False except: assert True ch.getOntologyChildren("CHEBI:27732") ch.getOntologyParents("CHEBI:27732") ch.getUpdatedPolymer("CHEBI:27732")
def test_chebi(): ch = ChEBI() ch.getCompleteEntity("CHEBI:10102") res = ch.conv("CHEBI:10102", "KEGG COMPOUND accession") assert res == ['C07484'] try: res = ch.conv("CHEBI:10102", "wrong db") assert False except: assert True ch.getOntologyChildren("CHEBI:27732") ch.getOntologyParents("CHEBI:27732") ch.getUpdatedPolymer("CHEBI:27732")
def test_ontology(): ch = ChEBI() ch.getAllOntologyChildrenInPath("CHEBI:27732", "has part")
def test_chebi_mass(): ch = ChEBI() mass1 = ch.getCompleteEntity("CHEBI:27732").mass assert float(mass1) == 194.19076
def test_completelist(): ch = ChEBI() names = [x.chebiAsciiName for x in ch.getCompleteEntityByList(["CHEBI:27732","CHEBI:36707"])] names = [str(x) for x in names] assert names == ["caffeine", "2-acetyl-1-alkyl-sn-glycero-3-phosphocholine"]