def GetCompoundInfo(self, mm, csid): """ take an csid, get a mask parameters: -`mm`: a metmask database -`csid`: a chemspider identifier """ tmpmask = mask({}, mm.idpatterns) csid = self.parent.urlSafe(csid) url = self.GetCompoundInfoURL + "CSID=" + str(csid) + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return (tmpmask) tmpmask.append('chemspider', csid, self.parent.confid, self.parent.sourceid) searchResults = xml.dom.minidom.parse(qRes) ids = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('CompoundInfo'))) inchi = list(mmquery.nodecontents(searchResults.getElementsByTagName('InChI'))) if inchi: for ide in inchi: tmpmask.append('inchi', ide, \ self.parent.confid, self.parent.sourceid) smiles = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('SMILES'))) if smiles: for ide in smiles: tmpmask.append('smiles', ide, self.parent.confid, self.parent.sourceid) inchikey = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('InChIKey'))) if inchikey: for ide in inchikey: tmpmask.append('inchikey', ide, self.parent.confid, self.parent.sourceid) return (tmpmask)
def getChebiChildren(self, chebiId): """ get the is_enantiomer children """ url = self.getOntologyChildren + "chebiId=" + chebiId qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) res = [] for it in searchResults.getElementsByTagName('ns1:ListElement'): type = list(query.nodecontents(it.getElementsByTagName("ns1:type"))) if type[0] == 'is enantiomer of': child = list(query.nodecontents(it.getElementsByTagName("ns1:chebiId"))) res.append(child[0].replace("CHEBI:", "")) return (res)
def InChItool(self, start, goal, query): urls = { 'inchikey': { 'chemspider': '/InChIKeyToCSID?inchi_key=', 'inchi': '/InChIKeyToInChI?inchi_key=' }, 'inchi': { 'chemspider': '/InChIToCSID?inchi=', 'inchikey': '/InChIToInChIKey?inchi=', 'smiles': '/InChIToSMILES?inchi=' }, 'smiles': { 'inchi': '/SMILESToInChI?smiles=' } } if start not in urls: raise Exception("unsupported query") if goal not in urls[start]: raise Exception("unsupported query") query = self.parent.urlSafe(query) url = self.InChI + urls[start][goal] + query qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list( mmquery.nodecontents(searchResults.getElementsByTagName('string'))) return (ids)
def getChebiChildren(self, chebiId): """ get the is_enantiomer children """ url = self.getOntologyChildren + "chebiId=" + chebiId qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) res = [] for it in searchResults.getElementsByTagName('ns1:ListElement'): type = list(query.nodecontents( it.getElementsByTagName("ns1:type"))) if type[0] == 'is enantiomer of': child = list( query.nodecontents(it.getElementsByTagName("ns1:chebiId"))) res.append(child[0].replace("CHEBI:", "")) return (res)
def ExtRefs(self, csid, ds): """ take a csid """ url = self.ExtRefsURL + "CSID=" + str(csid) + "&datasources=" + ds + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('ext_id'))) return (ids)
def url2ids(self, url): """ take an url, get the chebi ids """ qRes = self.parent.getUrl(url) if qRes: searchResults = xml.dom.minidom.parse(qRes) ids = list(query.nodecontents(searchResults.getElementsByTagName('ns1:chebiId'))) return [x.replace('CHEBI:', '') for x in ids] else: return []
def SimpleSearch(self, query): """ take a query string, get the csids """ query = self.parent.urlSafe(query) url = self.SimpleSearchURL + "query=" + str(query) + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('int'))) return (ids)
def ExtRefs(self, csid, ds): """ take a csid """ url = self.ExtRefsURL + "CSID=" + str( csid) + "&datasources=" + ds + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list( mmquery.nodecontents(searchResults.getElementsByTagName('ext_id'))) return (ids)
def url2ids(self, url): """ take an url, get the chebi ids """ qRes = self.parent.getUrl(url) if qRes: searchResults = xml.dom.minidom.parse(qRes) ids = list( query.nodecontents( searchResults.getElementsByTagName('ns1:chebiId'))) return [x.replace('CHEBI:', '') for x in ids] else: return []
def GetCompoundInfo(self, mm, csid): """ take an csid, get a mask parameters: -`mm`: a metmask database -`csid`: a chemspider identifier """ tmpmask = mask({}, mm.idpatterns) csid = self.parent.urlSafe(csid) url = self.GetCompoundInfoURL + "CSID=" + str( csid) + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return (tmpmask) tmpmask.append('chemspider', csid, self.parent.confid, self.parent.sourceid) searchResults = xml.dom.minidom.parse(qRes) ids = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('CompoundInfo'))) inchi = list( mmquery.nodecontents(searchResults.getElementsByTagName('InChI'))) if inchi: for ide in inchi: tmpmask.append('inchi', ide, \ self.parent.confid, self.parent.sourceid) smiles = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('SMILES'))) if smiles: for ide in smiles: tmpmask.append('smiles', ide, self.parent.confid, self.parent.sourceid) inchikey = \ list(mmquery.nodecontents(searchResults.getElementsByTagName('InChIKey'))) if inchikey: for ide in inchikey: tmpmask.append('inchikey', ide, self.parent.confid, self.parent.sourceid) return (tmpmask)
def SimpleSearch(self, query): """ take a query string, get the csids """ query = self.parent.urlSafe(query) url = self.SimpleSearchURL + "query=" + str( query) + "&token=" + self.token qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list( mmquery.nodecontents(searchResults.getElementsByTagName('int'))) return (ids)
def InChItool(self, start, goal, query): urls = {'inchikey': {'chemspider': '/InChIKeyToCSID?inchi_key=', 'inchi': '/InChIKeyToInChI?inchi_key='}, 'inchi': {'chemspider': '/InChIToCSID?inchi=', 'inchikey': '/InChIToInChIKey?inchi=', 'smiles': '/InChIToSMILES?inchi='}, 'smiles': {'inchi': '/SMILESToInChI?smiles='}} if start not in urls: raise Exception("unsupported query") if goal not in urls[start]: raise Exception("unsupported query") query = self.parent.urlSafe(query) url = self.InChI + urls[start][goal] + query qRes = self.parent.getUrl(url) if not qRes: return ([]) searchResults = xml.dom.minidom.parse(qRes) ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('string'))) return (ids)
def pubchem2mask(self, docSum): """ turn a docsum node in to a mask """ un = mask({}, self.parent.mm.idpatterns) cid = next(mmquery.nodecontents(docSum.getElementsByTagName("Id"))) un.append('cid', cid, self.parent.confid, self.parent.sourceid) p = re.compile('(<a href[^>]*>)|(</a>)|(ligand)|(,)') cnf = self.parent.confid src = self.parent.sourceid weak = self.parent.mm.confidence['weak'] for item in docSum.getElementsByTagName('Item'): if len(item.childNodes) == 0: continue nVal = item.childNodes[0].nodeValue if item.getAttribute('Name') == 'SynonymList': synonyms = list(mmquery.nodecontents(item.childNodes)) for s in synonyms: s = p.sub('', s.strip()) if 'kegg' in guessTable(s, self.parent.mm.idpatterns): un.append('kegg', s, cnf, src) continue if 'cas' in guessTable(s, self.parent.mm.idpatterns): un.append('cas', s, cnf, src) continue if 'chebi' in guessTable(s, self.parent.mm.idpatterns): un.append('chebi', s, cnf, src) continue if 'inchi' in guessTable(s, self.parent.mm.idpatterns): un.append('inchi', s, cnf, src) continue else: un.append('synonym', s, weak, src) elif item.getAttribute('Name') == 'IUPACName': un.append('iupac', nVal, cnf, src) elif item.getAttribute('Name') == 'CanonicalSmile': un.append('smiles', nVal, cnf, src) elif item.getAttribute('Name') == 'CanonicalSmile': un.append('smiles', nVal, cnf, src) elif item.getAttribute('Name') == 'InChIKey': un.append('inchikey', nVal, cnf, src) # annotation section elif item.getAttribute('Name') == 'MolecularFormula': un.append('formula', nVal, weak, src) elif item.getAttribute('Name') == 'MolecularWeight': un.append('weight', nVal, weak, src) elif item.getAttribute('Name') == 'TotalFormalCharge': un.append('totalcharge', nVal, weak, src) elif item.getAttribute('Name') == 'XLogP': un.append('xlogp', nVal, weak, src) elif item.getAttribute('Name') == 'XLogP': un.append('xlogp', nVal, weak, src) elif item.getAttribute('Name') == 'HydrogenBondDonorCount': un.append('hbonddonor', nVal, weak, src) elif item.getAttribute('Name') == 'HydrogenBondAcceptorCount': un.append('hbondacceptor', nVal, weak, src) elif item.getAttribute('Name') == 'HeavyAtomCount': un.append('heavyatom', nVal, weak, src) elif item.getAttribute('Name') == 'TPSA': un.append('tpsa', nVal, weak, src) return (un)
def chebi2mask(self, mm, chebiId): """ get a mask containing the info associated with a chebi id 2. use getComplete to fetch the contents of the relevant entries """ ba = chebiId un = mask({}, mm.idpatterns) qUrl = self.getComplete + "chebiId=" + str(chebiId) qRes = self.parent.getUrl(qUrl) if not qRes: return (mask({})) searchResults = xml.dom.minidom.parse(qRes) if not searchResults: return (mask({})) if searchResults.getElementsByTagName('ns1:return'): retList = searchResults.getElementsByTagName('ns1:return')[0] # found non-existent chebiId else: # this would have been reasonable but chebi has problems so that some # entries exist, but cant be fetched. valid chebi but we cant query # for it, # hence, skip completely # delmask = mask({}) # delmask.append('chebi', chebiId) # mm.brandish(delmask) return (mask({})) # chebiid if retList.getElementsByTagName("ns1:chebiId"): chebiId = list( query.nodecontents( retList.getElementsByTagName("ns1:chebiId")))[0] un.append('chebi', chebiId.replace("CHEBI:", ""), self.parent.confid, self.parent.sourceid) # smiles if retList.getElementsByTagName("ns1:smiles"): un.append( 'smiles', list( query.nodecontents( retList.getElementsByTagName("ns1:smiles")))[0], self.parent.confid, self.parent.sourceid) # synonym if searchResults.getElementsByTagName('ns1:Synonyms'): syns = searchResults.getElementsByTagName('ns1:Synonyms')[0] for sy in list( query.nodecontents(syns.getElementsByTagName("ns1:data"))): un.append('synonym', sy, self.parent.mm.confidence['weak'], \ self.parent.sourceid) # inchi if retList.getElementsByTagName("ns1:inchi"): un.append( 'inchi', list( query.nodecontents( retList.getElementsByTagName("ns1:inchi")))[0], self.parent.confid, self.parent.sourceid) # iupac if searchResults.getElementsByTagName('ns1:IupacNames'): syns = searchResults.getElementsByTagName('ns1:IupacNames')[0] for sy in list( query.nodecontents(syns.getElementsByTagName("ns1:data"))): un.append('iupac', sy, self.parent.confid, \ self.parent.sourceid) # kegg for ll in searchResults.getElementsByTagName('ns1:DatabaseLinks'): if list(query.nodecontents(ll.getElementsByTagName( "ns1:type")))[0] == 'KEGG COMPOUND accession': for sy in list( query.nodecontents( ll.getElementsByTagName("ns1:data"))): un.append('kegg', sy, self.parent.confid, self.parent.sourceid) # cas for ll in searchResults.getElementsByTagName('ns1:RegistryNumbers'): if list(query.nodecontents(ll.getElementsByTagName( "ns1:type")))[0] == 'CAS Registry Number': for sy in list( query.nodecontents( ll.getElementsByTagName("ns1:data"))): un.append('cas', sy, self.parent.confid, self.parent.sourceid) # pdb.set_trace() # formula if searchResults.getElementsByTagName('ns1:Formulae'): form = searchResults.getElementsByTagName('ns1:Formulae')[0] for sy in list( query.nodecontents(form.getElementsByTagName("ns1:data"))): un.append('formula', sy, self.parent.mm.confidence['weak'], \ self.parent.sourceid) return (un)
def chebi2mask(self, mm, chebiId): """ get a mask containing the info associated with a chebi id 2. use getComplete to fetch the contents of the relevant entries """ ba = chebiId un = mask({}, mm.idpatterns) qUrl = self.getComplete + "chebiId=" + str(chebiId) qRes = self.parent.getUrl(qUrl) if not qRes: return (mask({})) searchResults = xml.dom.minidom.parse(qRes) if not searchResults: return (mask({})) if searchResults.getElementsByTagName('ns1:return'): retList = searchResults.getElementsByTagName('ns1:return')[0] # found non-existent chebiId else: # this would have been reasonable but chebi has problems so that some # entries exist, but cant be fetched. valid chebi but we cant query # for it, # hence, skip completely # delmask = mask({}) # delmask.append('chebi', chebiId) # mm.brandish(delmask) return (mask({})) # chebiid if retList.getElementsByTagName("ns1:chebiId"): chebiId = list(query.nodecontents(retList.getElementsByTagName("ns1:chebiId")))[0] un.append('chebi', chebiId.replace("CHEBI:", ""), self.parent.confid, self.parent.sourceid) # smiles if retList.getElementsByTagName("ns1:smiles"): un.append('smiles', list(query.nodecontents(retList.getElementsByTagName("ns1:smiles")))[0], self.parent.confid, self.parent.sourceid) # synonym if searchResults.getElementsByTagName('ns1:Synonyms'): syns = searchResults.getElementsByTagName('ns1:Synonyms')[0] for sy in list(query.nodecontents(syns.getElementsByTagName("ns1:data"))): un.append('synonym', sy, self.parent.mm.confidence['weak'], \ self.parent.sourceid) # inchi if retList.getElementsByTagName("ns1:inchi"): un.append('inchi', list(query.nodecontents(retList.getElementsByTagName("ns1:inchi")))[0], self.parent.confid, self.parent.sourceid) # iupac if searchResults.getElementsByTagName('ns1:IupacNames'): syns = searchResults.getElementsByTagName('ns1:IupacNames')[0] for sy in list(query.nodecontents(syns.getElementsByTagName("ns1:data"))): un.append('iupac', sy, self.parent.confid, \ self.parent.sourceid) # kegg for ll in searchResults.getElementsByTagName('ns1:DatabaseLinks'): if list(query.nodecontents(ll.getElementsByTagName("ns1:type")))[0] == 'KEGG COMPOUND accession': for sy in list(query.nodecontents(ll.getElementsByTagName("ns1:data"))): un.append('kegg', sy, self.parent.confid, self.parent.sourceid) # cas for ll in searchResults.getElementsByTagName('ns1:RegistryNumbers'): if list(query.nodecontents(ll.getElementsByTagName("ns1:type")))[0] == 'CAS Registry Number': for sy in list(query.nodecontents(ll.getElementsByTagName("ns1:data"))): un.append('cas', sy, self.parent.confid, self.parent.sourceid) # pdb.set_trace() # formula if searchResults.getElementsByTagName('ns1:Formulae'): form = searchResults.getElementsByTagName('ns1:Formulae')[0] for sy in list(query.nodecontents(form.getElementsByTagName("ns1:data"))): un.append('formula', sy, self.parent.mm.confidence['weak'], \ self.parent.sourceid) return (un)