Beispiel #1
0
 def GetCompoundInfo(self, mm, csid):
     """
     take an csid, get a mask
     parameters:
     -`mm`: a metmask database
     -`csid`: a chemspider identifier
     """
     tmpmask = mask({}, mm.idpatterns)
     csid = self.parent.urlSafe(csid)
     url = self.GetCompoundInfoURL + "CSID=" + str(csid) + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return (tmpmask)
     tmpmask.append('chemspider', csid, self.parent.confid, self.parent.sourceid)
     searchResults = xml.dom.minidom.parse(qRes)
     ids = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('CompoundInfo')))
     inchi = list(mmquery.nodecontents(searchResults.getElementsByTagName('InChI')))
     if inchi:
         for ide in inchi:
             tmpmask.append('inchi', ide, \
                            self.parent.confid, self.parent.sourceid)
     smiles = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('SMILES')))
     if smiles:
         for ide in smiles:
             tmpmask.append('smiles', ide, self.parent.confid, self.parent.sourceid)
     inchikey = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('InChIKey')))
     if inchikey:
         for ide in inchikey:
             tmpmask.append('inchikey', ide, self.parent.confid, self.parent.sourceid)
     return (tmpmask)
Beispiel #2
0
 def getChebiChildren(self, chebiId):
     """ get the is_enantiomer children 
     """
     url = self.getOntologyChildren + "chebiId=" + chebiId
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     res = []
     for it in searchResults.getElementsByTagName('ns1:ListElement'):
         type = list(query.nodecontents(it.getElementsByTagName("ns1:type")))
         if type[0] == 'is enantiomer of':
             child = list(query.nodecontents(it.getElementsByTagName("ns1:chebiId")))
             res.append(child[0].replace("CHEBI:", ""))
     return (res)
Beispiel #3
0
 def InChItool(self, start, goal, query):
     urls = {
         'inchikey': {
             'chemspider': '/InChIKeyToCSID?inchi_key=',
             'inchi': '/InChIKeyToInChI?inchi_key='
         },
         'inchi': {
             'chemspider': '/InChIToCSID?inchi=',
             'inchikey': '/InChIToInChIKey?inchi=',
             'smiles': '/InChIToSMILES?inchi='
         },
         'smiles': {
             'inchi': '/SMILESToInChI?smiles='
         }
     }
     if start not in urls:
         raise Exception("unsupported query")
     if goal not in urls[start]:
         raise Exception("unsupported query")
     query = self.parent.urlSafe(query)
     url = self.InChI + urls[start][goal] + query
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(
         mmquery.nodecontents(searchResults.getElementsByTagName('string')))
     return (ids)
Beispiel #4
0
 def getChebiChildren(self, chebiId):
     """ get the is_enantiomer children 
     """
     url = self.getOntologyChildren + "chebiId=" + chebiId
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     res = []
     for it in searchResults.getElementsByTagName('ns1:ListElement'):
         type = list(query.nodecontents(
             it.getElementsByTagName("ns1:type")))
         if type[0] == 'is enantiomer of':
             child = list(
                 query.nodecontents(it.getElementsByTagName("ns1:chebiId")))
             res.append(child[0].replace("CHEBI:", ""))
     return (res)
Beispiel #5
0
 def ExtRefs(self, csid, ds):
     """
     take a csid
     """
     url = self.ExtRefsURL + "CSID=" + str(csid) + "&datasources=" + ds + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('ext_id')))
     return (ids)
Beispiel #6
0
 def url2ids(self, url):
     """
     take an url, get the chebi ids
     """
     qRes = self.parent.getUrl(url)
     if qRes:
         searchResults = xml.dom.minidom.parse(qRes)
         ids = list(query.nodecontents(searchResults.getElementsByTagName('ns1:chebiId')))
         return [x.replace('CHEBI:', '') for x in ids]
     else:
         return []
Beispiel #7
0
 def SimpleSearch(self, query):
     """
     take a query string, get the csids
     """
     query = self.parent.urlSafe(query)
     url = self.SimpleSearchURL + "query=" + str(query) + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('int')))
     return (ids)
Beispiel #8
0
 def ExtRefs(self, csid, ds):
     """
     take a csid
     """
     url = self.ExtRefsURL + "CSID=" + str(
         csid) + "&datasources=" + ds + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(
         mmquery.nodecontents(searchResults.getElementsByTagName('ext_id')))
     return (ids)
Beispiel #9
0
 def url2ids(self, url):
     """
     take an url, get the chebi ids
     """
     qRes = self.parent.getUrl(url)
     if qRes:
         searchResults = xml.dom.minidom.parse(qRes)
         ids = list(
             query.nodecontents(
                 searchResults.getElementsByTagName('ns1:chebiId')))
         return [x.replace('CHEBI:', '') for x in ids]
     else:
         return []
Beispiel #10
0
 def GetCompoundInfo(self, mm, csid):
     """
     take an csid, get a mask
     parameters:
     -`mm`: a metmask database
     -`csid`: a chemspider identifier
     """
     tmpmask = mask({}, mm.idpatterns)
     csid = self.parent.urlSafe(csid)
     url = self.GetCompoundInfoURL + "CSID=" + str(
         csid) + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return (tmpmask)
     tmpmask.append('chemspider', csid, self.parent.confid,
                    self.parent.sourceid)
     searchResults = xml.dom.minidom.parse(qRes)
     ids = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('CompoundInfo')))
     inchi = list(
         mmquery.nodecontents(searchResults.getElementsByTagName('InChI')))
     if inchi:
         for ide in inchi:
             tmpmask.append('inchi', ide, \
                            self.parent.confid, self.parent.sourceid)
     smiles = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('SMILES')))
     if smiles:
         for ide in smiles:
             tmpmask.append('smiles', ide, self.parent.confid,
                            self.parent.sourceid)
     inchikey = \
         list(mmquery.nodecontents(searchResults.getElementsByTagName('InChIKey')))
     if inchikey:
         for ide in inchikey:
             tmpmask.append('inchikey', ide, self.parent.confid,
                            self.parent.sourceid)
     return (tmpmask)
Beispiel #11
0
 def SimpleSearch(self, query):
     """
     take a query string, get the csids
     """
     query = self.parent.urlSafe(query)
     url = self.SimpleSearchURL + "query=" + str(
         query) + "&token=" + self.token
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(
         mmquery.nodecontents(searchResults.getElementsByTagName('int')))
     return (ids)
Beispiel #12
0
 def InChItool(self, start, goal, query):
     urls = {'inchikey': {'chemspider': '/InChIKeyToCSID?inchi_key=',
                          'inchi': '/InChIKeyToInChI?inchi_key='},
             'inchi': {'chemspider': '/InChIToCSID?inchi=',
                       'inchikey': '/InChIToInChIKey?inchi=',
                       'smiles': '/InChIToSMILES?inchi='},
             'smiles': {'inchi': '/SMILESToInChI?smiles='}}
     if start not in urls:
         raise Exception("unsupported query")
     if goal not in urls[start]:
         raise Exception("unsupported query")
     query = self.parent.urlSafe(query)
     url = self.InChI + urls[start][goal] + query
     qRes = self.parent.getUrl(url)
     if not qRes:
         return ([])
     searchResults = xml.dom.minidom.parse(qRes)
     ids = list(mmquery.nodecontents(searchResults.getElementsByTagName('string')))
     return (ids)
Beispiel #13
0
    def pubchem2mask(self, docSum):
        """ turn a docsum node in to a mask
        """
        un = mask({}, self.parent.mm.idpatterns)
        cid = next(mmquery.nodecontents(docSum.getElementsByTagName("Id")))
        un.append('cid', cid, self.parent.confid, self.parent.sourceid)
        p = re.compile('(<a href[^>]*>)|(</a>)|(ligand)|(,)')
        cnf = self.parent.confid
        src = self.parent.sourceid
        weak = self.parent.mm.confidence['weak']

        for item in docSum.getElementsByTagName('Item'):
            if len(item.childNodes) == 0:
                continue
            nVal = item.childNodes[0].nodeValue
            if item.getAttribute('Name') == 'SynonymList':
                synonyms = list(mmquery.nodecontents(item.childNodes))
                for s in synonyms:
                    s = p.sub('', s.strip())
                    if 'kegg' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('kegg', s, cnf, src)
                        continue
                    if 'cas' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('cas', s, cnf, src)
                        continue
                    if 'chebi' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('chebi', s, cnf, src)
                        continue
                    if 'inchi' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('inchi', s, cnf, src)
                        continue
                    else:
                        un.append('synonym', s, weak, src)
            elif item.getAttribute('Name') == 'IUPACName':
                un.append('iupac', nVal, cnf, src)
            elif item.getAttribute('Name') == 'CanonicalSmile':
                un.append('smiles', nVal, cnf, src)
            elif item.getAttribute('Name') == 'CanonicalSmile':
                un.append('smiles', nVal, cnf, src)
            elif item.getAttribute('Name') == 'InChIKey':
                un.append('inchikey', nVal, cnf, src)

            # annotation section
            elif item.getAttribute('Name') == 'MolecularFormula':
                un.append('formula', nVal, weak, src)
            elif item.getAttribute('Name') == 'MolecularWeight':
                un.append('weight', nVal, weak, src)
            elif item.getAttribute('Name') == 'TotalFormalCharge':
                un.append('totalcharge', nVal, weak, src)
            elif item.getAttribute('Name') == 'XLogP':
                un.append('xlogp', nVal, weak, src)
            elif item.getAttribute('Name') == 'XLogP':
                un.append('xlogp', nVal, weak, src)
            elif item.getAttribute('Name') == 'HydrogenBondDonorCount':
                un.append('hbonddonor', nVal, weak, src)
            elif item.getAttribute('Name') == 'HydrogenBondAcceptorCount':
                un.append('hbondacceptor', nVal, weak, src)
            elif item.getAttribute('Name') == 'HeavyAtomCount':
                un.append('heavyatom', nVal, weak, src)
            elif item.getAttribute('Name') == 'TPSA':
                un.append('tpsa', nVal, weak, src)
        return (un)
Beispiel #14
0
    def chebi2mask(self, mm, chebiId):
        """
        get a mask containing the info associated with a chebi id
        2. use getComplete to fetch the contents of the relevant entries
        """
        ba = chebiId
        un = mask({}, mm.idpatterns)
        qUrl = self.getComplete + "chebiId=" + str(chebiId)
        qRes = self.parent.getUrl(qUrl)
        if not qRes:
            return (mask({}))
        searchResults = xml.dom.minidom.parse(qRes)
        if not searchResults:
            return (mask({}))
        if searchResults.getElementsByTagName('ns1:return'):
            retList = searchResults.getElementsByTagName('ns1:return')[0]
        # found non-existent chebiId
        else:
            # this would have been reasonable but chebi has problems so that some
            # entries exist, but cant be fetched. valid chebi but we cant query
            # for it,
            # hence, skip completely
            # delmask = mask({})
            # delmask.append('chebi', chebiId)
            # mm.brandish(delmask)
            return (mask({}))
        # chebiid
        if retList.getElementsByTagName("ns1:chebiId"):
            chebiId = list(
                query.nodecontents(
                    retList.getElementsByTagName("ns1:chebiId")))[0]
            un.append('chebi', chebiId.replace("CHEBI:", ""),
                      self.parent.confid, self.parent.sourceid)

        # smiles
        if retList.getElementsByTagName("ns1:smiles"):
            un.append(
                'smiles',
                list(
                    query.nodecontents(
                        retList.getElementsByTagName("ns1:smiles")))[0],
                self.parent.confid, self.parent.sourceid)

        # synonym
        if searchResults.getElementsByTagName('ns1:Synonyms'):
            syns = searchResults.getElementsByTagName('ns1:Synonyms')[0]
            for sy in list(
                    query.nodecontents(syns.getElementsByTagName("ns1:data"))):
                un.append('synonym', sy, self.parent.mm.confidence['weak'], \
                          self.parent.sourceid)

        # inchi
        if retList.getElementsByTagName("ns1:inchi"):
            un.append(
                'inchi',
                list(
                    query.nodecontents(
                        retList.getElementsByTagName("ns1:inchi")))[0],
                self.parent.confid, self.parent.sourceid)

        # iupac
        if searchResults.getElementsByTagName('ns1:IupacNames'):
            syns = searchResults.getElementsByTagName('ns1:IupacNames')[0]
            for sy in list(
                    query.nodecontents(syns.getElementsByTagName("ns1:data"))):
                un.append('iupac', sy, self.parent.confid, \
                          self.parent.sourceid)

        # kegg
        for ll in searchResults.getElementsByTagName('ns1:DatabaseLinks'):
            if list(query.nodecontents(ll.getElementsByTagName(
                    "ns1:type")))[0] == 'KEGG COMPOUND accession':
                for sy in list(
                        query.nodecontents(
                            ll.getElementsByTagName("ns1:data"))):
                    un.append('kegg', sy, self.parent.confid,
                              self.parent.sourceid)

        # cas
        for ll in searchResults.getElementsByTagName('ns1:RegistryNumbers'):
            if list(query.nodecontents(ll.getElementsByTagName(
                    "ns1:type")))[0] == 'CAS Registry Number':
                for sy in list(
                        query.nodecontents(
                            ll.getElementsByTagName("ns1:data"))):
                    un.append('cas', sy, self.parent.confid,
                              self.parent.sourceid)

        # pdb.set_trace()
        # formula
        if searchResults.getElementsByTagName('ns1:Formulae'):
            form = searchResults.getElementsByTagName('ns1:Formulae')[0]
            for sy in list(
                    query.nodecontents(form.getElementsByTagName("ns1:data"))):
                un.append('formula', sy, self.parent.mm.confidence['weak'], \
                          self.parent.sourceid)

        return (un)
Beispiel #15
0
    def pubchem2mask(self, docSum):
        """ turn a docsum node in to a mask
        """
        un = mask({}, self.parent.mm.idpatterns)
        cid = next(mmquery.nodecontents(docSum.getElementsByTagName("Id")))
        un.append('cid', cid, self.parent.confid, self.parent.sourceid)
        p = re.compile('(<a href[^>]*>)|(</a>)|(ligand)|(,)')
        cnf = self.parent.confid
        src = self.parent.sourceid
        weak = self.parent.mm.confidence['weak']

        for item in docSum.getElementsByTagName('Item'):
            if len(item.childNodes) == 0:
                continue
            nVal = item.childNodes[0].nodeValue
            if item.getAttribute('Name') == 'SynonymList':
                synonyms = list(mmquery.nodecontents(item.childNodes))
                for s in synonyms:
                    s = p.sub('', s.strip())
                    if 'kegg' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('kegg', s, cnf, src)
                        continue
                    if 'cas' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('cas', s, cnf, src)
                        continue
                    if 'chebi' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('chebi', s, cnf, src)
                        continue
                    if 'inchi' in guessTable(s, self.parent.mm.idpatterns):
                        un.append('inchi', s, cnf, src)
                        continue
                    else:
                        un.append('synonym', s, weak, src)
            elif item.getAttribute('Name') == 'IUPACName':
                un.append('iupac', nVal, cnf, src)
            elif item.getAttribute('Name') == 'CanonicalSmile':
                un.append('smiles', nVal, cnf, src)
            elif item.getAttribute('Name') == 'CanonicalSmile':
                un.append('smiles', nVal, cnf, src)
            elif item.getAttribute('Name') == 'InChIKey':
                un.append('inchikey', nVal, cnf, src)

            # annotation section
            elif item.getAttribute('Name') == 'MolecularFormula':
                un.append('formula', nVal, weak, src)
            elif item.getAttribute('Name') == 'MolecularWeight':
                un.append('weight', nVal, weak, src)
            elif item.getAttribute('Name') == 'TotalFormalCharge':
                un.append('totalcharge', nVal, weak, src)
            elif item.getAttribute('Name') == 'XLogP':
                un.append('xlogp', nVal, weak, src)
            elif item.getAttribute('Name') == 'XLogP':
                un.append('xlogp', nVal, weak, src)
            elif item.getAttribute('Name') == 'HydrogenBondDonorCount':
                un.append('hbonddonor', nVal, weak, src)
            elif item.getAttribute('Name') == 'HydrogenBondAcceptorCount':
                un.append('hbondacceptor', nVal, weak, src)
            elif item.getAttribute('Name') == 'HeavyAtomCount':
                un.append('heavyatom', nVal, weak, src)
            elif item.getAttribute('Name') == 'TPSA':
                un.append('tpsa', nVal, weak, src)
        return (un)
Beispiel #16
0
    def chebi2mask(self, mm, chebiId):
        """
        get a mask containing the info associated with a chebi id
        2. use getComplete to fetch the contents of the relevant entries
        """
        ba = chebiId
        un = mask({}, mm.idpatterns)
        qUrl = self.getComplete + "chebiId=" + str(chebiId)
        qRes = self.parent.getUrl(qUrl)
        if not qRes:
            return (mask({}))
        searchResults = xml.dom.minidom.parse(qRes)
        if not searchResults:
            return (mask({}))
        if searchResults.getElementsByTagName('ns1:return'):
            retList = searchResults.getElementsByTagName('ns1:return')[0]
        # found non-existent chebiId
        else:
            # this would have been reasonable but chebi has problems so that some
            # entries exist, but cant be fetched. valid chebi but we cant query
            # for it,
            # hence, skip completely
            # delmask = mask({})
            # delmask.append('chebi', chebiId)
            # mm.brandish(delmask)
            return (mask({}))
        # chebiid
        if retList.getElementsByTagName("ns1:chebiId"):
            chebiId = list(query.nodecontents(retList.getElementsByTagName("ns1:chebiId")))[0]
            un.append('chebi', chebiId.replace("CHEBI:", ""),
                      self.parent.confid, self.parent.sourceid)

        # smiles
        if retList.getElementsByTagName("ns1:smiles"):
            un.append('smiles', list(query.nodecontents(retList.getElementsByTagName("ns1:smiles")))[0],
                      self.parent.confid, self.parent.sourceid)

        # synonym
        if searchResults.getElementsByTagName('ns1:Synonyms'):
            syns = searchResults.getElementsByTagName('ns1:Synonyms')[0]
            for sy in list(query.nodecontents(syns.getElementsByTagName("ns1:data"))):
                un.append('synonym', sy, self.parent.mm.confidence['weak'], \
                          self.parent.sourceid)

        # inchi
        if retList.getElementsByTagName("ns1:inchi"):
            un.append('inchi', list(query.nodecontents(retList.getElementsByTagName("ns1:inchi")))[0],
                      self.parent.confid, self.parent.sourceid)

        # iupac
        if searchResults.getElementsByTagName('ns1:IupacNames'):
            syns = searchResults.getElementsByTagName('ns1:IupacNames')[0]
            for sy in list(query.nodecontents(syns.getElementsByTagName("ns1:data"))):
                un.append('iupac', sy, self.parent.confid, \
                          self.parent.sourceid)

        # kegg
        for ll in searchResults.getElementsByTagName('ns1:DatabaseLinks'):
            if list(query.nodecontents(ll.getElementsByTagName("ns1:type")))[0] == 'KEGG COMPOUND accession':
                for sy in list(query.nodecontents(ll.getElementsByTagName("ns1:data"))):
                    un.append('kegg', sy, self.parent.confid, self.parent.sourceid)

        # cas
        for ll in searchResults.getElementsByTagName('ns1:RegistryNumbers'):
            if list(query.nodecontents(ll.getElementsByTagName("ns1:type")))[0] == 'CAS Registry Number':
                for sy in list(query.nodecontents(ll.getElementsByTagName("ns1:data"))):
                    un.append('cas', sy, self.parent.confid, self.parent.sourceid)

        # pdb.set_trace()
        # formula
        if searchResults.getElementsByTagName('ns1:Formulae'):
            form = searchResults.getElementsByTagName('ns1:Formulae')[0]
            for sy in list(query.nodecontents(form.getElementsByTagName("ns1:data"))):
                un.append('formula', sy, self.parent.mm.confidence['weak'], \
                          self.parent.sourceid)

        return (un)