def testPubChemFetch(self): """PubChem fetch test""" idTupList = [("JTOKYIBTLUQVQV-FGHQGBLESA-N", 404, None), ("CXHHBNMLPJOKQD-UHFFFAOYSA-N", 200, 78579)] nameSpace = "inchikey" domain = "compound" searchType = "lookup" returnType = "record" requestType = "GET" outputType = "JSON" baseUrl = "https://pubchem.ncbi.nlm.nih.gov" httpCodesCatch = [404] try: for (identifier, testRetCode, testPcId) in idTupList: for requestType in ["GET", "POST"]: ret, retCode = None, None pD = {} hL = {} ureq = UrlRequestUtil() if nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "GET": uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, outputType ]) ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") elif nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "POST": endPoint = "/".join( ["rest", "pug", domain, nameSpace, outputType]) pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "GET": # Needs to be specifically targeted on a particular compound ... uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, returnType, outputType ]) pD = {"classification_type": "simple"} # pD = {nameSpace: identifier} ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "POST": # Needs to be specifically targeted on a particular compound ... endPoint = "/".join([ "rest", "pug", domain, nameSpace, returnType, outputType ]) # This is a long request return server codes may be observed 500 pD = { nameSpace: identifier, "classification_type": "simple" } # pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON", sslCert="enable") # # logger.debug("Result status code %r", retCode) self.assertEqual(retCode, testRetCode) if retCode == 200: pcId = ret["PC_Compounds"][0]["id"]["id"]["cid"] self.assertEqual(pcId, testPcId) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testPubChemFetchClassification(self): """PubChem fetch classification test - can timeout""" idTupList = [("2244", 200, "2244", "record"), ("123631", 200, "123631", "record"), ("2244", 200, "2244", "classification"), ("123631", 200, "123631", "classification")] nameSpace = "cid" domain = "compound" searchType = "lookup" # returnType = "record" requestType = "GET" outputType = "JSON" baseUrl = "https://pubchem.ncbi.nlm.nih.gov" httpCodesCatch = [404] try: for (identifier, testRetCode, testPcId, returnType) in idTupList: for requestType in ["GET", "POST"]: logger.info( "namespace %r identifier %r returnType %r requestType %r", nameSpace, identifier, returnType, requestType) ret, retCode = None, None pD = {} hL = [] ureq = UrlRequestUtil() if nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "GET": uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, outputType ]) ret, retCode = ureq.get(baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON") elif nameSpace in [ "cid", "name", "inchikey" ] and returnType in ["record"] and searchType in [ "lookup" ] and requestType == "POST": endPoint = "/".join( ["rest", "pug", domain, nameSpace, outputType]) pD = {nameSpace: identifier} ret, retCode = ureq.post(baseUrl, endPoint, pD, headers=hL, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "GET": # Needs to be specifically targeted on a particular compound ... uId = quote(identifier.encode("utf8")) endPoint = "/".join([ "rest", "pug", domain, nameSpace, uId, returnType, outputType ]) # pD = {"classification_type": "simple"} pD = {} # pD = {nameSpace: identifier} ret, retCode = ureq.getUnWrapped( baseUrl, endPoint, pD, headers={}, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # elif nameSpace in ["cid"] and returnType in [ "classification" ] and searchType in ["lookup"] and requestType == "POST": # Needs to be specifically targeted on a particular compound ... endPoint = "/".join([ "rest", "pug", domain, nameSpace, returnType, outputType ]) # This is a long request return server codes may be observed 500 # pD = {nameSpace: identifier, "classification_type": "simple"} pD = {nameSpace: identifier} ret, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers={}, httpCodesCatch=httpCodesCatch, returnContentType="JSON") # # logger.debug("Result status code %r", retCode) self.assertEqual(retCode, testRetCode) if retCode == 200 and returnType == "record": pcId = str(ret["PC_Compounds"][0]["id"]["id"]["cid"]) self.assertEqual(pcId, testPcId) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __fetchDescriptors(self, ccIdList, ccidxP, chunkSize=100): """Fetch transformed SMILES descriptors from the ChemAxon webservice. Args: ccIdList (list, str): chemical component identifier list ccidxP (object): instance of the ChemCompIndexProvider() chunksize (int, optional): number of SMILES per request. Defaults to 100. Returns: (dict): dictionary {<ccId>: [<transformed SMILES>, ...], ...} Example API parameter data: { "errorHandlingMode": "FAIL_ON_ERROR", "inputParams": "smiles", "outputParams": "smiles", "structures": [ "CC(C)[C@H](N)C=O", "CC[C@H](C)[C@H](N)C=O", "CC(C)C[C@H](N)C=O" ] } Example query: curl -X POST "https://jchem-microservices.chemaxon.com/jwsio/rest-v1/molconvert/batch" -H "accept: */*" -H "Content-Type: application/json" -d "{ \"errorHandlingMode\": \"FAIL_ON_ERROR\", \"inputParams\": \"smiles\", \"outputParams\": \"mrv\", \"structures\": [ \"CC(C)[C@H](N)C=O\", \"CC[C@H](C)[C@H](N)C=O\", \"CC(C)C[C@H](N)C=O\" ]}" """ descrD = {} smilesCcIdD = {} smilesD = {} for ccId in ccIdList: smiL = list( set( ccidxP.getSMILES(ccId, smiTypeList=[ "oe-iso-smiles", "oe-smiles", "cactvs-iso-smiles", "cactvs-smiles" ]))) smilesCcIdD.setdefault(ccId, []).extend(smiL) for smi in smiL: smilesD.setdefault(smi, []).append(ccId) # logger.info("Translating (%d) SMILES for components (%d)", len(smilesD), len(smilesCcIdD)) # ---- smiLL = [ list(smilesD.keys())[i:i + chunkSize] for i in range(0, len(smilesD), chunkSize) ] # --- baseUrl = "https://jchem-microservices.chemaxon.com" endPoint = "jwsio/rest-v1/molconvert/batch" # hL = [("Accept", "application/json"), ("Content-Type", "application/json")] hD = {"Accept": "application/json", "Content-Type": "application/json"} try: pD = { "errorHandlingMode": "SKIP_ERROR", "inputParams": "smiles", "outputParams": "smiles" } # iCount = 0 for smiL in smiLL: iCount += 1 ureq = UrlRequestUtil() pD["structures"] = smiL logger.debug("pD %r", pD) rDL, retCode = ureq.postUnWrapped( baseUrl, endPoint, pD, headers=hD, sendContentType="application/json", returnContentType="application/json") logger.debug("API result (%r) %r", retCode, rDL) if rDL and len(rDL) == len(smiL): for ii, rD in enumerate(rDL): if "structure" in rD and "successful" in rD and rD[ "successful"]: if smiL[ii] == rD["structure"]: continue for ccId in smilesD[smiL[ii]]: if ccId in descrD and rD[ "structure"] in descrD[ccId]: continue if rD["structure"] in smilesCcIdD[ccId]: continue descrD.setdefault(ccId, []).append(rD["structure"]) else: logger.info("Chunk %d failed (%d)", iCount, len(rDL)) if iCount % 10 == 0: logger.info("Completed processing chunk (%d/%d)", iCount, len(smiLL)) # except Exception as e: logger.exception("Failing with %s", str(e)) return descrD