def setUp(self): self.__cachePath = os.path.join(HERE, "test-output", "CACHE") self.__dataPath = os.path.join(HERE, "test-data") dirPath = os.path.join(self.__cachePath, "rcsb_entry_info") fU = FileUtil() fn = "entry_info_details.json" fU.put(os.path.join(self.__dataPath, fn), os.path.join(dirPath, fn))
def __modelFixture(self): fU = FileUtil() modelSourcePath = os.path.join(self.__mockTopPath, "AF") for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")): fn = os.path.basename(iPath) uId = fn.split("-")[1] h3 = uId[-2:] h2 = uId[-4:-2] h1 = uId[-6:-4] oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn) fU.put(iPath, oPath)
def storeBundle(self, url, remoteDirPath, remoteStashPrefix="A", userName=None, password=None): """ Store a copy of the bundled search dependencies remotely - Args: url (str): URL string for the destination host (e.g. sftp://myserver.net or None for a local file) remoteDirPath (str): remote directory path on the remote resource remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') userName (str, optional): optional access information. Defaults to None. password (str, optional): optional access information. Defaults to None. Returns: bool: True for success or False otherwise """ try: ok = False fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix) if url and url.startswith("sftp://"): sftpU = SftpUtil() hostName = url[7:] ok = sftpU.connect(hostName, userName, pw=password, port=22) if ok: remotePath = os.path.join("/", remoteDirPath, fn) ok = sftpU.put(self.__localStashTarFilePath, remotePath) elif not url: fileU = FileUtil() remotePath = os.path.join(remoteDirPath, fn) ok = fileU.put(self.__localStashTarFilePath, remotePath) else: logger.error("Unsupported stash protocol %r", url) return ok except Exception as e: logger.exception("For %r %r failing with %s", url, remoteDirPath, str(e)) return False
def __pharosFixture(self): try: ok = False fU = FileUtil() srcPath = os.path.join(self.__dataPath, "Pharos") dstPath = os.path.join(self.__cachePath, "Pharos-targets") for fn in ["drug_activity", "cmpd_activity", "protein"]: inpPath = os.path.join(srcPath, fn + ".tdd.gz") outPath = os.path.join(dstPath, fn + ".tdd.gz") fU.get(inpPath, outPath) fU.uncompress(outPath, outputDir=dstPath) fU.remove(outPath) fU.put(os.path.join(srcPath, "pharos-readme.txt"), os.path.join(dstPath, "pharos-readme.txt")) ok = True except Exception as e: logger.exception("Failing with %s", str(e)) ok = False return ok
class FileUtilTests(unittest.TestCase): def setUp(self): self.__verbose = True self.__pathPdbxDictionaryFile = os.path.join(TOPDIR, "rcsb", "mock-data", "dictionaries", "mmcif_pdbx_v5_next.dic") self.__pathTaxonomyFile = os.path.join(TOPDIR, "rcsb", "mock-data", "NCBI", "names.dmp.gz") self.__zipFileUrl = "https://inventory.data.gov/dataset/794cd3d7-4d28-4408-8f7d-84b820dbf7f2/resource/6b78ec0c-4980-4ad8-9cbd-2d6eb9eda8e7/download/myfoodapediadata.zip" self.__xzFile = os.path.join(TOPDIR, "rcsb", "mock-data", "MOCK_MODBASE_MODELS", "NP_001030614.1_1.pdb.xz") # self.__ftpFileUrl = "ftp://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz" self.__httpsFileUrl = "https://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz" # self.__workPath = os.path.join(HERE, "test-output") self.__inpDirPath = os.path.join(HERE, "test-data") self.__fileU = FileUtil() self.__startTime = time.time() logger.debug("Running tests on version %s", __version__) logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) def tearDown(self): endTime = time.time() logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) def testTarBundling(self): """Test case for tarfile bundling and unbundling""" try: tP = os.path.join(self.__workPath, "t0.tar.gz") dirPath = os.path.join(self.__inpDirPath, "topdir") ok = self.__fileU.bundleTarfile(tP, [dirPath], mode="w:gz", recursive=True) self.assertTrue(ok) numBytes = self.__fileU.size(tP) self.assertGreaterEqual(numBytes, 250) # md5 = self.__fileU.hash(tP, hashType="md5") self.assertTrue(md5 is not None) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) # tP = os.path.join(self.__workPath, "t1.tar.gz") dirPathList = [ os.path.join(self.__inpDirPath, "topdir", "subdirA"), os.path.join(self.__inpDirPath, "topdir", "subdirB") ] ok = self.__fileU.bundleTarfile(tP, dirPathList, mode="w:gz", recursive=True) self.assertTrue(ok) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) tP = os.path.join(self.__workPath, "t2.tar") dirPathList = [ os.path.join(self.__inpDirPath, "topdir", "subdirA"), os.path.join(self.__inpDirPath, "topdir", "subdirB") ] ok = self.__fileU.bundleTarfile(tP, dirPathList, mode="w", recursive=True) self.assertTrue(ok) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testGetFile(self): """Test case for a local files and directories""" try: remoteLocator = self.__pathPdbxDictionaryFile fn = self.__fileU.getFileName(remoteLocator) # _, fn = os.path.split(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) ok = self.__fileU.remove(lPath) self.assertTrue(ok) dPath = os.path.join(self.__workPath, "tdir") ok = self.__fileU.mkdir(dPath) self.assertTrue(ok) ok = self.__fileU.remove(dPath) self.assertTrue(ok) ok = self.__fileU.remove(";lakdjf") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testMoveAndCopyFile(self): """Test case for copying ("put") and moving ("replace") local files""" try: remoteLocator = self.__pathPdbxDictionaryFile fn = self.__fileU.getFileName(remoteLocator) # _, fn = os.path.split(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) # Test copy file dPath2 = os.path.join(self.__workPath, "tdir") ok = self.__fileU.mkdir(dPath2) self.assertTrue(ok) lPath2 = os.path.join(dPath2, fn) ok = self.__fileU.put(lPath, lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath2) self.assertTrue(ok) # Remove copied file (to test moving file next) ok = self.__fileU.remove(lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath2) self.assertFalse(ok) # Test move file ok = self.__fileU.replace(lPath, lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertFalse(ok) ok = self.__fileU.exists(lPath2) self.assertTrue(ok) # Now clean up files and dirs ok = self.__fileU.remove(lPath) self.assertTrue(ok) ok = self.__fileU.remove(dPath2) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testZipUrl(self): """Test case for downloading remote zip file and extracting contents.""" try: remoteLocator = self.__zipFileUrl # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # lPath = os.path.join(self.__workPath, self.__fileU.getFileName(self.__zipFileUrl)) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath) ok = fp.endswith("Food_Display_Table.xlsx") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testFtpUrl(self): """Test case for downloading remote file ftp protocol and extracting contents.""" try: remoteLocator = self.__ftpFileUrl # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # dirPath = os.path.join(self.__workPath, "chem_comp_models") lPath = os.path.join(dirPath, self.__fileU.getFileName(self.__ftpFileUrl)) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=dirPath) ok = fp.endswith("chem_comp_model.cif") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testRemote(self): """Test case remote status""" try: remoteLocator = self.__httpsFileUrl ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # ok = self.__fileU.exists(remoteLocator) self.assertTrue(ok) size = self.__fileU.size(remoteLocator) self.assertGreaterEqual(size, 1000) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() @unittest.skip("DrugBank example -- skipping") def testGetDrugBankUrl(self): """Test case for downloading drugbank master xml file""" try: remoteLocator = "https://www.drugbank.ca/releases/latest/downloads/all-full-database" un = "username" pw = "password" # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # lPath = os.path.join(self.__workPath, "db-download.zip") ok = self.__fileU.get(remoteLocator, lPath, username=un, password=pw) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) self.__fileU.uncompress(lPath, outputDir=self.__workPath) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testXzFile(self): """Test case for extracting contents from xz file""" try: remoteLocator = self.__xzFile fn = self.__fileU.getFileName(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath) ok = fp.endswith(".pdb") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
class MarshalUtil(object): """Wrapper for serialization and deserialization methods.""" def __init__(self, **kwargs): self.__workPath = kwargs.get("workPath", ".") self.__workDirSuffix = kwargs.get("workDirSuffix", "marshall_") self.__workDirPrefix = kwargs.get("workDirSuffix", "_tempdir") # self.__fileU = FileUtil(workPath=self.__workPath) self.__ioU = IoUtil() def doExport(self, locator, obj, fmt="list", marshalHelper=None, numParts=None, **kwargs): """Serialize the input object at locator path in specified format. The input object is optionally preprocessed by the helper method. Args: locator (str): target path or URI obj (object): data to be serialized fmt (str, optional): format for serialization (mmcif, tdd, csv, list). Defaults to "list". marshalHelper (method, optional): pre-processor method applied to input data object. Defaults to None. numParts (int, optional): serialize the data in parts. Defaults to None. (json and pickle formats) Returns: bool: True for sucess or False otherwise """ try: ret = False localFlag = self.__fileU.isLocal(locator) if marshalHelper: myObj = marshalHelper(obj, **kwargs) else: myObj = obj # if localFlag and numParts and fmt in ["json", "pickle"]: localFilePath = self.__fileU.getFilePath(locator) ret = self.__ioU.serializeInParts(localFilePath, myObj, numParts, fmt=fmt, **kwargs) elif localFlag: localFilePath = self.__fileU.getFilePath(locator) ret = self.__ioU.serialize(localFilePath, myObj, fmt=fmt, workPath=self.__workPath, **kwargs) else: with tempfile.TemporaryDirectory( suffix=self.__workDirSuffix, prefix=self.__workDirPrefix, dir=self.__workPath) as tmpDirName: # write a local copy then copy to destination - # localFilePath = os.path.join( self.__workPath, tmpDirName, self.__fileU.getFileName(locator)) ok1 = self.__ioU.serialize(localFilePath, myObj, fmt=fmt, workPath=self.__workPath, **kwargs) ok2 = True if ok1: ok2 = self.__fileU.put(localFilePath, locator, **kwargs) ret = ok1 and ok2 except Exception as e: logger.exception("Exporting locator %r failing with %s", locator, str(e)) return ret def doImport(self, locator, fmt="list", marshalHelper=None, numParts=None, **kwargs): """Deserialize data at the target locator in specified format. The deserialized data is optionally post-processed by the input helper method. Args: locator (str): path or URI to input data fmt (str, optional): format for deserialization (mmcif, tdd, csv, list). Defaults to "list". marshalHelper (method, optional): post-processor method applied to deserialized data object. Defaults to None. numParts (int, optional): deserialize the data in parts. Defaults to None. (json and pickle formats) tarMember (str, optional): name of a member of tar file bundle. Defaults to None. (tar file format) Returns: Any: format specific return type """ try: tarMember = kwargs.get("tarMember", None) localFlag = self.__fileU.isLocal(locator) and not tarMember # if localFlag and numParts and fmt in ["json", "pickle"]: filePath = self.__fileU.getFilePath(locator) ret = self.__ioU.deserializeInParts(filePath, numParts, fmt=fmt, **kwargs) elif localFlag: filePath = self.__fileU.getFilePath(locator) ret = self.__ioU.deserialize(filePath, fmt=fmt, workPath=self.__workPath, **kwargs) else: # if fmt == "mmcif": ret = self.__ioU.deserialize(locator, fmt=fmt, workPath=self.__workPath, **kwargs) else: with tempfile.TemporaryDirectory( suffix=self.__workDirSuffix, prefix=self.__workDirPrefix, dir=self.__workPath) as tmpDirName: # # Fetch first then read a local copy - # if tarMember: localFilePath = os.path.join( self.__workPath, tmpDirName, tarMember) else: localFilePath = os.path.join( self.__workPath, tmpDirName, self.__fileU.getFileName(locator)) # --- Local copy approach --- self.__fileU.get(locator, localFilePath, **kwargs) ret = self.__ioU.deserialize(localFilePath, fmt=fmt, workPath=self.__workPath, **kwargs) if marshalHelper: ret = marshalHelper(ret, **kwargs) except Exception as e: logger.exception("Importing locator %r failing with %s", locator, str(e)) ret = None return ret def exists(self, filePath, mode=os.R_OK): return self.__fileU.exists(filePath, mode=mode) def mkdir(self, dirPath, mode=0o755): return self.__fileU.mkdir(dirPath, mode=mode) def remove(self, pth): return self.__fileU.remove(pth)
def __reloadFasta(self, dirPath, **kwargs): """Reload DrugBank target FASTA data files. Args: dirPath (str, optional): path to DrugBank cache directory useCache (bool, optional): flag to use cached files. Defaults to True. Returns: """ startTime = time.time() logger.info("Starting db reload at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) retFilePathList = [] urlTargetL = [ "https://go.drugbank.com/releases/latest/downloads/target-all-polypeptide-sequences", "https://go.drugbank.com/releases/latest/downloads/enzyme-all-polypeptide-sequences", "https://go.drugbank.com/releases/latest/downloads/carrier-all-polypeptide-sequences", "https://go.drugbank.com/releases/latest/downloads/transporter-all-polypeptide-sequences", ] useCache = kwargs.get("useCache", True) username = kwargs.get("username", None) password = kwargs.get("password", None) # if not username or not password: return retFilePathList # fU = FileUtil() fU.mkdir(dirPath) # if not useCache: # Clear any cached files for urlTarget in urlTargetL: baseFileName = fU.getFileName(urlTarget) zipFileName = baseFileName + ".fasta.zip" retFileName = baseFileName + ".fa" for fn in [baseFileName, zipFileName, retFileName]: try: fp = os.path.join(dirPath, fn) os.remove(fp) except Exception: pass # ok = False if useCache: ok = True for urlTarget in urlTargetL: baseFileName = fU.getFileName(urlTarget) retFileName = baseFileName + ".fa" retFilePath = os.path.join(dirPath, retFileName) ok = fU.exists(retFilePath) if not ok: break retFilePathList.append(retFilePath) # logger.info("Using cached files %r", ok) if not useCache or not ok: if not username or not password: logger.warning( "Missing credentials for DrugBank file download...") for urlTarget in urlTargetL: baseFileName = fU.getFileName(urlTarget) zipFileName = baseFileName + ".fasta.zip" retFileName = baseFileName + ".fa" zipFilePath = os.path.join(dirPath, zipFileName) retFilePath = os.path.join(dirPath, retFileName) basePath = os.path.join(dirPath, baseFileName) logger.info("Fetching url %s for FASTA target file %s", urlTarget, baseFileName) ok = fU.get(urlTarget, zipFilePath, username=username, password=password) endTime = time.time() logger.info( "Completed db fetch at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) # ok = fU.unbundleZipfile(zipFilePath, dirPath=basePath) fU.put(os.path.join(basePath, "protein.fasta"), retFilePath) endTime = time.time() logger.info( "Completed unzip at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) retFilePathList.append(retFilePath) return retFilePathList