def __setup(self, siteId=None): self.__siteId = siteId self.__cI = ConfigInfo(self.__siteId) self.__sessionPath = None self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh)
def get_model_file(depid, version_id, mileStone=None, siteId=None): if siteId is None: siteId = getSiteId() pi = PathInfo(siteId, sessionPath=".", verbose=True, log=sys.stderr) mmcif = pi.getModelPdbxFilePath(dataSetId=depid, fileSource="archive", versionId=version_id, mileStone=mileStone) logging.debug("mmcif file path: %s", mmcif) return mmcif
def testSessionPath(self): tests = [("archive", "D_1000000000", "session_test/12345")] for (fs, dataSetId, session_dir) in tests: logger.debug("File source %s dataSetId %s session dir %s" % (fs, dataSetId, session_dir)) fileSource = ("session", "wf-session", "session-download") for fs in fileSource: pI = PathInfo(siteId=self.__siteId, sessionPath=session_dir) fp = pI.getDirPath(dataSetId=dataSetId, fileSource=fs) logger.debug("session path %s" % fp) self.assertIsNotNone(fp, "Failed to get session path")
def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") # self.__lfh.write("+FileUtils.__setup() starting with entryId %r adjusted WWPDB_SITE_ID %r\n" % (self.__entryId, self.__siteId)) # self.__sObj = self.__reqObj.getSessionObj() self.__sessionId = self.__sObj.getId() self.__sessionPath = self.__sObj.getPath() self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) self.__cI = ConfigInfo(self.__siteId) self.__msL = self.__cI.get("CONTENT_MILESTONE_LIST")
def __init__(self, siteId, verbose=False, log=sys.stderr): """ :param `verbose`: boolean flag to activate verbose logging. :param `log`: stream for logging. """ self.__verbose = verbose self.__lfh = log self.__siteId = siteId # self.__pI = PathInfo(siteId=self.__siteId, sessionPath=".", verbose=self.__verbose, log=self.__lfh)
def __init__(self, reqObj=None, verbose=False, log=sys.stderr): self.__reqObj = reqObj self.__verbose = verbose self.__lfh = log # self.__sessionObj = self.__reqObj.getSessionObj() self.__sessionPath = self.__sessionObj.getPath() self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) # if self.__verbose: self.__lfh.write("+WebDownloadUtils.__setup() - session id %s\n" % (self.__sessionObj.getId())) self.__lfh.write("+WebDownloadUtils.__setup() - session path %s\n" % (self.__sessionPath))
def __setup(self): if self.__sessionPath is not None: self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) else: self.__pI = PathInfo(siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) # self.__inpFilePath = None self.__entryId = None self.__pdbId = None self.__pio = PdbxStatusHistoryIo(verbose=self.__verbose, log=self.__lfh) self.__statusCategory = "pdbx_database_status_history" self.__timeFormat = "%Y-%m-%d:%H:%M"
def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") self.__sessionObj = self.__reqObj.getSessionObj() self.__sessionPath = self.__sessionObj.getPath() self.__cI = ConfigInfo(self.__siteId) self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) # if self.__debug: self.__lfh.write("+DataExchange.__setup() - session id %s\n" % (self.__sessionObj.getId())) self.__lfh.write("+DataExchange.__setup() - session path %s\n" % (self.__sessionObj.getPath())) self.__lfh.write("+DataExchange.__setup() - data set %s instance %s file source %s\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource)) self.__pI.setDebugFlag(flag=self.__debug)
class StatusLoadWrapper(object): """Update release status items.""" def __init__(self, siteId, verbose=False, log=sys.stderr): """ :param `verbose`: boolean flag to activate verbose logging. :param `log`: stream for logging. """ self.__verbose = verbose self.__lfh = log self.__siteId = siteId # self.__pI = PathInfo(siteId=self.__siteId, sessionPath=".", verbose=self.__verbose, log=self.__lfh) def dbLoad(self, depSetId, fileSource="deposit", versionId="latest", mileStone="deposit"): try: self.__lfh.write( "+StatusLoadWrapper.dbload() site %s loading data set %s %s %s %s\n" % (self.__siteId, depSetId, fileSource, mileStone, versionId)) pdbxFilePath = self.__pI.getModelPdbxFilePath( dataSetId=depSetId, fileSource=fileSource, versionId=versionId, mileStone=mileStone) fD, fN = os.path.split(pdbxFilePath) # pylint: disable=unused-variable dbLd = DbLoadingApi(log=self.__lfh, verbose=self.__verbose) return dbLd.doLoadStatus(pdbxFilePath, fD) except Exception as e: if self.__verbose: self.__lfh.write( "+StatusLoadWrapper.dbload() dbload failed for %s %s\n" % (depSetId, str(e))) traceback.print_exc(file=self.__lfh) return False
class DataMaintenance(object): """Collection of data maintenance utilities supporting purge and recovery of data files post release. This class duplicates some methods from class DataExchange for anticipated future use. """ def __init__(self, siteId=None, testMode=False, verbose=False, log=sys.stderr): self.__verbose = verbose self.__lfh = log self.__siteId = siteId # In test mode no deletions are performed - self.__testMode = testMode self.__debug = False self.__sessionPath = None # self.__setup(siteId=siteId) def __setup(self, siteId=None): self.__siteId = siteId self.__cI = ConfigInfo(self.__siteId) self.__sessionPath = None self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) def setSessionPath(self, inputSessionPath=None): """Override the path to files with fileSource="session" """ self.__sessionPath = inputSessionPath def purgeLogs(self, dataSetId): archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", dataSetId, "log") if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() - purging logs in directory %s\n" % (dirPath)) if os.access(dirPath, os.W_OK): fpattern = os.path.join(dirPath, "*log") if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() candidate path length is %d\n" % len(pthList)) # for pth in pthList: try: if self.__testMode: self.__lfh.write( "+DataMaintenance.purgeLogs() TEST MODE skip remove %s\n" % pth) else: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return pthList def reversePurge(self, dataSetId, contentType, formatType="pdbx", partitionNumber=1): fn = self.__getArchiveFileName(dataSetId, contentType=contentType, formatType=formatType, version="none", partitionNumber=partitionNumber) archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", dataSetId) if self.__verbose: self.__lfh.write( "+DataMaintenance.__setup() - purging in directory %s\n" % (dirPath)) if len(dirPath) < 2: return [] fpattern = os.path.join(dirPath, fn + ".V*") if self.__verbose: self.__lfh.write( "+DataMaintenance.__setup() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write( "+DataMaintenance.__reversePurge() candidate length is %d\n" % len(pthList)) # fList = [] for pth in pthList: if not pth.endswith(".V1"): fList.append(pth) for pth in fList: try: if self.__testMode: self.__lfh.write( "+DataMaintenance.reversePurge() TEST MODE skip remove %s\n" % pth) else: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return fList def removeWorkflowDir(self, dataSetId): if (dataSetId is not None) and dataSetId.startswith("D_") and ( len(dataSetId) > 10): workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(workflowPath, "workflow", dataSetId) if os.access(dirPath, os.W_OK): if self.__testMode: self.__lfh.write( "+DataMaintenance.removeWorkflowDir() TEST MODE skip remove %s\n" % dirPath) else: shutil.rmtree(dirPath) return True else: return False else: return False def getLogFiles(self, dataSetId, fileSource="archive"): pL = [] if fileSource in ["archive"]: dirPath = self.__pI.getArchivePath(dataSetId) elif fileSource in ["deposit"]: dirPath = self.__pI.getDepositPath(dataSetId) else: return pL fpattern = os.path.join(dirPath, "*.log") pthList = glob.glob(fpattern) return pthList def getPurgeCandidates(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None, purgeType="exp"): """Return the latest version, and candidates for removal and compression. purgeType = 'exp' use strategy for experimental and model fileSource V<last>, V2, V1 'other' use strategy for other file types -- V<last> & V1 """ latestV = None rmL = [] gzL = [] vtL = self.getVersionFileList(dataSetId, wfInstanceId=wfInstanceId, fileSource=fileSource, contentType=contentType, formatType=formatType, partitionNumber=partitionNumber, mileStone=mileStone) n = len(vtL) if n > 0: latestV = vtL[0][0] if purgeType in ["exp"]: if n < 2: return latestV, rmL, gzL elif n == 2: gzL.append(vtL[1][0]) elif n == 3: gzL.append(vtL[1][0]) gzL.append(vtL[2][0]) elif n > 3: gzL.append(vtL[n - 2][0]) gzL.append(vtL[n - 1][0]) for i in range(1, n - 2): rmL.append(vtL[i][0]) else: pass elif purgeType in ["report", "other"]: if n < 2: return latestV, rmL, gzL elif n == 2: gzL.append(vtL[1][0]) elif n > 2: gzL.append(vtL[n - 1][0]) for i in range(1, n - 1): rmL.append(vtL[i][0]) else: pass return latestV, rmL, gzL def getVersionFileListSnapshot(self, basePath, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions in a snapshot directory (recovery mode). Return: List of [(file path, modification date string,size),...] """ pairL = [] # basePath = '/net/wwpdb_da_data_archive/.snapshot/nightly.1/data' try: if fileSource == "archive": pth = self.__pI.getArchivePath(dataSetId) snPth = os.path.join(basePath, "archive", dataSetId) elif fileSource == "deposit": pth = self.__pI.getDepositPath(dataSetId) snPth = os.path.join(basePath, "deposit", dataSetId) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) _dir, fn = os.path.split(fPattern) altPattern = os.path.join(snPth, fn) srcL = self.__getFileListWithVersion([altPattern], sortFlag=True) for src in srcL: _d, f = os.path.split(src[0]) dst = os.path.join(pth, f) if not os.access(dst, os.F_OK): pairL.append((src[0], dst)) return pairL except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %s\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] ## def getVersionFileList(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) return self.__getFileListWithVersion([fPattern], sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] def getContentTypeFileList(self, dataSetId, wfInstanceId, fileSource="archive", contentTypeList=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ if contentTypeList is None: contentTypeList = ["model"] try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fPatternList = [] for contentType in contentTypeList: fPattern = self.__pI.getFilePathContentTypeTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, fileSource=fileSource) fPatternList.append(fPattern) if self.__debug: self.__lfh.write( "+DataMaintenance.getContentTypeFileList() patterns %r\n" % fPatternList) return self.__getFileListWithVersion(fPatternList, sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] def getMiscFileList(self, fPatternList=None, sortFlag=True): if fPatternList is None: fPatternList = ["*"] return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag) def getLogFileList(self, entryId, fileSource="archive"): if fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] elif fileSource in ["deposit"]: pth = self.__pI.getDepositPath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] else: return [] return self.__getFileList(fPatternList=patList, sortFlag=True) def __getFileListWithVersion(self, fPatternList=None, sortFlag=False): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_ver_tuple_list = [] for f in files: tL = f.split(".") vId = tL[-1] if vId.startswith("V"): if vId[-1] not in [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" ]: file_ver_tuple_list.append((f, int(vId[1:-1]))) else: file_ver_tuple_list.append((f, int(vId[1:]))) # Sort the tuple list by version id # if sortFlag: file_ver_tuple_list.sort(key=lambda x: x[1], reverse=True) return file_ver_tuple_list except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for pattern %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return [] def __getFileList(self, fPatternList=None, sortFlag=True): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_date_tuple_list = [] for x in files: d = os.path.getmtime(x) s = float(os.path.getsize(x)) / 1000.0 file_date_tuple = (x, d, s) file_date_tuple_list.append(file_date_tuple) # Sort the tuple list by the modification time (recent changes first) if sortFlag: file_date_tuple_list.sort(key=lambda x: x[1], reverse=True) rTup = [] for fP, mT, sZ in file_date_tuple_list: tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S") rTup.append((fP, tS, sZ)) return rTup except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for patter %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return [] ## def __getArchiveFileName(self, dataSetId, wfInstanceId=None, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (_fp, _d, f) = self.__targetFilePath( dataSetId=dataSetId, wfInstanceId=wfInstanceId, fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone, ) return f # def __getInstanceFileName(self, dataSetId, wfInstanceId=None, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (_fp, _d, f) = self.__targetFilePath( # dataSetId=dataSetId, # wfInstanceId=wfInstanceId, # fileSource="wf-instance", # contentType=contentType, # formatType=formatType, # version=version, # partitionNumber=partitionNumber, # mileStone=mileStone, # ) # return f # def __getFilePath(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (fp, _d, _f) = self.__targetFilePath( # dataSetId=dataSetId, # wfInstanceId=wfInstanceId, # fileSource=fileSource, # contentType=contentType, # formatType=formatType, # version=version, # partitionNumber=partitionNumber, # mileStone=mileStone, # ) # return fp def __targetFilePath(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): """Return the file path, directory path, and filen ame for the input content object if this object is valid. If the file path cannot be verified return None for all values """ try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fP = self.__pI.getFilePath( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, versionId=version, partNumber=partitionNumber, mileStone=mileStone, ) dN, fN = os.path.split(fP) return fP, dN, fN except Exception as e: if self.__debug: self.__lfh.write( "+DataMaintenance.__targetFilePath() failing for data set %s instance %s file source %s error %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return (None, None, None)
class WebDownloadUtils(object): """ This class encapsulates handling download requests for workflow data files - """ def __init__(self, reqObj=None, verbose=False, log=sys.stderr): self.__reqObj = reqObj self.__verbose = verbose self.__lfh = log # self.__sessionObj = self.__reqObj.getSessionObj() self.__sessionPath = self.__sessionObj.getPath() self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) # if self.__verbose: self.__lfh.write("+WebDownloadUtils.__setup() - session id %s\n" % (self.__sessionObj.getId())) self.__lfh.write("+WebDownloadUtils.__setup() - session path %s\n" % (self.__sessionPath)) def makeDownloadResponse(self): """Return a response object correponding to a download action for data file described by the parameter content in the request object. """ if self.__verbose: self.__lfh.write("+WebDownloadUtils.makeResponse() starting with session path %s\n" % self.__sessionPath) filePath = self.__getDownloadFileInfo() if self.__verbose: self.__lfh.write("+WebDownloadUtils.makeResponse() target file path is %s\n" % filePath) return self.__makeResponseContentObject(filePath=filePath) def __getDownloadFileInfo(self): """Extract target file details and return file path or None.""" retPath = None # dataSetId = self.__reqObj.getValue("data_set_id") if len(dataSetId) < 1: return retPath fileSource = self.__reqObj.getValueOrDefault("file_source", default="archive") if fileSource not in ["archive", "wf-archive", "wf-instance", "session", "wf-session"]: return retPath wfInstanceId = self.__reqObj.getValueOrDefault("wf_instance", default=None) contentType = self.__reqObj.getValue("content_type") if len(contentType) < 1: return retPath formatType = self.__reqObj.getValueOrDefault("format", default="pdbx") versionId = self.__reqObj.getValueOrDefault("version", default="latest") partNumber = self.__reqObj.getValueOrDefault("part", "1") # retPath = self.__pI.getFilePath( dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, versionId=versionId, partNumber=partNumber ) return retPath def __makeResponseContentObject(self, filePath, attachmentFlag=True, compressFlag=False): """Create a response content object for the input file""" if self.__verbose: self.__lfh.write("+WebDownloadUtils.__makeResponseContentObject() starting with file path %s\n" % filePath) # rC = ResponseContent(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) if filePath is not None and os.access(filePath, os.F_OK): rC.setReturnFormat("binary") rC.setBinaryFile(filePath, attachmentFlag=attachmentFlag, serveCompressed=compressFlag) else: rC.setReturnFormat("json") rC.setError(errMsg="Download failure for %s" % filePath) return rC
def testGetStandardPaths(self): """Test getting standard file names within session paths.""" ok = True # fileSource, id, partionId, versionId tests = [ ("archive", "D_1000000000", None, 1, "latest"), ("archive", "D_1000000000", None, "latest", "latest"), ("archive", "D_1000000000", None, "next", "latest"), ("archive", "D_1000000000", None, "previous", "latest"), ("deposit", "D_1000000000", None, 1, "latest"), ] eId = "1" for (fs, dataSetId, wfInst, pId, vId) in tests: logger.debug("File source %s dataSetId %s partno %s wfInst %s version %s" % (fs, dataSetId, pId, wfInst, vId)) pI = PathInfo(siteId=self.__siteId) pI.setDebugFlag(False) # fp = pI.getModelPdbxFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Model path (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find model file - default") fp = pI.getModelPdbxFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId, mileStone="deposit") logger.debug("Model path (deposit) (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find model file - deposit") fp = pI.getModelPdbxFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId, mileStone="upload") logger.debug("Model path (upload) (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find model file - upload") fp = pI.getModelPdbFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Model path (PDB): %s" % fp) self.assertIsNotNone(fp, "Failed to find PDB model file") fp = pI.getStructureFactorsPdbxFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("SF path (pdbx): %s" % fp) self.assertIsNotNone(fp, "Failed to find SF file") fp = pI.getPolyLinkFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Link dist (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find PDBx model file") fp = pI.getPolyLinkReportFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Link Report dist (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find link report file") fp = pI.getSequenceStatsFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Sequence stats (PIC): %s" % fp) self.assertIsNotNone(fp, "Failed to find sequence stats file") fp = pI.getSequenceAlignFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Sequence align (PIC): %s" % fp) self.assertIsNotNone(fp, "Failed to find sequence align file") fp = pI.getReferenceSequenceFilePath(dataSetId, entityId=eId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Reference match entity %s (PDBx): %s" % (eId, fp)) self.assertIsNotNone(fp, "Failed to find reference sequence file") fp = pI.getSequenceAssignmentFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Sequence assignment (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find sequence assignment") fp = pI.getAssemblyAssignmentFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Assembly assignment (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find assembly assignment") fp = pI.getBlastMatchFilePath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId) logger.debug("Blast match (xml): %s" % fp) self.assertIsNotNone(fp, "Failed to find blast match file") fp = pI.getFilePath(dataSetId, wfInstanceId=wfInst, contentType="seqdb-match", formatType="pdbx", fileSource=fs, versionId=vId, partNumber=pId, mileStone=None) logger.debug("Sequence match (getFilePath) (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find seq-db match") # fp = pI.getFilePathContentTypeTemplate(dataSetId, wfInstanceId=wfInst, contentType="model", fileSource=fs) logger.debug("Model template: %s" % fp) self.assertIsNotNone(fp, "Failed to find model template") fp = pI.getArchivePath(dataSetId) logger.debug("getArchivePath (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find dir path") fp = pI.getDepositPath(dataSetId) logger.debug("getDepositPath (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find deposit path") fp = pI.getInstancePath(dataSetId, wfInstanceId="W_099") logger.debug("getWfInstancePath (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find wf instance path") fp = pI.getInstanceTopPath( dataSetId, ) logger.debug("getWfInstanceTopPath (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find wf Top instance path") fp = pI.getTempDepPath(dataSetId) logger.debug("getTempDepPath): %s" % fp) self.assertIsNotNone(fp, "Failed to find TempDep path") # fp = pI.getDirPath(dataSetId, wfInstanceId=wfInst, fileSource=fs, versionId=vId, partNumber=pId, mileStone=None) logger.debug("Sequence match (getDirPath) (PDBx): %s" % fp) self.assertIsNotNone(fp, "Failed to find dir path") ft = pI.getFilePathVersionTemplate(dataSetId, wfInstanceId=wfInst, contentType="em-volume", formatType="map", fileSource="archive", partNumber=pId, mileStone=None) logger.debug("EM volume version template: %r" % ft) ft = pI.getFilePathPartitionTemplate(dataSetId, wfInstanceId=wfInst, contentType="em-mask-volume", formatType="map", fileSource="archive", mileStone=None) logger.debug("EM mask partition template: %r" % ft) self.assertIsNotNone(ft, "Failed to mask model file") self.assertEqual(ok, True)
def testFileNames(self): """Tests parsing and validity functions""" tests = [("D_000001_model_P1.cif.V1", True), ("D_000001_model_P1.cif", False), ("D_000001_P1.cif.V1", False), ("D_000001_model.cif.V1", False), ("D_000001.cif", False)] # Matches w/o version number tests2 = [("D_000001_model_P1.cif.V1", True), ("D_000001_model_P1.cif", True), ("D_000001_P1.cif.V1", False), ("D_000001_model.cif.V1", False), ("D_000001.cif", False)] for t in tests: pI = PathInfo(siteId=self.__siteId) ret = pI.isValidFileName(t[0]) self.assertEqual(ret, t[1], "Parsing mismatch %s" % t[0]) # Withot version for t in tests2: pI = PathInfo(siteId=self.__siteId) ret = pI.isValidFileName(t[0], False) self.assertEqual(ret, t[1], "Parsing mismatch %s" % t[0]) pI = PathInfo(siteId=self.__siteId) self.assertEqual(pI.parseFileName("D_000001_model_P1.cif.V1"), ("D_000001", "model", "pdbx", 1, 1)) self.assertEqual(pI.parseFileName("D_000001_model.cif.V1"), (None, None, None, None, None)) # spiltFileName will give partials self.assertEqual(pI.splitFileName("D_000001_model_P1.cif.V1"), ("D_000001", "model", "pdbx", 1, 1)) self.assertEqual(pI.splitFileName("D_000001_model.cif.V1"), ("D_000001", "model", None, None, 1)) self.assertEqual(pI.getFileExtension("gif"), "gif", "Getting file extension") self.assertEqual(pI.getFileExtension("pdbx"), "cif", "Getting file extension") self.assertEqual(pI.getFileExtension("unk"), None, "Getting file extension")
class StatusHistory(object): """ Manage status history updates. """ def __init__(self, siteId=None, fileSource="archive", sessionPath=None, verbose=False, log=sys.stderr): self.__verbose = verbose self.__lfh = log self.__debug = False self.__fileSource = fileSource self.__sessionPath = sessionPath self.__siteId = siteId # self.__inpFilePath = None self.__entryId = None self.__pdbId = None # self.__setup() def __setup(self): if self.__sessionPath is not None: self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) else: self.__pI = PathInfo(siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) # self.__inpFilePath = None self.__entryId = None self.__pdbId = None self.__pio = PdbxStatusHistoryIo(verbose=self.__verbose, log=self.__lfh) self.__statusCategory = "pdbx_database_status_history" self.__timeFormat = "%Y-%m-%d:%H:%M" # def __setEntryId(self, entryId, pdbId): """Set the file path of the status history file and read any existing content --""" self.__entryId = entryId self.__pdbId = pdbId self.__inpFilePath = self.__pI.getStatusHistoryFilePath( dataSetId=entryId, fileSource=self.__fileSource, versionId="latest") if self.__exists(): return self.__pio.setFilePath(filePath=self.__inpFilePath, idCode=entryId) else: return False def __setInpPath(self, inpPath, entryId, pdbId): """Set the file path of the status history file and read any existing content --""" self.__entryId = entryId self.__pdbId = pdbId self.__inpFilePath = inpPath if self.__exists(): return self.__pio.setFilePath(filePath=self.__inpFilePath, idCode=entryId) else: return False def __new(self, entryId): """Create a new status history category using base category style content definition --""" return self.__pio.newContainer(containerName=entryId, overWrite=True) def setEntryId(self, entryId, pdbId, inpPath=None, overWrite=False): """Open an existing status history file from the archive directory and read the container corresponding to the input entry id. An alternate file path can be provided to override reading input from the archive directory. overWrite = True to rewrite any existing history file - Return: True for an existing file or for the creation of a new empty container initialized with an empty status history data category, or False otherwise. """ if inpPath is not None: ok = self.__setInpPath(inpPath, entryId, pdbId) else: ok = self.__setEntryId(entryId, pdbId) if not ok or overWrite: ok = self.__new(entryId) return self.__getRowCount() def store(self, entryId, outPath=None, versionId="latest"): if self.__getRowCount() < 1: return False if outPath is None: outFilePath = self.__pI.getStatusHistoryFilePath( dataSetId=entryId, fileSource=self.__fileSource, versionId=versionId) else: outFilePath = outPath # if self.__verbose: self.__lfh.write( "+StatusHistory.store() %s storing %d history records in file path %s\n" % (entryId, self.__getRowCount(), outFilePath)) # return self.__pio.write(outFilePath) def __exists(self): """Return True if a status history file exists or false otherwise.""" if os.access(self.__inpFilePath, os.R_OK): return True else: return False def getNow(self): return self.__getNow() def __getNow(self): """Return a CIF style date-timestamp value for current local time -""" today = datetime.datetime.today() return str(today.strftime(self.__timeFormat)) def dateTimeOk(self, dateTime): try: tS = self.__makeTimeStamp(dateTime) if (tS is not None) and (len(tS) < 16): return False else: datetime.datetime.strptime(tS, self.__timeFormat) return True except: # noqa: E722 pylint: disable=bare-except return False def __makeTimeStamp(self, inpTimeStamp): try: inpT = "" if len(inpTimeStamp) < 10: return inpT elif len(inpTimeStamp) == 10: inpT = inpTimeStamp + ":00:00" elif len(inpTimeStamp) >= 16: inpT = inpTimeStamp[:16] # t = datetime.datetime.strptime(inpT, self.__timeFormat) return str(t.strftime(self.__timeFormat)) except Exception as e: self.__lfh.write( "+StatusHistory.__makeTimeStamp() fails for inpTimeStamp %r inpT %r err %r\n" % (inpTimeStamp, inpT, str(e))) if self.__debug: traceback.print_exc(file=self.__lfh) return inpTimeStamp def __getRowCount(self): return self.__pio.getRowCount(catName=self.__statusCategory) # def __updatePriorEndDate(self, dateEnd=None): # """Update the 'end-date' value for the previous status history record.""" # nRows = self.__getRowCount() # if nRows > 0 and dateEnd is not None: # ok = self.__pio.updateAttribute(catName=self.__statusCategory, attribName="date_end", value=dateEnd, iRow=nRows - 1) # return ok # else: # return False def get(self): return self.__pio.getAttribDictList(catName=self.__statusCategory) def getLastStatusAndDate(self): tup = self.__lastStatusAndDate() if (self.__pdbId is None) and (len(tup) > 3): self.__pdbId = tup[3] return (tup[0], tup[1]) def __lastStatusAndDate(self): """Return the last status code, time stamp, and ordinal index in the current data context.""" try: nRows = self.__getRowCount() if nRows > 0: dList = self.__pio.getAttribDictList( catName=self.__statusCategory) # -- Get the row with the last ordinal -- tOrd = (-1, -1) for ii, d in enumerate(dList): if int(str(d["ordinal"])) > tOrd[1]: tOrd = (ii, int(d["ordinal"])) dA = dList[tOrd[0]] return (dA["status_code_end"], dA["date_end"], int(str(dA["ordinal"])), dA["pdb_id"]) else: return (None, None, None, None) except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) return (None, None, None, None) # def __testValueExists(self, value, key="status_code_begin"): # try: # dList = self.__pio.getAttribDictList(catName=self.__statusCategory) # for _ii, d in enumerate(dList): # if d[key] == value: # return True # except: # noqa: E722 pylint: disable=bare-except # return False def nextRecord(self, statusCodeNext="AUTH", dateNext=None, annotator=None, details=None): """ """ try: statusLast, dateLast, _ordinalLast, pdbId = self.__lastStatusAndDate( ) if statusCodeNext == statusLast: return False if self.__pdbId is None or len(self.__pdbId) < 4: self.__pdbId = pdbId if dateNext is None: dateNext = self.getNow() ok = self.add(statusCodeBegin=statusLast, dateBegin=dateLast, statusCodeEnd=statusCodeNext, dateEnd=dateNext, annotator=annotator, details=details) return ok except: # noqa: E722 pylint: disable=bare-except return False def add(self, statusCodeBegin="PROC", dateBegin=None, statusCodeEnd="PROC", dateEnd=None, annotator=None, details=None): return self.__appendRow( entryId=self.__entryId, pdbId=self.__pdbId, statusCodeBegin=statusCodeBegin, dateBegin=self.__makeTimeStamp(dateBegin), statusCodeEnd=statusCodeEnd, dateEnd=self.__makeTimeStamp(dateEnd), annotator=annotator, details=details, ) def __appendRow(self, entryId, pdbId, statusCodeBegin="PROC", dateBegin=None, statusCodeEnd="PROC", dateEnd=None, annotator=None, details=None): """ Append a row to the status history list -- if - dateEnd is not specified then the current date-time is used. return True for success or false otherwise """ uD = {} nRows = self.__getRowCount() if self.__verbose: self.__lfh.write( "+StatusHistory.__appendRow() %s begins with nRows %r pdbId %r statusBegin %r dateBegin %r statusEnd %r dateEnd %r\n" % (entryId, nRows, pdbId, statusCodeBegin, dateBegin, statusCodeEnd, dateEnd)) if nRows < 0: return False # if entryId is not None and len(entryId) > 0: uD["entry_id"] = str(entryId) else: return False if pdbId is not None and len(pdbId) > 0: uD["pdb_id"] = str(pdbId) else: return False if statusCodeBegin is not None and len(statusCodeBegin) > 0: uD["status_code_begin"] = str(statusCodeBegin) else: return False if statusCodeEnd is not None and len(statusCodeEnd) > 0: uD["status_code_end"] = str(statusCodeEnd) else: return False if dateBegin is not None and len(dateBegin) > 0: uD["date_begin"] = str(dateBegin) else: return False if dateEnd is not None and len(dateEnd) > 0: uD["date_end"] = str(dateEnd) else: uD["date_end"] = self.__getNow() if details is not None: uD["details"] = str(details) if annotator is not None and len(annotator) > 0: uD["annotator"] = str(annotator) else: uD["annotator"] = "UNASSIGNED" if nRows == 0: iOrdinal = 0 else: _t, tt, iOrdinal, _ttt = self.__lastStatusAndDate() uD["ordinal"] = str(iOrdinal + 1) # # Compute the time delta - # tt = self.__deltaDate(uD["date_end"], uD["date_begin"]) if tt > 0: uD["delta_days"] = "%.4f" % tt else: uD["delta_days"] = "0.0000" # ok = self.__pio.appendRowByAttribute(rowAttribDict=uD, catName=self.__statusCategory) return ok def __deltaDate(self, dateTimeEnd, dateTimeBegin, fail=-1): try: tEnd = datetime.datetime.strptime(dateTimeEnd, self.__timeFormat) tBegin = datetime.datetime.strptime(dateTimeBegin, self.__timeFormat) tDelta = tEnd - tBegin days = float(tDelta.total_seconds()) / 86400.0 # print " dateTimeBegin=", dateTimeBegin, " dateTimeEnd=", dateTimeEnd, " tDelta=", tDelta.total_seconds(), " days=", days return days except: # noqa: E722 pylint: disable=bare-except return fail
class FileUtils(FileUtilsBase): """ Manage the presentation of project files for download. """ def __init__(self, entryId, reqObj=None, verbose=False, log=sys.stderr): self.__verbose = verbose self.__lfh = log self.__reqObj = reqObj # Reassign siteId for the following special case -- self.__entryId = entryId siteId = self.__reqObj.getValue("WWPDB_SITE_ID") # This is for viewing the entries from the standalone validation server from annotation -- if siteId in ["WWPDB_DEPLOY_PRODUCTION_RU", "WWPDB_DEPLOY_VALSRV_RU", "WWPDB_DEPLOY_TEST", "WWPDB_DEPLOY_INTERNAL_RU"] and entryId.startswith("D_90"): siteId = "WWPDB_DEPLOY_VALSRV_RU" # # Get inventory of file types super(FileUtils, self).__init__() # self.__setup(siteId=siteId) def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") # self.__lfh.write("+FileUtils.__setup() starting with entryId %r adjusted WWPDB_SITE_ID %r\n" % (self.__entryId, self.__siteId)) # self.__sObj = self.__reqObj.getSessionObj() self.__sessionId = self.__sObj.getId() self.__sessionPath = self.__sObj.getPath() self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) self.__cI = ConfigInfo(self.__siteId) self.__msL = self.__cI.get("CONTENT_MILESTONE_LIST") # def renderFileList(self, fileSource="archive", rDList=None, titlePrefix="", titleSuffix="", displayImageFlag=False): """""" if rDList is None: rDList = self._rDList htmlList = [] nTot = 0 if fileSource in ["archive", "deposit", "wf-archive"]: for ky in rDList: if ky not in self._rD: continue ctList = self._rD[ky] title = titlePrefix + ky + titleSuffix fList = [] fList.extend(ctList) for ct in ctList: for ms in self.__msL: mt = ct + "-" + ms fList.append(mt) nF, oL = self.__renderContentTypeFileList( self.__entryId, fileSource=fileSource, wfInstanceId=None, contentTypeList=fList, title=title, displayImageFlag=displayImageFlag ) if nF > 0: htmlList.extend(oL) nTot += nF if fileSource in ["archive", "wf-archive"]: nF, oL = self.__renderLogFileList(self.__entryId, fileSource="archive", title="Archive Log Files") if nF > 0: htmlList.extend(oL) nTot += nF if fileSource in ["deposit"]: nF, oL = self.__renderLogFileList(self.__entryId, fileSource="deposit", title="Deposit Log Files") if nF > 0: htmlList.extend(oL) nTot += nF # if fileSource in ["wf-instance", "instance"]: iTopPath = self.__pI.getInstanceTopPath(self.__entryId) fPattern = os.path.join(iTopPath, "*") wfInstancePathList = filter(os.path.isdir, glob.glob(fPattern)) for wfInstancePath in wfInstancePathList: (_pth, wfInstId) = os.path.split(wfInstancePath) title = "Files in " + wfInstId nF, oL = self.__renderWfInstanceFileList(self.__entryId, wfInstancePath, title=title) if nF > 0: htmlList.extend(oL) nTot += nF # return nTot, htmlList def __renderContentTypeFileList(self, entryId, fileSource="archive", wfInstanceId=None, contentTypeList=None, title=None, displayImageFlag=False): if contentTypeList is None: contentTypeList = ["model"] if self.__verbose: self.__lfh.write( "+FileUtils.renderContentTypeFileList() entryId %r fileSource %r wfInstanceId %r contentTypeList %r \n" % (entryId, fileSource, wfInstanceId, contentTypeList) ) de = DataExchange( reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=wfInstanceId, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh ) tupL = de.getContentTypeFileList(fileSource=fileSource, contentTypeList=contentTypeList) # rTupL = [] for tup in tupL: href, fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if displayImageFlag and fN.startswith(entryId + "_img-emdb"): imgFile = os.path.join(self.__sessionPath, fN) if os.access(imgFile, os.F_OK): os.remove(imgFile) # os.symlink(tup[0], imgFile) imgHtml = '<img src="/sessions/' + self.__sessionId + "/" + fN + '" border="0" alt="Image" width="400" height="400">' rTupL.append(("displayImage", imgHtml, "")) # # if title is None: cS = ",".join(contentTypeList) title = "File Source %s (%s)" % (fileSource, cS) nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderWfInstanceFileList(self, entryId, wfPath, title=None): if self.__verbose: self.__lfh.write("+FileUtils.renderWfInstanceFileList() wfPath %s\n" % wfPath) wfPattern = os.path.join(wfPath, "*") de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=None, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) tupL = de.getMiscFileList(fPatternList=[wfPattern], sortFlag=True) # rTupL = [] for tup in tupL: href, _fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if title is None: title = "Workflow instance files for %s" % entryId nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderLogFileList(self, entryId, fileSource="archive", title=None): if self.__verbose: self.__lfh.write("+FileUtils.renderLogFileList() entryId %r fileSource %r\n" % (entryId, fileSource)) de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) tupL = de.getLogFileList(entryId, fileSource=fileSource) # rTupL = [] for tup in tupL: href, _fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if title is None: title = "Log Files in Source %s" % fileSource nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderFileList(self, fileTupleList, title, embeddedTitle=True): # oL = [] if len(fileTupleList) > 0: if embeddedTitle: oL.append('<table class="table table-bordered table-striped table-condensed">') oL.append('<tr><th class="width50">%s</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>' % title) else: oL.append("<h4>%s</h4>" % title) oL.append('<table class="table table-bordered table-striped table-condensed">') oL.append('<tr><th class="width50">Files</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>') for tup in fileTupleList: oL.append("<tr>") if tup[0] == "displayImage": oL.append('<td align="center" colspan="3">%s</td>' % tup[1]) else: oL.append("<td>%s</td>" % tup[0]) oL.append("<td>%s</td>" % tup[1]) oL.append("<td>%s</td>" % tup[2]) # oL.append("</tr>") # oL.append("</table>") # return len(fileTupleList), oL def __makeDownloadHref(self, filePath): _dP, fN = os.path.split(filePath) tS = "/service/review_v2/download_file?sessionid=" + self.__sessionId + "&file_path=" + filePath href = "<a class='my-file-downloadable' href='" + tS + "'>" + fN + "</a>" return href, fN
class DataExchange(object): """ Implements common data exchange operations including: moving annotation data files between session and workflow storage, accessing files in workflow directories, and routine file maintenance operations. """ def __init__(self, reqObj=None, depDataSetId=None, wfInstanceId=None, fileSource="archive", siteId=None, verbose=False, log=sys.stderr): self.__reqObj = reqObj self.__depDataSetId = depDataSetId self.__wfInstanceId = wfInstanceId self.__fileSource = fileSource self.__verbose = verbose self.__lfh = log # self.__debug = False self.__inputSessionPath = None # self.__setup(siteId=siteId) def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") self.__sessionObj = self.__reqObj.getSessionObj() self.__sessionPath = self.__sessionObj.getPath() self.__cI = ConfigInfo(self.__siteId) self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) # if self.__debug: self.__lfh.write("+DataExchange.__setup() - session id %s\n" % (self.__sessionObj.getId())) self.__lfh.write("+DataExchange.__setup() - session path %s\n" % (self.__sessionObj.getPath())) self.__lfh.write("+DataExchange.__setup() - data set %s instance %s file source %s\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource)) self.__pI.setDebugFlag(flag=self.__debug) def setFileSource(self, fileSource): """Override fileSource="archive" """ self.__fileSource = fileSource def setInputSessionPath(self, inputSessionPath=None): """Override the path to files with fileSource="session" """ self.__inputSessionPath = inputSessionPath def purgeLogs(self): archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId, "log") if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() - purging logs in directory %s\n" % (dirPath)) if os.access(dirPath, os.W_OK): fpattern = os.path.join(dirPath, "*log") if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() candidate path length is %d\n" % len(pthList)) # for pth in pthList: try: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return pthList def reversePurge(self, contentType, formatType="pdbx", partitionNumber=1): fn = self.__getArchiveFileName(contentType=contentType, formatType=formatType, version="none", partitionNumber=partitionNumber) archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId) if self.__verbose: self.__lfh.write("+DataExchange.__setup() - purging in directory %s\n" % (dirPath)) if len(dirPath) < 2: return [] fpattern = os.path.join(dirPath, fn + ".V*") if self.__verbose: self.__lfh.write("+DataExchange.__setup() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write("+DataExchange.__reversePurge() candidate length is %d\n" % len(pthList)) # fList = [] for pth in pthList: if not pth.endswith(".V1"): fList.append(pth) for pth in fList: try: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return fList def removeWorkflowDir(self): if (self.__depDataSetId is not None) and self.__depDataSetId.startswith("D_") and (len(self.__depDataSetId) > 7): workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(workflowPath, "workflow", self.__depDataSetId) if os.access(dirPath, os.W_OK): shutil.rmtree(dirPath) return True else: return False else: return False def createArchiveDir(self, purgeFlag=True): """Create new the archive directory if this is needed.""" if self.__verbose: self.__lfh.write("+DataExchange.export() creating archive directory for data set %s\n" % self.__depDataSetId) try: archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId) if not os.access(dirPath, os.W_OK): if self.__verbose: self.__lfh.write("+DataExchange.createArchiveDir() creating archive directory path %s\n" % dirPath) os.makedirs(dirPath) return True else: if purgeFlag: if self.__verbose: self.__lfh.write("+DataExchange.export() existing archive directory path purged: %s\n" % dirPath) shutil.rmtree(dirPath) os.makedirs(dirPath) return True else: if self.__verbose: self.__lfh.write("+DataExchange.export() archive directory exists: %s\n" % dirPath) return False except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def fetch(self, contentType, formatType, version="latest", partitionNumber=1): """Copy the input content object into the current session directory (session naming semantics follow source file object) Return the full path of the copied file or None """ inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.fetch() source type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath)) try: if os.access(inpFilePath, os.R_OK): (_dirPath, fileName) = os.path.split(inpFilePath) # trim of the trailing version - # lastIdx=tfileName.rfind(".V") # if lastIdx > 0: # fileName=tfileName[:lastIdx] # else: # fileName=tfileName outFilePath = os.path.join(self.__sessionPath, fileName) if self.__verbose: self.__lfh.write("+DataExchange.fetch() destination file path %s\n" % outFilePath) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__verbose: self.__lfh.write("+DataExchange.fetch() missing input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return None def export(self, inpFilePath, contentType, formatType, version="latest", partitionNumber=1): """Copy input file to workflow instance or archival storage. Return True on success or False otherwise. """ outFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.export() destination type %s format %s version %s path %s\n" % (contentType, formatType, version, outFilePath)) try: if os.access(inpFilePath, os.R_OK) and (os.path.getsize(inpFilePath) > 0): if self.__verbose: self.__lfh.write("+DataExchange.export() destination file path %s\n" % outFilePath) if inpFilePath.endswith(".gz"): self.__copyGzip(inpFilePath, outFilePath) else: shutil.copyfile(inpFilePath, outFilePath) return True else: if self.__verbose: self.__lfh.write("+DataExchange.export() missing or zero length input file at path %s\n" % inpFilePath) return False except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def __copyGzip(self, inpFilePath, outFilePath): """""" try: cmd = " gzip -cd %s > %s " % (inpFilePath, outFilePath) os.system(cmd) return True except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def copyDirToSession(self, dirName): """Replicate the input diretory in the session directory -""" try: if self.__fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(self.__depDataSetId) elif self.__fileSource in ["deposit"]: pth = self.__pI.getDepositPath(self.__depDataSetId) elif self.__fileSource in ["wf-instance"]: pth = self.__pI.getInstancePath(self.__depDataSetId, self.__wfInstanceId) else: return False srcPath = os.path.join(pth, dirName) if not os.access(srcPath, os.R_OK): return False dstPath = os.path.join(self.__sessionPath, dirName) if not os.path.isdir(dstPath): os.makedirs(dstPath, 0o755) # fPattern = os.path.join(srcPath, "*") fpL = filter(os.path.isfile, glob.glob(fPattern)) for fp in fpL: _dN, fN = os.path.split(fp) oP = os.path.join(dstPath, fN) shutil.copyfile(fp, oP) if self.__verbose: self.__lfh.write("+DataExchange.copyDirToSession() successful session copy of dirName %s\n" % (dirName)) return True except: # noqa: E722 pylint: disable=bare-except if self.__verbose: self.__lfh.write("+DataExchange.copyDirToSession() fails for dirName %s\n" % (dirName)) traceback.print_exc(file=self.__lfh) return False return True def copyToSession(self, contentType, formatType, version="latest", partitionNumber=1): """Copy the input content object into the session directory using archive naming conventions less version details. Return the full path of the session file or None """ inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__debug: self.__lfh.write("+DataExchange.copyToSession() source file type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath)) try: outFilePath = None if os.access(inpFilePath, os.R_OK): fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber) outFilePath = os.path.join(self.__sessionPath, fn) if self.__verbose: self.__lfh.write("+DataExchange.copyToSession() content type %s format %s copied to session path %s\n" % (contentType, formatType, outFilePath)) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__debug: self.__lfh.write("+DataExchange.copyToSession() missing input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: if self.__verbose: self.__lfh.write("+DataExchange.copyToSession() Failing for content type %s format %s with session path %s\n" % (contentType, formatType, outFilePath)) traceback.print_exc(file=self.__lfh) return None def updateArchiveFromSession(self, contentType, formatType, version="next", partitionNumber=1): """Copy the input content object from the session directory stored using archive naming conventions less version details to archive storage. Return the full path of the archive file or None """ fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber) inpFilePath = os.path.join(self.__sessionPath, fn) if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveDromSession() source file type %s format %s path %s\n" % (contentType, formatType, inpFilePath)) try: if os.access(inpFilePath, os.R_OK): outFilePath = self.__getFilePath(fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveFromSession() archive destination file path %s\n" % outFilePath) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveFrom() missing session input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return None ## def getVersionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) return self.__getFileList([fPattern], sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getPartitionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", mileStone=None): """ For the input content object return a list of file partitions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPattern = self.__pI.getFilePathPartitionTemplate( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, mileStone=mileStone ) tL = self.__getFileList([fPattern], sortFlag=True) if self.__debug: self.__lfh.write("+DataExchange.getPartionFileList() pattern %r\n" % fPattern) self.__lfh.write("+DataExchange.getPartionFileList() file list %r\n" % tL) # return tL except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getContentTypeFileList(self, fileSource="archive", contentTypeList=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ if contentTypeList is None: contentTypeList = ["model"] try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPatternList = [] for contentType in contentTypeList: fPattern = self.__pI.getFilePathContentTypeTemplate(dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, fileSource=fileSource) fPatternList.append(fPattern) if self.__debug: self.__lfh.write("+DataExchange.getContentTypeFileList() patterns %r\n" % fPatternList) return self.__getFileList(fPatternList, sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getMiscFileList(self, fPatternList=None, sortFlag=True): if fPatternList is None: fPatternList = ["*"] return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag) def getLogFileList(self, entryId, fileSource="archive"): if fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] elif fileSource in ["deposit"]: pth = self.__pI.getDepositPath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] else: return [] return self.__getFileList(fPatternList=patList, sortFlag=True) def __getFileList(self, fPatternList=None, sortFlag=True): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] rTup = [] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_date_tuple_list = [] for x in files: d = os.path.getmtime(x) s = float(os.path.getsize(x)) / 1000.0 file_date_tuple = (x, d, s) file_date_tuple_list.append(file_date_tuple) # Sort the tuple list by the modification time (recent changes first) if sortFlag: file_date_tuple_list.sort(key=lambda x: x[1], reverse=True) for fP, mT, sZ in file_date_tuple_list: tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S") rTup.append((fP, tS, sZ)) return rTup except Exception as e: if self.__verbose: self.__lfh.write("+DataExchange.__getFileList() failing for patternlist %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return rTup ## def __getArchiveFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (_fp, _d, f) = self.__targetFilePath( fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone ) return f # def __getInstanceFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (_fp, _d, f) = self.__targetFilePath( # fileSource="wf-instance", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone # ) # return f def __getFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (fp, _d, _f) = self.__targetFilePath( fileSource=fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone ) return fp def __targetFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): """Return the file path, directory path, and filename for the input content object if this object is valid. If the file path cannot be verified return None for all values """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fP = self.__pI.getFilePath( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, versionId=version, partNumber=partitionNumber, mileStone=mileStone, ) dN, fN = os.path.split(fP) return fP, dN, fN except Exception as e: if self.__debug: self.__lfh.write( "+DataExchange.__targetFilePath() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return (None, None, None)