def updateBundles(self): dirsToBundle = self.__getDirsToBundle() #Delete bundles that don't have to be updated for bId in self.__bundles: if bId not in dirsToBundle: gLogger.info("Deleting old bundle %s" % bId) del (self.__bundles[bId]) for bId in dirsToBundle: bundlePaths = dirsToBundle[bId] gLogger.info("Updating %s bundle %s" % (bId, bundlePaths)) buffer_ = cStringIO.StringIO() filesToBundle = sorted(File.getGlobbedFiles(bundlePaths)) if filesToBundle: commonPath = File.getCommonPath(filesToBundle) commonEnd = len(commonPath) gLogger.info("Bundle will have %s files with common path %s" % (len(filesToBundle), commonPath)) tarBuffer = tarfile.open('dummy', "w:gz", buffer_) for filePath in filesToBundle: tarBuffer.add(filePath, filePath[commonEnd:]) tarBuffer.close() zippedData = buffer_.getvalue() buffer_.close() hash_ = File.getMD5ForFiles(filesToBundle) gLogger.info("Bundled %s : %s bytes (%s)" % (bId, len(zippedData), hash_)) self.__bundles[bId] = (hash_, zippedData) else: self.__bundles[bId] = (None, None)
def updateBundles( self ): dirsToBundle = self.__getDirsToBundle() #Delete bundles that don't have to be updated for bId in self.__bundles: if bId not in dirsToBundle: gLogger.info( "Deleting old bundle %s" % bId ) del( self.__bundles[ bId ] ) for bId in dirsToBundle: bundlePaths = dirsToBundle[ bId ] gLogger.info( "Updating %s bundle %s" % ( bId, bundlePaths ) ) buffer_ = cStringIO.StringIO() filesToBundle = sorted( File.getGlobbedFiles( bundlePaths ) ) if filesToBundle: commonPath = File.getCommonPath( filesToBundle ) commonEnd = len( commonPath ) gLogger.info( "Bundle will have %s files with common path %s" % ( len( filesToBundle ), commonPath ) ) tarBuffer = tarfile.open( 'dummy', "w:gz", buffer_ ) for filePath in filesToBundle: tarBuffer.add( filePath, filePath[ commonEnd: ] ) tarBuffer.close() zippedData = buffer_.getvalue() buffer_.close() hash_ = File.getMD5ForFiles( filesToBundle ) gLogger.info( "Bundled %s : %s bytes (%s)" % ( bId, len( zippedData ), hash_ ) ) self.__bundles[ bId ] = ( hash_, zippedData ) else: self.__bundles[ bId ] = ( None, None )
def sendFile(self, filename, fileId, token=""): """ Send a file to server @type filename : string / file descriptor / file object @param filename : File to send to server @type fileId : any @param fileId : Identification of the file being sent @type token : string @param token : Optional token for the file @return : S_OK/S_ERROR """ fileHelper = FileHelper() retVal = fileHelper.getFileDescriptor(filename, "r") if not retVal['OK']: return retVal fd = retVal['Value'] try: retVal = self._sendTransferHeader( "FromClient", (fileId, token, File.getSize(filename))) if not retVal['OK']: return retVal transport = retVal['Value'] fileHelper.setTransport(transport) retVal = fileHelper.FDToNetwork(fd) if not retVal['OK']: return retVal retVal = transport.receiveData() return retVal finally: self._disconnect()
def createTarball(tarballPath, directoryToTar, additionalDirectoriesToTar=None): tf = tarfile.open(tarballPath, "w:gz") tf.add(directoryToTar, os.path.basename(os.path.abspath(directoryToTar)), recursive=True) if type(additionalDirectoriesToTar) in (types.StringType, types.UnicodeType): additionalDirectoriesToTar = [additionalDirectoriesToTar] if additionalDirectoriesToTar: for dirToTar in additionalDirectoriesToTar: if os.path.isdir(dirToTar): tf.add(dirToTar, os.path.basename(os.path.abspath(dirToTar)), recursive=True) tf.close() md5FilePath = False for suffix in (".tar.gz", ".gz"): sLen = len(suffix) if tarballPath[len(tarballPath) - sLen:] == suffix: md5FilePath = "%s.md5" % tarballPath[:-sLen] break if not md5FilePath: return S_ERROR("Could not generate md5 filename") md5str = File.getMD5ForFiles([tarballPath]) fd = open(md5FilePath, "w") fd.write(md5str) fd.close() return S_OK()
def sendFile( self, filename, fileId, token = "" ): """ Send a file to server @type filename : string / file descriptor / file object @param filename : File to send to server @type fileId : any @param fileId : Identification of the file being sent @type token : string @param token : Optional token for the file @return : S_OK/S_ERROR """ fileHelper = FileHelper() retVal = fileHelper.getFileDescriptor( filename, "r" ) if not retVal[ 'OK' ]: return retVal fd = retVal[ 'Value' ] retVal = self._sendTransferHeader( "FromClient", ( fileId, token, File.getSize( filename ) ) ) if not retVal[ 'OK' ]: return retVal trid, transport = retVal[ 'Value' ] try: fileHelper.setTransport( transport ) retVal = fileHelper.FDToNetwork( fd ) if not retVal[ 'OK' ]: return retVal retVal = transport.receiveData() return retVal finally: self._disconnect( trid )
def __uploadInputSandbox( self, classAdJob, jobDescriptionObject = None ): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ inputSandbox = self.__getInputSandboxEntries( classAdJob ) realFiles = [] badFiles = [] diskFiles = [] for isFile in inputSandbox: valid = True for tag in ( 'lfn:', 'LFN:', 'SB:', '%s' ): # in case of parametric input sandbox, there is %s passed, so have to ignore it also if isFile.find( tag ) == 0: valid = False break if valid: realFiles.append( isFile ) stringIOFiles = [] stringIOFilesSize = 0 if jobDescriptionObject is not None: if isinstance( jobDescriptionObject, StringIO.StringIO ): stringIOFiles = [jobDescriptionObject] stringIOFilesSize = len( jobDescriptionObject.buf ) gLogger.debug( "Size of the stringIOFiles: " + str( stringIOFilesSize ) ) else: return S_ERROR( "jobDescriptionObject is not a StringIO object" ) # Check real files for isFile in realFiles: if not os.path.exists( isFile ): # we are passing in real files, we expect them to be on disk badFiles.append( isFile ) gLogger.warn( "inputSandbox file/directory " + isFile + " not found. Keep looking for the others" ) continue diskFiles.append( isFile ) diskFilesSize = File.getGlobbedTotalSize( diskFiles ) gLogger.debug( "Size of the diskFiles: " + str( diskFilesSize ) ) totalSize = diskFilesSize + stringIOFilesSize gLogger.verbose( "Total size of the inputSandbox: " + str( totalSize ) ) okFiles = stringIOFiles + diskFiles if badFiles: result = S_ERROR( 'Input Sandbox is not valid' ) result['BadFile'] = badFiles result['TotalSize'] = totalSize return result if okFiles: if not self.sandboxClient: self.sandboxClient = SandboxStoreClient( useCertificates = self.useCertificates ) result = self.sandboxClient.uploadFilesAsSandbox( okFiles ) if not result[ 'OK' ]: return result inputSandbox.append( result[ 'Value' ] ) classAdJob.insertAttributeVectorString( "InputSandbox", inputSandbox ) return S_OK()
def sendFile(self, filename, fileId, token=""): """ Send a file to server :type filename: string / file descriptor / file object :param filename: File to send to server :type fileId: any :param fileId: Identification of the file being sent :type token: string :param token: Optional token for the file :return: S_OK/S_ERROR """ fileHelper = FileHelper() if "NoCheckSum" in token: fileHelper.disableCheckSum() retVal = fileHelper.getFileDescriptor(filename, "r") if not retVal["OK"]: return retVal fd = retVal["Value"] retVal = self._sendTransferHeader( "FromClient", (fileId, token, File.getSize(filename))) if not retVal["OK"]: return retVal trid, transport = retVal["Value"] try: fileHelper.setTransport(transport) retVal = fileHelper.FDToNetwork(fd) if not retVal["OK"]: return retVal retVal = transport.receiveData() return retVal finally: self._disconnect(trid)
def __uploadInputSandbox(self, classAdJob, jobDescriptionObject=None): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ inputSandbox = self.__getInputSandboxEntries(classAdJob) realFiles = [] badFiles = [] diskFiles = [] for isFile in inputSandbox: if not isFile.startswith(("lfn:", "LFN:", "SB:", "%s", "%(")): realFiles.append(isFile) stringIOFiles = [] stringIOFilesSize = 0 if jobDescriptionObject is not None: if isinstance(jobDescriptionObject, StringIO): stringIOFiles = [jobDescriptionObject] stringIOFilesSize = len(jobDescriptionObject.getvalue()) gLogger.debug("Size of the stringIOFiles: " + str(stringIOFilesSize)) else: return S_ERROR(EWMSJDL, "jobDescriptionObject is not a StringIO object") # Check real files for isFile in realFiles: if not os.path.exists(isFile): # we are passing in real files, we expect them to be on disk badFiles.append(isFile) gLogger.warn("inputSandbox file/directory " + isFile + " not found. Keep looking for the others") continue diskFiles.append(isFile) diskFilesSize = File.getGlobbedTotalSize(diskFiles) gLogger.debug("Size of the diskFiles: " + str(diskFilesSize)) totalSize = diskFilesSize + stringIOFilesSize gLogger.verbose("Total size of the inputSandbox: " + str(totalSize)) okFiles = stringIOFiles + diskFiles if badFiles: result = S_ERROR(EWMSJDL, "Input Sandbox is not valid") result["BadFile"] = badFiles result["TotalSize"] = totalSize return result if okFiles: if not self.sandboxClient: self.sandboxClient = SandboxStoreClient( useCertificates=self.useCertificates, delegatedDN=self.delegatedDN, delegatedGroup=self.delegatedGroup, ) result = self.sandboxClient.uploadFilesAsSandbox(okFiles) if not result["OK"]: return result inputSandbox.append(result["Value"]) classAdJob.insertAttributeVectorString("InputSandbox", inputSandbox) return S_OK()
def __uploadInputSandbox(self, classAdJob): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ sandboxClient = SandboxStoreClient( useCertificates=self.useCertificates, rpcClient=self.sbRPCClient, transferClient=self.sbTransferClient ) inputSandbox = self.__getInputSandboxEntries(classAdJob) realFiles = [] badFiles = [] okFiles = [] realFiles = [] for file in inputSandbox: valid = True for tag in ( "lfn:", "LFN:", "SB:", "%s", ): # in case of parametric input sandbox, there is %s passed, so have to ignore it also if file.find(tag) == 0: valid = False break if valid: realFiles.append(file) # If there are no files, skip! if not realFiles: return S_OK() # Check real files for file in realFiles: if not os.path.exists(file): badFiles.append(file) print "inputSandbox file/directory " + file + " not found" continue okFiles.append(file) # print "Total size of the inputSandbox: "+str(totalSize) totalSize = File.getGlobbedTotalSize(okFiles) if badFiles: result = S_ERROR("Input Sandbox is not valid") result["BadFile"] = badFiles result["TotalSize"] = totalSize return result if okFiles: result = sandboxClient.uploadFilesAsSandbox(okFiles) if not result["OK"]: return result inputSandbox.append(result["Value"]) classAdJob.insertAttributeVectorString("InputSandbox", inputSandbox) return S_OK()
def __uploadInputSandbox(self, classAdJob): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ sandboxClient = SandboxStoreClient( useCertificates=self.useCertificates, rpcClient=self.sbRPCClient, transferClient=self.sbTransferClient) inputSandbox = self.__getInputSandboxEntries(classAdJob) realFiles = [] badFiles = [] okFiles = [] realFiles = [] for file in inputSandbox: valid = True for tag in ( 'lfn:', 'LFN:', 'SB:', '%s' ): #in case of parametric input sandbox, there is %s passed, so have to ignore it also if file.find(tag) == 0: valid = False break if valid: realFiles.append(file) #If there are no files, skip! if not realFiles: return S_OK() #Check real files for file in realFiles: if not os.path.exists(file): badFiles.append(file) print "inputSandbox file/directory " + file + " not found" continue okFiles.append(file) #print "Total size of the inputSandbox: "+str(totalSize) totalSize = File.getGlobbedTotalSize(okFiles) if badFiles: result = S_ERROR('Input Sandbox is not valid') result['BadFile'] = badFiles result['TotalSize'] = totalSize return result if okFiles: result = sandboxClient.uploadFilesAsSandbox(okFiles) if not result['OK']: return result inputSandbox.append(result['Value']) classAdJob.insertAttributeVectorString("InputSandbox", inputSandbox) return S_OK()
def __uploadInputSandbox( self, classAdJob ): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ inputSandbox = self.__getInputSandboxEntries( classAdJob ) badFiles = [] okFiles = [] realFiles = [] for isFile in inputSandbox: valid = True for tag in ( 'lfn:', 'LFN:', 'SB:', '%s' ): # in case of parametric input sandbox, there is %s passed, so have to ignore it also if isFile.find( tag ) == 0: valid = False break if valid: realFiles.append( isFile ) # If there are no files, skip! if not realFiles: return S_OK() # Check real files for isFile in realFiles: if not os.path.exists( isFile ): badFiles.append( isFile ) gLogger.warn( "inputSandbox file/directory " + isFile + " not found. Keep looking for the others" ) continue okFiles.append( isFile ) totalSize = File.getGlobbedTotalSize( okFiles ) gLogger.verbose( "Total size of the inputSandbox: " + str( totalSize ) ) if badFiles: result = S_ERROR( 'Input Sandbox is not valid' ) result['BadFile'] = badFiles result['TotalSize'] = totalSize return result if okFiles: if not self.sandboxClient: self.sandboxClient = SandboxStoreClient( useCertificates = self.useCertificates ) result = self.sandboxClient.uploadFilesAsSandbox( okFiles ) if not result[ 'OK' ]: return result inputSandbox.append( result[ 'Value' ] ) classAdJob.insertAttributeVectorString( "InputSandbox", inputSandbox ) return S_OK()
def createTarball( tarballPath, directoryToTar, additionalDirectoriesToTar = None ): tf = tarfile.open( tarballPath, "w:gz" ) tf.add( directoryToTar, os.path.basename( os.path.abspath( directoryToTar ) ), recursive = True ) if type( additionalDirectoriesToTar ) in ( types.StringType, types.UnicodeType ): additionalDirectoriesToTar = [ additionalDirectoriesToTar ] if additionalDirectoriesToTar: for dirToTar in additionalDirectoriesToTar: if os.path.isdir( dirToTar ): tf.add( dirToTar, os.path.basename( os.path.abspath( dirToTar ) ), recursive = True ) tf.close() md5FilePath = False for suffix in ( ".tar.gz", ".gz" ): sLen = len( suffix ) if tarballPath[ len( tarballPath ) - sLen: ] == suffix: md5FilePath = "%s.md5" % tarballPath[:-sLen] break if not md5FilePath: return S_ERROR( "Could not generate md5 filename" ) md5str = File.getMD5ForFiles( [ tarballPath ] ) fd = open( md5FilePath, "w" ) fd.write( md5str ) fd.close() return S_OK()
def __uploadInputSandbox(self, classAdJob, jobDescriptionObject=None): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ inputSandbox = self.__getInputSandboxEntries(classAdJob) realFiles = [] badFiles = [] diskFiles = [] for isFile in inputSandbox: valid = True for tag in ( 'lfn:', 'LFN:', 'SB:', '%s' ): # in case of parametric input sandbox, there is %s passed, so have to ignore it also if isFile.find(tag) == 0: valid = False break if valid: realFiles.append(isFile) stringIOFiles = [] stringIOFilesSize = 0 if jobDescriptionObject is not None: if isinstance(jobDescriptionObject, StringIO.StringIO): stringIOFiles = [jobDescriptionObject] stringIOFilesSize = len(jobDescriptionObject.buf) gLogger.debug("Size of the stringIOFiles: " + str(stringIOFilesSize)) else: return S_ERROR("jobDescriptionObject is not a StringIO object") # Check real files for isFile in realFiles: if not os.path.exists( isFile ): # we are passing in real files, we expect them to be on disk badFiles.append(isFile) gLogger.warn("inputSandbox file/directory " + isFile + " not found. Keep looking for the others") continue diskFiles.append(isFile) diskFilesSize = File.getGlobbedTotalSize(diskFiles) gLogger.debug("Size of the diskFiles: " + str(diskFilesSize)) totalSize = diskFilesSize + stringIOFilesSize gLogger.verbose("Total size of the inputSandbox: " + str(totalSize)) okFiles = stringIOFiles + diskFiles if badFiles: result = S_ERROR('Input Sandbox is not valid') result['BadFile'] = badFiles result['TotalSize'] = totalSize return result if okFiles: if not self.sandboxClient: self.sandboxClient = SandboxStoreClient( useCertificates=self.useCertificates) result = self.sandboxClient.uploadFilesAsSandbox(okFiles) if not result['OK']: return result inputSandbox.append(result['Value']) classAdJob.insertAttributeVectorString("InputSandbox", inputSandbox) return S_OK()