def testAccessors(self): """ Test various accessors """ scram = ScramEnvironment(logger=self.testLogger) self.assertEqual(scram.getCmsswVersion(), self.version) self.assertEqual(scram.getScramArch(), self.arch) self.assertEqual(scram.getCmsswBase(), self.base)
def testScram(self): """ Test Scram environment """ msg = "You must set up a CMSSW environment first" scram = ScramEnvironment(logger=self.logger) self.assertNotEqual(scram.getCmsswVersion(), None, msg) self.assertNotEqual(scram.getScramArch(), None, msg) self.assertNotEqual(scram.getCmsswBase(), None, msg)
def testScram(self): """ Test Scram environment """ msg = "You must set up a CMSSW environment first" scram = ScramEnvironment(logger=self.logger) self.assertNotEqual(scram.getCmsswVersion(), None, msg) self.assertNotEqual(scram.getScramArch(), None, msg) self.assertNotEqual(scram.getCmsswBase(), None, msg)
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ and interface/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name , mode=mode, dereference=True) self.checksum = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'biglib', 'module'] if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') # /data/ subdirs contain data files needed by the code # /interface/ subdirs contain C++ header files needed e.g. by ROOT6 dataDirs = ['data','interface'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug(" checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug(" adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath,'src') self.logger.debug(" adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName) for filename in fileNames: self.logger.debug(" adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset and crabconfig file to the tarfile if cfgOutputName: self.tarfile.add(cfgOutputName, arcname='PSet.py') self.tarfile.add(os.path.splitext(cfgOutputName)[0]+'.pkl', arcname='PSet.pkl') configtmp = tempfile.NamedTemporaryFile(delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if not psetfilename == None: self.tarfile.add(psetfilename,'/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to tarball') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') configtmp.close() def close(self): """ Calculate the checkum and clos """ self.calculateChecksum() return self.tarfile.close() def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name self.logger.debug(" uploading archive to cache %s " % archiveName) ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl}) result = ufc.upload(archiveName) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return str(result['hashkey']) + '.tar.gz', self.checksum def calculateChecksum(self): """ Calculate a checksum that doesn't depend on the tgz creation data """ lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()] hasher = hashlib.md5(str(lsl)) self.logger.debug('tgz contents: %s' % lsl) self.checksum = hasher.hexdigest() self.logger.debug('MD5 checksum: %s' % self.checksum) #Old way reads in the file again. May use for for non-tar files if needed. #sha256sum = hashlib.sha256() #with open(self.tarfile.name, 'rb') as f: #while True: #chunkdata = f.read(8192) #if not chunkdata: #break #sha256sum.update(chunkdata) #sha256sum.hexdigest() def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root , _ , files in os.walk(dir_, followlinks = True): for file_ in files: os.stat(os.path.join(root, file_ )) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_ , msg) raise EnvironmentException(err) def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ and interface/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:bz2', config=None, logger=None, crabserver=None, s3tester=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name, mode=mode, dereference=True) self.checksum = None self.content = None self.crabserver = crabserver self.s3tester = s3tester def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'biglib', 'module'] if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') directories.append('cfipython') if getattr(self.config.JobType, 'sendExternalFolder', configParametersInfo['JobType.sendExternalFolder']['default']): externalDirPath = os.path.join(self.scram.getCmsswBase(), 'external') if os.path.exists(externalDirPath) and os.listdir(externalDirPath) != []: directories.append('external') else: self.logger.info("The config.JobType.sendExternalFolder parameter is set to True but the external directory "\ "doesn't exist or is empty, not adding to tarball. Path: %s" % externalDirPath) # Note that dataDirs are only looked-for and added under the src/ folder. # /data/ subdirs contain data files needed by the code # /interface/ subdirs contain C++ header files needed e.g. by ROOT6 dataDirs = ['data', 'interface'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug("Checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug("Adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _, _ in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath, 'src') self.logger.debug("Adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName) for filename in fileNames: self.logger.debug("Adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset files to the tarfile if cfgOutputName: basedir = os.path.dirname(cfgOutputName) self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP) def addMonFiles(self): """ Add monitoring files the debug tarball. """ configtmp = tempfile.NamedTemporaryFile(mode='w', delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if psetfilename: self.tarfile.add(psetfilename, '/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to debug_files.tar.gz') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) configtmp.close() def writeContent(self): """Save the content of the tarball""" self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()] def close(self): """ Calculate the checkum and close """ self.writeContent() return self.tarfile.close() def printSortedContent(self): """ To be used for diagnostic printouts returns a string containing tarball content as a list of files sorted by size already formatted for use in a print statement """ sortedContent = sorted(self.content, reverse=True) biggestFileSize = sortedContent[0][0] ndigits = int(math.ceil(math.log(biggestFileSize+1, 10))) contentList = "\nsandbox content sorted by size[Bytes]:" for (size, name) in sortedContent: contentList += ("\n%" + str(ndigits) + "s\t%s") % (size, name) return contentList def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name archiveSizeBytes = os.path.getsize(archiveName) # in python3 and python2 with __future__ division, double / means integer division archiveSizeKB = archiveSizeBytes//1024 if archiveSizeKB <= 512: archiveSize = "%d KB" % archiveSizeKB elif archiveSizeKB < 1024*10: # in python3 and python2 with __future__ division, single / means floating point division archiveSize = "%3f.1 MB" % (archiveSizeKB/1024) else: archiveSize = "%d MB" % (archiveSizeKB//1024) if archiveSizeBytes > FILE_SIZE_LIMIT: msg = ("%sError%s: input tarball size %s exceeds maximum allowed limit of %d MB" % (colors.RED, colors.NORMAL, archiveSize, FILE_SIZE_LIMIT//1024//1024)) msg += self.printSortedContent() raise SandboxTooBigException(msg) msg = ("Uploading archive %s (%s) to the CRAB cache. Using URI %s" % (archiveName, archiveSize, filecacheurl)) self.logger.debug(msg) if 'S3' in filecacheurl.upper(): # use S3 # generate a 32char hash like UserFileCache used to do hashkey = calculateChecksum(archiveName, exclude=NEW_USER_SANDBOX_EXCLUSIONS) # the ".tar.gz" suffix here is forced by other places in the client which add it when # storing tarball name in task table. Not very elegant to need to hardcode in several places. cachename = "%s.tar.gz" % hashkey # current code requires a taskname to extract username. Any dummy one will do # next version of RESTCache will get username from cmsweb FE headers uploadToS3(crabserver=self.crabserver, objecttype='sandbox', filepath=archiveName, tarballname=cachename, logger=self.logger) else: # old way using UFC ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True}) t1 = time.time() result = ufc.upload(archiveName, excludeList=NEW_USER_SANDBOX_EXCLUSIONS) ufcSeconds = int(time.time()-t1) if 'hashkey' not in result: self.logger.error("Failed to upload archive: %s" % str(result)) raise CachefileNotFoundException hashkey = str(result['hashkey']) # upload a copy to S3 dev as well, just to stress it a bit, this never raises s3report = testS3upload(self.s3tester, archiveName, hashkey, self.logger) # report also how long it took uploading to UFC (which surely worked if we are here) s3report['ufcseconds'] = ufcSeconds # upload S3 test report to crabcache reportFile = '/tmp/crabs3report.' + uuid.uuid4().hex with open(reportFile, 'w') as fp: json.dump(s3report, fp) reportName = 'S3-' + s3report['timestamp'] + ':s3report.json' try: ufc.uploadLog(reportFile, reportName) self.logger.debug('Report of S3 upload stored on CrabCache as %s', reportName) except Exception as e: self.logger.debug(str(e)) os.remove(reportFile) return hashkey def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root, _, files in os.walk(dir_, followlinks=True): for file_ in files: os.stat(os.path.join(root, file_)) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_, msg) raise EnvironmentException(err) def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ and interface/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name , mode=mode, dereference=True) self.checksum = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'biglib', 'module'] if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') # /data/ subdirs contain data files needed by the code # /interface/ subdirs contain C++ header files needed e.g. by ROOT6 dataDirs = ['data','interface'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug("Checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug("Adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath,'src') self.logger.debug("Adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName) for filename in fileNames: self.logger.debug("Adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset files to the tarfile if cfgOutputName: basedir = os.path.dirname(cfgOutputName) self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP) #debug directory configtmp = tempfile.NamedTemporaryFile(delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if not psetfilename == None: self.tarfile.add(psetfilename,'/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to tarball') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') configtmp.close() def writeContent(self): """Save the content of the tarball""" self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()] def close(self): """ Calculate the checkum and close """ self.writeContent() return self.tarfile.close() def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name self.logger.debug("Uploading archive %s to the CRAB cache. Using URI %s" % (archiveName, filecacheurl)) ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True}) result = ufc.upload(archiveName, excludeList = USER_SANDBOX_EXCLUSIONS) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return str(result['hashkey']) def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root , _ , files in os.walk(dir_, followlinks = True): for file_ in files: os.stat(os.path.join(root, file_ )) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_ , msg) raise EnvironmentException(err) def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ and interface/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:bz2', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name , mode=mode, dereference=True) self.checksum = None self.content = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'biglib', 'module'] if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') directories.append('cfipython') if getattr(self.config.JobType, 'sendExternalFolder', configParametersInfo['JobType.sendExternalFolder']['default']): externalDirPath = os.path.join(self.scram.getCmsswBase(), 'external') if os.path.exists(externalDirPath) and os.listdir(externalDirPath) != []: directories.append('external') else: self.logger.info("The config.JobType.sendExternalFolder parameter is set to True but the external directory "\ "doesn't exist or is empty, not adding to tarball. Path: %s" % externalDirPath) # Note that dataDirs are only looked-for and added under the src/ folder. # /data/ subdirs contain data files needed by the code # /interface/ subdirs contain C++ header files needed e.g. by ROOT6 dataDirs = ['data','interface'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug("Checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug("Adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _, _ in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath,'src') self.logger.debug("Adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName) for filename in fileNames: self.logger.debug("Adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset files to the tarfile if cfgOutputName: basedir = os.path.dirname(cfgOutputName) self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP) def addMonFiles(self): """ Add monitoring files the debug tarball. """ configtmp = tempfile.NamedTemporaryFile(delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if not psetfilename == None: self.tarfile.add(psetfilename,'/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to debug_files.tar.gz') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) configtmp.close() def writeContent(self): """Save the content of the tarball""" self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()] def close(self): """ Calculate the checkum and close """ self.writeContent() return self.tarfile.close() def printSortedContent(self): """ To be used for diagnostic printouts returns a string containing tarball content as a list of files sorted by size already formatted for use in a print statement """ sortedContent = sorted(self.content, reverse=True) biggestFileSize = sortedContent[0][0] ndigits = int(math.ceil(math.log(biggestFileSize+1, 10))) contentList = "\nsandbox content sorted by size[Bytes]:" for (size, name) in sortedContent: contentList += ("\n%" + str(ndigits) + "s\t%s") % (size, name) return contentList def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name archiveSizeBytes = os.path.getsize(archiveName) # in python3 and python2 with __future__ division, double / means integer division archiveSizeKB = archiveSizeBytes//1024 if archiveSizeKB <= 512 : archiveSize = "%d KB" % archiveSizeKB elif archiveSizeKB < 1024*10 : # in python3 and python2 with __future__ division, single / means floating point division archiveSize = "%3f.1 MB" % (archiveSizeKB/1024) else: archiveSize = "%d MB" % (archiveSizeKB//1024) if archiveSizeBytes > FILE_SIZE_LIMIT : msg=("%sError%s: input tarball size %s exceeds maximum allowed limit of %d MB" % (colors.RED, colors.NORMAL, archiveSize, FILE_SIZE_LIMIT//1024//1024)) msg += self.printSortedContent() raise SandboxTooBigException(msg) msg=("Uploading archive %s (%s) to the CRAB cache. Using URI %s" % (archiveName, archiveSize, filecacheurl)) self.logger.debug(msg) ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True}) result = ufc.upload(archiveName, excludeList = NEW_USER_SANDBOX_EXCLUSIONS) if 'hashkey' not in result: self.logger.error("Failed to upload archive: %s" % str(result)) raise CachefileNotFoundException return str(result['hashkey']) def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root , _ , files in os.walk(dir_, followlinks = True): for file_ in files: os.stat(os.path.join(root, file_ )) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_ , msg) raise EnvironmentException(err) def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ and interface/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name, mode=mode, dereference=True) self.checksum = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'biglib', 'module'] if getattr( self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') # /data/ subdirs contain data files needed by the code # /interface/ subdirs contain C++ header files needed e.g. by ROOT6 dataDirs = ['data', 'interface'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug("Checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug("Adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath, 'src') self.logger.debug("Adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException( "The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName) for filename in fileNames: self.logger.debug("Adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset files to the tarfile if cfgOutputName: basedir = os.path.dirname(cfgOutputName) self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL) self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP) #debug directory configtmp = tempfile.NamedTemporaryFile(delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if not psetfilename == None: self.tarfile.add(psetfilename, '/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to tarball') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') configtmp.close() def writeContent(self): """Save the content of the tarball""" self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()] def close(self): """ Calculate the checkum and close """ self.writeContent() return self.tarfile.close() def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name self.logger.debug( "Uploading archive %s to the CRAB cache. Using URI %s" % (archiveName, filecacheurl)) ufc = CRABClient.Emulator.getEmulator('ufc')({ 'endpoint': filecacheurl, "pycurl": True }) result = ufc.upload(archiveName, excludeList=USER_SANDBOX_EXCLUSIONS) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return str(result['hashkey']) def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root, _, files in os.walk(dir_, followlinks=True): for file_ in files: os.stat(os.path.join(root, file_)) except OSError as msg: err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_ , msg) raise EnvironmentException(err) def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name , mode=mode, dereference=True) self.checksum = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'module'] dataDirs = ['data'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug(" checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug(" adding directory %s to tarball" % fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath,'src') self.logger.debug(" adding data directory %s to tarball" % root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName) for filename in fileNames: self.logger.debug(" adding file %s to tarball" % filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) # Adding the pset file to the tarfile if cfgOutputName: self.tarfile.add(cfgOutputName, arcname='PSet.py') currentPath = os.getcwd() # psetfile = getattr(self.config.JobType, 'psetName', None) # self.tarfile.add(os.path.join(currentPath, psetfile), arcname='PSet.py') def close(self): """ Calculate the checkum and clos """ self.calculateChecksum() return self.tarfile.close() def upload(self): """ Upload the tarball to the Panda Cache """ self.close() archiveName = self.tarfile.name serverUrl = "" self.logger.debug(" uploading archive to cache %s " % archiveName) status,out = PandaInterface.putFile(archiveName, verbose=False, useCacheSrv=True, reuseSandbox=True) if out.startswith('NewFileName:'): # found the same input sandbox to reuse self.logger.debug("out: %s" % out) self.logger.debug("status: %s" % status) self.logger.debug("found the same input sandbox to reuse") archiveName = out.split(':')[-1] serverUrl = "https://%s:%s" % (out.split(':')[-2], '25443') self.logger.debug("archiveName: %s" %archiveName) elif out.startswith('True'): archiveName = out.split(':')[-1] serverUrl = "%s:%s:%s" % (out.split(':')[-4], out.split(':')[-3], out.split(':')[-2]) else: self.logger.error( str(out) ) self.logger.error("failed to upload source files with %s" % status) raise CachefileNotFoundException return serverUrl, archiveName, self.checksum def calculateChecksum(self): """ Calculate a checksum that doesn't depend on the tgz creation data """ lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()] hasher = hashlib.md5(str(lsl)) self.logger.debug('tgz contents: %s' % lsl) self.checksum = hasher.hexdigest() self.logger.debug('MD5 checksum: %s' % self.checksum) #Old way reads in the file again. May use for for non-tar files if needed. #sha256sum = hashlib.sha256() #with open(self.tarfile.name, 'rb') as f: #while True: #chunkdata = f.read(8192) #if not chunkdata: #break #sha256sum.update(chunkdata) #sha256sum.hexdigest() def __getattr__(self, *args): """ Pass any unknown functions or attribute requests on to the TarFile object """ self.logger.debug("Passing getattr %s on to TarFile" % args) return self.tarfile.__getattribute__(*args) def __enter__(self): """ Allow use as context manager """ return self def __exit__(self, excType, excValue, excTrace): """ Allow use as context manager """ self.tarfile.close() if excType: return False
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name, mode=mode, dereference=True) self.checksum = None def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'module'] if getattr( self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']): directories.append('python') dataDirs = ['data'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug(" checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug(" adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath, 'src') self.logger.debug(" adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException( 'The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName) for filename in fileNames: self.logger.debug(" adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) scriptExe = getattr(self.config.JobType, 'scriptExe', None) if scriptExe: self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe)) # Adding the pset and crabconfig file to the tarfile if cfgOutputName: self.tarfile.add(cfgOutputName, arcname='PSet.py') self.tarfile.add(os.path.splitext(cfgOutputName)[0] + '.pkl', arcname='PSet.pkl') configtmp = tempfile.NamedTemporaryFile(delete=True) configtmp.write(str(self.config)) configtmp.flush() psetfilename = getattr(self.config.JobType, 'psetName', None) if not psetfilename == None: self.tarfile.add(psetfilename, '/debug/originalPSet.py') else: self.logger.debug('Failed to add pset to tarball') self.tarfile.add(configtmp.name, '/debug/crabConfig.py') configtmp.close() def close(self): """ Calculate the checkum and clos """ self.calculateChecksum() return self.tarfile.close() def upload(self, filecacheurl=None): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name self.logger.debug(" uploading archive to cache %s " % archiveName) ufc = CRABClient.Emulator.getEmulator('ufc')({ 'endpoint': filecacheurl }) result = ufc.upload(archiveName) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return str(result['hashkey']) + '.tar.gz', self.checksum def calculateChecksum(self): """ Calculate a checksum that doesn't depend on the tgz creation data """ lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()] hasher = hashlib.md5(str(lsl)) self.logger.debug('tgz contents: %s' % lsl) self.checksum = hasher.hexdigest() self.logger.debug('MD5 checksum: %s' % self.checksum) #Old way reads in the file again. May use for for non-tar files if needed. #sha256sum = hashlib.sha256() #with open(self.tarfile.name, 'rb') as f: #while True: #chunkdata = f.read(8192) #if not chunkdata: #break #sha256sum.update(chunkdata) #sha256sum.hexdigest() def checkdirectory(self, dir_): #checking for infinite symbolic link loop try: for root, _, files in os.walk(dir_, followlinks=True): for file_ in files: os.stat(os.path.join(root, file_)) except OSError, msg: err = '%sError %s:Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir_ , msg) raise EnvironmentException(err)
class UserTarball(object): """ _UserTarball_ A subclass of TarFile for the user code tarballs. By default creates a new tarball with the user libraries from lib, module, and the data/ sections of the src/ area. Also adds user specified files in the right place. """ def __init__(self, name=None, mode='w:gz', config=None, logger=None): self.config = config self.logger = logger self.scram = ScramEnvironment(logger=self.logger) self.logger.debug("Making tarball in %s" % name) self.tarfile = tarfile.open(name=name , mode=mode, dereference=True) self.checksum = None PandaInterface.LOGGER = logging.getLogger('CRAB3:traceback') def addFiles(self, userFiles=None, cfgOutputName=None): """ Add the necessary files to the tarball """ directories = ['lib', 'module'] dataDirs = ['data'] userFiles = userFiles or [] # Tar up whole directories for directory in directories: fullPath = os.path.join(self.scram.getCmsswBase(), directory) self.logger.debug(" checking directory %s" % fullPath) if os.path.exists(fullPath): self.logger.debug(" adding directory %s to tarball" % fullPath) self.checkdirectory(fullPath) self.tarfile.add(fullPath, directory, recursive=True) # Search for and tar up "data" directories in src/ srcPath = os.path.join(self.scram.getCmsswBase(), 'src') for root, _dummy, _dummy in os.walk(srcPath): if os.path.basename(root) in dataDirs: directory = root.replace(srcPath,'src') self.logger.debug(" adding data directory %s to tarball" % root) self.checkdirectory(root) self.tarfile.add(root, directory, recursive=True) # Tar up extra files the user needs for globName in userFiles: fileNames = glob.glob(globName) if not fileNames: raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName) for filename in fileNames: self.logger.debug(" adding file %s to tarball" % filename) self.checkdirectory(filename) self.tarfile.add(filename, os.path.basename(filename), recursive=True) # Adding the pset file to the tarfile if cfgOutputName: self.tarfile.add(cfgOutputName, arcname='PSet.py') currentPath = os.getcwd() # psetfile = getattr(self.config.JobType, 'psetName', None) # self.tarfile.add(os.path.join(currentPath, psetfile), arcname='PSet.py') def close(self): """ Calculate the checkum and clos """ self.calculateChecksum() return self.tarfile.close() def upload(self): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name serverUrl = "" self.logger.debug(" uploading archive to cache %s " % archiveName) ufc = UserFileCache({'endpoint' : self.config.JobType.filecacheurl}) result = ufc.upload(archiveName) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return self.config.JobType.filecacheurl, str(result['hashkey']) + '.tar.gz', self.checksum def calculateChecksum(self): """ Calculate a checksum that doesn't depend on the tgz creation data """ lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()] hasher = hashlib.md5(str(lsl)) self.logger.debug('tgz contents: %s' % lsl) self.checksum = hasher.hexdigest() self.logger.debug('MD5 checksum: %s' % self.checksum) #Old way reads in the file again. May use for for non-tar files if needed. #sha256sum = hashlib.sha256() #with open(self.tarfile.name, 'rb') as f: #while True: #chunkdata = f.read(8192) #if not chunkdata: #break #sha256sum.update(chunkdata) #sha256sum.hexdigest() def checkdirectory(self,dir): #checking for infinite symbolic link loop try : for root , _ , files in os.walk(dir, followlinks = True ): for file in files: os.stat(os.path.join(root, file )) except OSError , msg : err = '%sError %s:Infinite directory loop found in: %s \nStderr: %s' % \ (colors.RED, colors.NORMAL, dir , msg) raise EnvironmentException(err)