Ejemplo n.º 1
0
    def testAccessors(self):
        """
        Test various accessors
        """

        scram = ScramEnvironment(logger=self.testLogger)

        self.assertEqual(scram.getCmsswVersion(), self.version)
        self.assertEqual(scram.getScramArch(),    self.arch)
        self.assertEqual(scram.getCmsswBase(),    self.base)
Ejemplo n.º 2
0
    def testScram(self):
        """
        Test Scram environment
        """

        msg = "You must set up a CMSSW environment first"
        scram = ScramEnvironment(logger=self.logger)
        self.assertNotEqual(scram.getCmsswVersion(), None, msg)
        self.assertNotEqual(scram.getScramArch(), None, msg)
        self.assertNotEqual(scram.getCmsswBase(), None, msg)
Ejemplo n.º 3
0
    def testScram(self):
        """
        Test Scram environment
        """

        msg = "You must set up a CMSSW environment first"
        scram = ScramEnvironment(logger=self.logger)
        self.assertNotEqual(scram.getCmsswVersion(), None, msg)
        self.assertNotEqual(scram.getScramArch(), None, msg)
        self.assertNotEqual(scram.getCmsswBase(), None, msg)
Ejemplo n.º 4
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ and interface/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name , mode=mode, dereference=True)
        self.checksum = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'biglib', 'module']
        if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
        # /data/ subdirs contain data files needed by the code
        # /interface/ subdirs contain C++ header files needed e.g. by ROOT6
        dataDirs    = ['data','interface']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in  directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug(" checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug(" adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath,'src')
                self.logger.debug(" adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName)
            for filename in fileNames:
                self.logger.debug(" adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)


        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset and crabconfig file to the tarfile
        if cfgOutputName:
            self.tarfile.add(cfgOutputName, arcname='PSet.py')
            self.tarfile.add(os.path.splitext(cfgOutputName)[0]+'.pkl', arcname='PSet.pkl')

        configtmp = tempfile.NamedTemporaryFile(delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if not psetfilename == None:
            self.tarfile.add(psetfilename,'/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to tarball')
        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')
        configtmp.close()

    def close(self):
        """
        Calculate the checkum and clos
        """

        self.calculateChecksum()
        return self.tarfile.close()

    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """
        self.close()
        archiveName = self.tarfile.name
        self.logger.debug(" uploading archive to cache %s " % archiveName)
        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl})
        result = ufc.upload(archiveName)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" % str(result))
            raise CachefileNotFoundException
        return str(result['hashkey']) + '.tar.gz', self.checksum


    def calculateChecksum(self):
        """
        Calculate a checksum that doesn't depend on the tgz
        creation data
        """
        lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()]
        hasher = hashlib.md5(str(lsl))
        self.logger.debug('tgz contents: %s' % lsl)
        self.checksum = hasher.hexdigest()
        self.logger.debug('MD5 checksum: %s' % self.checksum)

        #Old way reads in the file again. May use for for non-tar files if needed.
        #sha256sum = hashlib.sha256()
        #with open(self.tarfile.name, 'rb') as f:
            #while True:
                #chunkdata = f.read(8192)
                #if not chunkdata:
                    #break
                #sha256sum.update(chunkdata)
        #sha256sum.hexdigest()

    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root , _ , files in os.walk(dir_, followlinks = True):
                for file_ in files:
                    os.stat(os.path.join(root, file_ ))
        except OSError as msg:
            err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_ , msg)
            raise EnvironmentException(err)

    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)


    def __enter__(self):
        """
        Allow use as context manager
        """
        return self


    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 5
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ and interface/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:bz2', config=None, logger=None, crabserver=None, s3tester=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name, mode=mode, dereference=True)
        self.checksum = None
        self.content = None
        self.crabserver = crabserver
        self.s3tester = s3tester

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'biglib', 'module']
        if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
            directories.append('cfipython')
        if getattr(self.config.JobType, 'sendExternalFolder', configParametersInfo['JobType.sendExternalFolder']['default']):
            externalDirPath = os.path.join(self.scram.getCmsswBase(), 'external')
            if os.path.exists(externalDirPath) and os.listdir(externalDirPath) != []:
                directories.append('external')
            else:
                self.logger.info("The config.JobType.sendExternalFolder parameter is set to True but the external directory "\
                                  "doesn't exist or is empty, not adding to tarball. Path: %s" % externalDirPath)

        # Note that dataDirs are only looked-for and added under the src/ folder.
        # /data/ subdirs contain data files needed by the code
        # /interface/ subdirs contain C++ header files needed e.g. by ROOT6
        dataDirs = ['data', 'interface']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug("Checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug("Adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _, _ in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath, 'src')
                self.logger.debug("Adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName)
            for filename in fileNames:
                self.logger.debug("Adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)


        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset files to the tarfile
        if cfgOutputName:
            basedir = os.path.dirname(cfgOutputName)
            self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP)

    def addMonFiles(self):
        """
        Add monitoring files the debug tarball.
        """
        configtmp = tempfile.NamedTemporaryFile(mode='w', delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if psetfilename:
            self.tarfile.add(psetfilename, '/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to debug_files.tar.gz')

        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')

        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        configtmp.close()

    def writeContent(self):
        """Save the content of the tarball"""
        self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()]


    def close(self):
        """
        Calculate the checkum and close
        """
        self.writeContent()
        return self.tarfile.close()

    def printSortedContent(self):
        """
	To be used for diagnostic printouts
        returns a string containing tarball content as a list of files sorted by size
        already formatted for use in a print statement
        """
        sortedContent = sorted(self.content, reverse=True)
        biggestFileSize = sortedContent[0][0]
        ndigits = int(math.ceil(math.log(biggestFileSize+1, 10)))
        contentList = "\nsandbox content sorted by size[Bytes]:"
        for (size, name) in sortedContent:
            contentList += ("\n%" + str(ndigits) + "s\t%s") % (size, name)
        return contentList

    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """

        self.close()
        archiveName = self.tarfile.name
        archiveSizeBytes = os.path.getsize(archiveName)

	# in python3 and python2 with __future__ division, double / means integer division
        archiveSizeKB = archiveSizeBytes//1024
        if archiveSizeKB <= 512:
            archiveSize = "%d KB" % archiveSizeKB
        elif archiveSizeKB < 1024*10:
            # in python3 and python2 with __future__ division, single / means floating point division
            archiveSize = "%3f.1 MB" % (archiveSizeKB/1024)
        else:
            archiveSize = "%d MB" % (archiveSizeKB//1024)
        if archiveSizeBytes > FILE_SIZE_LIMIT:
            msg = ("%sError%s: input tarball size %s exceeds maximum allowed limit of %d MB" %
                   (colors.RED, colors.NORMAL, archiveSize, FILE_SIZE_LIMIT//1024//1024))
            msg += self.printSortedContent()
            raise SandboxTooBigException(msg)

        msg = ("Uploading archive %s (%s) to the CRAB cache. Using URI %s" %
               (archiveName, archiveSize, filecacheurl))
        self.logger.debug(msg)

        if 'S3' in filecacheurl.upper():
            # use S3
            # generate a 32char hash like UserFileCache used to do
            hashkey = calculateChecksum(archiveName, exclude=NEW_USER_SANDBOX_EXCLUSIONS)
            # the ".tar.gz" suffix here is forced by other places in the client which add it when
            # storing tarball name in task table. Not very elegant to need to hardcode in several places.
            cachename = "%s.tar.gz" % hashkey
            # current code requires a taskname to extract username. Any dummy one will do
            # next version of RESTCache will get username from cmsweb FE headers
            uploadToS3(crabserver=self.crabserver, objecttype='sandbox', filepath=archiveName,
                       tarballname=cachename, logger=self.logger)
        else:
            # old way using UFC
            ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
            t1 = time.time()
            result = ufc.upload(archiveName, excludeList=NEW_USER_SANDBOX_EXCLUSIONS)
            ufcSeconds = int(time.time()-t1)
            if 'hashkey' not in result:
                self.logger.error("Failed to upload archive: %s" % str(result))
                raise CachefileNotFoundException
            hashkey = str(result['hashkey'])
            # upload a copy to S3 dev as well, just to stress it a bit, this never raises
            s3report = testS3upload(self.s3tester, archiveName, hashkey, self.logger)
            # report also how long it took uploading to UFC (which surely worked if we are here)
            s3report['ufcseconds'] = ufcSeconds
            # upload S3 test report to crabcache
            reportFile = '/tmp/crabs3report.' + uuid.uuid4().hex
            with open(reportFile, 'w') as fp:
                json.dump(s3report, fp)
            reportName = 'S3-' + s3report['timestamp'] + ':s3report.json'
            try:
                ufc.uploadLog(reportFile, reportName)
                self.logger.debug('Report of S3 upload stored on CrabCache as %s', reportName)
            except Exception as e:
                self.logger.debug(str(e))
            os.remove(reportFile)
        return hashkey


    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root, _, files in os.walk(dir_, followlinks=True):
                for file_ in files:
                    os.stat(os.path.join(root, file_))
        except OSError as msg:
            err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_, msg)
            raise EnvironmentException(err)


    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)


    def __enter__(self):
        """
        Allow use as context manager
        """
        return self


    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 6
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ and interface/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name , mode=mode, dereference=True)
        self.checksum = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'biglib', 'module']
        if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
        # /data/ subdirs contain data files needed by the code
        # /interface/ subdirs contain C++ header files needed e.g. by ROOT6
        dataDirs    = ['data','interface']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug("Checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug("Adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath,'src')
                self.logger.debug("Adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName)
            for filename in fileNames:
                self.logger.debug("Adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)


        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset files to the tarfile
        if cfgOutputName:
            basedir = os.path.dirname(cfgOutputName)
            self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP)

        #debug directory
        configtmp = tempfile.NamedTemporaryFile(delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if not psetfilename == None:
            self.tarfile.add(psetfilename,'/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to tarball')
        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')
        configtmp.close()


    def writeContent(self):
        """Save the content of the tarball"""
        self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()]


    def close(self):
        """
        Calculate the checkum and close
        """
        self.writeContent()
        return self.tarfile.close()


    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """
        self.close()
        archiveName = self.tarfile.name
        self.logger.debug("Uploading archive %s to the CRAB cache. Using URI %s" % (archiveName, filecacheurl))
        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
        result = ufc.upload(archiveName, excludeList = USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" % str(result))
            raise CachefileNotFoundException
        return str(result['hashkey'])


    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root , _ , files in os.walk(dir_, followlinks = True):
                for file_ in files:
                    os.stat(os.path.join(root, file_ ))
        except OSError as msg:
            err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_ , msg)
            raise EnvironmentException(err)


    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)


    def __enter__(self):
        """
        Allow use as context manager
        """
        return self


    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 7
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ and interface/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:bz2', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name , mode=mode, dereference=True)
        self.checksum = None
        self.content = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'biglib', 'module']
        if getattr(self.config.JobType, 'sendPythonFolder', configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
            directories.append('cfipython')
        if getattr(self.config.JobType, 'sendExternalFolder', configParametersInfo['JobType.sendExternalFolder']['default']):
            externalDirPath = os.path.join(self.scram.getCmsswBase(), 'external')
            if os.path.exists(externalDirPath) and os.listdir(externalDirPath) != []:
                directories.append('external')
            else:
                self.logger.info("The config.JobType.sendExternalFolder parameter is set to True but the external directory "\
                                  "doesn't exist or is empty, not adding to tarball. Path: %s" % externalDirPath)

        # Note that dataDirs are only looked-for and added under the src/ folder.
        # /data/ subdirs contain data files needed by the code
        # /interface/ subdirs contain C++ header files needed e.g. by ROOT6
        dataDirs    = ['data','interface']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug("Checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug("Adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _, _ in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath,'src')
                self.logger.debug("Adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException("The input file '%s' taken from parameter config.JobType.inputFiles cannot be found." % globName)
            for filename in fileNames:
                self.logger.debug("Adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)


        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset files to the tarfile
        if cfgOutputName:
            basedir = os.path.dirname(cfgOutputName)
            self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP)

    def addMonFiles(self):
        """
        Add monitoring files the debug tarball.
        """
        configtmp = tempfile.NamedTemporaryFile(delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if not psetfilename == None:
            self.tarfile.add(psetfilename,'/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to debug_files.tar.gz')

        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')

        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        configtmp.close()

    def writeContent(self):
        """Save the content of the tarball"""
        self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()]


    def close(self):
        """
        Calculate the checkum and close
        """
        self.writeContent()
        return self.tarfile.close()

    def printSortedContent(self):
        """
	To be used for diagnostic printouts
        returns a string containing tarball content as a list of files sorted by size
        already formatted for use in a print statement
        """
        sortedContent = sorted(self.content, reverse=True)
        biggestFileSize = sortedContent[0][0]
        ndigits = int(math.ceil(math.log(biggestFileSize+1, 10)))
        contentList  = "\nsandbox content sorted by size[Bytes]:"
        for (size, name) in sortedContent:
            contentList += ("\n%" + str(ndigits) + "s\t%s") % (size, name)
        return contentList


    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """

        self.close()
        archiveName = self.tarfile.name
        archiveSizeBytes = os.path.getsize(archiveName)

	# in python3 and python2 with __future__ division, double / means integer division
        archiveSizeKB = archiveSizeBytes//1024
        if archiveSizeKB <= 512 :
            archiveSize = "%d KB" % archiveSizeKB
        elif archiveSizeKB < 1024*10 :
            # in python3 and python2 with __future__ division, single / means floating point division
            archiveSize = "%3f.1 MB" % (archiveSizeKB/1024)
        else:
            archiveSize = "%d MB" % (archiveSizeKB//1024)
        if archiveSizeBytes > FILE_SIZE_LIMIT :
            msg=("%sError%s: input tarball size %s exceeds maximum allowed limit of %d MB" % (colors.RED, colors.NORMAL, archiveSize, FILE_SIZE_LIMIT//1024//1024))
            msg += self.printSortedContent()
            raise SandboxTooBigException(msg)

        msg=("Uploading archive %s (%s) to the CRAB cache. Using URI %s" % (archiveName, archiveSize, filecacheurl))
        self.logger.debug(msg)

        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
        result = ufc.upload(archiveName, excludeList = NEW_USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload archive: %s" % str(result))
            raise CachefileNotFoundException
        return str(result['hashkey'])


    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root , _ , files in os.walk(dir_, followlinks = True):
                for file_ in files:
                    os.stat(os.path.join(root, file_ ))
        except OSError as msg:
            err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_ , msg)
            raise EnvironmentException(err)


    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)


    def __enter__(self):
        """
        Allow use as context manager
        """
        return self


    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 8
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ and interface/ sections of the src/ area.

            Also adds user specified files in the right place.
    """
    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name, mode=mode, dereference=True)
        self.checksum = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'biglib', 'module']
        if getattr(
                self.config.JobType, 'sendPythonFolder',
                configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
        # /data/ subdirs contain data files needed by the code
        # /interface/ subdirs contain C++ header files needed e.g. by ROOT6
        dataDirs = ['data', 'interface']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug("Checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug("Adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath, 'src')
                self.logger.debug("Adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException(
                    "The input file '%s' taken from parameter config.JobType.inputFiles cannot be found."
                    % globName)
            for filename in fileNames:
                self.logger.debug("Adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename,
                                 os.path.basename(filename),
                                 recursive=True)

        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset files to the tarfile
        if cfgOutputName:
            basedir = os.path.dirname(cfgOutputName)
            self.tarfile.add(cfgOutputName, arcname=BOOTSTRAP_CFGFILE)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL),
                             arcname=BOOTSTRAP_CFGFILE_PKL)
            self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP),
                             arcname=BOOTSTRAP_CFGFILE_DUMP)

        #debug directory
        configtmp = tempfile.NamedTemporaryFile(delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if not psetfilename == None:
            self.tarfile.add(psetfilename, '/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to tarball')
        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')
        configtmp.close()

    def writeContent(self):
        """Save the content of the tarball"""
        self.content = [(int(x.size), x.name)
                        for x in self.tarfile.getmembers()]

    def close(self):
        """
        Calculate the checkum and close
        """
        self.writeContent()
        return self.tarfile.close()

    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """
        self.close()
        archiveName = self.tarfile.name
        self.logger.debug(
            "Uploading archive %s to the CRAB cache. Using URI %s" %
            (archiveName, filecacheurl))
        ufc = CRABClient.Emulator.getEmulator('ufc')({
            'endpoint': filecacheurl,
            "pycurl": True
        })
        result = ufc.upload(archiveName, excludeList=USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" %
                              str(result))
            raise CachefileNotFoundException
        return str(result['hashkey'])

    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root, _, files in os.walk(dir_, followlinks=True):
                for file_ in files:
                    os.stat(os.path.join(root, file_))
        except OSError as msg:
            err = '%sError%s: Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_ , msg)
            raise EnvironmentException(err)

    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)

    def __enter__(self):
        """
        Allow use as context manager
        """
        return self

    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 9
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name , mode=mode, dereference=True)
        self.checksum = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'module']
        dataDirs    = ['data']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in  directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug(" checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug(" adding directory %s to tarball" % fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath,'src')
                self.logger.debug(" adding data directory %s to tarball" % root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName)
            for filename in fileNames:
                self.logger.debug(" adding file %s to tarball" % filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)

        # Adding the pset file to the tarfile
        if cfgOutputName:
            self.tarfile.add(cfgOutputName, arcname='PSet.py')
        currentPath = os.getcwd()

#        psetfile = getattr(self.config.JobType, 'psetName', None)
#        self.tarfile.add(os.path.join(currentPath, psetfile), arcname='PSet.py')

    def close(self):
        """
        Calculate the checkum and clos
        """

        self.calculateChecksum()
        return self.tarfile.close()

    def upload(self):
        """
        Upload the tarball to the Panda Cache
        """
        self.close()
        archiveName = self.tarfile.name
        serverUrl = ""
        self.logger.debug(" uploading archive to cache %s " % archiveName)
        status,out = PandaInterface.putFile(archiveName, verbose=False, useCacheSrv=True, reuseSandbox=True)

        if out.startswith('NewFileName:'):
            # found the same input sandbox to reuse
            self.logger.debug("out: %s" % out)
            self.logger.debug("status: %s" % status)
            self.logger.debug("found the same input sandbox to reuse")
            archiveName = out.split(':')[-1]
            serverUrl = "https://%s:%s" % (out.split(':')[-2], '25443')
            self.logger.debug("archiveName: %s" %archiveName)
        elif out.startswith('True'):
            archiveName = out.split(':')[-1]
            serverUrl = "%s:%s:%s" % (out.split(':')[-4], out.split(':')[-3], out.split(':')[-2])
        else:
            self.logger.error( str(out) )
            self.logger.error("failed to upload source files with %s" % status)
            raise CachefileNotFoundException

        return serverUrl, archiveName, self.checksum

    def calculateChecksum(self):
        """
        Calculate a checksum that doesn't depend on the tgz
        creation data
        """

        lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()]
        hasher = hashlib.md5(str(lsl))
        self.logger.debug('tgz contents: %s' % lsl)
        self.checksum = hasher.hexdigest()
        self.logger.debug('MD5 checksum: %s' % self.checksum)

        #Old way reads in the file again. May use for for non-tar files if needed.
        #sha256sum = hashlib.sha256()
        #with open(self.tarfile.name, 'rb') as f:
            #while True:
                #chunkdata = f.read(8192)
                #if not chunkdata:
                    #break
                #sha256sum.update(chunkdata)
        #sha256sum.hexdigest()


    def __getattr__(self, *args):
        """
        Pass any unknown functions or attribute requests on to the TarFile object
        """
        self.logger.debug("Passing getattr %s on to TarFile" % args)
        return self.tarfile.__getattribute__(*args)


    def __enter__(self):
        """
        Allow use as context manager
        """
        return self


    def __exit__(self, excType, excValue, excTrace):
        """
        Allow use as context manager
        """
        self.tarfile.close()
        if excType:
            return False
Ejemplo n.º 10
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ sections of the src/ area.

            Also adds user specified files in the right place.
    """
    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name, mode=mode, dereference=True)
        self.checksum = None

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'module']
        if getattr(
                self.config.JobType, 'sendPythonFolder',
                configParametersInfo['JobType.sendPythonFolder']['default']):
            directories.append('python')
        dataDirs = ['data']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug(" checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug(" adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath, 'src')
                self.logger.debug(" adding data directory %s to tarball" %
                                  root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException(
                    'The input file "%s" taken from parameter config.JobType.inputFiles cannot be found'
                    % globName)
            for filename in fileNames:
                self.logger.debug(" adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename,
                                 os.path.basename(filename),
                                 recursive=True)

        scriptExe = getattr(self.config.JobType, 'scriptExe', None)
        if scriptExe:
            self.tarfile.add(scriptExe, arcname=os.path.basename(scriptExe))

        # Adding the pset and crabconfig file to the tarfile
        if cfgOutputName:
            self.tarfile.add(cfgOutputName, arcname='PSet.py')
            self.tarfile.add(os.path.splitext(cfgOutputName)[0] + '.pkl',
                             arcname='PSet.pkl')

        configtmp = tempfile.NamedTemporaryFile(delete=True)
        configtmp.write(str(self.config))
        configtmp.flush()
        psetfilename = getattr(self.config.JobType, 'psetName', None)
        if not psetfilename == None:
            self.tarfile.add(psetfilename, '/debug/originalPSet.py')
        else:
            self.logger.debug('Failed to add pset to tarball')
        self.tarfile.add(configtmp.name, '/debug/crabConfig.py')
        configtmp.close()

    def close(self):
        """
        Calculate the checkum and clos
        """

        self.calculateChecksum()
        return self.tarfile.close()

    def upload(self, filecacheurl=None):
        """
        Upload the tarball to the File Cache
        """
        self.close()
        archiveName = self.tarfile.name
        self.logger.debug(" uploading archive to cache %s " % archiveName)
        ufc = CRABClient.Emulator.getEmulator('ufc')({
            'endpoint': filecacheurl
        })
        result = ufc.upload(archiveName)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" %
                              str(result))
            raise CachefileNotFoundException
        return str(result['hashkey']) + '.tar.gz', self.checksum

    def calculateChecksum(self):
        """
        Calculate a checksum that doesn't depend on the tgz
        creation data
        """
        lsl = [(x.name, int(x.size), int(x.mtime), x.uname)
               for x in self.tarfile.getmembers()]
        hasher = hashlib.md5(str(lsl))
        self.logger.debug('tgz contents: %s' % lsl)
        self.checksum = hasher.hexdigest()
        self.logger.debug('MD5 checksum: %s' % self.checksum)

        #Old way reads in the file again. May use for for non-tar files if needed.
        #sha256sum = hashlib.sha256()
        #with open(self.tarfile.name, 'rb') as f:
        #while True:
        #chunkdata = f.read(8192)
        #if not chunkdata:
        #break
        #sha256sum.update(chunkdata)
        #sha256sum.hexdigest()

    def checkdirectory(self, dir_):
        #checking for infinite symbolic link loop
        try:
            for root, _, files in os.walk(dir_, followlinks=True):
                for file_ in files:
                    os.stat(os.path.join(root, file_))

        except OSError, msg:
            err = '%sError %s:Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir_ , msg)
            raise EnvironmentException(err)
Ejemplo n.º 11
0
class UserTarball(object):
    """
        _UserTarball_

            A subclass of TarFile for the user code tarballs. By default
            creates a new tarball with the user libraries from lib, module,
            and the data/ sections of the src/ area.

            Also adds user specified files in the right place.
    """

    def __init__(self, name=None, mode='w:gz', config=None, logger=None):
        self.config = config
        self.logger = logger
        self.scram = ScramEnvironment(logger=self.logger)
        self.logger.debug("Making tarball in %s" % name)
        self.tarfile = tarfile.open(name=name , mode=mode, dereference=True)
        self.checksum = None
        PandaInterface.LOGGER = logging.getLogger('CRAB3:traceback')

    def addFiles(self, userFiles=None, cfgOutputName=None):
        """
        Add the necessary files to the tarball
        """
        directories = ['lib', 'module']
        dataDirs    = ['data']
        userFiles = userFiles or []

        # Tar up whole directories
        for directory in  directories:
            fullPath = os.path.join(self.scram.getCmsswBase(), directory)
            self.logger.debug(" checking directory %s" % fullPath)
            if os.path.exists(fullPath):
                self.logger.debug(" adding directory %s to tarball" % fullPath)
                self.checkdirectory(fullPath)
                self.tarfile.add(fullPath, directory, recursive=True)

        # Search for and tar up "data" directories in src/
        srcPath = os.path.join(self.scram.getCmsswBase(), 'src')
        for root, _dummy, _dummy in os.walk(srcPath):
            if os.path.basename(root) in dataDirs:
                directory = root.replace(srcPath,'src')
                self.logger.debug(" adding data directory %s to tarball" % root)
                self.checkdirectory(root)
                self.tarfile.add(root, directory, recursive=True)

        # Tar up extra files the user needs
        for globName in userFiles:
            fileNames = glob.glob(globName)
            if not fileNames:
                raise InputFileNotFoundException('The input file "%s" taken from parameter config.JobType.inputFiles cannot be found' % globName)
            for filename in fileNames:
                self.logger.debug(" adding file %s to tarball" % filename)
                self.checkdirectory(filename)
                self.tarfile.add(filename, os.path.basename(filename), recursive=True)

        # Adding the pset file to the tarfile
        if cfgOutputName:
            self.tarfile.add(cfgOutputName, arcname='PSet.py')
        currentPath = os.getcwd()

#        psetfile = getattr(self.config.JobType, 'psetName', None)
#        self.tarfile.add(os.path.join(currentPath, psetfile), arcname='PSet.py')

    def close(self):
        """
        Calculate the checkum and clos
        """

        self.calculateChecksum()
        return self.tarfile.close()

    def upload(self):
        """
        Upload the tarball to the File Cache
        """
        self.close()
        archiveName = self.tarfile.name
        serverUrl = ""
        self.logger.debug(" uploading archive to cache %s " % archiveName)
        ufc = UserFileCache({'endpoint' : self.config.JobType.filecacheurl})
        result = ufc.upload(archiveName)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" % str(result))
            raise CachefileNotFoundException
        return self.config.JobType.filecacheurl, str(result['hashkey']) + '.tar.gz', self.checksum


    def calculateChecksum(self):
        """
        Calculate a checksum that doesn't depend on the tgz
        creation data
        """
        lsl = [(x.name, int(x.size), int(x.mtime), x.uname) for x in self.tarfile.getmembers()]
        hasher = hashlib.md5(str(lsl))
        self.logger.debug('tgz contents: %s' % lsl)
        self.checksum = hasher.hexdigest()
        self.logger.debug('MD5 checksum: %s' % self.checksum)

        #Old way reads in the file again. May use for for non-tar files if needed.
        #sha256sum = hashlib.sha256()
        #with open(self.tarfile.name, 'rb') as f:
            #while True:
                #chunkdata = f.read(8192)
                #if not chunkdata:
                    #break
                #sha256sum.update(chunkdata)
        #sha256sum.hexdigest()

    def checkdirectory(self,dir):

        #checking for infinite symbolic link loop
        try :
            for root , _ , files in os.walk(dir, followlinks = True ):
                for file in files:
                    os.stat(os.path.join(root, file ))

        except OSError , msg :
            err = '%sError %s:Infinite directory loop found in: %s \nStderr: %s' % \
                    (colors.RED, colors.NORMAL, dir , msg)
            raise EnvironmentException(err)