Exemple #1
0
def AddChecksums(report):
    if 'steps' not in report:
        return
    if 'cmsRun' not in report['steps']:
        return
    if 'output' not in report['steps']['cmsRun']:
        return

    for outputMod in report['steps']['cmsRun']['output'].values():
        for fileInfo in outputMod:
            if 'checksums' in fileInfo:
                continue
            if 'pfn' not in fileInfo:
                if 'fileName' in fileInfo:
                    fileInfo['pfn'] = fileInfo['fileName']
                else:
                    continue
            fileInfo['size'] = os.stat(fileInfo['pfn']).st_size
            print("==== Checksum computation STARTING at %s ====" %
                  time.asctime(time.gmtime()))
            (adler32, cksum) = calculateChecksums(fileInfo['pfn'])
            print("==== Checksum FINISHED at %s ====" %
                  time.asctime(time.gmtime()))
            print("== FileName: %s  -  FileAdler32: %s  - FileSize: %.3f MBytes" % \
                 (fileInfo['pfn'], adler32, float(fileInfo['size'])/(1024*1024)) )
            fileInfo['checksums'] = {'adler32': adler32, 'cksum': cksum}
Exemple #2
0
    def processFile(self, filename, fileReport, step, outputModule):
        """
        This attaches the info to the fileReport instance


        """
        # Get checksum
        (adler32, cksum) = calculateChecksums(filename)

        # Get info from spec
        output = getattr(step.output.modules, outputModule)
        disableGUID      = getattr(output, 'disableGUID', False)
        fixedLFN         = getattr(output, 'fixedLFN', False)
        primaryDataset   = output.primaryDataset
        processedDataset = output.processedDataset
        dataTier         = output.dataTier

        # Get other file information
        size = os.stat(filename)[6]

        #Get info from file
        mergedLFNBase    = getattr(fileReport, 'MergedLFNBase', None)
        mergedBySize     = getattr(fileReport, 'MergedBySize', False)
        lfn              = getattr(fileReport, 'lfn')

        # Do LFN manipulation
        # First in the standard case
        if not fixedLFN and not disableGUID:
            guid = getattr(fileReport, 'guid', None)
            if not guid:
                msg = "No GUID for file %s" %(lfn)
                logging.error(msg)
                raise Exception(msg)
            # Then we have to change the LFN to match the GUID
            dirname = os.path.dirname(lfn)
            filelfn = '%s.root' %(str(guid))
            setattr(fileReport, 'lfn', os.path.join(dirname, filelfn))
        elif not fixedLFN and mergedBySize and mergedLFNBase:
            # Then we better do the merge stuff
            # Not tested for now
            mergedLFNBase.rstrip('/')
            newLFN = os.path.join(mergedLFNBase, os.path.basename(lfn))
            setattr(fileReport, 'lfn', newLFN)




        # Attach values
        setattr(fileReport, 'checksums', {'adler32': adler32, 'cksum': cksum})
        setattr(fileReport, 'size', size)
        setattr(fileReport, "dataset", {"applicationName": "cmsRun",
                                        "applicationVersion": step.application.setup.cmsswVersion,
                                        "primaryDataset": primaryDataset,
                                        "processedDataset": processedDataset,
                                        "dataTier": dataTier})

        return fileReport
def AddChecksums(report):
    if 'steps' not in report:
        return
    if 'cmsRun' not in report['steps']:
        return
    if 'output' not in report['steps']['cmsRun']:
        return

    for outputMod in report['steps']['cmsRun']['output'].values():
        for fileInfo in outputMod:
            if 'checksums' in fileInfo:
                continue
            if 'pfn' not in fileInfo:
                if 'fileName' in fileInfo:
                    fileInfo['pfn'] = fileInfo['fileName']
                else:
                    continue           
            fileInfo['size'] = os.stat(fileInfo['pfn']).st_size
            print("==== Checksum computation STARTING at %s ====" % time.asctime(time.gmtime()))
            (adler32, cksum) = calculateChecksums(fileInfo['pfn'])
            print("==== Checksum FINISHED at %s ====" % time.asctime(time.gmtime()))
            print("== FileName: %s  -  FileAdler32: %s  - FileSize: %.3f MBytes" % \
                 (fileInfo['pfn'], adler32, float(fileInfo['size'])/(1024*1024)) ) 
            fileInfo['checksums'] = {'adler32': adler32, 'cksum': cksum}
Exemple #4
0
    def execute(self, emulator=None):
        """
        _execute_

        """
        # Are we using emulators again?
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        logging.info("Steps.Executors.%s.execute called", self.__class__.__name__)

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()
        logging.info("Using the following overrides: %s ", overrides)
        # Find alternate stageout location
        self.altLFN = overrides.get('altLFN', None)
        self.failedPreviousStep = overrides.get('previousCmsRunFailure', False)

        logging.info("Step configuration is: %s", self.step)
        # Wait timeout for stageOut
        waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        matchFiles = [
            ".log$",  # matches the scram, wmagent and cmsRun logs
            "FrameworkJobReport.xml",
            "Report.pkl",
            "^PSet.py$",
            "^PSet.pkl$",
            "_condor_std*",  # condor wrapper logs at the pilot top level
        ]

        ignoredDirs = ['Utils', 'WMCore', 'WMSandbox']

        # Okay, we need a stageOut Manager
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
                ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**overrides)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime = self.step.retryDelay
        else:
            # new style
            logging.info("LOGARCHIVE IS USING NEW STAGEOUT CODE")
            manager = WMCore.Storage.FileManager.StageOutMgr(retryPauseTime=self.step.retryDelay,
                                                             numberOfRetries=self.step.retryCount,
                                                             **overrides)

        # Now we need to find all the reports
        # The log search follows this structure: ~pilotArea/jobArea/WMTaskSpaceArea/StepsArea
        # Start looking at the pilot scratch area first, such that we find the condor logs
        # Then look at the job area in order to find the wmagentJob log
        # Finally, at the taskspace area to find the cmsRun/FWJR/PSet files
        pilotScratchDir = os.path.join(self.stepSpace.taskSpace.location, '../../')
        logFilesToArchive = self.findFilesInDirectory(pilotScratchDir, matchFiles, ignoredDirs)

        # What if it's empty?
        if len(logFilesToArchive) == 0:
            msg = "Couldn't find any log files in the job"
            logging.error(msg)
            return logFilesToArchive

        # Now that we've gone through all the steps, we have to tar it out
        tarName = 'logArchive.tar.gz'
        tarBallLocation = os.path.join(self.stepSpace.location, tarName)
        with tarfile.open(tarBallLocation, 'w:gz') as tarBall:
            for fName in logFilesToArchive:
                altName = fName.replace(pilotScratchDir, '', 1)
                tarBall.add(name=fName, arcname=altName)

        fileInfo = {'LFN': self.getLFN(tarName),
                    'PFN': tarBallLocation,
                    'PNN': None,
                    'GUID': None
                    }
        signal.signal(signal.SIGALRM, alarmHandler)
        signal.alarm(waitTime)

        try:
            manager(fileInfo)
            self.report.addOutputModule(moduleName="logArchive")
            (adler32, cksum) = calculateChecksums(tarBallLocation)
            reportFile = {"lfn": fileInfo["LFN"], "pfn": fileInfo["PFN"],
                          "location": fileInfo["PNN"], "module_label": "logArchive",
                          "events": 0, "size": 0, "merged": False,
                          "checksums": {'adler32': adler32, 'cksum': cksum}}
            self.report.addOutputFile(outputModule="logArchive", aFile=reportFile)
        except Alarm:
            msg = "Indefinite hang during stageOut of logArchive"
            logging.error(msg)
            self.report.addError(self.stepName, 60404, "LogArchiveTimeout", msg)
            self.saveReport()
            raise WMExecutionFailure(60404, "LogArchiveTimeout", msg)
        except WMException as ex:
            self.report.addError(self.stepName, 60307, "LogArchiveFailure", str(ex))
            self.saveReport()
            raise ex
        except Exception as ex:
            self.report.addError(self.stepName, 60405, "LogArchiveFailure", str(ex))
            self.saveReport()
            msg = "Failure in transferring logArchive tarball\n"
            logging.exception(msg)
            raise WMException("LogArchiveFailure", message=str(ex))
        signal.alarm(0)

        signal.alarm(waitTime)
        self.sendLogToEOS(overrides, tarBallLocation, useNewStageOutCode)
        signal.alarm(0)

        return
Exemple #5
0
    def execute(self, emulator=None, **overrides):
        """
        _execute_

        """
        # Are we using emulators again?
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        overrides = {}
        #TODO need to set override using addOverride method in WMStep
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Find alternate stageout location
        self.altLFN = overrides.get('altLFN', None)

        logging.info("Beginning Steps.Executors.LogArchive.Execute")
        logging.info("Using the following overrides: %s ", overrides)
        logging.info("Step is: %s", self.step)
        # Wait timeout for stageOut
        waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        matchFiles = [
            ".log$",  # matches the scram, wmagent and cmsRun logs
            "FrameworkJobReport.xml",
            "Report.pkl",
            "^PSet.py$",
            "^PSet.pkl$",
            "_condor_std*",  # condor wrapper logs at the pilot top level
            ]

        ignoredDirs = ['Utils', 'WMCore', 'WMSandbox']

        # Okay, we need a stageOut Manager
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
                ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**overrides)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime = self.step.retryDelay
        else:
            # new style
            logging.info("LOGARCHIVE IS USING NEW STAGEOUT CODE")
            manager = WMCore.Storage.FileManager.StageOutMgr(
                retryPauseTime=self.step.retryDelay,
                numberOfRetries=self.step.retryCount,
                **overrides)

        # Now we need to find all the reports
        # The log search follows this structure: ~pilotArea/jobArea/WMTaskSpaceArea/StepsArea
        # Start looking at the pilot scratch area first, such that we find the condor logs
        # Then look at the job area in order to find the wmagentJob log
        # Finally, at the taskspace area to find the cmsRun/FWJR/PSet files
        pilotScratchDir = os.path.join(self.stepSpace.taskSpace.location, '../../')
        logFilesToArchive = self.findFilesInDirectory(pilotScratchDir, matchFiles, ignoredDirs)

        # What if it's empty?
        if len(logFilesToArchive) == 0:
            msg = "Couldn't find any log files in the job"
            logging.error(msg)
            return logFilesToArchive

        # Now that we've gone through all the steps, we have to tar it out
        tarName = 'logArchive.tar.gz'
        tarBallLocation = os.path.join(self.stepSpace.location, tarName)
        tarBall = tarfile.open(tarBallLocation, 'w:gz')
        for f in logFilesToArchive:
            tarBall.add(name=f,
                        arcname=f.replace(self.stepSpace.taskSpace.location, '', 1).lstrip('/'))
        tarBall.close()

        fileInfo = {'LFN': self.getLFN(tarName),
                    'PFN': tarBallLocation,
                    'PNN': None,
                    'GUID': None
                    }
        signal.signal(signal.SIGALRM, alarmHandler)
        signal.alarm(waitTime)

        try:
            manager(fileInfo)
            self.report.addOutputModule(moduleName="logArchive")
            (adler32, cksum) = calculateChecksums(tarBallLocation)
            reportFile = {"lfn": fileInfo["LFN"], "pfn": fileInfo["PFN"],
                          "location": fileInfo["PNN"], "module_label": "logArchive",
                          "events": 0, "size": 0, "merged": False,
                          "checksums": {'adler32': adler32, 'cksum': cksum}}
            self.report.addOutputFile(outputModule="logArchive", aFile=reportFile)
        except Alarm:
            msg = "Indefinite hang during stageOut of logArchive"
            logging.error(msg)
            self.report.addError(self.stepName, 60404, "LogArchiveTimeout", msg)
            self.saveReport()
            raise WMExecutionFailure(60404, "LogArchiveTimeout", msg)
        except WMException as ex:
            self.report.addError(self.stepName, 60307, "LogArchiveFailure", str(ex))
            self.saveReport()
            raise ex
        except Exception as ex:
            self.report.addError(self.stepName, 60405, "LogArchiveFailure", str(ex))
            self.saveReport()
            msg = "Failure in transferring logArchive tarball\n"
            logging.exception(msg)
            raise WMException("LogArchiveFailure", message=str(ex))
        signal.alarm(0)

        signal.alarm(waitTime)
        self.sendLogToEOS(overrides, tarBallLocation, useNewStageOutCode)
        signal.alarm(0)

        return