def AddChecksums(report):
    if 'steps' not in report:
        return
    if 'cmsRun' not in report['steps']:
        return
    if 'output' not in report['steps']['cmsRun']:
        return

    for outputMod in report['steps']['cmsRun']['output'].values():
        for fileInfo in outputMod:
            if 'checksums' in fileInfo:
                continue
            if 'pfn' not in fileInfo:
                if 'fileName' in fileInfo:
                    fileInfo['pfn'] = fileInfo['fileName']
                else:
                    continue
            print("==== Checksum STARTING at %s ====" %
                  time.asctime(time.gmtime()))
            print("== Filename: %s" % fileInfo['pfn'])
            (adler32, cksum) = calculateChecksums(fileInfo['pfn'])
            print("==== Checksum FINISHING at %s ====" %
                  time.asctime(time.gmtime()))
            fileInfo['checksums'] = {'adler32': adler32, 'cksum': cksum}
            fileInfo['size'] = os.stat(fileInfo['pfn']).st_size
Exemple #2
0
    def processFile(self, filename, fileReport, step, outputModule):
        """
        This attaches the info to the fileReport instance


        """
        # Get checksum
        (adler32, cksum) = calculateChecksums(filename)

        # Get info from spec
        output = getattr(step.output.modules, outputModule)
        disableGUID = getattr(output, 'disableGUID', False)
        fixedLFN = getattr(output, 'fixedLFN', False)
        primaryDataset = output.primaryDataset
        processedDataset = output.processedDataset
        dataTier = output.dataTier

        # Get other file information
        size = os.stat(filename)[6]

        #Get info from file
        mergedLFNBase = getattr(fileReport, 'MergedLFNBase', None)
        mergedBySize = getattr(fileReport, 'MergedBySize', False)
        lfn = getattr(fileReport, 'lfn')

        # Do LFN manipulation
        # First in the standard case
        if not fixedLFN and not disableGUID:
            guid = getattr(fileReport, 'guid', None)
            if not guid:
                msg = "No GUID for file %s" % (lfn)
                logging.error(msg)
                raise Exception(msg)
            # Then we have to change the LFN to match the GUID
            dirname = os.path.dirname(lfn)
            filelfn = '%s.root' % (str(guid))
            setattr(fileReport, 'lfn', os.path.join(dirname, filelfn))
        elif not fixedLFN and mergedBySize and mergedLFNBase:
            # Then we better do the merge stuff
            # Not tested for now
            mergedLFNBase.rstrip('/')
            newLFN = os.path.join(mergedLFNBase, os.path.basename(lfn))
            setattr(fileReport, 'lfn', newLFN)

        # Attach values
        setattr(fileReport, 'checksums', {'adler32': adler32, 'cksum': cksum})
        setattr(fileReport, 'size', size)
        setattr(
            fileReport, "dataset", {
                "applicationName": "cmsRun",
                "applicationVersion": step.application.setup.cmsswVersion,
                "primaryDataset": primaryDataset,
                "processedDataset": processedDataset,
                "dataTier": dataTier
            })

        return fileReport
Exemple #3
0
    def processFile(self, filename, fileReport, step, outputModule):
        """
        This attaches the info to the fileReport instance


        """
        # Get checksum
        (adler32, cksum) = calculateChecksums(filename)

        # Get info from spec
        output = getattr(step.output.modules, outputModule)
        disableGUID      = getattr(output, 'disableGUID', False)
        fixedLFN         = getattr(output, 'fixedLFN', False)
        primaryDataset   = output.primaryDataset
        processedDataset = output.processedDataset
        dataTier         = output.dataTier

        # Get other file information
        size = os.stat(filename)[6]

        #Get info from file
        mergedLFNBase    = getattr(fileReport, 'MergedLFNBase', None)
        mergedBySize     = getattr(fileReport, 'MergedBySize', False)
        lfn              = getattr(fileReport, 'lfn')

        # Do LFN manipulation
        # First in the standard case
        if not fixedLFN and not disableGUID:
            guid = getattr(fileReport, 'guid', None)
            if not guid:
                msg = "No GUID for file %s" %(lfn)
                logging.error(msg)
                raise Exception(msg)
            # Then we have to change the LFN to match the GUID
            dirname = os.path.dirname(lfn)
            filelfn = '%s.root' %(str(guid))
            setattr(fileReport, 'lfn', os.path.join(dirname, filelfn))
        elif not fixedLFN and mergedBySize and mergedLFNBase:
            # Then we better do the merge stuff
            # Not tested for now
            mergedLFNBase.rstrip('/')
            newLFN = os.path.join(mergedLFNBase, os.path.basename(lfn))
            setattr(fileReport, 'lfn', newLFN)




        # Attach values
        setattr(fileReport, 'checksums', {'adler32': adler32, 'cksum': cksum})
        setattr(fileReport, 'size', size)
        setattr(fileReport, "dataset", {"applicationName": "cmsRun",
                                        "applicationVersion": step.application.setup.cmsswVersion,
                                        "primaryDataset": primaryDataset,
                                        "processedDataset": processedDataset,
                                        "dataTier": dataTier})

        return fileReport
def AddChecksums(report):
    if 'steps' not in report:
        return
    if 'cmsRun' not in report['steps']:
        return
    if 'output' not in report['steps']['cmsRun']:
        return

    for outputMod in report['steps']['cmsRun']['output'].values():
        for fileInfo in outputMod:
            if 'checksums' in fileInfo:
                continue
            if 'pfn' not in fileInfo:
                if 'fileName' in fileInfo:
                    fileInfo['pfn'] = fileInfo['fileName']
                else:
                    continue
            print("==== Checksum STARTING at %s ====" % time.asctime(time.gmtime()))
            print("== Filename: %s" % fileInfo['pfn'])
            (adler32, cksum) = calculateChecksums(fileInfo['pfn'])
            print("==== Checksum FINISHING at %s ====" % time.asctime(time.gmtime()))
            fileInfo['checksums'] = {'adler32': adler32, 'cksum': cksum}
            fileInfo['size'] = os.stat(fileInfo['pfn']).st_size
def AddChecksums(report):
    if 'steps' not in report:
        return
    if 'cmsRun' not in report['steps']:
        return
    if 'output' not in report['steps']['cmsRun']:
        return

    for outputMod in report['steps']['cmsRun']['output'].values():
        for fileInfo in outputMod:
            if 'checksums' in fileInfo:
                continue
            if 'pfn' not in fileInfo:
                if 'fileName' in fileInfo:
                    fileInfo['pfn'] = fileInfo['fileName']
                else:
                    continue           
            fileInfo['size'] = os.stat(fileInfo['pfn']).st_size
            print("==== Checksum computation STARTING at %s ====" % time.asctime(time.gmtime()))
            (adler32, cksum) = calculateChecksums(fileInfo['pfn'])
            print("==== Checksum FINISHED at %s ====" % time.asctime(time.gmtime()))
            print("== FileName: %s  -  FileAdler32: %s  - FileSize: %.3f MBytes" % \
                 (fileInfo['pfn'], adler32, float(fileInfo['size'])/(1024*1024)) ) 
            fileInfo['checksums'] = {'adler32': adler32, 'cksum': cksum}
    def execute(self, emulator = None, **overrides):
        """
        _execute_


        """
        # Are we using emulators again?
        if (emulator != None):
            return emulator.emulate( self.step, self.job )

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Find alternate stageout location
        self.altLFN = overrides.get('altLFN', None)

        logging.info("Beginning Steps.Executors.LogArchive.Execute")
        logging.info("Using the following overrides: %s " % overrides)
        logging.info("Step is: %s" % self.step)
        # Wait timout for stageOut
        waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        matchFiles = [
            ".log$",
            "FrameworkJobReport",
            "Report.pkl",
            "Report.pcl",
            "^PSet.py$",
            "^PSet.pkl$"
            ]

        #Okay, we need a stageOut Manager
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
            ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**overrides)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime  = self.step.retryDelay
        else:
            # new style
            logging.info("LOGARCHIVE IS USING NEW STAGEOUT CODE")
            manager = WMCore.Storage.FileManager.StageOutMgr(
                                retryPauseTime  = self.step.retryDelay,
                                numberOfRetries = self.step.retryCount,
                                **overrides)

        #Now we need to find all the reports
        logFilesForTransfer = []
        #Look in the taskSpace first
        logFilesForTransfer.extend(self.findFilesInDirectory(self.stepSpace.taskSpace.location, matchFiles))

        #What if it's empty?
        if len(logFilesForTransfer) == 0:
            msg = "Could find no log files in job"
            logging.error(msg)
            return logFilesForTransfer

        #Now that we've gone through all the steps, we have to tar it out
        tarName         = 'logArchive.tar.gz'
        tarBallLocation = os.path.join(self.stepSpace.location, tarName)
        tarBall         = tarfile.open(tarBallLocation, 'w:gz')
        for f in logFilesForTransfer:
            tarBall.add(name  = f,
                        arcname = f.replace(self.stepSpace.taskSpace.location, '', 1).lstrip('/'))
        tarBall.close()


        fileInfo = {'LFN': self.getLFN(tarName),
            'PFN' : tarBallLocation,
            'PNN' : None,
            'GUID' : None
            }

        signal.signal(signal.SIGALRM, alarmHandler)
        signal.alarm(waitTime)
        try:
            manager(fileInfo)
            self.report.addOutputModule(moduleName = "logArchive")
            (adler32, cksum) = calculateChecksums(tarBallLocation)
            reportFile = {"lfn": fileInfo["LFN"], "pfn": fileInfo["PFN"],
                          "location": fileInfo["PNN"], "module_label": "logArchive",
                          "events": 0, "size": 0, "merged": False,
                          "checksums": {'adler32': adler32, 'cksum' : cksum}}
            self.report.addOutputFile(outputModule = "logArchive", aFile= reportFile)
        except Alarm:
            msg = "Indefinite hang during stageOut of logArchive"
            logging.error(msg)
            self.report.addError(self.stepName, 60404, "LogArchiveTimeout", msg)
            self.report.persist("Report.pkl")
            raise WMExecutionFailure(60404, "LogArchiveTimeout", msg)
        except WMException as ex:
            self.report.addError(self.stepName, 60307, "LogArchiveFailure", str(ex))
            self.report.setStepStatus(self.stepName, 0)
            self.report.persist("Report.pkl")
            raise ex
        except Exception as ex:
            self.report.addError(self.stepName, 60405, "LogArchiveFailure", str(ex))
            self.report.setStepStatus(self.stepName, 0)
            self.report.persist("Report.pkl")
            msg = "Failure in transferring logArchive tarball\n"
            msg += str(ex) + "\n"
            msg += traceback.format_exc()
            logging.error(msg)
            raise WMException("LogArchiveFailure", message = str(ex))

        signal.alarm(0)
        return
Exemple #7
0
    def execute(self, emulator=None, **overrides):
        """
        _execute_


        """
        # Are we using emulators again?
        if (emulator != None):
            return emulator.emulate(self.step, self.job)

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Find alternate stageout location
        self.altLFN = overrides.get('altLFN', None)

        logging.info("Beginning Steps.Executors.LogArchive.Execute")
        logging.info("Using the following overrides: %s " % overrides)
        logging.info("Step is: %s" % self.step)
        # Wait timout for stageOut
        waitTime = overrides.get(
            'waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        matchFiles = [
            ".log$", "FrameworkJobReport", "Report.pkl", "Report.pcl",
            "^PSet.py$", "^PSet.pkl$"
        ]

        #Okay, we need a stageOut Manager
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
            ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**overrides)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime = self.step.retryDelay
        else:
            # new style
            logging.info("LOGARCHIVE IS USING NEW STAGEOUT CODE")
            manager = WMCore.Storage.FileManager.StageOutMgr(
                retryPauseTime=self.step.retryDelay,
                numberOfRetries=self.step.retryCount,
                **overrides)

        #Now we need to find all the reports
        logFilesForTransfer = []
        #Look in the taskSpace first
        logFilesForTransfer.extend(
            self.findFilesInDirectory(self.stepSpace.taskSpace.location,
                                      matchFiles))

        #What if it's empty?
        if len(logFilesForTransfer) == 0:
            msg = "Could find no log files in job"
            logging.error(msg)
            return logFilesForTransfer

        #Now that we've gone through all the steps, we have to tar it out
        tarName = 'logArchive.tar.gz'
        tarBallLocation = os.path.join(self.stepSpace.location, tarName)
        tarBall = tarfile.open(tarBallLocation, 'w:gz')
        for f in logFilesForTransfer:
            tarBall.add(name=f,
                        arcname=f.replace(self.stepSpace.taskSpace.location,
                                          '', 1).lstrip('/'))
        tarBall.close()

        fileInfo = {
            'LFN': self.getLFN(tarName),
            'PFN': tarBallLocation,
            'PNN': None,
            'GUID': None
        }

        signal.signal(signal.SIGALRM, alarmHandler)
        signal.alarm(waitTime)
        try:
            manager(fileInfo)
            self.report.addOutputModule(moduleName="logArchive")
            (adler32, cksum) = calculateChecksums(tarBallLocation)
            reportFile = {
                "lfn": fileInfo["LFN"],
                "pfn": fileInfo["PFN"],
                "location": fileInfo["PNN"],
                "module_label": "logArchive",
                "events": 0,
                "size": 0,
                "merged": False,
                "checksums": {
                    'adler32': adler32,
                    'cksum': cksum
                }
            }
            self.report.addOutputFile(outputModule="logArchive",
                                      file=reportFile)
        except Alarm:
            msg = "Indefinite hang during stageOut of logArchive"
            logging.error(msg)
            self.report.addError(self.stepName, 60404, "LogArchiveTimeout",
                                 msg)
            self.report.persist("Report.pkl")
            raise WMExecutionFailure(60404, "LogArchiveTimeout", msg)
        except WMException as ex:
            self.report.addError(self.stepName, 60307, "LogArchiveFailure",
                                 str(ex))
            self.report.setStepStatus(self.stepName, 0)
            self.report.persist("Report.pkl")
            raise ex
        except Exception as ex:
            self.report.addError(self.stepName, 60405, "LogArchiveFailure",
                                 str(ex))
            self.report.setStepStatus(self.stepName, 0)
            self.report.persist("Report.pkl")
            msg = "Failure in transferring logArchive tarball\n"
            msg += str(ex) + "\n"
            msg += traceback.format_exc()
            logging.error(msg)
            raise WMException("LogArchiveFailure", message=str(ex))

        signal.alarm(0)
        return
Exemple #8
0
    def execute(self, emulator=None, **overrides):
        """
        _execute_


        """
        # Are we using emulators again?
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Find alternate stageout location
        self.altLFN = overrides.get('altLFN', None)

        logging.info("Beginning Steps.Executors.LogArchive.Execute")
        logging.info("Using the following overrides: %s ", overrides)
        logging.info("Step is: %s", self.step)
        # Wait timeout for stageOut
        waitTime = overrides.get(
            'waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        matchFiles = [
            ".log$",  # matches the scram, wmagent and cmsRun logs
            "FrameworkJobReport.xml",
            "Report.pkl",
            "^PSet.py$",
            "^PSet.pkl$",
            "_condor_std*",  # condor wrapper logs at the pilot top level
        ]
        ignoredDirs = ['Utils', 'WMCore', 'WMSandbox']

        # Okay, we need a stageOut Manager
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
                ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**overrides)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime = self.step.retryDelay
        else:
            # new style
            logging.info("LOGARCHIVE IS USING NEW STAGEOUT CODE")
            manager = WMCore.Storage.FileManager.StageOutMgr(
                retryPauseTime=self.step.retryDelay,
                numberOfRetries=self.step.retryCount,
                **overrides)

        # Now we need to find all the reports
        # The log search follows this structure: ~pilotArea/jobArea/WMTaskSpaceArea/StepsArea
        # Start looking at the pilot scratch area first, such that we find the condor logs
        # Then look at the job area in order to find the wmagentJob log
        # Finally, at the taskspace area to find the cmsRun/FWJR/PSet files
        pilotScratchDir = os.path.join(self.stepSpace.taskSpace.location,
                                       '../../')
        logFilesToArchive = self.findFilesInDirectory(pilotScratchDir,
                                                      matchFiles, ignoredDirs)

        # What if it's empty?
        if len(logFilesToArchive) == 0:
            msg = "Couldn't find any log files in the job"
            logging.error(msg)
            return logFilesToArchive

        # Now that we've gone through all the steps, we have to tar it out
        tarName = 'logArchive.tar.gz'
        tarBallLocation = os.path.join(self.stepSpace.location, tarName)
        tarBall = tarfile.open(tarBallLocation, 'w:gz')
        for f in logFilesToArchive:
            tarBall.add(name=f,
                        arcname=f.replace(self.stepSpace.taskSpace.location,
                                          '', 1).lstrip('/'))
        tarBall.close()

        fileInfo = {
            'LFN': self.getLFN(tarName),
            'PFN': tarBallLocation,
            'PNN': None,
            'GUID': None
        }

        signal.signal(signal.SIGALRM, alarmHandler)
        signal.alarm(waitTime)
        try:
            manager(fileInfo)
            self.report.addOutputModule(moduleName="logArchive")
            (adler32, cksum) = calculateChecksums(tarBallLocation)
            reportFile = {
                "lfn": fileInfo["LFN"],
                "pfn": fileInfo["PFN"],
                "location": fileInfo["PNN"],
                "module_label": "logArchive",
                "events": 0,
                "size": 0,
                "merged": False,
                "checksums": {
                    'adler32': adler32,
                    'cksum': cksum
                }
            }
            self.report.addOutputFile(outputModule="logArchive",
                                      aFile=reportFile)
        except Alarm:
            msg = "Indefinite hang during stageOut of logArchive"
            logging.error(msg)
            self.report.addError(self.stepName, 60404, "LogArchiveTimeout",
                                 msg)
            self.report.persist("Report.pkl")
            raise WMExecutionFailure(60404, "LogArchiveTimeout", msg)
        except WMException as ex:
            self.report.addError(self.stepName, 60307, "LogArchiveFailure",
                                 str(ex))
            self.report.persist("Report.pkl")
            raise ex
        except Exception as ex:
            self.report.addError(self.stepName, 60405, "LogArchiveFailure",
                                 str(ex))
            self.report.persist("Report.pkl")
            msg = "Failure in transferring logArchive tarball\n"
            msg += str(ex) + "\n"
            msg += traceback.format_exc()
            logging.error(msg)
            raise WMException("LogArchiveFailure", message=str(ex))

        signal.alarm(0)
        return