Ejemplo n.º 1
0
    def testCheckLumiInformation(self):
        """
        _testCheckLumiInformation_

        Test the function that checks if all files
        have run lumi information
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        myReport.checkForRunLumiInformation(stepName="cmsRun1")

        self.assertNotEqual(myReport.getExitCode(), 70452)

        # Remove the lumi information on purpose
        myReport2 = Report("cmsRun1")
        myReport2.parse(self.xmlPath)
        fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1")
        for fRef in fRefs:
            fRef.runs = ConfigSection()
        myReport2.checkForRunLumiInformation(stepName="cmsRun1")
        self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1"))
        self.assertEqual(myReport2.getExitCode(), 70452)

        return
Ejemplo n.º 2
0
    def testCheckLumiInformation(self):
        """
        _testCheckLumiInformation_

        Test the function that checks if all files
        have run lumi information
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        myReport.checkForRunLumiInformation(stepName = "cmsRun1")

        self.assertNotEqual(myReport.getExitCode(), 60452)

        # Remove the lumi information on purpose
        myReport2 = Report("cmsRun1")
        myReport2.parse(self.xmlPath)
        fRefs = myReport2.getAllFileRefsFromStep(step = "cmsRun1")
        for fRef in fRefs:
            fRef.runs = ConfigSection()
        myReport2.checkForRunLumiInformation(stepName = "cmsRun1")
        self.assertFalse(myReport2.stepSuccessful(stepName = "cmsRun1"))
        self.assertEqual(myReport2.getExitCode(), 60452)

        return
Ejemplo n.º 3
0
 def testASONoNameChange(self):
     AsyncStageOut_t.FakeTransferWorker.setFailProbability(0)
     testJob = self.roundtripHelper(preserveLFN = True)
     stepReport = Report('cmsRun1')
     stepReport.unpersist(testJob['fwjr_path'])
     files = stepReport.getAllFileRefsFromStep(step = 'cmsRun1')
     for file in files:
         self.assertNotEqual( file.lfn.find('store/temp'),
                              -1,
                              "The lfn should still have store/temp: %s" % file.lfn)
Ejemplo n.º 4
0
    def testGetAdlerChecksum(self):
        """
        _testGetAdlerChecksum_

        Test the function that sees if all files
        have an adler checksum.

        For some reason, our default XML report doesn't have checksums
        Therefore it should fail.
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        myReport.checkForAdlerChecksum(stepName="cmsRun1")

        self.assertFalse(myReport.stepSuccessful(stepName="cmsRun1"))
        self.assertEqual(myReport.getExitCode(), 60451)

        # Now see what happens if the adler32 is set to None
        myReport2 = Report("cmsRun1")
        myReport2.parse(self.xmlPath)
        fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1")
        for fRef in fRefs:
            fRef.checksums = {'adler32': None}
        myReport2.checkForAdlerChecksum(stepName="cmsRun1")
        self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1"))
        self.assertEqual(myReport2.getExitCode(), 60451)

        myReport3 = Report("cmsRun1")
        myReport3.parse(self.xmlPath)
        fRefs = myReport3.getAllFileRefsFromStep(step="cmsRun1")
        for fRef in fRefs:
            fRef.checksums = {'adler32': 100}

        myReport3.checkForAdlerChecksum(stepName="cmsRun1")
        self.assertTrue(myReport3.getExitCode() != 60451)

        return
Ejemplo n.º 5
0
    def testGetAdlerChecksum(self):
        """
        _testGetAdlerChecksum_

        Test the function that sees if all files
        have an adler checksum.

        For some reason, our default XML report doesn't have checksums
        Therefore it should fail.
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        myReport.checkForAdlerChecksum(stepName = "cmsRun1")

        self.assertFalse(myReport.stepSuccessful(stepName = "cmsRun1"))
        self.assertEqual(myReport.getExitCode(), 60451)

        # Now see what happens if the adler32 is set to None
        myReport2 = Report("cmsRun1")
        myReport2.parse(self.xmlPath)
        fRefs = myReport2.getAllFileRefsFromStep(step = "cmsRun1")
        for fRef in fRefs:
            fRef.checksums = {'adler32': None}
        myReport2.checkForAdlerChecksum(stepName = "cmsRun1")
        self.assertFalse(myReport2.stepSuccessful(stepName = "cmsRun1"))
        self.assertEqual(myReport2.getExitCode(), 60451)

        myReport3 = Report("cmsRun1")
        myReport3.parse(self.xmlPath)
        fRefs = myReport3.getAllFileRefsFromStep(step = "cmsRun1")
        for fRef in fRefs:
            fRef.checksums = {'adler32': 100}

        myReport3.checkForAdlerChecksum(stepName = "cmsRun1")
        self.assertTrue(myReport3.getExitCode() != 60451)

        return
Ejemplo n.º 6
0
    def post(self, emulator=None):
        """
        _post_

        Post execution checkpointing

        """
        # Another emulator check
        if emulator is not None:
            return emulator.emulatePost(self.step)

        logging.info("Steps.Executors.%s.post called", self.__class__.__name__)

        for step in self.stepSpace.taskSpace.stepSpaces():

            if step == self.stepName:
                # Don't try to parse your own report; it's not there yet
                continue

            stepLocation = os.path.join(self.stepSpace.taskSpace.location,
                                        step)
            logging.info("Beginning report processing for step %s", step)

            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s",
                              step, stepLocation)
                continue

            # First, get everything from a file and 'unpersist' it
            stepReport = Report(step)
            stepReport.unpersist(reportLocation)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            files = stepReport.getAllFileRefsFromStep(step=step)
            for fileInfo in files:
                if hasattr(fileInfo, 'lfn') and hasattr(
                        fileInfo, 'location') and hasattr(fileInfo, 'guid'):
                    fileInfo.user_dn = getattr(self.step, "userDN", None)
                    fileInfo.async_dest = getattr(self.step, "asyncDest", None)
                    fileInfo.user_vogroup = getattr(self.step, "owner_vogroup",
                                                    '')
                    fileInfo.user_vorole = getattr(self.step, "owner_vorole",
                                                   '')

            stepReport.persist(reportLocation)

        return None
Ejemplo n.º 7
0
    def post(self, emulator = None):
        """
        _post_

        Post execution checkpointing

        """
        #Another emulator check
        if (emulator != None):
            return emulator.emulatePost( self.step )

        for step in self.stepSpace.taskSpace.stepSpaces():

            if step == self.stepName:
                #Don't try to parse your own report; it's not there yet
                continue

            stepLocation = os.path.join(self.stepSpace.taskSpace.location, step)
            logging.info("Beginning report processing for step %s" % step)

            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s" \
                              % (step, stepLocation))
                continue

            # First, get everything from a file and 'unpersist' it
            stepReport = Report(step)
            stepReport.unpersist(reportLocation)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            files = stepReport.getAllFileRefsFromStep(step = step)
            for file in files:

                if not hasattr(file, 'lfn') or not hasattr(file, 'location') or \
                       not hasattr(file, 'guid'):
                    continue

                file.user_dn = getattr(self.step, "userDN", None)
                file.async_dest = getattr(self.step, "asyncDest", None)
                file.user_vogroup = getattr(self.step, "owner_vogroup", '')
                file.user_vorole = getattr(self.step, "owner_vorole", '')

            stepReport.persist(reportLocation)

        print "Steps.Executors.StageOut.post called"
        return None
Ejemplo n.º 8
0
     def setJobWantsASO(self, filename, preserveLFN = True):
         stepReport = Report('cmsRun1')
         stepReport.unpersist(filename)
         files = stepReport.getAllFileRefsFromStep(step = 'cmsRun1')
         for file in files:
 
             if not hasattr(file, 'lfn') or not hasattr(file, 'location') or \
                    not hasattr(file, 'guid'):
                 continue
 
             file.user_dn = "/CN=dummy-name/O=melopartydotcom"
             file.async_dest = "T2_US_Vanderbilt"
             file.user_vogroup = ''
             file.user_vorole = ''
             file.preserve_lfn = preserveLFN
             
         stepReport.persist(filename)
Ejemplo n.º 9
0
    def execute(self, emulator = None):
        """
        _execute_


        """
        #Are we using emulators again?
        if (emulator != None):
            return emulator.emulate( self.step, self.job )


        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Set wait to over an hour
        waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        logging.info("StageOut override is: %s " % self.step)

        # Pull out StageOutMgr Overrides

        # switch between old stageOut behavior and new, fancy stage out behavior
        useNewStageOutCode = False
        if overrides.has_key('newStageOut') and overrides.get('newStageOut'):
            useNewStageOutCode = True


        stageOutCall = {}
        if overrides.has_key("command") and overrides.has_key("option") \
               and overrides.has_key("se-name") and overrides.has_key("lfn-prefix"):
            logging.critical('using override in StageOut')
            stageOutCall['command']    = overrides.get('command')
            stageOutCall['option']     = overrides.get('option')
            stageOutCall['se-name']    = overrides.get('se-name')
            stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix')

        # naw man, this is real
        # iterate over all the incoming files
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**stageOutCall)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime  = self.step.retryDelay
        else:
            # new style
            logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE")
            print "STAGEOUT IS USING NEW STAGEOUT CODE"
            manager = WMCore.Storage.FileManager.StageOutMgr(
                                retryPauseTime  = self.step.retryDelay,
                                numberOfRetries = self.step.retryCount,
                                **stageOutCall)

        # We need to find a list of steps in our task
        # And eventually a list of jobReports for out steps

        # Search through steps for report files
        filesTransferred = []

        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                #Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location, step)
            logging.info("Beginning report processing for step %s" % (step))
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s" \
                              % (step, stepLocation))
                continue
            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)
            taskID = getattr(stepReport.data, 'id', None)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Okay, time to start using stuff
            # Now I'm a bit confused about this; each report should ONLY
            # Have the results of that particular step in it,
            # So getting all the files should get ONLY the files
            # for that step; or so I hope
            files = stepReport.getAllFileRefsFromStep(step = step)
            for file in files:
                if not hasattr(file, 'lfn') and hasattr(file, 'pfn'):
                    # Then we're truly hosed on this file; ignore it
                    msg = "Not a file: %s" % file
                    logging.error(msg)
                    continue
                # Support direct-to-merge
                # This requires pulling a bunch of stuff from everywhere
                # First check if it's needed
                if hasattr(self.step.output, 'minMergeSize') \
                       and hasattr(file, 'size') \
                       and not getattr(file, 'merged', False):

                    # We need both of those to continue, and we don't
                    # direct-to-merge
                    if getattr(self.step.output, 'doNotDirectMerge', False):
                        # Then we've been told explicitly not to do direct-to-merge
                        continue
                    if file.size >= self.step.output.minMergeSize:
                        # Then this goes direct to merge
                        try:
                            file = self.handleLFNForMerge(mergefile = file, step = step)
                        except Exception, ex:
                            logging.error("Encountered error while handling LFN for merge due to size.\n")
                            logging.error(str(ex))
                            logging.debug(file)
                            logging.debug("minMergeSize: %s" % self.step.output.minMergeSize)
                            manager.cleanSuccessfulStageOuts()
                            stepReport.addError(self.stepName, 60401,
                                                "DirectToMergeFailure", str(ex))
                    elif getattr(self.step.output, 'maxMergeEvents', None) != None\
                             and getattr(file, 'events', None) != None\
                             and not getattr(file, 'merged', False):
                        # Then direct-to-merge due to events if
                        # the file is large enough:
                        if file.events >= self.step.output.maxMergeEvents:
                            # straight to merge
                            try:
                                file = self.handleLFNForMerge(mergefile = file, step = step)
                            except Exception, ex:
                                logging.error("Encountered error while handling LFN for merge due to events.\n")
                                logging.error(str(ex))
                                logging.debug(file)
                                logging.debug("maxMergeEvents: %s" % self.step.output.maxMergeEvents)
                                manager.cleanSuccessfulStageOuts()
                                stepReport.addError(self.stepName, 60402,
                                                    "DirectToMergeFailure", str(ex))

                # Save the input PFN in case we need it
                # Undecided whether to move file.pfn to the output PFN
                file.InputPFN   = file.pfn
                lfn = getattr(file, 'lfn')
                fileSource = getattr(file, 'Source', None)
                if fileSource in ['TFileService', 'UserDefined']:
                    userLfnRegEx(lfn)
                else:
                    lfnRegEx(lfn)
                fileForTransfer = {'LFN': lfn,
                                   'PFN': getattr(file, 'pfn'),
                                   'SEName' : None,
                                   'StageOutCommand': None}
                signal.signal(signal.SIGALRM, alarmHandler)
                signal.alarm(waitTime)
                try:
                    manager(fileForTransfer)
                    #Afterwards, the file should have updated info.
                    filesTransferred.append(fileForTransfer)
                    file.StageOutCommand = fileForTransfer['StageOutCommand']
                    file.location        = fileForTransfer['SEName']
                    file.OutputPFN       = fileForTransfer['PFN']
                except Alarm:
                    msg = "Indefinite hang during stageOut of logArchive"
                    logging.error(msg)
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60403,
                                        "StageOutTimeout", msg)
                    stepReport.persist("Report.pkl")
                except Exception, ex:
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60307,
                                        "StageOutFailure", str(ex))
                    stepReport.setStepStatus(self.stepName, 1)
                    stepReport.persist("Report.pkl")
                    raise
Ejemplo n.º 10
0
    def testReportHandling(self):
        """
        _testReportHandling_

        Verify that we're able to parse a CMSSW report, convert it to a Report()
        style report, pickle it and then have the accountant process it.
        """
        self.procPath = os.path.join(WMCore.WMBase.getTestBase(),
                                    "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(self.procPath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"

        fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl")
        cmsRunStep = myReport.retrieveStep("cmsRun1")
        cmsRunStep.status = 0
        myReport.setTaskName('/TestWF/None')
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath)

        pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000)
        pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN",
                           appFam = "RECO", psetHash = "GIBBERISH",
                           configContent = "MOREGIBBERISH")
        pFile.setDatasetPath("/bogus/dataset/path")
        #pFile.addRun(Run(1, *[45]))
        pFile.create()

        config = self.createConfig(workerThreads = 1)
        accountant = JobAccountantPoller(config)
        accountant.setup()
        accountant.algorithm()

        self.verifyJobSuccess(self.testJob["id"])
        self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1"))

        inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root")
        inputFile.load()
        self.testMergeJob = Job(name = "testMergeJob", files = [inputFile])
        self.testMergeJob.create(group = self.mergeJobGroup)
        self.testMergeJob["state"] = "complete"
        self.stateChangeAction.execute(jobs = [self.testMergeJob])

        self.mergePath = os.path.join(WMCore.WMBase.getTestBase(),
                                         "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml")

        myReport = Report("mergeReco")
        myReport.parse(self.mergePath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("mergeReco"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"
            fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1",
                               "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1",
                               "dataTier": "RECO"}

        fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl")
        myReport.setTaskName('/MergeWF/None')
        cmsRunStep = myReport.retrieveStep("mergeReco")
        cmsRunStep.status = 0
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath)
        accountant.algorithm()

        self.verifyJobSuccess(self.testMergeJob["id"])
        self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco"))

        return
Ejemplo n.º 11
0
    def testReportHandling(self):
        """
        _testReportHandling_

        Verify that we're able to parse a CMSSW report, convert it to a Report()
        style report, pickle it and then have the accountant process it.
        """
        self.procPath = os.path.join(WMCore.WMBase.getTestBase(),
                                    "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(self.procPath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"

        fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl")
        cmsRunStep = myReport.retrieveStep("cmsRun1")
        cmsRunStep.status = 0
        myReport.setTaskName('/TestWF/None')
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath)

        pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000)
        pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN",
                           appFam = "RECO", psetHash = "GIBBERISH",
                           configContent = "MOREGIBBERISH")
        pFile.setDatasetPath("/bogus/dataset/path")
        #pFile.addRun(Run(1, *[45]))
        pFile.create()

        config = self.createConfig(workerThreads = 1)
        accountant = JobAccountantPoller(config)
        accountant.setup()
        accountant.algorithm()

        self.verifyJobSuccess(self.testJob["id"])
        self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1"))

        inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root")
        inputFile.load()
        self.testMergeJob = Job(name = "testMergeJob", files = [inputFile])
        self.testMergeJob.create(group = self.mergeJobGroup)
        self.testMergeJob["state"] = "complete"
        self.stateChangeAction.execute(jobs = [self.testMergeJob])

        self.mergePath = os.path.join(WMCore.WMBase.getTestBase(),
                                         "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml")

        myReport = Report("mergeReco")
        myReport.parse(self.mergePath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("mergeReco"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"
            fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1",
                               "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1",
                               "dataTier": "RECO"}

        fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl")
        myReport.setTaskName('/MergeWF/None')
        cmsRunStep = myReport.retrieveStep("mergeReco")
        cmsRunStep.status = 0
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath)
        accountant.algorithm()

        self.verifyJobSuccess(self.testMergeJob["id"])
        self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco"))

        return
Ejemplo n.º 12
0
    def execute(self, emulator = None):
        """
        _execute_


        """
        #Are we using emulators again?
        if (emulator != None):
            return emulator.emulate( self.step, self.job )


        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Set wait to two hours per retry
        # this alarm leaves a subprocess behing that may cause trouble, see #6273
        waitTime = overrides.get('waitTime', 7200 * self.step.retryCount)

        logging.info("StageOut override is: %s ", self.step)

        # Pull out StageOutMgr Overrides

        # switch between old stageOut behavior and new, fancy stage out behavior
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
            ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True


        stageOutCall = {}
        if "command" in overrides and "option" in overrides \
               and "phedex-node" in overrides \
               and"lfn-prefix" in overrides:
            logging.critical('using override in StageOut')
            stageOutCall['command']    = overrides.get('command')
            stageOutCall['option']     = overrides.get('option')
            stageOutCall['phedex-node']= overrides.get('phedex-node')
            stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix')

        # naw man, this is real
        # iterate over all the incoming files
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr(**stageOutCall)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime  = self.step.retryDelay
        else:
            # new style
            logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE")
            print("STAGEOUT IS USING NEW STAGEOUT CODE")
            manager = FMStageOutMgr(retryPauseTime  = self.step.retryDelay,
                                    numberOfRetries = self.step.retryCount,
                                    **stageOutCall)

        # We need to find a list of steps in our task
        # And eventually a list of jobReports for out steps

        # Search through steps for report files
        filesTransferred = []

        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                #Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location, step)
            logging.info("Beginning report processing for step %s", step)
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s", step, stepLocation)
                continue
            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Okay, time to start using stuff
            # Now I'm a bit confused about this; each report should ONLY
            # Have the results of that particular step in it,
            # So getting all the files should get ONLY the files
            # for that step; or so I hope
            files = stepReport.getAllFileRefsFromStep(step = step)
            for fileName in files:

                # make sure the file information is consistent
                if hasattr(fileName, 'pfn') and ( not hasattr(fileName, 'lfn') or not hasattr(fileName, 'module_label') ):
                    msg = "Not a valid file: %s" % fileName
                    logging.error(msg)
                    continue

                # Figuring out if we should do straight to merge
                #  - should we do straight to merge at all ?
                #  - is straight to merge disabled for this output ?
                #  - are we over the size threshold
                #  - are we over the event threshold ?
                straightToMerge = False
                if not getattr(fileName, 'merged', False) and hasattr(self.step.output, 'minMergeSize'):
                    if fileName.module_label not in getattr(self.step.output, 'forceUnmergedOutputs', []):
                        if getattr(fileName, 'size', 0) >= self.step.output.minMergeSize:
                            straightToMerge = True
                        if getattr(fileName, 'events', 0) >= getattr(self.step.output, 'maxMergeEvents', sys.maxsize):
                            straightToMerge = True

                if straightToMerge:

                    try:
                        fileName = self.handleLFNForMerge(mergefile = fileName,
                                                          step = step)
                    except Exception as ex:
                        logging.info("minMergeSize: %s", getattr(self.step.output, 'minMergeSize', None))
                        logging.info("maxMergeEvents: %s", getattr(self.step.output, 'maxMergeEvents', None))
                        logging.error("Encountered error while handling LFN for merge %s", fileName)
                        logging.error(str(ex))
                        manager.cleanSuccessfulStageOuts()
                        stepReport.addError(self.stepName, 60401, "DirectToMergeFailure", str(ex))

                # Save the input PFN in case we need it
                # Undecided whether to move fileName.pfn to the output PFN
                fileName.InputPFN = fileName.pfn
                lfn = getattr(fileName, 'lfn')
                fileSource = getattr(fileName, 'Source', None)
                if fileSource in ['TFileService', 'UserDefined']:
                    userLfnRegEx(lfn)
                else:
                    lfnRegEx(lfn)

                fileForTransfer = {'LFN': lfn,
                                   'PFN': getattr(fileName, 'pfn'),
                                   'PNN' : None,
                                   'StageOutCommand': None,
                                   'Checksums' : getattr(fileName, 'checksums', None)}

                signal.signal(signal.SIGALRM, alarmHandler)
                signal.alarm(waitTime)
                try:
                    manager(fileForTransfer)
                    #Afterwards, the file should have updated info.
                    filesTransferred.append(fileForTransfer)
                    fileName.StageOutCommand = fileForTransfer['StageOutCommand']
                    fileName.location        = fileForTransfer['PNN']
                    fileName.OutputPFN       = fileForTransfer['PFN']
                except Alarm:
                    msg = "Indefinite hang during stageOut of logArchive"
                    logging.error(msg)
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60403, "StageOutTimeout", msg)
                    stepReport.setStepStatus(self.stepName, 1)
                    # well, if it fails for one file, it fails for the whole job...
                    break
                except Exception as ex:
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60307, "StageOutFailure", str(ex))
                    stepReport.setStepStatus(self.stepName, 1)
                    stepReport.persist(reportLocation)
                    raise

                signal.alarm(0)

            # Am DONE with report. Persist it
            stepReport.persist(reportLocation)

        #Done with all steps, and should have a list of
        #stagedOut files in fileForTransfer
        logging.info("Transferred %i files", len(filesTransferred))
        return
Ejemplo n.º 13
0
    def execute(self, emulator=None):
        """
        _execute_


        """
        #Are we using emulators again?
        if (emulator != None):
            return emulator.emulate(self.step, self.job)

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Set wait to two hours per retry
        # this alarm leaves a subprocess behing that may cause trouble, see #6273
        waitTime = overrides.get('waitTime', 7200 * self.step.retryCount)

        logging.info("StageOut override is: %s ", self.step)

        # Pull out StageOutMgr Overrides

        # switch between old stageOut behavior and new, fancy stage out behavior
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
            ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True

        stageOutCall = {}
        if "command" in overrides and "option" in overrides \
               and "phedex-node" in overrides \
               and"lfn-prefix" in overrides:
            logging.critical('using override in StageOut')
            stageOutCall['command'] = overrides.get('command')
            stageOutCall['option'] = overrides.get('option')
            stageOutCall['phedex-node'] = overrides.get('phedex-node')
            stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix')

        # naw man, this is real
        # iterate over all the incoming files
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr(**stageOutCall)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime = self.step.retryDelay
        else:
            # new style
            logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE")
            print("STAGEOUT IS USING NEW STAGEOUT CODE")
            manager = FMStageOutMgr(retryPauseTime=self.step.retryDelay,
                                    numberOfRetries=self.step.retryCount,
                                    **stageOutCall)

        # We need to find a list of steps in our task
        # And eventually a list of jobReports for out steps

        # Search through steps for report files
        filesTransferred = []

        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                #Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location,
                                        step)
            logging.info("Beginning report processing for step %s", step)
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s",
                              step, stepLocation)
                continue
            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Okay, time to start using stuff
            # Now I'm a bit confused about this; each report should ONLY
            # Have the results of that particular step in it,
            # So getting all the files should get ONLY the files
            # for that step; or so I hope
            files = stepReport.getAllFileRefsFromStep(step=step)
            for fileName in files:

                # make sure the file information is consistent
                if hasattr(fileName,
                           'pfn') and (not hasattr(fileName, 'lfn') or
                                       not hasattr(fileName, 'module_label')):
                    msg = "Not a valid file: %s" % fileName
                    logging.error(msg)
                    continue

                # Figuring out if we should do straight to merge
                #  - should we do straight to merge at all ?
                #  - is straight to merge disabled for this output ?
                #  - are we over the size threshold
                #  - are we over the event threshold ?
                straightToMerge = False
                if not getattr(fileName, 'merged', False) and hasattr(
                        self.step.output, 'minMergeSize'):
                    if fileName.module_label not in getattr(
                            self.step.output, 'forceUnmergedOutputs', []):
                        if getattr(fileName, 'size',
                                   0) >= self.step.output.minMergeSize:
                            straightToMerge = True
                        if getattr(fileName, 'events', 0) >= getattr(
                                self.step.output, 'maxMergeEvents',
                                sys.maxsize):
                            straightToMerge = True

                if straightToMerge:

                    try:
                        fileName = self.handleLFNForMerge(mergefile=fileName,
                                                          step=step)
                    except Exception as ex:
                        logging.info(
                            "minMergeSize: %s",
                            getattr(self.step.output, 'minMergeSize', None))
                        logging.info(
                            "maxMergeEvents: %s",
                            getattr(self.step.output, 'maxMergeEvents', None))
                        logging.error(
                            "Encountered error while handling LFN for merge %s",
                            fileName)
                        logging.error(str(ex))
                        manager.cleanSuccessfulStageOuts()
                        stepReport.addError(self.stepName, 60401,
                                            "DirectToMergeFailure", str(ex))

                # Save the input PFN in case we need it
                # Undecided whether to move fileName.pfn to the output PFN
                fileName.InputPFN = fileName.pfn
                lfn = getattr(fileName, 'lfn')
                fileSource = getattr(fileName, 'Source', None)
                if fileSource in ['TFileService', 'UserDefined']:
                    userLfnRegEx(lfn)
                else:
                    lfnRegEx(lfn)

                fileForTransfer = {
                    'LFN': lfn,
                    'PFN': getattr(fileName, 'pfn'),
                    'PNN': None,
                    'StageOutCommand': None,
                    'Checksums': getattr(fileName, 'checksums', None)
                }

                signal.signal(signal.SIGALRM, alarmHandler)
                signal.alarm(waitTime)
                try:
                    manager(fileForTransfer)
                    #Afterwards, the file should have updated info.
                    filesTransferred.append(fileForTransfer)
                    fileName.StageOutCommand = fileForTransfer[
                        'StageOutCommand']
                    fileName.location = fileForTransfer['PNN']
                    fileName.OutputPFN = fileForTransfer['PFN']
                except Alarm:
                    msg = "Indefinite hang during stageOut of logArchive"
                    logging.error(msg)
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60403,
                                        "StageOutTimeout", msg)
                    stepReport.setStepStatus(self.stepName, 1)
                    # well, if it fails for one file, it fails for the whole job...
                    break
                except Exception as ex:
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60307,
                                        "StageOutFailure", str(ex))
                    stepReport.setStepStatus(self.stepName, 1)
                    stepReport.persist(reportLocation)
                    raise

                signal.alarm(0)

            # Am DONE with report. Persist it
            stepReport.persist(reportLocation)

        #Done with all steps, and should have a list of
        #stagedOut files in fileForTransfer
        logging.info("Transferred %i files", len(filesTransferred))
        return
Ejemplo n.º 14
0
    def execute(self, emulator = None):
        """
        _execute_


        """
        #Are we using emulators again?
        if (emulator != None):
            return emulator.emulate( self.step, self.job )


        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()

        # Set wait to over an hour
        waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount))

        logging.info("StageOut override is: %s " % self.step)

        # Pull out StageOutMgr Overrides

        # switch between old stageOut behavior and new, fancy stage out behavior
        useNewStageOutCode = False
        if getattr(self.step, 'newStageout', False) or \
            ('newStageOut' in overrides and overrides.get('newStageOut')):
            useNewStageOutCode = True


        stageOutCall = {}
        if "command" in overrides and "option" in overrides \
               and "se-name" in overrides and "phedex-node" in overrides \
               and"lfn-prefix" in overrides:
            logging.critical('using override in StageOut')
            stageOutCall['command']    = overrides.get('command')
            stageOutCall['option']     = overrides.get('option')
            stageOutCall['se-name']    = overrides.get('se-name')
            stageOutCall['phedex-node']= overrides.get('phedex-node')
            stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix')

        # naw man, this is real
        # iterate over all the incoming files
        if not useNewStageOutCode:
            # old style
            manager = StageOutMgr.StageOutMgr(**stageOutCall)
            manager.numberOfRetries = self.step.retryCount
            manager.retryPauseTime  = self.step.retryDelay
        else:
            # new style
            logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE")
            print "STAGEOUT IS USING NEW STAGEOUT CODE"
            manager = WMCore.Storage.FileManager.StageOutMgr(
                                retryPauseTime  = self.step.retryDelay,
                                numberOfRetries = self.step.retryCount,
                                **stageOutCall)

        # We need to find a list of steps in our task
        # And eventually a list of jobReports for out steps

        # Search through steps for report files
        filesTransferred = []

        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                #Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location, step)
            logging.info("Beginning report processing for step %s" % (step))
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s" \
                              % (step, stepLocation))
                continue
            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)
            taskID = getattr(stepReport.data, 'id', None)

            # Don't stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Okay, time to start using stuff
            # Now I'm a bit confused about this; each report should ONLY
            # Have the results of that particular step in it,
            # So getting all the files should get ONLY the files
            # for that step; or so I hope
            files = stepReport.getAllFileRefsFromStep(step = step)
            for file in files:
                if not hasattr(file, 'lfn') and hasattr(file, 'pfn'):
                    # Then we're truly hosed on this file; ignore it
                    msg = "Not a file: %s" % file
                    logging.error(msg)
                    continue
                # Support direct-to-merge
                # This requires pulling a bunch of stuff from everywhere
                # First check if it's needed
                if hasattr(self.step.output, 'minMergeSize') \
                       and hasattr(file, 'size') \
                       and not getattr(file, 'merged', False):

                    # We need both of those to continue, and we don't
                    # direct-to-merge
                    if getattr(self.step.output, 'doNotDirectMerge', False):
                        # Then we've been told explicitly not to do direct-to-merge
                        continue
                    if file.size >= self.step.output.minMergeSize:
                        # Then this goes direct to merge
                        try:
                            file = self.handleLFNForMerge(mergefile = file, step = step)
                        except Exception as ex:
                            logging.error("Encountered error while handling LFN for merge due to size.\n")
                            logging.error(str(ex))
                            logging.debug(file)
                            logging.debug("minMergeSize: %s" % self.step.output.minMergeSize)
                            manager.cleanSuccessfulStageOuts()
                            stepReport.addError(self.stepName, 60401,
                                                "DirectToMergeFailure", str(ex))
                    elif getattr(self.step.output, 'maxMergeEvents', None) != None\
                             and getattr(file, 'events', None) != None\
                             and not getattr(file, 'merged', False):
                        # Then direct-to-merge due to events if
                        # the file is large enough:
                        if file.events >= self.step.output.maxMergeEvents:
                            # straight to merge
                            try:
                                file = self.handleLFNForMerge(mergefile = file, step = step)
                            except Exception as ex:
                                logging.error("Encountered error while handling LFN for merge due to events.\n")
                                logging.error(str(ex))
                                logging.debug(file)
                                logging.debug("maxMergeEvents: %s" % self.step.output.maxMergeEvents)
                                manager.cleanSuccessfulStageOuts()
                                stepReport.addError(self.stepName, 60402,
                                                    "DirectToMergeFailure", str(ex))

                # Save the input PFN in case we need it
                # Undecided whether to move file.pfn to the output PFN
                file.InputPFN   = file.pfn
                lfn = getattr(file, 'lfn')
                fileSource = getattr(file, 'Source', None)
                if fileSource in ['TFileService', 'UserDefined']:
                    userLfnRegEx(lfn)
                else:
                    lfnRegEx(lfn)
                fileForTransfer = {'LFN': lfn,
                                   'PFN': getattr(file, 'pfn'),
                                   'SEName' : None,
                                   'PNN' : None,
                                   'StageOutCommand': None,
                                   'Checksums' : getattr(file, 'checksums', None)}
                signal.signal(signal.SIGALRM, alarmHandler)
                signal.alarm(waitTime)
                try:
                    manager(fileForTransfer)
                    #Afterwards, the file should have updated info.
                    filesTransferred.append(fileForTransfer)
                    file.StageOutCommand = fileForTransfer['StageOutCommand']
#                    file.location        = fileForTransfer['SEName']
                    file.location        = fileForTransfer['PNN']
                    file.OutputPFN       = fileForTransfer['PFN']
                except Alarm:
                    msg = "Indefinite hang during stageOut of logArchive"
                    logging.error(msg)
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60403,
                                        "StageOutTimeout", msg)
                    stepReport.persist("Report.pkl")
                except Exception as ex:
                    manager.cleanSuccessfulStageOuts()
                    stepReport.addError(self.stepName, 60307,
                                        "StageOutFailure", str(ex))
                    stepReport.setStepStatus(self.stepName, 1)
                    stepReport.persist("Report.pkl")
                    raise

                signal.alarm(0)



            # Am DONE with report
            # Persist it
            stepReport.persist(reportLocation)



        #Done with all steps, and should have a list of
        #stagedOut files in fileForTransfer
        logging.info("Transferred %i files" %(len(filesTransferred)))
        return