Ejemplo n.º 1
0
    def setUp(self):
        """
        _setUp_

        """

        self.manager = SeederManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None,
        }

        return
Ejemplo n.º 2
0
    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i" %subscriptionID)

            wmbsSubscription = Subscription(id = subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow         = wmbsSubscription["workflow"]

            wmWorkload       = retrieveWMSpec(wmbsSubscription)





            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i" % (workflow.id))
                logging.error("Aborting Subscription %i" % (subscriptionID))
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager    = SeederManager(wmTask)
                    seederList = manager.getSeederList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit))

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                                  subscription = wmbsSubscription,
                                                  generators=seederList,
                                                  limit = self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = jobSplittingFunction.next()
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow = workflow.id,
                                              conn = myThread.transaction.conn,
                                              transaction = True)
                logging.debug("Have %i jobs for this workflow already" % (jobNumber))



                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists()))
                    logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) )

                    # Create a directory
                    self.createWorkArea.processJobs(jobGroup = wmbsJobGroup,
                                                    startDir = self.jobCacheDir,
                                                    workflow = workflow,
                                                    wmWorkload = wmWorkload,
                                                    transaction = myThread.transaction,
                                                    conn = myThread.transaction.conn)


                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job = job, workflow = workflow,
                                     wmTask = wmTask, jobNumber = jobNumber)


                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i" \
                                 % (wmbsJobGroup.exists()))


            # END: while loop over jobSplitter
            myThread.transaction.commit()



            # About to reset everything
            wmbsJobGroups  = None
            wmTask         = None
            wmWorkload     = None
            splitParams    = None
            wmbsJobFactory = None
            gc.collect()



            # About to check memory
            doMemoryCheck("About to get memory references: End of subscription loop")


        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")


        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters
Ejemplo n.º 3
0
    def __call__(self, parameters):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """

        logging.info("In JobCreatorWorker.__call__")

        myThread = threading.currentThread()

        for entry in parameters:
            # This retrieves a single subscription
            subscriptionID = entry.get('subscription')

            if subscriptionID < 0:
                logging.error("Got non-existant subscription")
                logging.error("Assuming parameters in error: returning")
                return subscriptionID

            myThread.transaction.begin()

            logging.info("About to call subscription %i" %subscriptionID)

            wmbsSubscription = Subscription(id = subscriptionID)
            wmbsSubscription.load()
            wmbsSubscription["workflow"].load()
            workflow         = wmbsSubscription["workflow"]

            wmWorkload       = retrieveWMSpec(wmbsSubscription)





            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We have no sandbox
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                wmTask = None
                seederList = []
                logging.error("Have no task for workflow %i" % (workflow.id))
                logging.error("Aborting Subscription %i" % (subscriptionID))
                continue

            else:
                wmTask = wmWorkload.getTaskByPath(workflow.task)
                if hasattr(wmTask.data, 'seeders'):
                    manager    = SeederManager(wmTask)
                    seederList = manager.getSeederList()
                else:
                    seederList = []

            logging.info("About to enter JobFactory")
            logging.debug("Going to call wmbsJobFactory with limit %i" % (self.limit))

            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                                  subscription = wmbsSubscription,
                                                  generators=seederList,
                                                  limit = self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            continueSubscription = True
            myThread.transaction.commit()

            # Turn on the jobFactory
            myThread.transaction.begin()
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.

                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    continue

                # Now we get to find out what job they are.
                countJobs = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
                jobNumber = countJobs.execute(workflow = workflow.id,
                                              conn = myThread.transaction.conn,
                                              transaction = True)
                logging.debug("Have %i jobs for this workflow already" % (jobNumber))



                for wmbsJobGroup in wmbsJobGroups:

                    logging.debug("Processing jobGroup %i" % (wmbsJobGroup.exists()))
                    logging.debug("Processing %i jobs" % (len(wmbsJobGroup.jobs)) )

                    # Create a directory
                    self.createWorkArea.processJobs(jobGroup = wmbsJobGroup,
                                                    startDir = self.jobCacheDir,
                                                    workflow = workflow,
                                                    wmWorkload = wmWorkload,
                                                    transaction = myThread.transaction,
                                                    conn = myThread.transaction.conn)


                    for job in wmbsJobGroup.jobs:
                        jobNumber += 1
                        self.saveJob(job = job, workflow = workflow,
                                     wmTask = wmTask, jobNumber = jobNumber)


                    self.advanceJobGroup(wmbsJobGroup)

                    logging.debug("Finished call for jobGroup %i" \
                                 % (wmbsJobGroup.exists()))


            # END: while loop over jobSplitter
            myThread.transaction.commit()



            # About to reset everything
            wmbsJobGroups  = None
            wmTask         = None
            wmWorkload     = None
            splitParams    = None
            wmbsJobFactory = None
            gc.collect()



            # About to check memory
            doMemoryCheck("About to get memory references: End of subscription loop")


        # Final memory check
        doMemoryCheck("About to get memory references: End of __call__()")


        logging.debug("About to return from JobCreatorWorker.__call__()")

        return parameters
Ejemplo n.º 4
0
class SeederTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        """

        self.manager = SeederManager()

        self.seedlistForRandom = {
            "simMuonRPCDigis.initialSeed": None,
            "simEcalUnsuppressedDigis.initialSeed": None,
            "simCastorDigis.initialSeed": None,
            "generator.initialSeed": None,
            "simSiStripDigis.initialSeed": None,
            "LHCTransport.initialSeed": None,
            "mix.initialSeed": None,
            "simHcalUnsuppressedDigis.initialSeed": None,
            "theSource.initialSeed": None,
            "simMuonCSCDigis.initialSeed": None,
            "VtxSmeared.initialSeed": None,
            "g4SimHits.initialSeed": None,
            "simSiPixelDigis.initialSeed": None,
            "simMuonDTDigis.initialSeed": None,
            "evtgenproducer.initialSeed": None,
        }

        return

    def tearDown(self):
        """
        _tearDown_

        """
        # Do nothing

        return

    def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"):
        """
        _oneHundredFiles_
        
        Generate a WMBS data stack representing 100 files for job splitter
        testing
        
        """
        fileset1 = Fileset(name="EventBasedFiles1")
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i, 1000, 100, 10 + i, 12312  # lfn  # size  # events  # run  # lumi
            )
            f["locations"].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        # for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addSeeder("RandomSeeder", **self.seedlistForRandom)
        self.manager.addSeeder("RunAndLumiSeeder")

        return jobs

    def testSimpleFiles(self):
        """
        _testSimpleFiles_

        
        test using one hundred files that we can save the attributes in a job
        """
        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            self.manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(hasattr(conf.RandomSeeder.evtgenproducer, "initialSeed"), True)
                self.assertEqual(hasattr(conf.RandomSeeder.generator, "initialSeed"), True)
                self.assertEqual(job["mask"]["FirstLumi"], count % 11)
                self.assertEqual(job["mask"]["FirstRun"], (count / 11) + 1)
                count += 1

        return

    def testWMTask(self):
        """
        _testWMTask_
        
        Test whether or not we can read the seeder parameters out of a WMTask.
        Also tests RandomSeeder and RunAndLumiSeeder
        """

        task1 = makeWMTask("task1")

        randomDict = {"generator.initialSeed": None, "evtgenproducer.initialSeed": None, "MAXINT": 10000}
        lumiDict = {"lumi_per_run": 5}

        task1.addGenerator("RandomSeeder", **randomDict)
        task1.addGenerator("RunAndLumiSeeder", **lumiDict)

        manager = SeederManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(hasattr(conf.RandomSeeder.evtgenproducer, "initialSeed"), True)
                self.assertEqual(hasattr(conf.RandomSeeder.generator, "initialSeed"), True)
                self.assertEqual(job["mask"]["FirstLumi"], count % 6)
                self.assertEqual(job["mask"]["FirstRun"], (count / 6) + 1)
                x = conf.RandomSeeder.generator.initialSeed
                assert x > 0, "ERROR: producing negative random numbers"
                assert x < 10000, "ERROR: MAXINT tag failed; producing bad random number %i" % (x)
                count += 1

        return

    def testPresetSeeder(self):
        """
        _testPresetSeeder_
        
        Test whether the PresetSeeder works
        """

        task1 = makeWMTask("task2")

        seederDict = {"generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001}
        task1.addGenerator("PresetSeeder", **seederDict)

        manager = SeederManager(task=task1)

        jobs = self.oneHundredFiles()

        for jobGrp in jobs:
            manager(jobGrp)

        for jobGrp in jobs:
            count = 0
            for job in jobGrp.jobs:
                conf = job.getBaggage()
                self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed, 1001)
                self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001)

        return
Ejemplo n.º 5
0
    def pollSubscriptions(self):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """
        logging.info("Beginning JobCreator.pollSubscriptions() cycle.")
        myThread = threading.currentThread()

        #First, get list of Subscriptions
        subscriptions    = self.subscriptionList.execute()

        # Okay, now we have a list of subscriptions
        for subscriptionID in subscriptions:
            wmbsSubscription = Subscription(id = subscriptionID)
            try:
                wmbsSubscription.load()
            except IndexError:
                # This happens when the subscription no longer exists
                # i.e., someone executed a kill() function on the database
                # while the JobCreator was in cycle
                # Ignore this subscription
                msg = "JobCreator cannot load subscription %i" % subscriptionID
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            workflow         = Workflow(id = wmbsSubscription["workflow"].id)
            workflow.load()
            wmbsSubscription['workflow'] = workflow
            wmWorkload       = retrieveWMSpec(workflow = workflow)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                msg = "Have no task for workflow %i\n" % (workflow.id)
                msg += "Aborting Subscription %i" % (subscriptionID)
                logging.error(msg)
                self.sendAlert(1, msg = msg)
                continue

            logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id))

            # Set task object
            wmTask = wmWorkload.getTaskByPath(workflow.task)
            if hasattr(wmTask.data, 'seeders'):
                manager    = SeederManager(wmTask)
                seederList = manager.getSeederList()
            else:
                seederList = []

            logging.debug("Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit))
            
            # My hope is that the job factory is smart enough only to split un-split jobs
            wmbsJobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                                  subscription = wmbsSubscription,
                                                  generators=seederList,
                                                  limit = self.limit)
            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s" % splitParams)

            # Turn on the jobFactory
            wmbsJobFactory.open()

            # Create a function to hold it
            jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory,
                                               splitParams = splitParams)

            # Now we get to find out how many jobs there are.
            jobNumber = self.countJobs.execute(workflow = workflow.id,
                                               conn = myThread.transaction.conn, 
                                               transaction = True)
            jobNumber += splitParams.get('initial_lfn_counter', 0)
            logging.debug("Have %i jobs for this workflow already" % (jobNumber))

            continueSubscription = True
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.
                myThread.transaction.begin()
                try:
                    wmbsJobGroups = jobSplittingFunction.next()
                    logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups)))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # If we have no jobGroups, we're done
                if len(wmbsJobGroups) == 0:
                    logging.info("Found end in iteration over subscription %i" % (subscriptionID))
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                            
                # Assemble a dict of all the info
                processDict = {'workflow': workflow,
                               'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(),
                               'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox,
                               'wmTaskPrio': wmTask.getTaskPriority(),
                               'owner': wmWorkload.getOwner().get('name', None),
                               'ownerDN': wmWorkload.getOwner().get('dn', None)}
                tempSubscription = Subscription(id = wmbsSubscription['id'])

                nameDictList = []
                for wmbsJobGroup in wmbsJobGroups:
                    # For each jobGroup, put a dictionary
                    # together and run it with creatorProcess
                    jobsInGroup               = len(wmbsJobGroup.jobs)
                    wmbsJobGroup.subscription = tempSubscription
                    tempDict = {}
                    tempDict.update(processDict)
                    tempDict['jobGroup']  = wmbsJobGroup
                    tempDict['swVersion'] = wmTask.getSwVersion()
                    tempDict['scramArch'] = wmTask.getScramArch()

                    jobGroup = creatorProcess(work = tempDict,
                                              jobCacheDir = self.jobCacheDir)
                    jobNumber += jobsInGroup

                    # Set jobCache for group
                    for job in jobGroup.jobs:
                        nameDictList.append({'jobid':job['id'],
                                             'cacheDir':job['cache_dir']})
                        job["user"] = wmWorkload.getOwner()["name"]
                        job["group"] = wmWorkload.getOwner()["group"]
                # Set the caches in the database
                try:
                    if len(nameDictList) > 0:
                        self.setBulkCache.execute(jobDictList = nameDictList,
                                                  conn = myThread.transaction.conn, 
                                                  transaction = True)
                except WMException:
                    raise
                except Exception, ex:
                    msg =  "Unknown exception while setting the bulk cache:\n"
                    msg += str(ex)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Error while setting bulkCache with following values: %s\n" % nameDictList)
                    raise JobCreatorException(msg)

                # Advance the jobGroup in changeState
                for wmbsJobGroup in wmbsJobGroups:
                    self.advanceJobGroup(wmbsJobGroup = wmbsJobGroup)

                # Now end the transaction so that everything is wrapped
                # in a single rollback
                myThread.transaction.commit()


            # END: While loop over jobFactory

            # Close the jobFactory
            wmbsJobFactory.close()