def setUp(self): """ _setUp_ """ self.manager = GeneratorManager() self.seedlistForRandom = { "simMuonRPCDigis.initialSeed": None, "simEcalUnsuppressedDigis.initialSeed": None, "simCastorDigis.initialSeed": None, "generator.initialSeed": None, "simSiStripDigis.initialSeed": None, "LHCTransport.initialSeed": None, "mix.initialSeed": None, "simHcalUnsuppressedDigis.initialSeed": None, "theSource.initialSeed": None, "simMuonCSCDigis.initialSeed": None, "VtxSmeared.initialSeed": None, "g4SimHits.initialSeed": None, "simSiPixelDigis.initialSeed": None, "simMuonDTDigis.initialSeed": None, "evtgenproducer.initialSeed": None } return
def testPresetSeeder(self): """ _testPresetSeeder_ Test whether the PresetSeeder works """ task1 = makeWMTask("task2") seederDict = { "generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001 } task1.addGenerator("PresetSeeder", **seederDict) manager = GeneratorManager(task=task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001) return
def testWMTask(self): """ _testWMTask_ Test whether or not we can read the seeder parameters out of a WMTask. Also tests RandomSeeder and RunAndLumiSeeder """ task1 = makeWMTask("task1") randomDict = { "generator.initialSeed": None, "evtgenproducer.initialSeed": None, "MAXINT": 1 } lumiDict = {"lumi_per_run": 5} task1.addGenerator("RandomSeeder", **randomDict) task1.addGenerator("RunAndLumiSeeder", **lumiDict) manager = GeneratorManager(task=task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertTrue( hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed')) self.assertTrue( hasattr(conf.RandomSeeder.generator, 'initialSeed')) #self.assertEqual(job["mask"]["FirstLumi"], count%6) #self.assertEqual(job["mask"]["FirstRun"], (count/6) + 1) x = conf.RandomSeeder.generator.initialSeed self.assertTrue(x > 0, "ERROR: producing negative random numbers") self.assertTrue( x <= 1, "ERROR: MAXINT tag failed; producing bad random number %i" % (x)) count += 1 return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() #First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id = subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) self.sendAlert(6, msg = msg) continue workflow = Workflow(id = wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow = workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) self.sendAlert(1, msg = msg) continue logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id)) # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception, ex: msg = "Had failure loading generators for subscription %i\n" % (subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) self.sendAlert(6, msg = msg) continue logging.debug("Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit)) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) # My hope is that the job factory is smart enough only to split un-split jobs splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting")) wmbsJobFactory = splitterFactory(package = "WMCore.WMBS", subscription = wmbsSubscription, generators=seederList, limit = self.limit) # Turn on the jobFactory wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory, splitParams = splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow = workflow.id, conn = myThread.transaction.conn, transaction = True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database." % (jobNumber, workflow.name)) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = jobSplittingFunction.next() logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info("Found end in iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = {'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', '')} tempSubscription = Subscription(id = wmbsSubscription['id']) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion() tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber jobGroup = creatorProcess(work = tempDict, jobCacheDir = self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({'jobid':job['id'], 'cacheDir':job['cache_dir']}) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute(jobDictList = nameDictList, conn = myThread.transaction.conn, transaction = True) except WMException: raise except Exception, ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Error while setting bulkCache with following values: %s\n" % nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup = wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit()
class SeederTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.manager = GeneratorManager() self.seedlistForRandom = { "simMuonRPCDigis.initialSeed": None, "simEcalUnsuppressedDigis.initialSeed": None, "simCastorDigis.initialSeed": None, "generator.initialSeed": None, "simSiStripDigis.initialSeed": None, "LHCTransport.initialSeed": None, "mix.initialSeed": None, "simHcalUnsuppressedDigis.initialSeed": None, "theSource.initialSeed": None, "simMuonCSCDigis.initialSeed": None, "VtxSmeared.initialSeed": None, "g4SimHits.initialSeed": None, "simSiPixelDigis.initialSeed": None, "simMuonDTDigis.initialSeed": None, "evtgenproducer.initialSeed": None } return def tearDown(self): """ _tearDown_ """ #Do nothing return def oneHundredFiles(self, splittingAlgo = "EventBased", jobType = "Processing"): """ _oneHundredFiles_ Generate a WMBS data stack representing 100 files for job splitter testing """ fileset1 = Fileset(name='EventBasedFiles1') for i in range(0, 100): f = File("/store/MultipleFileSplit%s.root" % i, # lfn 1000, # size 100, # events 10 + i, # run 12312 # lumi ) f['locations'].add("BULLSHIT") fileset1.addFile(f ) work = Workflow() subscription1 = Subscription( fileset = fileset1, workflow = work, split_algo = splittingAlgo, type = jobType) splitter = SplitterFactory() jobfactory = splitter(subscription1) jobs = jobfactory(events_per_job = 100) #for jobGroup in jobs: # yield jobGroup self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom) self.manager.addGenerator("RunAndLumiSeeder") return jobs def testSimpleFiles(self): """ _testSimpleFiles_ test using one hundred files that we can save the attributes in a job """ jobs = self.oneHundredFiles() for jobGrp in jobs: self.manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertEqual(hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'), True) self.assertEqual(hasattr(conf.RandomSeeder.generator, 'initialSeed'), True) self.assertEqual(job["mask"]["FirstLumi"], count%11) self.assertEqual(job["mask"]["FirstRun"], (count/11) + 1) count += 1 return def testWMTask(self): """ _testWMTask_ Test whether or not we can read the seeder parameters out of a WMTask. Also tests RandomSeeder and RunAndLumiSeeder """ task1 = makeWMTask("task1") randomDict = {"generator.initialSeed": None, "evtgenproducer.initialSeed": None, "MAXINT": 1} lumiDict = {"lumi_per_run": 5} task1.addGenerator("RandomSeeder", **randomDict) task1.addGenerator("RunAndLumiSeeder", **lumiDict) manager = GeneratorManager(task = task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertTrue(hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed')) self.assertTrue(hasattr(conf.RandomSeeder.generator, 'initialSeed')) #self.assertEqual(job["mask"]["FirstLumi"], count%6) #self.assertEqual(job["mask"]["FirstRun"], (count/6) + 1) x = conf.RandomSeeder.generator.initialSeed self.assertTrue( x > 0, "ERROR: producing negative random numbers") self.assertTrue( x <= 1, "ERROR: MAXINT tag failed; producing bad random number %i" %(x)) count += 1 return def testPresetSeeder(self): """ _testPresetSeeder_ Test whether the PresetSeeder works """ task1 = makeWMTask("task2") seederDict = {"generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001} task1.addGenerator("PresetSeeder", **seederDict) manager = GeneratorManager(task = task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001) return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() # First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id=subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) continue workflow = Workflow(id=wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow=workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) continue logging.debug("Have loaded subscription %i with workflow %i\n", subscriptionID, workflow.id) # retrieve information from the workload to propagate down to the job configuration allowOpport = wmWorkload.getAllowOpportunistic() # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception as ex: msg = "Had failure loading generators for subscription %i\n" % (subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) continue logging.debug("Going to call wmbsJobFactory for sub %i with limit %i", subscriptionID, self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s", splitParams) # Load the proper job splitting module splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting")) # and return an instance of the splitting algorithm wmbsJobFactory = splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) # Turn on the jobFactory --> get available files for that subscription, keep result proxies wmbsJobFactory.open() # Create a function to hold it, calling __call__ from the JobFactory # which then calls algorithm method of the job splitting algo instance jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database.", jobNumber, workflow.name) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups)) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info("Found end in iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = {'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', ''), 'numberOfCores': 1, 'inputDataset': wmTask.getInputDatasetPath(), 'inputPileup': wmTask.getInputPileupDatasets()} try: maxCores = 1 stepNames = wmTask.listAllStepNames() for stepName in stepNames: sh = wmTask.getStep(stepName) maxCores = max(maxCores, sh.getNumberOfCores()) processDict.update({'numberOfCores': maxCores}) except AttributeError: logging.info("Failed to read multicore settings from task %s", wmTask.getPathName()) tempSubscription = Subscription(id=wmbsSubscription['id']) # if we have glideinWMS constraints, then adapt all jobs if self.glideinLimits: capResourceEstimates(wmbsJobGroups, self.glideinLimits) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion(allSteps=True) tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber tempDict['agentName'] = self.agentName tempDict['inputDatasetLocations'] = wmbsJobGroup.getLocationsForJobs() tempDict['allowOpportunistic'] = allowOpport jobGroup = creatorProcess(work=tempDict, jobCacheDir=self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({'jobid': job['id'], 'cacheDir': job['cache_dir']}) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute(jobDictList=nameDictList, conn=myThread.transaction.conn, transaction=True) except WMException: raise except Exception as ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) logging.debug("Error while setting bulkCache with following values: %s\n", nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit() # END: While loop over jobFactory # Close the jobFactory wmbsJobFactory.close() return
class SeederTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.manager = GeneratorManager() self.seedlistForRandom = { "simMuonRPCDigis.initialSeed": None, "simEcalUnsuppressedDigis.initialSeed": None, "simCastorDigis.initialSeed": None, "generator.initialSeed": None, "simSiStripDigis.initialSeed": None, "LHCTransport.initialSeed": None, "mix.initialSeed": None, "simHcalUnsuppressedDigis.initialSeed": None, "theSource.initialSeed": None, "simMuonCSCDigis.initialSeed": None, "VtxSmeared.initialSeed": None, "g4SimHits.initialSeed": None, "simSiPixelDigis.initialSeed": None, "simMuonDTDigis.initialSeed": None, "evtgenproducer.initialSeed": None } return def tearDown(self): """ _tearDown_ """ #Do nothing return def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"): """ _oneHundredFiles_ Generate a WMBS data stack representing 100 files for job splitter testing """ fileset1 = Fileset(name='EventBasedFiles1') for i in range(0, 100): f = File( "/store/MultipleFileSplit%s.root" % i, # lfn 1000, # size 100, # events 10 + i, # run 12312 # lumi ) f['locations'].add("BULLSHIT") fileset1.addFile(f) work = Workflow() subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType) splitter = SplitterFactory() jobfactory = splitter(subscription1) jobs = jobfactory(events_per_job=100) #for jobGroup in jobs: # yield jobGroup self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom) self.manager.addGenerator("RunAndLumiSeeder") return jobs def testSimpleFiles(self): """ _testSimpleFiles_ test using one hundred files that we can save the attributes in a job """ jobs = self.oneHundredFiles() for jobGrp in jobs: self.manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertEqual( hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed'), True) self.assertEqual( hasattr(conf.RandomSeeder.generator, 'initialSeed'), True) self.assertEqual(job["mask"]["FirstLumi"], count % 11) self.assertEqual(job["mask"]["FirstRun"], (count / 11) + 1) count += 1 return def testWMTask(self): """ _testWMTask_ Test whether or not we can read the seeder parameters out of a WMTask. Also tests RandomSeeder and RunAndLumiSeeder """ task1 = makeWMTask("task1") randomDict = { "generator.initialSeed": None, "evtgenproducer.initialSeed": None, "MAXINT": 1 } lumiDict = {"lumi_per_run": 5} task1.addGenerator("RandomSeeder", **randomDict) task1.addGenerator("RunAndLumiSeeder", **lumiDict) manager = GeneratorManager(task=task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertTrue( hasattr(conf.RandomSeeder.evtgenproducer, 'initialSeed')) self.assertTrue( hasattr(conf.RandomSeeder.generator, 'initialSeed')) #self.assertEqual(job["mask"]["FirstLumi"], count%6) #self.assertEqual(job["mask"]["FirstRun"], (count/6) + 1) x = conf.RandomSeeder.generator.initialSeed self.assertTrue(x > 0, "ERROR: producing negative random numbers") self.assertTrue( x <= 1, "ERROR: MAXINT tag failed; producing bad random number %i" % (x)) count += 1 return def testPresetSeeder(self): """ _testPresetSeeder_ Test whether the PresetSeeder works """ task1 = makeWMTask("task2") seederDict = { "generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001 } task1.addGenerator("PresetSeeder", **seederDict) manager = GeneratorManager(task=task1) jobs = self.oneHundredFiles() for jobGrp in jobs: manager(jobGrp) for jobGrp in jobs: count = 0 for job in jobGrp.jobs: conf = job.getBaggage() self.assertEqual(conf.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(conf.PresetSeeder.generator.initialSeed, 1001) return
def __call__(self, parameters): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("In JobCreatorWorker.__call__") myThread = threading.currentThread() for entry in parameters: # This retrieves a single subscription subscriptionID = entry.get('subscription') if subscriptionID < 0: logging.error("Got non-existant subscription") logging.error("Assuming parameters in error: returning") return subscriptionID myThread.transaction.begin() logging.info("About to call subscription %i", subscriptionID) wmbsSubscription = Subscription(id=subscriptionID) wmbsSubscription.load() wmbsSubscription["workflow"].load() workflow = wmbsSubscription["workflow"] wmWorkload = retrieveWMSpec(wmbsSubscription) if not workflow.task or not wmWorkload: # Then we have a problem # We have no sandbox # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it wmTask = None seederList = [] logging.error("Have no task for workflow %i", workflow.id) logging.error("Aborting Subscription %i", subscriptionID) continue else: wmTask = wmWorkload.getTaskByPath(workflow.task) if hasattr(wmTask.data, 'seeders'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] logging.info("About to enter JobFactory") logging.debug("Going to call wmbsJobFactory with limit %i", self.limit) # My hope is that the job factory is smart enough only to split un-split jobs wmbsJobFactory = self.splitterFactory( package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s", splitParams) continueSubscription = True myThread.transaction.commit() # Turn on the jobFactory myThread.transaction.begin() wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups)) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i", subscriptionID) continueSubscription = False continue # Now we get to find out what job they are. countJobs = self.daoFactory( classname="Jobs.GetNumberOfJobsPerWorkflow") jobNumber = countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) logging.debug("Have %i jobs for this workflow already", jobNumber) for wmbsJobGroup in wmbsJobGroups: logging.debug("Processing jobGroup %i", wmbsJobGroup.exists()) logging.debug("Processing %i jobs", len(wmbsJobGroup.jobs)) # Create a directory self.createWorkArea.processJobs( jobGroup=wmbsJobGroup, startDir=self.jobCacheDir, workflow=workflow, wmWorkload=wmWorkload, transaction=myThread.transaction, conn=myThread.transaction.conn) for job in wmbsJobGroup.jobs: jobNumber += 1 self.saveJob(job=job, workflow=workflow, wmTask=wmTask, jobNumber=jobNumber) self.advanceJobGroup(wmbsJobGroup) logging.debug("Finished call for jobGroup %i", wmbsJobGroup.exists()) # END: while loop over jobSplitter myThread.transaction.commit() # About to reset everything wmbsJobGroups = None wmTask = None wmWorkload = None splitParams = None wmbsJobFactory = None gc.collect() # About to check memory doMemoryCheck( "About to get memory references: End of subscription loop") # Final memory check doMemoryCheck("About to get memory references: End of __call__()") logging.debug("About to return from JobCreatorWorker.__call__()") return parameters
def __call__(self, parameters): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("In JobCreatorWorker.__call__") myThread = threading.currentThread() for entry in parameters: # This retrieves a single subscription subscriptionID = entry.get('subscription') if subscriptionID < 0: logging.error("Got non-existant subscription") logging.error("Assuming parameters in error: returning") return subscriptionID myThread.transaction.begin() logging.info("About to call subscription %i", subscriptionID) wmbsSubscription = Subscription(id=subscriptionID) wmbsSubscription.load() wmbsSubscription["workflow"].load() workflow = wmbsSubscription["workflow"] wmWorkload = retrieveWMSpec(wmbsSubscription) if not workflow.task or not wmWorkload: # Then we have a problem # We have no sandbox # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it wmTask = None seederList = [] logging.error("Have no task for workflow %i", workflow.id) logging.error("Aborting Subscription %i", subscriptionID) continue else: wmTask = wmWorkload.getTaskByPath(workflow.task) if hasattr(wmTask.data, 'seeders'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] logging.info("About to enter JobFactory") logging.debug("Going to call wmbsJobFactory with limit %i", self.limit) # My hope is that the job factory is smart enough only to split un-split jobs wmbsJobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s", splitParams) continueSubscription = True myThread.transaction.commit() # Turn on the jobFactory myThread.transaction.begin() wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups)) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i", subscriptionID) continueSubscription = False continue # Now we get to find out what job they are. countJobs = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow") jobNumber = countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) logging.debug("Have %i jobs for this workflow already", jobNumber) for wmbsJobGroup in wmbsJobGroups: logging.debug("Processing jobGroup %i", wmbsJobGroup.exists()) logging.debug("Processing %i jobs", len(wmbsJobGroup.jobs)) # Create a directory self.createWorkArea.processJobs(jobGroup=wmbsJobGroup, startDir=self.jobCacheDir, workflow=workflow, wmWorkload=wmWorkload, transaction=myThread.transaction, conn=myThread.transaction.conn) for job in wmbsJobGroup.jobs: jobNumber += 1 self.saveJob(job=job, workflow=workflow, wmTask=wmTask, jobNumber=jobNumber) self.advanceJobGroup(wmbsJobGroup) logging.debug("Finished call for jobGroup %i", wmbsJobGroup.exists()) # END: while loop over jobSplitter myThread.transaction.commit() # About to reset everything wmbsJobGroups = None wmTask = None wmWorkload = None splitParams = None wmbsJobFactory = None gc.collect() # About to check memory doMemoryCheck("About to get memory references: End of subscription loop") # Final memory check doMemoryCheck("About to get memory references: End of __call__()") logging.debug("About to return from JobCreatorWorker.__call__()") return parameters
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() #First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id=subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) self.sendAlert(6, msg=msg) continue workflow = Workflow(id=wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow=workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) self.sendAlert(1, msg=msg) continue logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id)) # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception, ex: msg = "Had failure loading generators for subscription %i\n" % ( subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) self.sendAlert(6, msg=msg) continue logging.debug( "Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit)) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) # My hope is that the job factory is smart enough only to split un-split jobs splitterFactory = SplitterFactory( splitParams.get('algo_package', "WMCore.JobSplitting")) wmbsJobFactory = splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) # Turn on the jobFactory wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database." % (jobNumber, workflow.name)) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = jobSplittingFunction.next() logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info( "Found end in iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = { 'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'wmTaskPrio': wmTask.getTaskPriority(), 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', '') } tempSubscription = Subscription(id=wmbsSubscription['id']) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion() tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber jobGroup = creatorProcess(work=tempDict, jobCacheDir=self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({ 'jobid': job['id'], 'cacheDir': job['cache_dir'] }) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute( jobDictList=nameDictList, conn=myThread.transaction.conn, transaction=True) except WMException: raise except Exception, ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) self.sendAlert(6, msg=msg) logging.debug( "Error while setting bulkCache with following values: %s\n" % nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit()