def testGetMaxEvents(self): """ test class for getMaxEvents in Mask.py """ #The way I've decided to implement this depends on SetMaxAndSkipEvents() #Therefore a failure in one will result in a failure in the second #I'm not sure if this is the best way, but it's the one users will use #The problem is that it's called in reverse order by unittest so you have to #remember that. # -mnorman testMask = Mask() maxEvents = 100 skipEvents = 0 tempMax = testMask.getMaxEvents() self.assertEqual(tempMax, None) testMask.setMaxAndSkipEvents(maxEvents, skipEvents) tempMax = testMask.getMaxEvents() self.assertEqual(tempMax, maxEvents + skipEvents)
def testCreateFilesetFromDBS(self): """Test creating an analysis fileset from DBS""" rangesMask = Mask() rangesMask.addRunWithLumiRanges(run=1, lumiList=[[1, 9], [12, 12], [31, 31], [38, 39], [49, 49], [51, 52], [64, 65], [82, 82], [92, 98]]) fs, fl = self.acService.createFilesetFromDBS(self.collection, filesetName='test_fs', dbsURL=self.dbsURL, dataset=self.dataset, mask=rangesMask) self.assertTrue(fl['_id']) self.assertEqual(len(fl['files']), 21)
def testGetMax(self): """ test class for the getMax() routine added to Mask.py """ testMask = Mask() maxRuns = 1000 skipRuns = 200 testMask.setMaxAndSkipRuns(maxRuns, skipRuns) self.assertEqual(testMask.getMax('Lumi'), None) self.assertEqual(testMask.getMax('junk'), None) self.assertEqual(testMask.getMax('Run'), 1000)
def testMask3(self): mymask = Mask() mymask['FirstEvent'] = 9999 mymask['LastEvent'] = 999 myjob = Job() myjob["mask"] = mymask self.roundTrip(myjob)
def testSetMaxAndSkipRuns(self): """ test class for setMaxAndSkipRuns in Mask.py """ testMask = Mask() maxRuns = 1000 skipRuns = 200 testMask.setMaxAndSkipRuns(maxRuns, skipRuns) self.assertEqual(testMask['FirstRun'], skipRuns) self.assertEqual(testMask['LastRun'], maxRuns + skipRuns) return
def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testSetMaxAndSkipEvents(self): """ test class for setMaxAndSkipEvents in Mask.py """ testMask = Mask() maxEvents = 100 skipEvents = 10 testMask.setMaxAndSkipEvents(maxEvents, skipEvents) self.assertEqual(testMask['FirstEvent'], skipEvents) self.assertEqual(testMask['LastEvent'], maxEvents + skipEvents) return
def testSetMaxAndSkipLumis(self): """ test class for setMaxAndSkipLumis in Mask.py """ testMask = Mask() maxLumis = 10 skipLumis = 2 testMask.setMaxAndSkipLumis(maxLumis, skipLumis) self.assertEqual(testMask['FirstLumi'], skipLumis) self.assertEqual(testMask['LastLumi'], maxLumis + skipLumis) return
def testMask5(self): mymask = Mask() mymask['FirstEvent'] = 9999 mymask['LastEvent'] = 999 myjob = DataStructsJob() myjob["mask"] = mymask self.roundTripLax(myjob)
def testFirstEvent(self): """ _testFirstEvent_ Verify that we set the firstEvent parameter whenever the FirstEvent field in the job mask is a positive integer. And the job is not production. """ job = Job() job["input_files"] = [{"lfn": "bogusFile", "parents": []}] job["mask"] = Mask() tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse(hasattr(tweak.process.source, "skipEvents"), "Error: There should be no skipEvents tweak.") self.assertFalse(hasattr(tweak.process.source, "firstEvent"), "Error: There should be no firstEvent tweak.") job["mask"]["FirstEvent"] = 0 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "skipEvents"), "Error: There should be a skipEvents tweak.") self.assertEqual(tweak.process.source.skipEvents, 'customTypeCms.untracked.uint32(0)', "Error: The skipEvents tweak should be 0.") return
def testFirstRunMC(self): """ _testFirstRunMC_ Verify that we set the lumi in a MC job and it gets into process.source.firstRun parameter. """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["mask"]["FirstLumi"] = 200 job["mask"]["FirstEvent"] = 100 job["counter"] = 5 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a first run tweak") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(1)', "Error: The first run should be 1") job["mask"]["FirstRun"] = 5 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a first run tweak") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(5)', "Error: The first run should be 5")
def testFirstLumiMC(self): """ _testFirstLumiMC_ Verify that we set the lumi in a MC job and it gets into process.source.firstRun parameter, and if we don't at least we get the counter there. """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["counter"] = 5 job["mask"]["FirstEvent"] = 100 try: tweak = WMTweaks.makeJobTweak(job) self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job) except WMTweakMaskError: pass job["mask"]["FirstLumi"] = 200 tweak = WMTweaks.makeJobTweak(job) self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"), "Error: There should be a first lumi tweak") self.assertEqual(tweak.process.source.firstLuminosityBlock, 200, "Error: The first luminosity block should be 5") job["mask"]["FirstLumi"] = 10 tweak = WMTweaks.makeJobTweak(job) self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"), "Error: There should be a first lumi tweak") self.assertEqual(tweak.process.source.firstLuminosityBlock, 10, "Error: The first luminosity block should be 10")
def testFirstRun(self): """ _testFirstRun_ Verify that when we set the FirstRun in the mask, it is set in the process but when it is not, then no firstRun appears in the process. This for jobs with real input files. """ job = Job() job["input_files"] = [{"lfn": "bogusFile", "parents": []}] job["mask"] = Mask() tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse(hasattr(tweak.process.source, "firstRun"), "Error: There should be no firstRun tweak.") job["mask"]["FirstRun"] = 93 tweak = WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a firstRun tweak.") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(93)', "Error: The firstRun tweak should be 93.") return
def testFirstEventMC(self): """ _testFirstEventMC_ Verify that we set the firstEvent parameter whenever the FirstEvent field in the job mask is a positive integer and the job is a production one. Otherwise we get a number based on the counter (modulo 2^32 - 1) """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["counter"] = 5 job["mask"]["FirstLumi"] = 200 try: tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job) except WMTweakMaskError: pass job["mask"]["FirstEvent"] = 100 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse( hasattr(tweak.process.source, "skipEvents"), "Error: There should be no skipEvents tweak, it's MC.") self.assertTrue(hasattr(tweak.process.source, "firstEvent"), "Error: There should be a first event tweak") self.assertEqual(tweak.process.source.firstEvent, 'customTypeCms.untracked.uint32(100)', "Error: The firstEvent tweak should be 100.") return
def __init__(self, name=None, files=None): """ A job has a jobgroup which gives it its subscription and workflow. inputFiles is a list containing files associated to a job last_update is the time the job last changed """ dict.__init__(self) self.baggage = ConfigSection("baggage") if files == None: self["input_files"] = [] else: self["input_files"] = files self["id"] = None self["jobgroup"] = None self["name"] = name self["state"] = 'new' self["state_time"] = int(time.time()) self["outcome"] = "failure" self["retry_count"] = 0 self["location"] = None self["mask"] = Mask() self["task"] = None self["fwjr"] = None self["fwjr_path"] = None self["workflow"] = None self["owner"] = None return
def testDuplicateSubscription(self): """Can't duplicate subscriptions""" siteWhitelist = ["T2_XX_SiteA", "T2_XX_SiteB"] # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) # Not clear what's supposed to happen here, 2nd test is completely redundant dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask, commonLocation=siteWhitelist) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. # Not clear what's supposed to happen here, 2nd test is completely redundant numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask, commonLocation=siteWhitelist) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id'])
def testFilter(self): """ Test filtering of a set(run) object """ mask = Mask() mask.addRunWithLumiRanges(run=1, lumiList=[[1, 9], [12, 12], [31, 31], [38, 39], [49, 49]]) runs = set() runs.add(Run(1, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs=runs) self.assertEqual(len(newRuns), 0) runs = set() runs.add(Run(1, 2, 148, 166, 185, 195, 203, 212)) runs.add(Run(2, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs=runs) self.assertEqual(len(newRuns), 1) runs = set() runs.add(Run(1, 2, 9, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs=runs) self.assertEqual(len(newRuns), 1) run = newRuns.pop() self.assertEqual(run.run, 1) self.assertEqual(run.lumis, [2, 9])
def testCreateFilesetFromDBS(self): """Test creating an analysis fileset from DBS""" rangesMask = Mask() rangesMask.addRunWithLumiRanges(run=1, lumiList=[[1, 9], [12, 12], [31, 31], [38, 39], [49, 49], [51, 52], [64, 65], [82, 82], [92, 98]]) fs, fl = self.acService.createFilesetFromDBS(self.collection, filesetName='test_fs', dbsURL=self.dbsURL, dataset=self.dataset, mask=rangesMask) self.assertTrue(fl['_id']) self.assertEqual(len(fl['files']), 21) for file in fl['files']: self.assertTrue(fl['files'][file]['merged'])
def testFilter(self): """ Test filtering of a set(run) object """ mask = Mask() mask.addRunWithLumiRanges(run=1, lumiList=[[1, 9], [12, 12], [31, 31], [38, 39], [49, 49]]) runs = set() runs.add(Run(1, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs = runs) self.assertEqual(len(newRuns), 0) runs = set() runs.add(Run(1, 2, 148, 166, 185, 195, 203, 212)) runs.add(Run(2, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs = runs) self.assertEqual(len(newRuns), 1) runs = set() runs.add(Run(1, 2, 148, 166, 185, 195, 203, 212)) newRuns = mask.filterRunLumisByMask(runs = runs) self.assertEqual(len(newRuns), 1) run = newRuns.pop() self.assertEqual(run.run, 1) self.assertEqual(run.lumis, [2])
def testFilterRealCase(self): """ Test filtering of a set(run) object based on real cases from production """ mask = Mask() mask.addRunWithLumiRanges(run=1, lumiList=[[9, 9], [8, 8], [3, 4], [7, 7]]) mask.setMaxAndSkipLumis(0, 7) mask.setMaxAndSkipRuns(0, 1) runs = set() runs.add(Run(1, *[(9, 500), (10, 500)])) runs.add(Run(1, *[(3, 500), (4, 500)])) runs.add(Run(1, *[(7, 500), (8, 500)])) newRuns = mask.filterRunLumisByMask(runs=runs) self.assertEqual(len(newRuns), 1) run = newRuns.pop() self.assertEqual(run.run, 1) self.assertEqual(run.lumis, [3, 4, 7, 8, 9])
def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id'])
def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" # This test is failing because the name of the couch DB is set to None # in BasicProductionWorkload.getProdArgs() but changing it to # "reqmgr_config_cache_t" from StdBase test arguments does not fix the # situation. testDuplicateSubscription probably has the same issue siteWhitelist = ["T2_XX_SiteA", "T2_XX_SiteB"] self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask, commonLocation=siteWhitelist) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) firstFile = list(fileset.files)[0] self.assertEqual(firstFile['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(firstFile['merged'], False) # merged files get added to dbs self.assertEqual(len(firstFile['parents']), 0) # firstFile.loadData() self.assertEqual(sorted(firstFile['locations']), sorted(self.pnns)) self.assertEqual(len(firstFile.getParentLFNs()), 0) self.assertEqual(len(firstFile.getRuns()), 1) run = firstFile.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testGetMax(self): """ test class for the getMax() routine added to Mask.py """ testMask = Mask() maxRuns = 999 skipRuns = 201 testMask.setMaxAndSkipRuns(maxRuns, skipRuns) self.assertEqual(testMask.getMax('Event'), None) self.assertEqual(testMask.getMax('Lumi'), None) self.assertEqual(testMask.getMax('junk'), None) self.assertEqual(testMask.getMax('Run'), 1000)
def __init__(self, location, inputFiles, runAndLumis, agentNumber, lfnBase, outputMods, firstEvent=0, lastEvent=-1, firstLumi=None,\ firstRun=None, seeding=None, lheInputFiles=False, oneEventMode=False, eventsPerLumi=None, maxRuntime=None): SetupCMSSWPset.__init__(self, crabPSet=True) self.stepSpace = ConfigSection() self.stepSpace.location = location self.step = StepConfiguration(lfnBase, outputMods) self.step.section_("data") self.step.data._internal_name = "cmsRun" self.step.data.section_("application") self.step.data.application.section_("configuration") self.step.data.application.section_("command") self.step.data.application.section_("multicore") self.step.data.application.command.configuration = "PSet.py" self.step.data.application.command.oneEventMode = oneEventMode in [ "1", "True", True ] self.step.data.application.command.memoryCheck = False self.step.data.application.command.silentMemoryCheck = True # self.step.data.application.configuration.pickledarguments.globalTag/globalTagTransaction if eventsPerLumi: self.step.data.application.configuration.eventsPerLumi = eventsPerLumi if maxRuntime: self.step.data.application.configuration.maxSecondsUntilRampdown = maxRuntime self.step.data.section_("input") self.job = jobDict(lheInputFiles, seeding) self.job["input_files"] = [] for inputF in inputFiles: if isinstance(inputF, basestring): self.job["input_files"].append({"lfn": inputF, "parents": ""}) else: self.job["input_files"].append(inputF) self.job['mask'] = Mask() self.job['mask']["FirstEvent"] = firstEvent self.job['mask']["LastEvent"] = lastEvent self.job['mask']["FirstRun"] = firstRun self.job['mask']["FirstLumi"] = firstLumi self.job['mask']["runAndLumis"] = runAndLumis self.job['agentNumber'] = agentNumber self.job['counter'] = 0
def __init__(self): WMMask.__init__(self) WMBSBase.__init__(self) return
def split(self): """Apply policy to spec""" # if not specified take standard defaults self.args.setdefault('SliceType', 'NumberOfEvents') self.args.setdefault('SliceSize', 1000) # events per job self.args.setdefault('SubSliceType', 'NumberOfEventsPerLumi') self.args.setdefault('SubSliceSize', self.args['SliceSize']) # events per lumi self.args.setdefault('MaxJobsPerElement', 250) # jobs per WQE if not self.mask: self.mask = Mask(FirstRun=1, FirstLumi=self.initialTask.getFirstLumi(), FirstEvent=self.initialTask.getFirstEvent(), LastRun=1, LastEvent=self.initialTask.getFirstEvent() + self.initialTask.totalEvents() - 1) mask = Mask(**self.mask) #First let's initialize some parameters stepSize = int(self.args['SliceSize']) * int( self.args['MaxJobsPerElement']) total = mask['LastEvent'] - mask['FirstEvent'] + 1 lastAllowedEvent = mask['LastEvent'] eventsAccounted = 0 while eventsAccounted < total: current = mask['FirstEvent'] + stepSize - 1 # inclusive range if current > lastAllowedEvent: current = lastAllowedEvent mask['LastEvent'] = current #Calculate the job splitting without actually doing it nEvents = mask['LastEvent'] - mask['FirstEvent'] + 1 lumis_per_job = ceil(self.args['SliceSize'] / float(self.args['SubSliceSize'])) remainingEvents = nEvents % self.args['SliceSize'] lumis = (nEvents / self.args['SliceSize']) * lumis_per_job lumis += ceil(remainingEvents / float(self.args['SubSliceSize'])) jobs = ceil(lumis / lumis_per_job) mask['LastLumi'] = mask['FirstLumi'] + int( lumis) - 1 # inclusive range nLumis = mask['LastLumi'] - mask['FirstLumi'] + 1 self.newQueueElement(WMSpec=self.wmspec, NumberOfLumis=nLumis, NumberOfEvents=nEvents, Jobs=jobs, Mask=copy(mask)) if mask['LastEvent'] > (2**32 - 1): #This is getting tricky, to ensure consecutive #events numbers we must calculate where the jobSplitter #will restart the firstEvent to 1 for the last time #in the newly created unit internalEvents = mask['FirstEvent'] accumulatedEvents = internalEvents breakPoint = internalEvents while accumulatedEvents < mask['LastEvent']: if (internalEvents + self.args['SliceSize'] - 1) > (2**32 - 1): internalEvents = 1 breakPoint = accumulatedEvents else: internalEvents += self.args['SliceSize'] accumulatedEvents += self.args['SliceSize'] leftoverEvents = mask['LastEvent'] - breakPoint + 1 mask['FirstEvent'] = leftoverEvents + 1 else: mask['FirstEvent'] = mask['LastEvent'] + 1 mask['FirstLumi'] = mask['LastLumi'] + 1 eventsAccounted += stepSize lastAllowedEvent = (total - eventsAccounted) + mask['FirstEvent'] - 1
def testRunsAndLumis(self): """ Test several different ways of creating the same list of runs and lumis """ runMask = Mask() rangesMask = Mask() runAndLumisMask = Mask() runMask.addRun(Run(100,1,2,3,4,5,6,8,9,10)) runMask.addRun(Run(200,6,7,8)) runMask.addRun(Run(300,12)) rangesMask.addRunWithLumiRanges(run=100, lumiList=[[1, 6], [8, 10]]) rangesMask.addRunWithLumiRanges(run=200, lumiList=[[6, 8]]) rangesMask.addRunWithLumiRanges(run=300, lumiList=[[12, 12]]) runAndLumisMask.addRunAndLumis(run=100, lumis=[1, 6]) runAndLumisMask.addRunAndLumis(run=100, lumis=[8, 10]) runAndLumisMask.addRunAndLumis(run=200, lumis=[6, 8]) runAndLumisMask.addRunAndLumis(run=300, lumis=[12, 12]) self.assertEqual(runMask.getRunAndLumis(), rangesMask.getRunAndLumis()) # Note, this may break if the TODO in Mask.addRunAndLumis() is addressed self.assertEqual(runMask.getRunAndLumis(), runAndLumisMask.getRunAndLumis())
# No runs in this input file # Ignore it pass maskA = job['mask'] # Have to transform this because JSON is too stupid to understand ints # Also for some reason we're getting a strange problem where the mask # isn't being loaded at all. I'm not sure what to do there except drop it. try: for key in maskA['runAndLumis'].keys(): maskA['runAndLumis'][int(key)] = maskA['runAndLumis'][key] del maskA['runAndLumis'][key] except KeyError: # We don't have a mask. Not much we can do about this maskA = Mask() mask = Mask() mask.update(maskA) runs = [] # Turn arbitrary format into real runs for r in runsA: run = Run(runNumber = r['run_number']) run.lumis = r.get('lumis', []) runs.append(run) # Get rid of runs that aren't in the mask runs = mask.filterRunLumisByMask(runs = runs) for err in errorCouch: task = err['value']['task'] step = err['value']['step'] errors = err['value']['error'] logs = err['value']['logs'] start = err['value']['start']
def split(self): """Apply policy to spec""" # if not specified take standard defaults self.args.setdefault('SliceType', 'NumberOfEvents') self.args.setdefault('SliceSize', 1000) # events per job self.args.setdefault('SubSliceType', 'NumberOfEventsPerLumi') self.args.setdefault('SubSliceSize', self.args['SliceSize']) # events per lumi self.args.setdefault('MaxJobsPerElement', 1000) # jobs per WQE self.args.setdefault('MaxLumisPerElement', os.environ.get('MAX_LUMIS_PER_WQE')) self.args.setdefault( 'blowupFactor', 1.0) # Estimate of additional jobs following tasks. # Total WQE tasks will be Jobs*(1+blowupFactor) noInputUpdate = self.initialTask.getTrustSitelists().get('trustlists') noPileupUpdate = self.initialTask.getTrustSitelists().get( 'trustPUlists') if not self.mask: self.mask = Mask(FirstRun=1, FirstLumi=self.initialTask.getFirstLumi(), FirstEvent=self.initialTask.getFirstEvent(), LastRun=1, LastEvent=self.initialTask.getFirstEvent() + self.initialTask.totalEvents() - 1) mask = Mask(**self.mask) #First let's initialize some parameters stepSize = int(self.args['SliceSize']) * int( self.args['MaxJobsPerElement']) total = mask['LastEvent'] - mask['FirstEvent'] + 1 lastAllowedEvent = mask['LastEvent'] eventsAccounted = 0 while eventsAccounted < total: current = mask['FirstEvent'] + stepSize - 1 # inclusive range if current > lastAllowedEvent: current = lastAllowedEvent mask['LastEvent'] = current #Calculate the job splitting without actually doing it # number of lumis is calculated by events number and SubSliceSize which is events per lumi # So if there no exact division between events per job and events per lumi # it takes the ceiling of the value. # Therefore total lumis can't be calculated from total events / SubSliceSize # It has to be caluated by adding the lumis_per_job * number of jobs nEvents = mask['LastEvent'] - mask['FirstEvent'] + 1 lumis_per_job = ceil(self.args['SliceSize'] / self.args['SubSliceSize']) nLumis = floor(nEvents / self.args['SliceSize']) * lumis_per_job remainingLumis = ceil(nEvents % self.args['SliceSize'] / self.args['SubSliceSize']) nLumis += remainingLumis jobs = ceil(nEvents / self.args['SliceSize']) if self.args['MaxLumisPerElement'] and nLumis > int( self.args['MaxLumisPerElement']): raise WorkQueueWMSpecError( self.wmspec, "Too many lumis in WQE: %s" % nLumis) mask['LastLumi'] = mask['FirstLumi'] + int( nLumis) - 1 # inclusive range self.newQueueElement(WMSpec=self.wmspec, NumberOfLumis=nLumis, NumberOfEvents=nEvents, Jobs=jobs, Mask=copy(mask), NoInputUpdate=noInputUpdate, NoPileupUpdate=noPileupUpdate, blowupFactor=self.args['blowupFactor']) if mask['LastEvent'] > (2**32 - 1): #This is getting tricky, to ensure consecutive #events numbers we must calculate where the jobSplitter #will restart the firstEvent to 1 for the last time #in the newly created unit internalEvents = mask['FirstEvent'] accumulatedEvents = internalEvents breakPoint = internalEvents while accumulatedEvents < mask['LastEvent']: if (internalEvents + self.args['SliceSize'] - 1) > (2**32 - 1): internalEvents = 1 breakPoint = accumulatedEvents else: internalEvents += self.args['SliceSize'] accumulatedEvents += self.args['SliceSize'] leftoverEvents = mask['LastEvent'] - breakPoint + 1 mask['FirstEvent'] = leftoverEvents + 1 else: mask['FirstEvent'] = mask['LastEvent'] + 1 mask['FirstLumi'] = mask['LastLumi'] + 1 eventsAccounted += stepSize lastAllowedEvent = (total - eventsAccounted) + mask['FirstEvent'] - 1
def testMask6(self): mymask = Mask() myjob = DataStructsJob() myjob["mask"] = mymask self.roundTripLax(myjob)
def testRunsAndLumis(self): """ Test several different ways of creating the same list of runs and lumis """ runMask = Mask() rangesMask = Mask() runAndLumisMask = Mask() runMask.addRun(Run(100, 1, 2, 3, 4, 5, 6, 8, 9, 10)) runMask.addRun(Run(200, 6, 7, 8)) runMask.addRun(Run(300, 12)) rangesMask.addRunWithLumiRanges(run=100, lumiList=[[1, 6], [8, 10]]) rangesMask.addRunWithLumiRanges(run=200, lumiList=[[6, 8]]) rangesMask.addRunWithLumiRanges(run=300, lumiList=[[12, 12]]) runAndLumisMask.addRunAndLumis(run=100, lumis=[1, 6]) runAndLumisMask.addRunAndLumis(run=100, lumis=[8, 10]) runAndLumisMask.addRunAndLumis(run=200, lumis=[6, 8]) runAndLumisMask.addRunAndLumis(run=300, lumis=[12, 12]) self.assertEqual(runMask.getRunAndLumis(), rangesMask.getRunAndLumis()) # Note, this may break if the TODO in Mask.addRunAndLumis() is addressed self.assertEqual(runMask.getRunAndLumis(), runAndLumisMask.getRunAndLumis())
def testMask2(self): mymask = Mask() mymask['FirstEvent'] = 9999 mymask['LastEvent'] = 999 self.roundTrip(mymask)
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Production', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputA'] expWfTasks = ['/TestWorkload/Production', '/TestWorkload/Production/LogCollect', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', '/TestWorkload/Production/ProductionMergeOutputA', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect'] expFsets = ['FILESET_DEFINED_DURING_RUNTIME', '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/merged-MergedRECO', '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/merged-MergedUSER', '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/unmerged-OutputARECO'] subMaps = ['FILESET_DEFINED_DURING_RUNTIME', (6, '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/LogCollect', 'MinFileBased', 'LogCollect'), (7, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionMergeOutputA', 'ParentlessMergeBySize', 'Merge'), (4, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputB', 'ParentlessMergeBySize', 'Merge')] testArguments = MonteCarloWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = TEST_DB_NAME testArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) myMask = Mask(FirstRun=1, FirstLumi=1, FirstEvent=1, LastRun=1, LastLumi=10, LastEvent=1000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets[0] = topFilesetName # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps[0] = (1, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production') subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps) ### create another top level subscription myMask = Mask(FirstRun=1, FirstLumi=11, FirstEvent=1001, LastRun=1, LastLumi=20, LastEvent=2000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets.append(topFilesetName) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps.append((9, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production')) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testMask1(self): self.roundTrip(Mask())