def testGetMaskedBlocks(self): """ _testGetMaskedBlocks_ Check that getMaskedBlocks is returning the correct information """ rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) inputDataset = task.inputDataset() inputDataset.primary = 'SingleElectron' inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380' inputDataset.tier = 'USER' task.data.input.splitting.runs = [166921, 166429, 166911] task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20'] lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], }) inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], }) dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = DBSReader(inputDataset.dbsurl) maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset) for dummyBlock, files in maskedBlocks.iteritems(): for dummyFile, lumiList in files.iteritems(): self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
def getMaskedBlocks(self, task, dbs, datasetPath): """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis which were ok (given the lumi mask). The data structure returned is the following: { "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...} "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...} } """ # Get the task mask as a LumiList object to make operations easier maskedBlocks = {} taskMask = task.getLumiMask() # for performance reasons, we first get all the blocknames blocks = [ x['block_name'] for x in dbs.dbs.listBlocks(dataset=datasetPath) ] for block in blocks: fileLumis = dbs.dbs.listFileLumis(block_name=block, validFileOnly=1) for fileLumi in fileLumis: lfn = fileLumi['logical_file_name'] runNumber = str(fileLumi['run_num']) lumis = fileLumi['lumi_section_num'] fileMask = LumiList(runsAndLumis={runNumber: lumis}) commonMask = taskMask & fileMask if commonMask: maskedBlocks.setdefault(block, {}) maskedBlocks[block].setdefault(lfn, LumiList()) maskedBlocks[block][lfn] += commonMask return maskedBlocks
def getLumiList(lumi_mask_name, logger=None): """ Takes a lumi-mask and returns a LumiList object. lumi-mask: either an http address or a json file on disk. """ lumi_list = None parts = urlparse.urlparse(lumi_mask_name) if parts[0] in ['http', 'https']: if logger: logger.debug('Downloading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(url=lumi_mask_name) except urllib2.HTTPError as err: raise ConfigurationException( "Problem downloading lumi-mask file; %s %s" % (err.code, err.msg)) else: if logger: logger.debug('Reading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(filename=lumi_mask_name) except IOError as err: raise ConfigurationException("Problem loading lumi-mask file; %s" % str(err)) return lumi_list
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ Tier1ReRecoWorkload = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) dummyDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList={ '206371': [[1, 50], [60, 70]], '180899': [[1, 1]], }) units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ Globals.GlobalParams.setNumOfRunsPerFile(3) Globals.GlobalParams.setNumOfLumisPerBlock(5) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) inputDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } ) dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def testFilter(self): """ Test filtering of a list of lumis """ runsAndLumis = { 1: range(1, 34) + [35] + range(37, 48), 2: range(49, 76) + range(77, 131) + range(133, 137) } completeList = zip([1]*150, range(1, 150)) + \ zip([2]*150, range(1, 150)) + \ zip([3]*150, range(1, 150)) smallList = zip([1]*50, range(1, 10)) + zip([2]*50, range(50, 70)) overlapList = zip([1]*150, range(30, 40)) + \ zip([2]*150, range(60, 80)) overlapRes = zip([1]*9, range(30, 34)) + [(1, 35)] + \ zip([1]*9, range(37, 40)) + \ zip([2]*30, range(60, 76)) + \ zip([2]*9, range(77, 80)) runLister = LumiList(runsAndLumis = runsAndLumis) # Test a list to be filtered which is a superset of constructed list filterComplete = runLister.filterLumis(completeList) # Test a list to be filtered which is a subset of constructed list filterSmall = runLister.filterLumis(smallList) # Test a list to be filtered which is neither filterOverlap = runLister.filterLumis(overlapList) self.assertTrue(filterComplete == runLister.getLumis()) self.assertTrue(filterSmall == smallList) self.assertTrue(filterOverlap == overlapRes)
def testOr(self): """ a|b for lots of cases """ alumis = {'1' : range(2,20) + range(31,39) + range(45,49), '2' : range(6,20) + range (30,40), '3' : range(10,20) + range (30,40) + range(50,60), } blumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80), '2' : range(10,35), '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70), } clumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80), '2' : range(10,35), } result = {'1' : range(2,20) + range(31,39) + range(45,49) + range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80), '2' : range(6,20) + range (30,40) + range(10,35), '3' : range(10,20) + range (30,40) + range(50,60) + range(10,15) + range(35,40) + range(45,51) + range(59,70), } a = LumiList(runsAndLumis = alumis) b = LumiList(runsAndLumis = blumis) c = LumiList(runsAndLumis = blumis) r = LumiList(runsAndLumis = result) self.assertTrue((a|b).getCMSSWString() == r.getCMSSWString()) self.assertTrue((a|b).getCMSSWString() == (b|a).getCMSSWString()) self.assertTrue((a|b).getCMSSWString() == (a+b).getCMSSWString()) # Test list constuction (faster) multiple = [alumis, blumis, clumis] easy = LumiList(runsAndLumis = multiple) hard = a + b hard += c self.assertTrue(hard.getCMSSWString() == easy.getCMSSWString())
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) dummyDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], }) units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def fast_getDoubleLumis(lumisDict): doubleLumis = set() for run, lumis in lumisDict.iteritems(): seen = set() doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def addJobs(self, jobs): if self.algo == 'FileBased': self.lumisPerJob += [ sum([x.get('lumiCount', 0) for x in job['input_files']]) for job in jobs ] self.eventsPerJob += [ sum([x['events'] for x in job['input_files']]) for job in jobs ] self.filesPerJob += [len(job['input_files']) for job in jobs] elif self.algo == 'EventBased': self.lumisPerJob += [ job['mask']['LastLumi'] - job['mask']['FirstLumi'] for job in jobs ] self.eventsPerJob += [ job['mask']['LastEvent'] - job['mask']['FirstEvent'] for job in jobs ] else: for job in jobs: avgEventsPerLumi = sum([ f['avgEvtsPerLumi'] for f in job['input_files'] ]) / float(len(job['input_files'])) lumis = LumiList(compactList=job['mask']['runAndLumis']) self.lumisPerJob.append(len(lumis.getLumis())) self.eventsPerJob.append(avgEventsPerLumi * self.lumisPerJob[-1])
def makeLumiList(lumiString): try: compactList = json.loads(lumiString) ll = LumiList(compactList = compactList) return ll.getCompactList() except: raise WMWorkloadToolsException("Could not parse LumiList")
def testAnd(self): """ a&b for lots of cases """ alumis = {'1' : range(2,20) + range(31,39) + range(45,49), '2' : range(6,20) + range (30,40), '3' : range(10,20) + range (30,40) + range(50,60), '4' : range(1,100), } blumis = {'1' : range(1,6) + range(12,13) + range(16,25) + range(25,40) + range(40,50) + range(33,36), '2' : range(10,35), '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70), '5' : range(1,100), } result = {'1' : range(2,6) + range(12,13) + range(16,20) + range(31,39) + range(45,49), '2' : range(10,20) + range(30,35), '3' : range(10,15) + range(35,40) + range(50,51)+ range(59,60), } a = LumiList(runsAndLumis = alumis) b = LumiList(runsAndLumis = blumis) r = LumiList(runsAndLumis = result) self.assertTrue((a&b).getCMSSWString() == r.getCMSSWString()) self.assertTrue((a&b).getCMSSWString() == (b&a).getCMSSWString()) self.assertTrue((a|b).getCMSSWString() != r.getCMSSWString())
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) dummyDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList={ '206371': [[1, 50], [60, 70]], '180899': [[1, 1]], }) units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def mergeLumis(inputdata, lumimask): """ Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no). """ doubleLumis = set() mergedLumis = set() #merge the lumis from single files for reports in inputdata.values(): for report in reports: for run, lumis in literal_eval(report['runlumi']).iteritems(): for lumi in lumis: if (run,lumi) in mergedLumis: doubleLumis.add((run,lumi)) mergedLumis.add((run,lumi)) #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]} dLumisDict = {} mLumisDict = {} for k, v in doubleLumis: dLumisDict.setdefault(k, []).append(int(v)) for k, v in mergedLumis: mLumisDict.setdefault(k, []).append(int(v)) doubleLumis = LumiList(runsAndLumis=dLumisDict) mergedLumis = LumiList(runsAndLumis=mLumisDict) #get the compact list using CMSSW framework return mergedLumis.getCompactList(), (LumiList(compactList=lumimask) - mergedLumis).getCompactList(), doubleLumis.getCompactList()
def getDoubleLumis(lumisDict): #calculate lumis counted twice doubleLumis = set() for run, lumis in lumisDict.iteritems(): seen = set() doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def testWrite(self): alumis = {'1' : range(2,20) + range(31,39) + range(45,49), '2' : range(6,20) + range (30,40), '3' : range(10,20) + range (30,40) + range(50,60), '4' : range(1,100), } a = LumiList(runsAndLumis = alumis) a.writeJSON('newFile.json')
def makeLumiList(lumiDict): try: if isinstance(lumiDict, basestring): lumiDict = JsonWrapper.loads(lumiDict) ll = LumiList(compactList=lumiDict) return ll.getCompactList() except: raise WMSpecFactoryException("Could not parse LumiList, %s: %s" % (type(lumiDict), lumiDict))
def subtractLumis(input, output): """ Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes). """ out = LumiList(runsAndLumis=output) in_ = LumiList(runsAndLumis=input) diff = in_ - out return out.getCompactList(), diff.getCompactList()
def testWrite(self): alumis = { "1": range(2, 20) + range(31, 39) + range(45, 49), "2": range(6, 20) + range(30, 40), "3": range(10, 20) + range(30, 40) + range(50, 60), "4": range(1, 100), } a = LumiList(runsAndLumis=alumis) a.writeJSON("newFile.json")
def testDuplicates(self): """ Test a list with lots of duplicates """ result = list(zip([1]*100, range(1, 34) + range(37, 48))) lumis = list(zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25))) lister = LumiList(lumis = lumis) self.assertTrue(lister.getLumis() == result)
def getDoubleLumis(lumisDict): #calculate lumis counted twice doubleLumis = set() for run, lumis in lumisDict.iteritems(): for lumi in lumis: if lumisDict[run].count(lumi) > 1: doubleLumis.add((run,lumi)) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def testDuplicates(self): """ Test a list with lots of duplicates """ result = zip([1]*100, range(1, 34) + range(37, 48)) lumis = zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25)) lister = LumiList(lumis = lumis) self.assertTrue(lister.getLumis() == result)
def fast_getDoubleLumis(lumisDict): doubleLumis = set() for run, lumis in lumisDict.iteritems(): seen = set() doubleLumis.update( set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def makeLumiList(lumiDict): try: if isinstance(lumiDict, (str, bytes)): lumiDict = json.loads(lumiDict) ll = LumiList(compactList=lumiDict) return ll.getCompactList() except: raise WMSpecFactoryException("Could not parse LumiList, %s: %s" % (type(lumiDict), lumiDict))
def removeLumiList(self, lumiList): """ Remove a lumi list from this data structure This requires conversion to LumiList to do the lumi algebra an may be computationally expensive for a large number of lumis. """ myLumis = LumiList(compactList=self['runAndLumis']) myLumis = myLumis - lumiList self['runAndLumis'] = myLumis.getCompactList()
def testNull(self): """ Test a null list """ runLister = LumiList(lumis = None) self.assertTrue(runLister.getCMSSWString() == '') self.assertTrue(runLister.getLumis() == []) self.assertTrue(runLister.getCompactList() == {})
def adjustLumisForCompletion(self, task, unprocessed): """Sets the run, lumi information in the task information for the completion jobs. Returns True if completion jobs are needed, otherwise False. """ missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ try: available = set(os.listdir(missingDir)) & unprocessed except OSError: available = set() failed = set(self.failedJobs) & unprocessed if len(available) == 0 and len(failed) == 0: return False missing = LumiList() for missingFile in available: with open(os.path.join(missingDir, missingFile)) as fd: self.logger.info("Adding missing lumis from job %s", missingFile) missing = missing + LumiList(compactList=literal_eval(fd.read())) for failedId in failed: f = None try: tmpdir = tempfile.mkdtemp() f = tarfile.open("run_and_lumis.tar.gz") fn = "job_lumis_{0}.json".format(failedId) f.extract(fn, path=tmpdir) with open(os.path.join(tmpdir, fn)) as fd: injson = json.load(fd) missing = missing + LumiList(compactList=injson) self.logger.info("Adding lumis from failed job %s", failedId) finally: if f: f.close() shutil.rmtree(tmpdir) missing_compact = missing.getCompactList() runs = missing.getRuns() # Compact list is like # { # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]], # '2':[[1,45],[50,80]] # } # Now we turn lumis it into something like: # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80'] # which is the format expected by buildLumiMask in the splitting algorithm lumis = [",".join(str(l) for l in functools.reduce(lambda x, y:x + y, missing_compact[run])) for run in runs] task['tm_split_args']['runs'] = runs task['tm_split_args']['lumis'] = lumis return True
def adjust(self, parameters, inputs, outputs, se): local = self._local if local and se.transfer_inputs(): inputs += [(se.local(f), os.path.basename(f), False) for id, f in self._files if f] if se.transfer_outputs(): outputs += [(se.local(rf), os.path.basename(lf)) for lf, rf in self.outputs] parameters['mask']['files'] = self.input_files parameters['output files'] = self.outputs if not self._file_based: ls = LumiList(lumis=set([(run, lumi) for (id, file, run, lumi) in self._units])) parameters['mask']['lumis'] = ls.getCompactList()
def addJobs(self, jobs): if self.algo == 'FileBased': self.lumisPerJob += [sum([x.get('lumiCount', 0) for x in job['input_files']]) for job in jobs] self.eventsPerJob += [sum([x['events'] for x in job['input_files']]) for job in jobs] elif self.algo == 'EventBased': self.lumisPerJob += [job['mask']['LastLumi'] - job['mask']['FirstLumi'] for job in jobs] self.eventsPerJob += [job['mask']['LastEvent'] - job['mask']['FirstEvent'] for job in jobs] else: for job in jobs: avgEventsPerLumi = sum([f['avgEvtsPerLumi'] for f in job['input_files']])/float(len(job['input_files'])) lumis = LumiList(compactList=job['mask']['runAndLumis']) self.lumisPerJob.append(len(lumis.getLumis())) self.eventsPerJob.append(avgEventsPerLumi * self.lumisPerJob[-1])
def mergeLumis(inputdata): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedLumis = set() #merge the lumis from single files for reports in inputdata.values(): for report in reports: for run, lumis in literal_eval(report['runlumi']).iteritems(): for lumi in lumis: mergedLumis.add((run,int(lumi))) #lumi is str, but need int mergedLumis = LumiList(lumis=mergedLumis) return mergedLumis.getCompactList()
def getMaskedBlocks(self, task, dbs, datasetPath): """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis which were ok (given the lumi mask). The data structure returned is the following: { "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...} "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...} } """ # Get mask and convert to LumiList to make operations easier maskedBlocks = {} lumiMask = task.getLumiMask() taskMask = LumiList(compactList=lumiMask) # Find all the files that have runs and lumis we are interested in, # fill block lfn part of maskedBlocks for run, lumis in lumiMask.items(): files = [] for slumis in Lexicon.slicedIterator(lumis, 50): slicedFiles = dbs.dbs.listFileArray(dataset=datasetPath, run_num=run, lumi_list=slumis, detail=True) files.extend(slicedFiles) for file in files: blockName = file['block_name'] fileName = file['logical_file_name'] if blockName not in maskedBlocks: maskedBlocks[blockName] = {} if fileName not in maskedBlocks[blockName]: maskedBlocks[blockName][fileName] = LumiList() # Fill maskedLumis part of maskedBlocks for block in maskedBlocks: fileLumis = dbs.dbs.listFileLumis(block_name=block, validFileOnly=1) for fileLumi in fileLumis: lfn = fileLumi['logical_file_name'] # For each run : [lumis] mask by needed lumis, append to maskedBlocks if maskedBlocks[block].get(lfn, None) is not None: lumiList = LumiList( runsAndLumis={ fileLumi['run_num']: fileLumi['lumi_section_num'] }) maskedBlocks[block][lfn] += (lumiList & taskMask) return maskedBlocks
def testRuns(self): """ Test constucting from run and list of lumis """ runsAndLumis = { 1: range(1, 34) + [35] + range(37, 48), 2: range(49, 76) + range(77, 131) + range(133, 137) } runsAndLumis2 = { '1': range(1, 34) + [35] + range(37, 48), '2': range(49, 76) + range(77, 131) + range(133, 137) } blank = { '1': [], '2': [] } jsonLister = LumiList(filename = 'lumiTest.json') jsonString = jsonLister.getCMSSWString() jsonList = jsonLister.getCompactList() runLister = LumiList(runsAndLumis = runsAndLumis) runString = runLister.getCMSSWString() runList = runLister.getCompactList() runLister2 = LumiList(runsAndLumis = runsAndLumis2) runList2 = runLister2.getCompactList() runLister3 = LumiList(runsAndLumis = blank) self.assertTrue(jsonString == runString) self.assertTrue(jsonList == runList) self.assertTrue(runList2 == runList) self.assertTrue(len(runLister3) == 0)
def makeNewJobByWork(self, reason='', failedJob=False): """ Make a new job given the passed in parameters. :param reason: Why are we making a new job (debugging only) :param failedJob: Make the job as already failed :return: nothing """ events = self.eventsInJob lumis = self.jobLumis files = self.jobFiles self.maxLumis = max(self.maxLumis, len(lumis)) # Transform the lumi list into something compact and usable lumiList = LumiList(lumis=lumis).getCompactList() logging.debug( "Because %s new job with events: %s, lumis: %s, and files: %s", reason, events, lumiList, [f['lfn'] for f in files]) if failedJob: logging.debug(" This job will be made failed") self.newJob(failedJob=failedJob, failedReason=reason) else: self.newJob() # Calculate and add performance information timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters( self.perfParameters) self.currentJob.addResourceEstimates(jobTime=events * timePerEvent, disk=events * sizePerEvent, memory=memoryRequirement) # Add job mask information for run, lumiRanges in lumiList.iteritems(): for lumiRange in lumiRanges: self.currentJob['mask'].addRunAndLumis(run=int(run), lumis=lumiRange) # Add files for f in files: self.currentJob.addFile(f) # Add pileup info if needed if self.deterministicPU: eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis logging.debug('Adding baggage to skip %s events', eventsToSkip) self.currentJob.addBaggageParameter("skipPileupEvents", eventsToSkip) return
def validFiles(self, files): """ Apply lumi mask and or run white/black list and return files which have one or more of the requested lumis """ runWhiteList = self.topLevelTask.inputRunWhitelist() runBlackList = self.topLevelTask.inputRunBlacklist() lumiMask = self.topLevelTask.getLumiMask() blackMask = None if lumiMask: # We have a lumiMask, so use it and modify with run white/black list if runWhiteList: lumiMask.selectRuns(runWhiteList) if runBlackList: lumiMask.removeRuns(runBlackList) elif runWhiteList: # We have a run whitelist, subtract off blacklist lumiMask = LumiList(runs=runWhiteList) if runBlackList: # We only have a blacklist, so make a black mask out of it instead lumiMask.removeRuns(runBlackList) else: lumiMask = None if runBlackList: blackMask = LumiList(runs=runBlackList) results = [] for f in files: if isinstance(f, basestring) or "LumiList" not in f: results.append(f) continue # Create a LumiList from the WMBS info runLumis = {} for x in f['LumiList']: if x['RunNumber'] in runLumis: runLumis[x['RunNumber']].extend(x['LumiSectionNumber']) else: runLumis[x['RunNumber']] = x['LumiSectionNumber'] fileLumiList = LumiList(runsAndLumis=runLumis) if lumiMask: if fileLumiList & lumiMask: # At least one lumi from file is in lumiMask results.append(f) elif blackMask: if fileLumiList - blackMask: # At least one lumi from file is not in blackMask results.append(f) else: # There is effectively no mask results.append(f) return results
def getLumiListInValidFiles(dataset, dbsurl = 'phys03'): """ Get the runs/lumis in the valid files of a given dataset. dataset: the dataset name as published in DBS dbsurl: the DBS URL or DBS prod instance Returns a LumiList object. """ dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl) dbs3api = DbsApi(url=dbsurl) try: files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True) except Exception as ex: msg = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (dataset, dbsurl, ex) msg += "\n%s" % (traceback.format_exc()) raise ClientException(msg) if not files: msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl) raise ClientException(msg) validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']] blocks = set([f['block_name'] for f in files]) runLumiPairs = [] for blockName in blocks: fileLumis = dbs3api.listFileLumis(block_name=blockName) for f in fileLumis: if f['logical_file_name'] in validFiles: run = f['run_num'] lumis = f['lumi_section_num'] for lumi in lumis: runLumiPairs.append((run,lumi)) lumiList = LumiList(lumis=runLumiPairs) return lumiList
def getDuplicateLumis(lumisDict): """ Get the run-lumis appearing more than once in the input dictionary of runs and lumis, which is assumed to have the following format: { '1': [1,2,3,4,6,7,8,9,10], '2': [1,4,5,20] } """ doubleLumis = set() for run, lumis in lumisDict.iteritems(): seen = set() doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def notestRead(self): """ Test reading from JSON """ exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136" exDict = {"1": [[1, 33], [35, 35], [37, 47]], "2": [[49, 75], [77, 130], [133, 136]]} exVLBR = cms.VLuminosityBlockRange("1:1-1:33", "1:35", "1:37-1:47", "2:49-2:75", "2:77-2:130", "2:133-2:136") jsonList = LumiList(filename="lumiTest.json") lumiString = jsonList.getCMSSWString() lumiList = jsonList.getCompactList() lumiVLBR = jsonList.getVLuminosityBlockRange(True) self.assertTrue(lumiString == exString) self.assertTrue(lumiList == exDict) self.assertTrue(lumiVLBR == exVLBR)
def getLumiList(lumi_mask_name, logger = None): """ Takes a lumi-mask and returns a LumiList object. lumi-mask: either an http address or a json file on disk. """ lumi_list = None parts = urlparse.urlparse(lumi_mask_name) if parts[0] in ['http', 'https']: if logger: logger.debug('Downloading lumi-mask from %s' % lumi_mask_name) lumi_list = LumiList(url = lumi_mask_name) else: if logger: logger.debug('Reading lumi-mask from %s' % lumi_mask_name) lumi_list = LumiList(filename = lumi_mask_name) return lumi_list
def notestRead(self): """ Test reading from JSON """ exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136" exDict = {'1': [[1, 33], [35, 35], [37, 47]], '2': [[49, 75], [77, 130], [133, 136]]} exVLBR = cms.VLuminosityBlockRange('1:1-1:33', '1:35', '1:37-1:47', '2:49-2:75', '2:77-2:130', '2:133-2:136') jsonList = LumiList(filename = 'lumiTest.json') lumiString = jsonList.getCMSSWString() lumiList = jsonList.getCompactList() lumiVLBR = jsonList.getVLuminosityBlockRange(True) self.assertTrue(lumiString == exString) self.assertTrue(lumiList == exDict) self.assertTrue(lumiVLBR == exVLBR)
def testSubtract(self): """ a-b for lots of cases """ alumis = { '1': range(2, 20) + range(31, 39) + range(45, 49), '2': range(6, 20) + range(30, 40), '3': range(10, 20) + range(30, 40) + range(50, 60), } blumis = { '1': range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(33, 36), '2': range(10, 35), '3': range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70), } clumis = { '1': range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(33, 36), '2': range(10, 35), } result = { '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39), '2': range(6, 10) + range(35, 40), '3': range(15, 20) + range(30, 35) + range(51, 59), } result2 = { '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39), '2': range(6, 10) + range(35, 40), '3': range(10, 20) + range(30, 40) + range(50, 60), } a = LumiList(runsAndLumis=alumis) b = LumiList(runsAndLumis=blumis) c = LumiList(runsAndLumis=clumis) r = LumiList(runsAndLumis=result) r2 = LumiList(runsAndLumis=result2) self.assertTrue((a - b).getCMSSWString() == r.getCMSSWString()) self.assertTrue((a - b).getCMSSWString() != (b - a).getCMSSWString()) # Test where c is missing runs from a self.assertTrue((a - c).getCMSSWString() == r2.getCMSSWString()) self.assertTrue((a - c).getCMSSWString() != (c - a).getCMSSWString()) # Test empty lists self.assertTrue(str(a - a) == '{}') self.assertTrue(len(a - a) == 0)
def mergeLumis(inputdata, lumimask): """ Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no). """ doubleLumis = set() mergedLumis = set() #merge the lumis from single files for reports in inputdata.values(): for report in reports: for run, lumis in literal_eval(report['runlumi']).iteritems(): for lumi in lumis: if (run, lumi) in mergedLumis: doubleLumis.add((run, lumi)) mergedLumis.add((run, lumi)) #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]} dLumisDict = {} mLumisDict = {} for k, v in doubleLumis: dLumisDict.setdefault(k, []).append(int(v)) for k, v in mergedLumis: mLumisDict.setdefault(k, []).append(int(v)) doubleLumis = LumiList(runsAndLumis=dLumisDict) mergedLumis = LumiList(runsAndLumis=mLumisDict) #get the compact list using CMSSW framework return mergedLumis.getCompactList(), ( LumiList(compactList=lumimask) - mergedLumis).getCompactList(), doubleLumis.getCompactList()
def getLumilist(self): """ Get the LumiList parameter and return a LumiList object, in case the LumiList is not empty. """ lumiDict = self._getValue('LumiList', {}) if not lumiDict: return {} return LumiList(compactList=lumiDict)
def makeNewJobByWork(self, reason='', failedJob=False): """ Make a new job given the passed in parameters. :param reason: Why are we making a new job (debugging only) :param failedJob: Make the job as already failed :return: nothing """ events = self.eventsInJob lumis = self.jobLumis files = self.jobFiles self.maxLumis = max(self.maxLumis, len(lumis)) # Transform the lumi list into something compact and usable lumiList = LumiList(lumis=lumis).getCompactList() logging.debug("Because %s new job with events: %s, lumis: %s, and files: %s", reason, events, lumiList, [f['lfn'] for f in files]) if failedJob: logging.debug(" This job will be made failed") self.newJob(failedJob=failedJob, failedReason=reason) else: self.newJob() # Calculate and add performance information timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters(self.perfParameters) self.currentJob.addResourceEstimates(jobTime=events * timePerEvent, disk=events * sizePerEvent, memory=memoryRequirement) # Add job mask information for run, lumiRanges in lumiList.iteritems(): for lumiRange in lumiRanges: self.currentJob['mask'].addRunAndLumis(run=int(run), lumis=lumiRange) # Add files for f in files: self.currentJob.addFile(f) # Add pileup info if needed if self.deterministicPU: eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis logging.debug('Adding baggage to skip %s events', eventsToSkip) self.currentJob.addBaggageParameter("skipPileupEvents", eventsToSkip) return
def getLumilistWhitelist(self, collectionID, taskName): """ Args: collectionID, taskName: Parameters for getLumiWhitelist Returns: a LumiList object describing the lumi list from the collection """ lumiList = LumiList(compactList=self.getLumiWhitelist(collectionID, taskName)) return lumiList
def testAddLumiMask(self): """ _testAddLumiMask_ Verify that setting and getting the lumiMask objects for a task works correctly. Do a round trip of a typical lumi mask """ testTask = makeWMTask("TestTask") lumiMask = LumiList(compactList={ '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]], '2': [[1, 45]], '3': [[1, 45], [50, 80]], }) testTask.setLumiMask(lumiMask=lumiMask.getCompactList()) outMask = testTask.getLumiMask() self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString()) return
def subtractLumis(input, output): """ Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes). """ out = LumiList(runsAndLumis=output) in_ = LumiList(runsAndLumis=input) diff = in_ - out #calculate lumis counted twice doubleLumis = set() for run,lumis in output.iteritems(): for lumi in lumis: if output[run].count(lumi) > 1: doubleLumis.add((run,lumi)) dLumisDict = {} for k, v in doubleLumis: dLumisDict.setdefault(k, []).append(v) double = LumiList(runsAndLumis=dLumisDict) return out.getCompactList(), diff.getCompactList(), double.getCompactList()
def testAddLumiMask(self): """ _testAddLumiMask_ Verify that setting and getting the lumiMask objects for a task works correctly. Do a round trip of a typical lumi mask """ testTask = makeWMTask("TestTask") lumiMask = LumiList(compactList = { '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]], '2':[[1,45]], '3':[[1,45],[50,80]], }) testTask.setLumiMask(lumiMask = lumiMask.getCompactList()) outMask = LumiList(compactList = testTask.getLumiMask()) self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString()) return
def testOr(self): """ a|b for lots of cases """ alumis = { "1": range(2, 20) + range(31, 39) + range(45, 49), "2": range(6, 20) + range(30, 40), "3": range(10, 20) + range(30, 40) + range(50, 60), } blumis = { "1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80), "2": range(10, 35), "3": range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70), } clumis = {"1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80), "2": range(10, 35)} result = { "1": range(2, 20) + range(31, 39) + range(45, 49) + range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80), "2": range(6, 20) + range(30, 40) + range(10, 35), "3": range(10, 20) + range(30, 40) + range(50, 60) + range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70), } a = LumiList(runsAndLumis=alumis) b = LumiList(runsAndLumis=blumis) c = LumiList(runsAndLumis=blumis) r = LumiList(runsAndLumis=result) self.assertTrue((a | b).getCMSSWString() == r.getCMSSWString()) self.assertTrue((a | b).getCMSSWString() == (b | a).getCMSSWString()) self.assertTrue((a | b).getCMSSWString() == (a + b).getCMSSWString())
def edit_process_source(pset, config): """Edit parameter set for task. Adjust input files and lumi mask, as well as adding a process summary for performance analysis. """ files = config['mask']['files'] lumis = LumiList( compactList=config['mask']['lumis']).getVLuminosityBlockRange() want_summary = config['want summary'] runtime = config.get('task runtime') cores = config.get('cores') # MC production settings run_first = config['mask'].get('first run') lumi_first = config['mask'].get('first lumi') lumi_events = config['mask'].get('events per lumi') seeding = config.get('randomize seeds', False) with open(pset, 'a') as fp: frag = fragment.format(events=config['mask']['events']) if any([f for f in files]) and not config['gridpack']: frag += "\nprocess.source.fileNames = cms.untracked.vstring({0})".format( repr([str(f) for f in files])) if config['gridpack']: # ExternalLHEProducer only understands local files and does # not expect the `file:` prefix. Also, there can never be # more than one gridpack, so take the first element. frag += fragment_gridpack.format( gridpack=os.path.abspath(files[0].replace('file:', ''))) if lumis: frag += "\nprocess.source.lumisToProcess = cms.untracked.VLuminosityBlockRange({0})".format( [str(l) for l in lumis]) if want_summary: frag += fragment_sum if runtime: frag += fragment_runtime.format(time=runtime) if seeding: frag += fragment_seeding if lumi_events: frag += fragment_lumi.format(events=lumi_events) if lumi_first: frag += fragment_first_lumi.format(lumi=lumi_first) if run_first: frag += fragment_first_run.format(run=run_first) if cores: frag += fragment_cores.format(cores=cores) logger.info("config file fragment") with mangler.output('pset'): for l in frag.splitlines(): logger.debug(l) fp.write(frag)
def validFiles(self, files): """ Apply lumi mask and or run white/black list and return files which have one or more of the requested lumis """ runWhiteList = self.topLevelTask.inputRunWhitelist() runBlackList = self.topLevelTask.inputRunBlacklist() taskLumiMask = self.topLevelTask.getLumiMask() blackMask = None if taskLumiMask: # We have a lumiMask, so use it and modify with run white/black list if isinstance(taskLumiMask, LumiList): # For a possible future where we use LumiList more prevalently lumiMask = copy.deepcopy(taskLumiMask) else: lumiMask = LumiList(compactList = taskLumiMask) if runWhiteList: lumiMask.selectRuns(runWhiteList) if runBlackList: lumiMask.removeRuns(runBlackList) elif runWhiteList: # We have a run whitelist, subtract off blacklist lumiMask = LumiList(runs = runWhiteList) if runBlackList: # We only have a blacklist, so make a black mask out of it instead lumiMask.removeRuns(runBlackList) else: lumiMask = None if runBlackList: blackMask = LumiList(runs = runWhiteList) results = [] for f in files: if type(f) == type("") or not f.has_key("LumiList"): results.append(f) continue # Create a LumiList from the WMBS info fileRunsAndLumis = {} for x in f['LumiList']: fileRunsAndLumis.update({str(x['RunNumber']): x['LumiSectionNumber']}) fileLumiList = LumiList(runsAndLumis = fileRunsAndLumis) if lumiMask: if fileLumiList & lumiMask: # At least one lumi from file is in lumiMask results.append(f) elif blackMask: if fileLumiList - blackMask: # At least one lumi from file is not in blackMask results.append(f) else: # There is effectively no mask results.append(f) return results
def adjustLumisForCompletion(self, task, unprocessed): """Sets the run, lumi information in the task information for the completion jobs. Returns True if completion jobs are needed, otherwise False. """ missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ try: available = set(os.listdir(missingDir)) & unprocessed except OSError: available = set() failed = set(self.failedJobs) & unprocessed if len(available) == 0 and len(failed) == 0: return False missing = LumiList() for missingFile in available: with open(os.path.join(missingDir, missingFile)) as fd: self.logger.info("Adding missing lumis from job %s", missingFile) missing = missing + LumiList( compactList=literal_eval(fd.read())) for failedId in failed: f = None try: tmpdir = tempfile.mkdtemp() f = tarfile.open("run_and_lumis.tar.gz") fn = "job_lumis_{0}.json".format(failedId) f.extract(fn, path=tmpdir) with open(os.path.join(tmpdir, fn)) as fd: injson = json.load(fd) missing = missing + LumiList(compactList=injson) self.logger.info("Adding lumis from failed job %s", failedId) finally: if f: f.close() shutil.rmtree(tmpdir) missing_compact = missing.getCompactList() runs = missing.getRuns() # Compact list is like # { # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]], # '2':[[1,45],[50,80]] # } # Now we turn lumis it into something like: # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80'] # which is the format expected by buildLumiMask in the splitting algorithm lumis = [ ",".join( str(l) for l in functools.reduce( lambda x, y: x + y, missing_compact[run])) for run in runs ] task['tm_split_args']['runs'] = runs task['tm_split_args']['lumis'] = lumis return True
def testFilter(self): """ Test filtering of a list of lumis """ runsAndLumis = { 1: range(1, 34) + [35] + range(37, 48), 2: range(49, 76) + range(77, 131) + range(133, 137) } completeList = list(zip([1]*150, range(1, 150))) + \ list(zip([2]*150, range(1, 150))) + \ list(zip([3]*150, range(1, 150))) smallList = list(zip([1]*50, range(1, 10))) + list(zip([2]*50, range(50, 70))) overlapList = list(zip([1]*150, range(30, 40))) + \ list(zip([2]*150, range(60, 80))) overlapRes = list(zip([1]*9, range(30, 34))) + [(1, 35)] + \ list(zip([1]*9, range(37, 40))) + \ list(zip([2]*30, range(60, 76))) + \ list(zip([2]*9, range(77, 80))) runLister = LumiList(runsAndLumis = runsAndLumis) # Test a list to be filtered which is a superset of constructed list filterComplete = runLister.filterLumis(completeList) # Test a list to be filtered which is a subset of constructed list filterSmall = runLister.filterLumis(smallList) # Test a list to be filtered which is neither filterOverlap = runLister.filterLumis(overlapList) self.assertTrue(filterComplete == runLister.getLumis()) self.assertTrue(filterSmall == smallList) self.assertTrue(filterOverlap == overlapRes)