Example #1
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Example #2
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """
        # Get the task mask as a LumiList object to make operations easier
        maskedBlocks = {}
        taskMask = task.getLumiMask()

        # for performance reasons, we first get all the blocknames
        blocks = [
            x['block_name'] for x in dbs.dbs.listBlocks(dataset=datasetPath)
        ]

        for block in blocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                runNumber = str(fileLumi['run_num'])
                lumis = fileLumi['lumi_section_num']
                fileMask = LumiList(runsAndLumis={runNumber: lumis})
                commonMask = taskMask & fileMask
                if commonMask:
                    maskedBlocks.setdefault(block, {})
                    maskedBlocks[block].setdefault(lfn, LumiList())
                    maskedBlocks[block][lfn] += commonMask

        return maskedBlocks
Example #3
0
def getLumiList(lumi_mask_name, logger=None):
    """
    Takes a lumi-mask and returns a LumiList object.
    lumi-mask: either an http address or a json file on disk.
    """
    lumi_list = None
    parts = urlparse.urlparse(lumi_mask_name)
    if parts[0] in ['http', 'https']:
        if logger:
            logger.debug('Downloading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(url=lumi_mask_name)
        except urllib2.HTTPError as err:
            raise ConfigurationException(
                "Problem downloading lumi-mask file; %s %s" %
                (err.code, err.msg))
    else:
        if logger:
            logger.debug('Reading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(filename=lumi_mask_name)
        except IOError as err:
            raise ConfigurationException("Problem loading lumi-mask file; %s" %
                                         str(err))

    return lumi_list
Example #4
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Tier1ReRecoWorkload = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Example #5
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Globals.GlobalParams.setNumOfRunsPerFile(3)
        Globals.GlobalParams.setNumOfLumisPerBlock(5)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } )

        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Example #6
0
    def testFilter(self):
        """
        Test filtering of a list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }

        completeList = zip([1]*150, range(1, 150)) + \
                       zip([2]*150, range(1, 150)) + \
                       zip([3]*150, range(1, 150))

        smallList    = zip([1]*50,  range(1, 10)) + zip([2]*50, range(50, 70))
        overlapList  = zip([1]*150, range(30, 40)) + \
                       zip([2]*150, range(60, 80))
        overlapRes   = zip([1]*9,   range(30, 34)) + [(1, 35)] + \
                       zip([1]*9,   range(37, 40)) + \
                       zip([2]*30,  range(60, 76)) + \
                       zip([2]*9,   range(77, 80))

        runLister = LumiList(runsAndLumis = runsAndLumis)

        # Test a list to be filtered which is a superset of constructed list
        filterComplete = runLister.filterLumis(completeList)
        # Test a list to be filtered which is a subset of constructed list
        filterSmall    = runLister.filterLumis(smallList)
        # Test a list to be filtered which is neither
        filterOverlap  = runLister.filterLumis(overlapList)

        self.assertTrue(filterComplete == runLister.getLumis())
        self.assertTrue(filterSmall    == smallList)
        self.assertTrue(filterOverlap  == overlapRes)
Example #7
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        clumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                 }
        result = {'1' : range(2,20) + range(31,39) + range(45,49) + range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(6,20) + range (30,40) + range(10,35),
                  '3' : range(10,20) + range (30,40) + range(50,60) + range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        c = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a|b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (b|a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (a+b).getCMSSWString())

        # Test list constuction (faster)

        multiple = [alumis, blumis, clumis]
        easy = LumiList(runsAndLumis = multiple)
        hard = a + b
        hard += c
        self.assertTrue(hard.getCMSSWString() == easy.getCMSSWString())
Example #8
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Example #9
0
def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()
Example #10
0
 def addJobs(self, jobs):
     if self.algo == 'FileBased':
         self.lumisPerJob += [
             sum([x.get('lumiCount', 0) for x in job['input_files']])
             for job in jobs
         ]
         self.eventsPerJob += [
             sum([x['events'] for x in job['input_files']]) for job in jobs
         ]
         self.filesPerJob += [len(job['input_files']) for job in jobs]
     elif self.algo == 'EventBased':
         self.lumisPerJob += [
             job['mask']['LastLumi'] - job['mask']['FirstLumi']
             for job in jobs
         ]
         self.eventsPerJob += [
             job['mask']['LastEvent'] - job['mask']['FirstEvent']
             for job in jobs
         ]
     else:
         for job in jobs:
             avgEventsPerLumi = sum([
                 f['avgEvtsPerLumi'] for f in job['input_files']
             ]) / float(len(job['input_files']))
             lumis = LumiList(compactList=job['mask']['runAndLumis'])
             self.lumisPerJob.append(len(lumis.getLumis()))
             self.eventsPerJob.append(avgEventsPerLumi *
                                      self.lumisPerJob[-1])
Example #11
0
def makeLumiList(lumiString):
    try:
        compactList = json.loads(lumiString)
        ll = LumiList(compactList = compactList)
        return ll.getCompactList()
    except:
        raise WMWorkloadToolsException("Could not parse LumiList")
Example #12
0
    def testAnd(self):
        """
        a&b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                  '4' : range(1,100),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,25) + range(25,40) + range(40,50) + range(33,36),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                  '5' : range(1,100),
                 }
        result = {'1' : range(2,6) + range(12,13) + range(16,20) + range(31,39) + range(45,49),
                  '2' : range(10,20) + range(30,35),
                  '3' : range(10,15) + range(35,40) + range(50,51)+ range(59,60),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a&b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a&b).getCMSSWString() == (b&a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() != r.getCMSSWString())
Example #13
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run,lumi) in mergedLumis:
                            doubleLumis.add((run,lumi))
                        mergedLumis.add((run,lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (LumiList(compactList=lumimask) - mergedLumis).getCompactList(), doubleLumis.getCompactList()
Example #15
0
 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Example #16
0
 def testWrite(self):
     alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
               '2' : range(6,20) + range (30,40),
               '3' : range(10,20) + range (30,40) + range(50,60),
               '4' : range(1,100),
              }
     a = LumiList(runsAndLumis = alumis)
     a.writeJSON('newFile.json')
Example #17
0
def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, basestring):
            lumiDict = JsonWrapper.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" % (type(lumiDict), lumiDict))
Example #18
0
 def testWrite(self):
     alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
               '2' : range(6,20) + range (30,40),
               '3' : range(10,20) + range (30,40) + range(50,60),
               '4' : range(1,100),
              }
     a = LumiList(runsAndLumis = alumis)
     a.writeJSON('newFile.json')
Example #19
0
 def subtractLumis(input, output):
     """
     Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
     """
     out = LumiList(runsAndLumis=output)
     in_ = LumiList(runsAndLumis=input)
     diff = in_ - out
     return out.getCompactList(), diff.getCompactList()
Example #20
0
 def testWrite(self):
     alumis = {
         "1": range(2, 20) + range(31, 39) + range(45, 49),
         "2": range(6, 20) + range(30, 40),
         "3": range(10, 20) + range(30, 40) + range(50, 60),
         "4": range(1, 100),
     }
     a = LumiList(runsAndLumis=alumis)
     a.writeJSON("newFile.json")
Example #21
0
    def testDuplicates(self):
        """
        Test a list with lots of duplicates
        """
        result = list(zip([1]*100, range(1, 34) + range(37, 48)))
        lumis  = list(zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25)))

        lister = LumiList(lumis = lumis)
        self.assertTrue(lister.getLumis() == result)
Example #22
0
 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         for lumi in lumis:
             if lumisDict[run].count(lumi) > 1:
                 doubleLumis.add((run,lumi))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Example #23
0
    def testDuplicates(self):
        """
        Test a list with lots of duplicates
        """
        result = zip([1]*100, range(1, 34) + range(37, 48))
        lumis  = zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25))

        lister = LumiList(lumis = lumis)
        self.assertTrue(lister.getLumis() == result)
Example #24
0
def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(
            set((run, lumi) for lumi in lumis
                if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()
Example #25
0
def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, (str, bytes)):
            lumiDict = json.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" %
                                     (type(lumiDict), lumiDict))
Example #26
0
    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()
Example #27
0
    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()
Example #28
0
    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})
Example #29
0
    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})
Example #30
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        clumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                 }
        result = {'1' : range(2,20) + range(31,39) + range(45,49) + range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(6,20) + range (30,40) + range(10,35),
                  '3' : range(10,20) + range (30,40) + range(50,60) + range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        c = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a|b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (b|a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (a+b).getCMSSWString())

        # Test list constuction (faster)

        multiple = [alumis, blumis, clumis]
        easy = LumiList(runsAndLumis = multiple)
        hard = a + b
        hard += c
        self.assertTrue(hard.getCMSSWString() == easy.getCMSSWString())
Example #31
0
    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s", missingFile)
                missing = missing + LumiList(compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s", failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [",".join(str(l) for l in functools.reduce(lambda x, y:x + y, missing_compact[run])) for run in runs]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True
Example #32
0
    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False) for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf)) for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run, lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()
Example #33
0
 def addJobs(self, jobs):
     if self.algo == 'FileBased':
         self.lumisPerJob += [sum([x.get('lumiCount', 0) for x in job['input_files']]) for job in jobs]
         self.eventsPerJob += [sum([x['events'] for x in job['input_files']]) for job in jobs]
     elif self.algo == 'EventBased':
         self.lumisPerJob += [job['mask']['LastLumi'] - job['mask']['FirstLumi'] for job in jobs]
         self.eventsPerJob += [job['mask']['LastEvent'] - job['mask']['FirstEvent'] for job in jobs]
     else:
         for job in jobs:
             avgEventsPerLumi = sum([f['avgEvtsPerLumi'] for f in job['input_files']])/float(len(job['input_files']))
             lumis = LumiList(compactList=job['mask']['runAndLumis'])
             self.lumisPerJob.append(len(lumis.getLumis()))
             self.eventsPerJob.append(avgEventsPerLumi * self.lumisPerJob[-1])
Example #34
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Example #35
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Example #36
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """

        # Get mask and convert to LumiList to make operations easier
        maskedBlocks = {}
        lumiMask = task.getLumiMask()
        taskMask = LumiList(compactList=lumiMask)

        # Find all the files that have runs and lumis we are interested in,
        # fill block lfn part of maskedBlocks

        for run, lumis in lumiMask.items():
            files = []
            for slumis in Lexicon.slicedIterator(lumis, 50):
                slicedFiles = dbs.dbs.listFileArray(dataset=datasetPath,
                                                    run_num=run,
                                                    lumi_list=slumis,
                                                    detail=True)
                files.extend(slicedFiles)
            for file in files:
                blockName = file['block_name']
                fileName = file['logical_file_name']
                if blockName not in maskedBlocks:
                    maskedBlocks[blockName] = {}
                if fileName not in maskedBlocks[blockName]:
                    maskedBlocks[blockName][fileName] = LumiList()

        # Fill maskedLumis part of maskedBlocks

        for block in maskedBlocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                # For each run : [lumis] mask by needed lumis, append to maskedBlocks
                if maskedBlocks[block].get(lfn, None) is not None:
                    lumiList = LumiList(
                        runsAndLumis={
                            fileLumi['run_num']: fileLumi['lumi_section_num']
                        })
                    maskedBlocks[block][lfn] += (lumiList & taskMask)

        return maskedBlocks
Example #37
0
    def testRuns(self):
        """
        Test constucting from run and list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }
        runsAndLumis2 = {
            '1': range(1, 34) + [35] + range(37, 48),
            '2': range(49, 76) + range(77, 131) + range(133, 137)
        }
        blank = {
            '1': [],
            '2': []
        }

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList   = jsonLister.getCompactList()

        runLister = LumiList(runsAndLumis = runsAndLumis)
        runString = runLister.getCMSSWString()
        runList   = runLister.getCompactList()

        runLister2 = LumiList(runsAndLumis = runsAndLumis2)
        runList2 = runLister2.getCompactList()

        runLister3 = LumiList(runsAndLumis = blank)


        self.assertTrue(jsonString == runString)
        self.assertTrue(jsonList   == runList)
        self.assertTrue(runList2   == runList)
        self.assertTrue(len(runLister3) == 0)
Example #38
0
    def makeNewJobByWork(self, reason='', failedJob=False):
        """
        Make a new job given the passed in parameters.

        :param reason: Why are we making a new job (debugging only)
        :param failedJob: Make the job as already failed

        :return: nothing
        """

        events = self.eventsInJob
        lumis = self.jobLumis
        files = self.jobFiles

        self.maxLumis = max(self.maxLumis, len(lumis))

        # Transform the lumi list into something compact and usable
        lumiList = LumiList(lumis=lumis).getCompactList()
        logging.debug(
            "Because %s new job with events: %s, lumis: %s, and files: %s",
            reason, events, lumiList, [f['lfn'] for f in files])
        if failedJob:
            logging.debug(" This job will be made failed")
            self.newJob(failedJob=failedJob, failedReason=reason)
        else:
            self.newJob()

        # Calculate and add performance information
        timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters(
            self.perfParameters)
        self.currentJob.addResourceEstimates(jobTime=events * timePerEvent,
                                             disk=events * sizePerEvent,
                                             memory=memoryRequirement)
        # Add job mask information
        for run, lumiRanges in lumiList.iteritems():
            for lumiRange in lumiRanges:
                self.currentJob['mask'].addRunAndLumis(run=int(run),
                                                       lumis=lumiRange)
        # Add files
        for f in files:
            self.currentJob.addFile(f)
        # Add pileup info if needed
        if self.deterministicPU:
            eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis
            logging.debug('Adding baggage to skip %s events', eventsToSkip)
            self.currentJob.addBaggageParameter("skipPileupEvents",
                                                eventsToSkip)

        return
Example #39
0
    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        lumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if lumiMask:  # We have a lumiMask, so use it and modify with run white/black list
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:  # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs=runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs=runBlackList)

        results = []
        for f in files:
            if isinstance(f, basestring) or "LumiList" not in f:
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            runLumis = {}
            for x in f['LumiList']:
                if x['RunNumber'] in runLumis:
                    runLumis[x['RunNumber']].extend(x['LumiSectionNumber'])
                else:
                    runLumis[x['RunNumber']] = x['LumiSectionNumber']
            fileLumiList = LumiList(runsAndLumis=runLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask:  # At least one lumi from file is not in blackMask
                    results.append(f)
            else:  # There is effectively no mask
                results.append(f)

        return results
Example #40
0
    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False)
                       for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf))
                        for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run,
                                                      lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()
Example #41
0
def getLumiListInValidFiles(dataset, dbsurl = 'phys03'):
    """
    Get the runs/lumis in the valid files of a given dataset.

    dataset: the dataset name as published in DBS
    dbsurl: the DBS URL or DBS prod instance

    Returns a LumiList object.
    """
    dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl)
    dbs3api = DbsApi(url=dbsurl)
    try:
        files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True)
    except Exception as ex:
        msg  = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (dataset, dbsurl, ex)
        msg += "\n%s" % (traceback.format_exc())
        raise ClientException(msg)
    if not files:
        msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl)
        raise ClientException(msg)
    validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']]
    blocks = set([f['block_name'] for f in files])
    runLumiPairs = []
    for blockName in blocks:
        fileLumis = dbs3api.listFileLumis(block_name=blockName)
        for f in fileLumis:
            if f['logical_file_name'] in validFiles:
                run = f['run_num']
                lumis = f['lumi_section_num']
                for lumi in lumis:
                    runLumiPairs.append((run,lumi))
    lumiList = LumiList(lumis=runLumiPairs)

    return lumiList
Example #42
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Example #43
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Example #44
0
    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict = {"1": [[1, 33], [35, 35], [37, 47]], "2": [[49, 75], [77, 130], [133, 136]]}
        exVLBR = cms.VLuminosityBlockRange("1:1-1:33", "1:35", "1:37-1:47", "2:49-2:75", "2:77-2:130", "2:133-2:136")

        jsonList = LumiList(filename="lumiTest.json")
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList == exDict)
        self.assertTrue(lumiVLBR == exVLBR)
Example #45
0
def getLumiList(lumi_mask_name, logger = None):
    """
    Takes a lumi-mask and returns a LumiList object.
    lumi-mask: either an http address or a json file on disk.
    """
    lumi_list = None
    parts = urlparse.urlparse(lumi_mask_name)
    if parts[0] in ['http', 'https']:
        if logger:
            logger.debug('Downloading lumi-mask from %s' % lumi_mask_name)
        lumi_list = LumiList(url = lumi_mask_name)
    else:
        if logger:
            logger.debug('Reading lumi-mask from %s' % lumi_mask_name)
        lumi_list = LumiList(filename = lumi_mask_name)

    return lumi_list
Example #46
0
    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict   = {'1': [[1, 33], [35, 35], [37, 47]],
                    '2': [[49, 75], [77, 130], [133, 136]]}
        exVLBR   = cms.VLuminosityBlockRange('1:1-1:33', '1:35', '1:37-1:47', '2:49-2:75', '2:77-2:130', '2:133-2:136')

        jsonList = LumiList(filename = 'lumiTest.json')
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList   == exDict)
        self.assertTrue(lumiVLBR   == exVLBR)
Example #47
0
    def testSubtract(self):
        """
        a-b for lots of cases
        """

        alumis = {
            '1': range(2, 20) + range(31, 39) + range(45, 49),
            '2': range(6, 20) + range(30, 40),
            '3': range(10, 20) + range(30, 40) + range(50, 60),
        }
        blumis = {
            '1':
            range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) +
            range(33, 36),
            '2':
            range(10, 35),
            '3':
            range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70),
        }
        clumis = {
            '1':
            range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) +
            range(33, 36),
            '2':
            range(10, 35),
        }
        result = {
            '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39),
            '2': range(6, 10) + range(35, 40),
            '3': range(15, 20) + range(30, 35) + range(51, 59),
        }
        result2 = {
            '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39),
            '2': range(6, 10) + range(35, 40),
            '3': range(10, 20) + range(30, 40) + range(50, 60),
        }
        a = LumiList(runsAndLumis=alumis)
        b = LumiList(runsAndLumis=blumis)
        c = LumiList(runsAndLumis=clumis)
        r = LumiList(runsAndLumis=result)
        r2 = LumiList(runsAndLumis=result2)

        self.assertTrue((a - b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a - b).getCMSSWString() != (b - a).getCMSSWString())
        # Test where c is missing runs from a
        self.assertTrue((a - c).getCMSSWString() == r2.getCMSSWString())
        self.assertTrue((a - c).getCMSSWString() != (c - a).getCMSSWString())
        # Test empty lists
        self.assertTrue(str(a - a) == '{}')
        self.assertTrue(len(a - a) == 0)
Example #48
0
    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run, lumi) in mergedLumis:
                            doubleLumis.add((run, lumi))
                        mergedLumis.add((run, lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (
            LumiList(compactList=lumimask) -
            mergedLumis).getCompactList(), doubleLumis.getCompactList()
Example #49
0
 def getLumilist(self):
     """
     Get the LumiList parameter and return a LumiList object,
     in case the LumiList is not empty.
     """
     lumiDict = self._getValue('LumiList', {})
     if not lumiDict:
         return {}
     return LumiList(compactList=lumiDict)
Example #50
0
    def makeNewJobByWork(self, reason='', failedJob=False):
        """
        Make a new job given the passed in parameters.

        :param reason: Why are we making a new job (debugging only)
        :param failedJob: Make the job as already failed

        :return: nothing
        """

        events = self.eventsInJob
        lumis = self.jobLumis
        files = self.jobFiles

        self.maxLumis = max(self.maxLumis, len(lumis))

        # Transform the lumi list into something compact and usable
        lumiList = LumiList(lumis=lumis).getCompactList()
        logging.debug("Because %s new job with events: %s, lumis: %s, and files: %s",
                      reason, events, lumiList, [f['lfn'] for f in files])
        if failedJob:
            logging.debug(" This job will be made failed")
            self.newJob(failedJob=failedJob, failedReason=reason)
        else:
            self.newJob()

        # Calculate and add performance information
        timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters(self.perfParameters)
        self.currentJob.addResourceEstimates(jobTime=events * timePerEvent, disk=events * sizePerEvent,
                                             memory=memoryRequirement)
        # Add job mask information
        for run, lumiRanges in lumiList.iteritems():
            for lumiRange in lumiRanges:
                self.currentJob['mask'].addRunAndLumis(run=int(run), lumis=lumiRange)
        # Add files
        for f in files:
            self.currentJob.addFile(f)
        # Add pileup info if needed
        if self.deterministicPU:
            eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis
            logging.debug('Adding baggage to skip %s events', eventsToSkip)
            self.currentJob.addBaggageParameter("skipPileupEvents", eventsToSkip)

        return
Example #51
0
    def getLumilistWhitelist(self, collectionID, taskName):
        """
        Args:
            collectionID, taskName: Parameters for getLumiWhitelist

        Returns: a LumiList object describing the lumi list from the collection
        """

        lumiList = LumiList(compactList=self.getLumiWhitelist(collectionID, taskName))
        return lumiList
Example #52
0
    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList={
            '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
            '2': [[1, 45]],
            '3': [[1, 45], [50, 80]],
        })

        testTask.setLumiMask(lumiMask=lumiMask.getCompactList())
        outMask = testTask.getLumiMask()
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return
    def subtractLumis(input, output):
        """
        Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
        """
        out = LumiList(runsAndLumis=output)
        in_ = LumiList(runsAndLumis=input)
        diff = in_ - out

        #calculate lumis counted twice
        doubleLumis = set()
        for run,lumis in output.iteritems():
            for lumi in lumis:
                if output[run].count(lumi) > 1:
                    doubleLumis.add((run,lumi))
        dLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(v)
        double = LumiList(runsAndLumis=dLumisDict)

        return out.getCompactList(), diff.getCompactList(), double.getCompactList()
Example #54
0
    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList = {
                '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
                '2':[[1,45]],
                '3':[[1,45],[50,80]],
            })

        testTask.setLumiMask(lumiMask = lumiMask.getCompactList())
        outMask =  LumiList(compactList = testTask.getLumiMask())
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return
Example #55
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {
            "1": range(2, 20) + range(31, 39) + range(45, 49),
            "2": range(6, 20) + range(30, 40),
            "3": range(10, 20) + range(30, 40) + range(50, 60),
        }
        blumis = {
            "1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80),
            "2": range(10, 35),
            "3": range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70),
        }
        clumis = {"1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80), "2": range(10, 35)}
        result = {
            "1": range(2, 20)
            + range(31, 39)
            + range(45, 49)
            + range(1, 6)
            + range(12, 13)
            + range(16, 30)
            + range(40, 50)
            + range(39, 80),
            "2": range(6, 20) + range(30, 40) + range(10, 35),
            "3": range(10, 20)
            + range(30, 40)
            + range(50, 60)
            + range(10, 15)
            + range(35, 40)
            + range(45, 51)
            + range(59, 70),
        }
        a = LumiList(runsAndLumis=alumis)
        b = LumiList(runsAndLumis=blumis)
        c = LumiList(runsAndLumis=blumis)
        r = LumiList(runsAndLumis=result)
        self.assertTrue((a | b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a | b).getCMSSWString() == (b | a).getCMSSWString())
        self.assertTrue((a | b).getCMSSWString() == (a + b).getCMSSWString())
Example #56
0
def edit_process_source(pset, config):
    """Edit parameter set for task.

    Adjust input files and lumi mask, as well as adding a process summary
    for performance analysis.
    """
    files = config['mask']['files']
    lumis = LumiList(
        compactList=config['mask']['lumis']).getVLuminosityBlockRange()
    want_summary = config['want summary']
    runtime = config.get('task runtime')
    cores = config.get('cores')

    # MC production settings
    run_first = config['mask'].get('first run')
    lumi_first = config['mask'].get('first lumi')
    lumi_events = config['mask'].get('events per lumi')
    seeding = config.get('randomize seeds', False)

    with open(pset, 'a') as fp:
        frag = fragment.format(events=config['mask']['events'])
        if any([f for f in files]) and not config['gridpack']:
            frag += "\nprocess.source.fileNames = cms.untracked.vstring({0})".format(
                repr([str(f) for f in files]))
        if config['gridpack']:
            # ExternalLHEProducer only understands local files and does
            # not expect the `file:` prefix. Also, there can never be
            # more than one gridpack, so take the first element.
            frag += fragment_gridpack.format(
                gridpack=os.path.abspath(files[0].replace('file:', '')))
        if lumis:
            frag += "\nprocess.source.lumisToProcess = cms.untracked.VLuminosityBlockRange({0})".format(
                [str(l) for l in lumis])
        if want_summary:
            frag += fragment_sum
        if runtime:
            frag += fragment_runtime.format(time=runtime)
        if seeding:
            frag += fragment_seeding
        if lumi_events:
            frag += fragment_lumi.format(events=lumi_events)
        if lumi_first:
            frag += fragment_first_lumi.format(lumi=lumi_first)
        if run_first:
            frag += fragment_first_run.format(run=run_first)
        if cores:
            frag += fragment_cores.format(cores=cores)

        logger.info("config file fragment")
        with mangler.output('pset'):
            for l in frag.splitlines():
                logger.debug(l)
        fp.write(frag)
Example #57
0
    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        taskLumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if taskLumiMask:       # We have a lumiMask, so use it and modify with run white/black list
            if isinstance(taskLumiMask, LumiList):  # For a possible future where we use LumiList more prevalently
                lumiMask = copy.deepcopy(taskLumiMask)
            else:
                lumiMask = LumiList(compactList = taskLumiMask)
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:    # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs = runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs = runWhiteList)

        results = []
        for f in files:
            if type(f) == type("") or not f.has_key("LumiList"):
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            fileRunsAndLumis = {}
            for x in f['LumiList']:
                fileRunsAndLumis.update({str(x['RunNumber']): x['LumiSectionNumber']})
            fileLumiList = LumiList(runsAndLumis = fileRunsAndLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask: # At least one lumi from file is not in blackMask
                    results.append(f)
            else:                            # There is effectively no mask
                results.append(f)

        return results
Example #58
0
    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/"  #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s",
                                 missingFile)
                missing = missing + LumiList(
                    compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s",
                                     failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [
            ",".join(
                str(l) for l in functools.reduce(
                    lambda x, y: x + y, missing_compact[run])) for run in runs
        ]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True
Example #59
0
    def testFilter(self):
        """
        Test filtering of a list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }

        completeList = list(zip([1]*150, range(1, 150))) + \
                       list(zip([2]*150, range(1, 150))) + \
                       list(zip([3]*150, range(1, 150)))

        smallList    = list(zip([1]*50,  range(1, 10))) + list(zip([2]*50, range(50, 70)))
        overlapList  = list(zip([1]*150, range(30, 40))) + \
                       list(zip([2]*150, range(60, 80)))
        overlapRes   = list(zip([1]*9,   range(30, 34))) + [(1, 35)] + \
                       list(zip([1]*9,   range(37, 40))) + \
                       list(zip([2]*30,  range(60, 76))) + \
                       list(zip([2]*9,   range(77, 80)))

        runLister = LumiList(runsAndLumis = runsAndLumis)

        # Test a list to be filtered which is a superset of constructed list
        filterComplete = runLister.filterLumis(completeList)
        # Test a list to be filtered which is a subset of constructed list
        filterSmall    = runLister.filterLumis(smallList)
        # Test a list to be filtered which is neither
        filterOverlap  = runLister.filterLumis(overlapList)

        self.assertTrue(filterComplete == runLister.getLumis())
        self.assertTrue(filterSmall    == smallList)
        self.assertTrue(filterOverlap  == overlapRes)