Python LumiList.getCompactList 예제들, WMCore.DataStructs.LumiList.LumiList.getCompactList Python 예제들

예제 #1

0

파일 보기

파일: LumiList_t.py 프로젝트: AndrewLevin/WMCore

    def testRuns(self):
        """
        Test constucting from run and list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }
        runsAndLumis2 = {
            '1': range(1, 34) + [35] + range(37, 48),
            '2': range(49, 76) + range(77, 131) + range(133, 137)
        }
        blank = {
            '1': [],
            '2': []
        }

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList   = jsonLister.getCompactList()

        runLister = LumiList(runsAndLumis = runsAndLumis)
        runString = runLister.getCMSSWString()
        runList   = runLister.getCompactList()

        runLister2 = LumiList(runsAndLumis = runsAndLumis2)
        runList2 = runLister2.getCompactList()

        runLister3 = LumiList(runsAndLumis = blank)


        self.assertTrue(jsonString == runString)
        self.assertTrue(jsonList   == runList)
        self.assertTrue(runList2   == runList)
        self.assertTrue(len(runLister3) == 0)

예제 #2

0

파일 보기

    def testRuns(self):
        """
        Test constucting from run and list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }
        runsAndLumis2 = {
            '1': range(1, 34) + [35] + range(37, 48),
            '2': range(49, 76) + range(77, 131) + range(133, 137)
        }
        blank = {
            '1': [],
            '2': []
        }

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList   = jsonLister.getCompactList()

        runLister = LumiList(runsAndLumis = runsAndLumis)
        runString = runLister.getCMSSWString()
        runList   = runLister.getCompactList()

        runLister2 = LumiList(runsAndLumis = runsAndLumis2)
        runList2 = runLister2.getCompactList()

        runLister3 = LumiList(runsAndLumis = blank)


        self.assertTrue(jsonString == runString)
        self.assertTrue(jsonList   == runList)
        self.assertTrue(runList2   == runList)
        self.assertTrue(len(runLister3) == 0)

예제 #3

0

파일 보기

    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run, lumi) in mergedLumis:
                            doubleLumis.add((run, lumi))
                        mergedLumis.add((run, lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (
            LumiList(compactList=lumimask) -
            mergedLumis).getCompactList(), doubleLumis.getCompactList()

예제 #4

0

파일 보기

파일: BasicJobType.py 프로젝트: nizamyusli/CRABClient

 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     diff = LumiList(compactList=lumimask) - mergedLumis
     return mergedLumis.getCompactList(), diff.getCompactList()

예제 #5

0

파일 보기

파일: PreDAG.py 프로젝트: sharmaprajesh/CRABServer

    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/"  #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s",
                                 missingFile)
                missing = missing + LumiList(
                    compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s",
                                     failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [
            ",".join(
                str(l) for l in functools.reduce(
                    lambda x, y: x + y, missing_compact[run])) for run in runs
        ]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True

예제 #6

0

파일 보기

def makeLumiList(lumiString):
    try:
        compactList = json.loads(lumiString)
        ll = LumiList(compactList = compactList)
        return ll.getCompactList()
    except:
        raise WMWorkloadToolsException("Could not parse LumiList")

예제 #7

0

파일 보기

파일: runPostCrab.py 프로젝트: BrieucF/GridIn

def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()

예제 #8

0

파일 보기

파일: BasicJobType.py 프로젝트: PerilousApricot/CRABClient

    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run,lumi) in mergedLumis:
                            doubleLumis.add((run,lumi))
                        mergedLumis.add((run,lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (LumiList(compactList=lumimask) - mergedLumis).getCompactList(), doubleLumis.getCompactList()

예제 #9

0

파일 보기

파일: BasicJobType.py 프로젝트: blinkseb/CRABClient

 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()

예제 #10

0

파일 보기

def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, basestring):
            lumiDict = JsonWrapper.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" % (type(lumiDict), lumiDict))

예제 #11

0

파일 보기

파일: BasicJobType.py 프로젝트: nizamyusli/CRABClient

 def subtractLumis(input, output):
     """
     Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
     """
     out = LumiList(runsAndLumis=output)
     in_ = LumiList(runsAndLumis=input)
     diff = in_ - out
     return out.getCompactList(), diff.getCompactList()

예제 #12

0

파일 보기

파일: BasicJobType.py 프로젝트: nizamyusli/CRABClient

 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         for lumi in lumis:
             if lumisDict[run].count(lumi) > 1:
                 doubleLumis.add((run,lumi))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()

예제 #13

0

파일 보기

def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, (str, bytes)):
            lumiDict = json.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" %
                                     (type(lumiDict), lumiDict))

예제 #14

0

파일 보기

def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(
            set((run, lumi) for lumi in lumis
                if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()

예제 #15

0

파일 보기

    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()

예제 #16

0

파일 보기

파일: Mask.py 프로젝트: AndrewLevin/WMCore

    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()

예제 #17

0

파일 보기

    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})

예제 #18

0

파일 보기

    def testList(self):
        """
        Test constucting from list of pairs
        """

        listLs1 = range(1, 34) + [35] + range(37, 48)
        listLs2 = range(49, 76) + range(77, 131) + range(133, 137)
        lumis = list(zip([1]*100, listLs1)) + list(zip([2]*100, listLs2))

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList = jsonLister.getCompactList()

        pairLister = LumiList(lumis = lumis)
        pairString = pairLister.getCMSSWString()
        pairList = pairLister.getCompactList()

        self.assertTrue(jsonString == pairString)
        self.assertTrue(jsonList   == pairList)

예제 #19

0

파일 보기

파일: LumiList_t.py 프로젝트: AndrewLevin/WMCore

    def testList(self):
        """
        Test constucting from list of pairs
        """

        listLs1 = range(1, 34) + [35] + range(37, 48)
        listLs2 = range(49, 76) + range(77, 131) + range(133, 137)
        lumis = zip([1]*100, listLs1) + zip([2]*100, listLs2)

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList = jsonLister.getCompactList()

        pairLister = LumiList(lumis = lumis)
        pairString = pairLister.getCMSSWString()
        pairList = pairLister.getCompactList()

        self.assertTrue(jsonString == pairString)
        self.assertTrue(jsonList   == pairList)

예제 #20

0

파일 보기

파일: LumiList_t.py 프로젝트: AndrewLevin/WMCore

    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})

예제 #21

0

파일 보기

파일: BasicJobType.py 프로젝트: PerilousApricot/CRABClient

    def subtractLumis(input, output):
        """
        Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
        """
        out = LumiList(runsAndLumis=output)
        in_ = LumiList(runsAndLumis=input)
        diff = in_ - out

        #calculate lumis counted twice
        doubleLumis = set()
        for run,lumis in output.iteritems():
            for lumi in lumis:
                if output[run].count(lumi) > 1:
                    doubleLumis.add((run,lumi))
        dLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(v)
        double = LumiList(runsAndLumis=dLumisDict)

        return out.getCompactList(), diff.getCompactList(), double.getCompactList()

예제 #22

0

파일 보기

파일: task.py 프로젝트: khurtado/lobster

    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False) for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf)) for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run, lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()

예제 #23

0

파일 보기

파일: PreDAG.py 프로젝트: belforte/CRABServer

    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s", missingFile)
                missing = missing + LumiList(compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s", failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [",".join(str(l) for l in functools.reduce(lambda x, y:x + y, missing_compact[run])) for run in runs]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True

예제 #24

0

파일 보기

    def subtractLumis(input, output):
        """
        Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
        """
        out = LumiList(runsAndLumis=output)
        in_ = LumiList(runsAndLumis=input)
        diff = in_ - out

        #calculate lumis counted twice
        doubleLumis = set()
        for run, lumis in output.iteritems():
            for lumi in lumis:
                if output[run].count(lumi) > 1:
                    doubleLumis.add((run, lumi))
        dLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(v)
        double = LumiList(runsAndLumis=dLumisDict)

        return out.getCompactList(), diff.getCompactList(
        ), double.getCompactList()

예제 #25

0

파일 보기

파일: BasicJobType.py 프로젝트: emaszs/CRABClient

 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()

예제 #26

0

파일 보기

파일: task.py 프로젝트: WenzhaoLi/lobster

    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False)
                       for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf))
                        for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run,
                                                      lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()

예제 #27

0

파일 보기

파일: BasicJobType.py 프로젝트: emaszs/CRABClient

 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()

예제 #28

0

파일 보기

파일: LumiList_t.py 프로젝트: bbockelm/WMCore

    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict = {"1": [[1, 33], [35, 35], [37, 47]], "2": [[49, 75], [77, 130], [133, 136]]}
        exVLBR = cms.VLuminosityBlockRange("1:1-1:33", "1:35", "1:37-1:47", "2:49-2:75", "2:77-2:130", "2:133-2:136")

        jsonList = LumiList(filename="lumiTest.json")
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList == exDict)
        self.assertTrue(lumiVLBR == exVLBR)

예제 #29

0

파일 보기

파일: BasicJobType.py 프로젝트: belforte/CRABClient

 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()

예제 #30

0

파일 보기

    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict   = {'1': [[1, 33], [35, 35], [37, 47]],
                    '2': [[49, 75], [77, 130], [133, 136]]}
        exVLBR   = cms.VLuminosityBlockRange('1:1-1:33', '1:35', '1:37-1:47', '2:49-2:75', '2:77-2:130', '2:133-2:136')

        jsonList = LumiList(filename = 'lumiTest.json')
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList   == exDict)
        self.assertTrue(lumiVLBR   == exVLBR)

예제 #31

0

파일 보기

파일: WMTask_t.py 프로젝트: elasticaso/WMCore

    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList = {
                '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
                '2':[[1,45]],
                '3':[[1,45],[50,80]],
            })

        testTask.setLumiMask(lumiMask = lumiMask.getCompactList())
        outMask =  LumiList(compactList = testTask.getLumiMask())
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return

예제 #32

0

파일 보기

    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList={
            '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
            '2': [[1, 45]],
            '3': [[1, 45], [50, 80]],
        })

        testTask.setLumiMask(lumiMask=lumiMask.getCompactList())
        outMask = testTask.getLumiMask()
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return

예제 #33

0

파일 보기

파일: DataDiscovery.py 프로젝트: dciangot/CRABServer

    def formatOutput(self, task, requestname, datasetfiles, locations, tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({"key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert})

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                self.logger.warning("Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                        "because you specified useParents=True but some your files have no" +
                        "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" %pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error("Impossible translating %s to a CMS name through SiteDB" %pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' % (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug("Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList()
        self.logger.debug("Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))

예제 #34

0

파일 보기

파일: DataDiscovery.py 프로젝트: belforte/CRABServer

    def formatOutput(self, task, requestname, datasetfiles, locations, tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {"cacheduration": 1, "pycurl": True} # cache duration is in hours
        resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                    self.logger.warning("Skipping %s because its block (%s) has no locations", lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                            "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                            "because you specified useParents=True but some your files have no" +
                            "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error("Impossible translating %s to a CMS name through CMS Resource Catalog", locations[wmfile['block']] )
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "The locations of some blocks (%d) have not been found: %s" % (len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d', (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug("Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList()
        self.logger.debug("Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))

예제 #35

0

파일 보기

    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))

예제 #36

0

파일 보기

파일: Analysis.py 프로젝트: PerilousApricot/CRABClient

    def run(self, filecacheurl = None):
        """
        Override run() for JobType
        """
        configArguments = {'addoutputfiles'            : [],
                           'adduserfiles'              : [],
                           'tfileoutfiles'             : [],
                           'edmoutfiles'               : [],
                          }

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({'jobarch'    : scram.scramArch,
                                'jobsw' : scram.cmsswVersion, })

        # Build tarball
        if self.workdir:
            tarUUID =  PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename   = os.path.join(self.workdir, tarUUID +'default.tgz')
                cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
            else:
                raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename   = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset
#        configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue('JobType.disableAutomaticOutputCollection')):
            outputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', [])]
            edmfiles = [file for file in edmfiles if file in outputFiles]
            tfiles = [file for file in tfiles if file in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
        self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)

        # Write out CMSSW config
        self.cmsswCfg.writeFile(cfgOutputName)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
            inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
            uploadResults = tb.upload(filecacheurl = filecacheurl)

        self.logger.debug("Result uploading input files: %s " % str(uploadResults))
        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = uploadResults[0]
        isbchecksum = uploadResults[1]

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."    
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            ## Get the user-specified primary dataset name.
            primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles')
            # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
            primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
            if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
                self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
            configArguments['inputdata'] = primaryDataset

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
            lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
        if run_ranges_is_valid:
            run_list = getRunList(run_ranges)
            if lumi_list:
                lumi_list.selectRuns(run_list)
            else:
                if len(run_list) > 50000:
                    msg  = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
                    msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                    raise ConfigurationException(msg)
                lumi_list = LumiList(runs = run_list)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments, isbchecksum

예제 #37

0

파일 보기

파일: WMWorkloadTools.py 프로젝트: samircury/WMCore

def makeLumiList(lumiDict):
    try:
        ll = LumiList(compactList = lumiDict)
        return ll.getCompactList()
    except:
        raise WMWorkloadToolsException("Could not parse LumiList")

예제 #38

0

파일 보기

    def run(self, requestConfig):
        """
        Override run() for JobType
        """
        configArguments = {
            'addoutputfiles': [],
            'adduserfiles': [],
            'tfileoutfiles': [],
            'edmoutfiles': [],
        }

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({
            'jobarch': scram.scramArch,
            'jobsw': scram.cmsswVersion,
        })

        # Build tarball
        if self.workdir:
            tarUUID = PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir,
                                           tarUUID + 'default.tgz')
                cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
            else:
                raise EnvironmentException(
                    'Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        #configArguments['userisburl'] = 'https://'+ self.config.General.ufccacheUrl + '/crabcache/file?hashkey=' + uploadResults['hashkey']#XXX hardcoded
        #configArguments['userisburl'] = 'INSERTuserisburl'#XXX hardcoded
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset
#        configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)

# Create CMSSW config
        self.logger.debug("self.config: %s" % self.config)
        self.logger.debug("self.config.JobType.psetName: %s" %
                          self.config.JobType.psetName)
        cmsswCfg = CMSSWConfig(config=self.config,
                               logger=self.logger,
                               userConfig=self.config.JobType.psetName)

        ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here.
        edmfiles, tfiles = cmsswCfg.outputFiles()
        addoutputFiles = [
            re.sub(r'^file:', '', file)
            for file in getattr(self.config.JobType, 'outputFiles', [])
            if re.sub(r'^file:', '', file) not in edmfiles + tfiles
        ]
        self.logger.debug(
            "The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug(
            "The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug(
            "The following user output files will be collected: %s" %
            addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)

        # Write out CMSSW config
        cmsswCfg.writeFile(cfgOutputName)

        with UserTarball(name=tarFilename,
                         logger=self.logger,
                         config=self.config) as tb:
            inputFiles = [
                re.sub(r'^file:', '', file)
                for file in getattr(self.config.JobType, 'inputFiles', [])
            ]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [
                os.path.basename(f) for f in inputFiles
            ]
            uploadResults = tb.upload()

        self.logger.debug("Result uploading input files: %s " %
                          str(uploadResults))
        configArguments['cachefilename'] = uploadResults[1]
        configArguments['cacheurl'] = uploadResults[0]
        isbchecksum = uploadResults[2]

        # Upload list of user-defined input files to process as the primary input
        userFileName = getattr(self.config.Data, 'userInputFile', None)
        if userFileName:
            self.logger.debug(
                "Attaching a list of user-specified primary input files from %s."
                % userFileName)
            fnames = []
            for fname in open(userFileName).readlines():
                fnames.append(fname.strip())
            configArguments['userfiles'] = filter(
                lambda x: x, fnames)  #removing whitelines and empty objects

            primDS = getattr(self.config.Data, 'primaryDataset', None)
            if primDS:
                # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
                primDS = "/" + os.path.join(*primDS.split("/"))
                if not re.match("/%(primDS)s.*" % lfnParts, primDS):
                    self.logger.warning(
                        "Invalid primary dataset name %s for private MC; publishing may fail"
                        % primDS)
                configArguments['inputdata'] = primDS
            else:
                configArguments['inputdata'] = getattr(self.config.Data,
                                                       'inputDataset',
                                                       '/CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              lumi_mask_name)
            lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        run_ranges_is_valid = run_ranges is not None and isinstance(
            run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                          run_ranges)
        if run_ranges_is_valid:
            run_list = getRunList(run_ranges)
            if lumi_list:
                lumi_list.selectRuns(run_list)
            else:
                if len(run_list) > 50000:
                    msg = "Data.runRange includes %s runs." % str(
                        len(run_list))
                    msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                    raise ConfigurationException(msg)
                lumi_list = LumiList(runs=run_list)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments, isbchecksum

예제 #39

0

파일 보기

파일: BasicJobType.py 프로젝트: belforte/CRABClient

 def intersectLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) & LumiList(compactList=lumisB)
     return result.getCompactList()

예제 #40

0

파일 보기

파일: Analysis.py 프로젝트: nizamyusli/CRABClient

    def run(self, filecacheurl=None):
        """
        Override run() for JobType
        """
        configArguments = {"addoutputfiles": [], "adduserfiles": [], "tfileoutfiles": [], "edmoutfiles": []}

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({"jobarch": scram.getScramArch(), "jobsw": scram.getCmsswVersion()})

        # Build tarball
        if self.workdir:
            tarUUID = PandaInterface.wrappedUuidGen()
            self.logger.debug("UNIQUE NAME: tarUUID %s " % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir, tarUUID + "default.tgz")
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException("Problem with uuidgen while preparing for Sandbox upload.")
        else:
            _dummy, tarFilename = tempfile.mkstemp(suffix=".tgz")
            _dummy, cfgOutputName = tempfile.mkstemp(suffix="_cfg.py")

        if getattr(self.config.Data, "inputDataset", None):
            configArguments["inputdata"] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger, userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled configuration file created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(
            self.config.JobType,
            "disableAutomaticOutputCollection",
            getParamDefaultValue("JobType.disableAutomaticOutputCollection"),
        ):
            outputFiles = [re.sub(r"^file:", "", file) for file in getattr(self.config.JobType, "outputFiles", [])]
            edmfiles = [file for file in edmfiles if file in outputFiles]
            tfiles = [file for file in tfiles if file in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [
            re.sub(r"^file:", "", file)
            for file in getattr(self.config.JobType, "outputFiles", [])
            if re.sub(r"^file:", "", file) not in edmfiles + tfiles
        ]
        self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
        configArguments["edmoutfiles"] = edmfiles
        configArguments["tfileoutfiles"] = tfiles
        configArguments["addoutputfiles"].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if (
            not configArguments["edmoutfiles"]
            and not configArguments["tfileoutfiles"]
            and not configArguments["addoutputfiles"]
        ):
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(
                self.config.JobType,
                "disableAutomaticOutputCollection",
                getParamDefaultValue("JobType.disableAutomaticOutputCollection"),
            ):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
            inputFiles = [re.sub(r"^file:", "", file) for file in getattr(self.config.JobType, "inputFiles", [])]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments["adduserfiles"] = [os.path.basename(f) for f in inputFiles]
            uploadResults = tb.upload(filecacheurl=filecacheurl)

        self.logger.debug("Result uploading input files: %s " % str(uploadResults))
        configArguments["cacheurl"] = filecacheurl
        configArguments["cachefilename"] = uploadResults[0]
        isbchecksum = uploadResults[1]

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, "userInputFiles", None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments["userfiles"] = set(userFilesList)
            ## Get the user-specified primary dataset name.
            primaryDataset = getattr(self.config.Data, "primaryDataset", "CRAB_UserFiles")
            # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
            primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
            if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
                self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
            configArguments["inputdata"] = primaryDataset

        lumi_mask_name = getattr(self.config.Data, "lumiMask", None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, "runRange", None)
        if run_ranges:
            run_ranges_is_valid = re.match("^\d+((?!(-\d+-))(\,|\-)\d+)*$", run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
                        msg += (
                            " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        )
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments["runs"] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments["lumis"] = [
                str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(" ", "")
                for run in configArguments["runs"]
            ]

        configArguments["jobtype"] = "Analysis"

        return tarFilename, configArguments, isbchecksum

예제 #41

0

파일 보기

파일: BasicJobType.py 프로젝트: todor-ivanov/CRABClient

 def subtractLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) - LumiList(compactList=lumisB)
     return result.getCompactList()

예제 #42

0

파일 보기

파일: BasicJobType.py 프로젝트: todor-ivanov/CRABClient

 def intersectLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) & LumiList(compactList=lumisB)
     return result.getCompactList()

예제 #43

0

파일 보기

파일: Analysis.py 프로젝트: khurtado/CRABClient

    def run(self, requestConfig):
        """
        Override run() for JobType
        """
        configArguments = {'addoutputfiles'            : [],
                           'adduserfiles'              : [],
                           'tfileoutfiles'             : [],
                           'edmoutfiles'               : [],
                          }

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({'jobarch'    : scram.scramArch,
                                'jobsw' : scram.cmsswVersion, })

        # Build tarball
        if self.workdir:
            tarUUID =  PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename   = os.path.join(self.workdir, tarUUID +'default.tgz')
                cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
            else:
                raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename   = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        #configArguments['userisburl'] = 'https://'+ self.config.General.ufccacheUrl + '/crabcache/file?hashkey=' + uploadResults['hashkey']#XXX hardcoded
        #configArguments['userisburl'] = 'INSERTuserisburl'#XXX hardcoded
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset
#        configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)

        # Create CMSSW config
        self.logger.debug("self.config: %s" % self.config)
        self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName)
        cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
                               userConfig=self.config.JobType.psetName)

        ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here.
        edmfiles, tfiles = cmsswCfg.outputFiles()
        addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
        self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)

        # Write out CMSSW config
        cmsswCfg.writeFile(cfgOutputName)

        with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
            inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
            uploadResults = tb.upload()

        self.logger.debug("Result uploading input files: %s " % str(uploadResults))
        configArguments['cachefilename'] = uploadResults[1]
        configArguments['cacheurl'] = uploadResults[0]
        isbchecksum = uploadResults[2]

        # Upload list of user-defined input files to process as the primary input
        userFileName = getattr(self.config.Data, 'userInputFile', None)
        if userFileName:
            self.logger.debug("Attaching a list of user-specified primary input files from %s." % userFileName)
            fnames = []
            for fname in open(userFileName).readlines():
                fnames.append(fname.strip())
            configArguments['userfiles'] = filter(lambda x: x, fnames) #removing whitelines and empty objects

            primDS = getattr(self.config.Data, 'primaryDataset', None)
            if primDS:
                # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
                primDS = "/" + os.path.join(*primDS.split("/"))
                if not re.match("/%(primDS)s.*" % lfnParts, primDS):
                    self.logger.warning("Invalid primary dataset name %s for private MC; publishing may fail" % primDS)
                configArguments['inputdata'] = primDS
            else:
                configArguments['inputdata'] = getattr(self.config.Data, 'inputDataset', '/CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name)
            lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
        if run_ranges_is_valid:
            run_list = getRunList(run_ranges)
            if lumi_list:
                lumi_list.selectRuns(run_list)
            else:
                if len(run_list) > 50000:
                    msg  = "Data.runRange includes %s runs." % str(len(run_list))
                    msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                    raise ConfigurationException(msg)
                lumi_list = LumiList(runs = run_list)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments, isbchecksum

예제 #44

0

파일 보기

파일: BasicJobType.py 프로젝트: belforte/CRABClient

 def subtractLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) - LumiList(compactList=lumisB)
     return result.getCompactList()

예제 #45

0

파일 보기

파일: DataDiscovery.py 프로젝트: sharmaprajesh/CRABServer

    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                        +
                        "because you specified useParents=True but some your files have no"
                        + "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {
                        'Checksum': infos['Checksum'],
                        'Adler32': infos['Adler32'],
                        'Md5': infos['Md5']
                    }
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))

예제 #46

0

파일 보기

파일: Analysis.py 프로젝트: matz-e/CRABClient

    def run(self, filecacheurl = None):
        """
        Override run() for JobType
        """
        configArguments = {'addoutputfiles'            : [],
                           'adduserfiles'              : [],
                           'tfileoutfiles'             : [],
                           'edmoutfiles'               : [],
                          }

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({'jobarch'    : scram.scramArch,
                                'jobsw' : scram.cmsswVersion, })

        # Build tarball
        if self.workdir:
            tarUUID =  PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename   = os.path.join(self.workdir, tarUUID +'default.tgz')
                cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
            else:
                raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename   = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset
#        configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)

        # Create CMSSW config
        self.logger.debug("self.config: %s" % self.config)
        self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName)
        cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
                               userConfig=self.config.JobType.psetName)

        ## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here.
        edmfiles, tfiles = cmsswCfg.outputFiles()
        addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
        self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)

        # Write out CMSSW config
        cmsswCfg.writeFile(cfgOutputName)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
            inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
            uploadResults = tb.upload(filecacheurl = filecacheurl)

        self.logger.debug("Result uploading input files: %s " % str(uploadResults))
        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = uploadResults[0]
        isbchecksum = uploadResults[1]

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg  = "%sWarning%s: CRAB configuration parameter Data.userInputFiles contains duplicated entries." % (colors.RED, colors.NORMAL)
                msg += " Duplicated entries will be removed."    
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            ## Get the user-specified primary dataset name.
            primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles')
            # Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
            primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
            if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
                self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
            configArguments['inputdata'] = primaryDataset

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name)
            lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
        if run_ranges_is_valid:
            run_list = getRunList(run_ranges)
            if lumi_list:
                lumi_list.selectRuns(run_list)
            else:
                if len(run_list) > 50000:
                    msg  = "Data.runRange includes %s runs." % str(len(run_list))
                    msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                    raise ConfigurationException(msg)
                lumi_list = LumiList(runs = run_list)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments, isbchecksum

예제 #47

0

파일 보기

파일: DataDiscovery.py 프로젝트: ddaina/CRABServer

    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue
                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    self.logger.warning(
                        "Skipping %s because it has no parents")
                    continue
                ## Create a WMCore File object.
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))

예제 #48

0

파일 보기

def makeLumiList(lumiDict):
    try:
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMWorkloadToolsException("Could not parse LumiList")

예제 #49

0

파일 보기

파일: Analysis.py 프로젝트: cirkovic/CRABClient

    def run(self, filecacheurl=None):
        """
        Override run() for JobType
        """
        configArguments = {
            'addoutputfiles': [],
            'adduserfiles': [],
            'tfileoutfiles': [],
            'edmoutfiles': [],
        }

        if getattr(self.config.Data, 'useParent', False) and getattr(
                self.config.Data, 'secondaryInputDataset', None):
            msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together."
            raise ConfigurationException(msg)

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({
            'jobarch': scram.getScramArch(),
            'jobsw': scram.getCmsswVersion()
        })

        # Build tarball
        if self.workdir:
            tarUUID = PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir,
                                           tarUUID + 'default.tgz')
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException(
                    'Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _dummy, tarFilename = tempfile.mkstemp(suffix='.tgz')
            _dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" %
                          (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config,
                                    logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled and the configuration files created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(
                self.config.JobType, 'disableAutomaticOutputCollection',
                getParamDefaultValue(
                    'JobType.disableAutomaticOutputCollection')):
            outputFiles = [
                re.sub(r'^file:', '', file)
                for file in getattr(self.config.JobType, 'outputFiles', [])
            ]
            edmfiles = [file for file in edmfiles if file in outputFiles]
            tfiles = [file for file in tfiles if file in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [
            re.sub(r'^file:', '', file)
            for file in getattr(self.config.JobType, 'outputFiles', [])
            if re.sub(r'^file:', '', file) not in edmfiles + tfiles
        ]
        self.logger.debug(
            "The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug(
            "The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug(
            "The following user output files will be collected: %s" %
            addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if not configArguments['edmoutfiles'] and not configArguments[
                'tfileoutfiles'] and not configArguments['addoutputfiles']:
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(
                    self.config.JobType, 'disableAutomaticOutputCollection',
                    getParamDefaultValue(
                        'JobType.disableAutomaticOutputCollection')):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename,
                         logger=self.logger,
                         config=self.config) as tb:
            inputFiles = [
                re.sub(r'^file:', '', file)
                for file in getattr(self.config.JobType, 'inputFiles', [])
            ]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [
                os.path.basename(f) for f in inputFiles
            ]
            try:
                uploadResult = tb.upload(filecacheurl=filecacheurl)
            except HTTPException as hte:
                if 'X-Error-Info' in hte.headers:
                    reason = hte.headers['X-Error-Info']
                    reason_re = re.compile(
                        r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$'
                    )
                    re_match = reason_re.match(reason)
                    if re_match:
                        ISBSize = int(re_match.group(1))
                        ISBSizeLimit = int(re_match.group(2))
                        reason = "%sError%s:" % (colors.RED, colors.NORMAL)
                        reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (
                            ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024)
                        ISBContent = sorted(tb.content, reverse=True)
                        biggestFileSize = ISBContent[0][0]
                        ndigits = int(
                            math.ceil(math.log(biggestFileSize + 1, 10)))
                        reason += "\nInput sanbox content sorted by size[Bytes]:"
                        for (size, name) in ISBContent:
                            reason += ("\n%" + str(ndigits) + "s\t%s") % (size,
                                                                          name)
                        raise ClientException(reason)
                raise hte
            except Exception as e:
                msg = (
                    "Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(
                    msg)  #the traceback is only printed into the logfile
                raise ClientException(msg)

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
        self.logger.debug("Result uploading input files: %(cachefilename)s " %
                          configArguments)

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments

예제 #50

0

파일 보기

파일: Analysis.py 프로젝트: jmarra13/CRABClient

    def run(self, filecacheurl = None):
        """
        Override run() for JobType
        """
        configArguments = {'addoutputfiles'            : [],
                           'adduserfiles'              : [],
                           'tfileoutfiles'             : [],
                           'edmoutfiles'               : [],
                          }

        if getattr(self.config.Data, 'useParent', False) and getattr(self.config.Data, 'secondaryInputDataset', None):
            msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together."
            raise ConfigurationException(msg)

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({'jobarch': scram.getScramArch(),
                                'jobsw': scram.getCmsswVersion()})

        # Build tarball
        if self.workdir:
            tarUUID =  PandaInterface.wrappedUuidGen()
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename   = os.path.join(self.workdir, tarUUID + 'default.tgz')
                debugTarFilename = os.path.join(self.workdir, 'debugFiles.tgz')
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _, tarFilename   = tempfile.mkstemp(suffix='.tgz')
            _, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" % (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled and the configuration files created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue('JobType.disableAutomaticOutputCollection')):
            outputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', [])]
            edmfiles = [file for file in edmfiles if file in outputFiles]
            tfiles = [file for file in tfiles if file in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
        self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
        self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if not configArguments['edmoutfiles'] and not configArguments['tfileoutfiles'] and not configArguments['addoutputfiles']:
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue('JobType.disableAutomaticOutputCollection')):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
            inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
            try:
                uploadResult = tb.upload(filecacheurl = filecacheurl)
            except HTTPException as hte:
                if 'X-Error-Info' in hte.headers:
                    reason = hte.headers['X-Error-Info']
                    reason_re = re.compile(r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$')
                    re_match = reason_re.match(reason)
                    if re_match:
                        ISBSize = int(re_match.group(1))
                        ISBSizeLimit = int(re_match.group(2))
                        reason  = "%sError%s:" % (colors.RED, colors.NORMAL)
                        reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (ISBSize/1024/1024, ISBSizeLimit/1024/1024)
                        ISBContent = sorted(tb.content, reverse=True)
                        biggestFileSize = ISBContent[0][0]
                        ndigits = int(math.ceil(math.log(biggestFileSize+1, 10)))
                        reason += "\nInput sanbox content sorted by size[Bytes]:"
                        for (size, name) in ISBContent:
                            reason += ("\n%" + str(ndigits) + "s\t%s") % (size, name)
                        raise ClientException(reason)
                raise hte
            except Exception as e:
                msg = ("Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n"
                       "More details can be found in %s" % (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(msg) #the traceback is only printed into the logfile
                raise ClientException(msg)

        debugFilesUploadResult = None
        with UserTarball(name=debugTarFilename, logger=self.logger, config=self.config) as dtb:
            dtb.addMonFiles()
            try:
                debugFilesUploadResult = dtb.upload(filecacheurl = filecacheurl)
            except Exception as e:
                msg = ("Problem uploading debug_files.tar.gz.\nError message: %s.\n"
                       "More details can be found in %s" % (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(msg) #the traceback is only printed into the logfile

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
        if debugFilesUploadResult is not None:
            configArguments['debugfilename'] = "%s.tar.gz" % debugFilesUploadResult
        self.logger.debug("Result uploading input files: %(cachefilename)s " % configArguments)

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
            except ValueError as ex:
                msg  = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg  = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs = run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments

예제 #51

0

파일 보기

    def run(self, filecacheurl = None):
        """
        Override run() for JobType
        """

        taskDict, webdir = self.getTaskDict()
        addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles'))
        tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles'))
        edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles'))
        jobarch = getColumn(taskDict, 'tm_job_arch')
        jobsw = getColumn(taskDict, 'tm_job_sw')

        sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz')
        getFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename, self.proxyfilename)

        configArguments = {'addoutputfiles' : addoutputfiles,
                           'tfileoutfiles' : tfileoutfiles,
                           'edmoutfiles' : edmoutfiles,
                           'jobarch' : jobarch,
                           'jobsw' : jobsw,
                          }

        # Maybe the user wnat to change the dataset
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
        result = ufc.upload(sandboxFilename, excludeList = NEW_USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" % str(result))
            raise CachefileNotFoundException

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey'])

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug("Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
            except ValueError as ex:
                msg  = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg  = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs = run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]

        configArguments['jobtype'] = 'Analysis'

        return sandboxFilename, configArguments