コード例 #1
0
ファイル: Fileset_t.py プロジェクト: stuartw/WMCore
    def setUp(self):
        """
        Create a dummy fileset and populate it with random files,
        in order to use it for the testcase methods
        
        """
        logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename=__file__.replace('.py','.log'),
                    filemode='w')
        self.logger = logging.getLogger('FilesetClassTest')
        
        #Setup the initial testcase environment:
        initialfile = File('/tmp/lfn1',1000,1,1,1)
        self.initialSet = set()
        self.initialSet.add(initialfile)
        
        #Create a Fileset, containing a initial file on it.
        self.fileset = Fileset(name = 'self.fileset', files = self.initialSet)
        #Populate the fileset with random files
        for i in range(1,1000):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                              random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn=lfn, size=size, events=events, checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            self.fileset.addFile(file)
コード例 #2
0
ファイル: Feeder.py プロジェクト: stuartw/WMCore
 def __call__(self, fileset):
     """
     return a randomly sized list of files (DataStructs.File) at locations
     files will always be new
     """
     num_files = random.randint(0 , self.max)
     for f in self.makelist(fileset):
         list = []
         for i in range(0, num_files):
             # Decide where the file is
             locs = []
             for i in range(0, len(self.locations)):
                 if random.randint(0 , 1):
                     locs.append(self.locations[i])
             lfn='/store/data/fake-feeder-files/notreal/%s.root' % uuid(i) 
             size=2000 + ((i-5) * 50) 
             events=1000 + ((i-3) * 150) 
             run = random.randint(0 , int(3.14159265 * i * self.max)) 
             lumi = random.randint(0 ,10)
             file = File(lfn, size, events, run, lumi)
             file.setLocation(locs)
             f.addFile(file)
             
     return fileset
             
         
コード例 #3
0
    def execute(self, *args, **kwargs): #pylint: disable=unused-argument

        # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
        # but that would confuse WMCore, therefore cast to int
        totalevents = int(kwargs['task']['tm_totalunits'])
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        newFile.setLocation(self.getListOfSites())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
コード例 #4
0
ファイル: Fileset_t.py プロジェクト: stuartw/WMCore
 def testAddFile(self):
     """
         Testcase for the addFile method of the Fileset class
         
     """
     #First test - Add file and check if its there
     testfile = File('/tmp/lfntest',9999,9,9)
     self.fileset.addFile(testfile)
     assert(testfile in self.fileset.listNewFiles(), 'Couldn\'t add file ' +
             'to fileset - fileset.addfile method not working')
     #Second test - Add file that was already at Fileset.files , 
     # and check if it gets updated
     testFileSame = File('/tmp/lfntest',9999,9,9)
     testFileSame.setLocation(set('dummyse.dummy.com'))
     self.fileset.addFile(testFileSame)
     assert(testFileSame in  self.fileset.getFiles(),'Same file copy ' +
            'failed - fileset.addFile not updating location of already ' +
            'existing files' )
     assert(testfile in self.fileset.getFiles(),'Same file copy ' +
            'failed - fileset.addFile unable to remove previous file ' +
            'from list')
     #Third test - Add file that was already at Fileset.newfiles , 
     #and check if it gets updated
     assert(testFileSame in  self.fileset.listNewFiles(),'Same file copy ' +
            'failed - fileset.addFile not adding file to fileset.newFiles')
コード例 #5
0
    def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize = 100,
                      user = "******", group = "cmsdataops"):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        result = self.couchdb.loadView("ACDC", "owner_coll_fileset_files",
                                       {"startkey": [group, user,
                                                     collectionName, filesetName],
                                        "endkey": [group, user,
                                                   collectionName, filesetName, {}],
                                        "limit": chunkSize,
                                        "skip": chunkOffset,
                                        }, [])

        for row in result["rows"]:
            resultRow = row['value']
            newFile = File(lfn = resultRow["lfn"], size = resultRow["size"],
                           events = resultRow["events"], parents = set(resultRow["parents"]),
                           locations = set(resultRow["locations"]), merged = resultRow["merged"])
            for run in resultRow["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
コード例 #6
0
    def doBlock(self, entity, fileset):

        connection = urlopen(self.nodeURL + "&block=%s" % quote(entity))
        aString = connection.read()
        connection.close()

        if aString[2:8] != "phedex":
            print "PhEDExNotifier: bad string from server follows."
            print "%s" % aString

        phedex = eval(aString.replace("null", "None"), {}, {})

        blocks = phedex["phedex"]["block"]
        if len(blocks) != 1:
            print "PhEDExNotifier: Found %d blocks, expected 1, will only consider first block" % len(blocks)

        files = blocks[0]["file"]
        for file in files:
            lfn = file["name"]
            events = self.getEvents(lfn)
            (runs, lumis) = self.getRunLumi(lfn)
            fileToAdd = File(lfn, file["bytes"], events, runs[0], lumis[0])
            replicas = file["replica"]
            if len(replicas) > 0:
                locations = []
                for replica in replicas:
                    locations.append(replica["node"])
                fileToAdd.setLocation(locations)
                fileset.addFile(fileToAdd)
コード例 #7
0
    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                          "cert":self.config.TaskWorker.cmscert})
        newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
コード例 #8
0
    def __init__(self, lfn = None, id = -1, size = None,
                 events = None, checksums = {}, parents = None, locations = None,
                 status = "NOTUPLOADED"):
        WMBSBase.__init__(self)
        WMFile.__init__(self, lfn = lfn, size = size, events = events,
                        checksums = checksums, parents = parents, merged = True)
        self.setdefault("status", status)
        self.setdefault("id", id)

        # Parameters for the algorithm
        self.setdefault("appName", None)
        self.setdefault("appVer", None)
        self.setdefault("appFam", None)
        self.setdefault("psetHash", None)
        self.setdefault("configContent", None)
        self.setdefault("datasetPath", None)
        self.setdefault("valid_status", None)

        if locations == None:
            self.setdefault("newlocations", set())
        else:
            self.setdefault("newlocations", self.makeset(locations))

        # The WMBS base class creates a DAO factory for WMBS, we'll need to
        # overwrite that so we can use the factory for DBSBuffer objects.
        self.daoFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                     logger = self.logger,
                                     dbinterface = self.dbi)

        #Remove reference to WMBS daofactory to prevent confusion
        self.daofactory = self.daoFactory
        return
コード例 #9
0
ファイル: EventBased_t.py プロジェクト: stuartw/WMCore
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("se01")
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation("se02")
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setdefault("se03")
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )

        return
コード例 #10
0
    def jobConfig(self, wf, task, jobid, lfn):
        """
        Create a fake job dict to upload to the ACDC server
        """
        testFile = File(lfn=lfn, size=1024, events=1024)
        testFile.setLocation(["T2_CH_CERN", "T2_CH_CERN_HLT"])
        testFile.addRun(Run(jobid, 1, 2))  # run = jobid
        testJob = self.getMinimalJob(wf, task)
        testJob.addFile(testFile)

        return testJob
コード例 #11
0
ファイル: Report.py プロジェクト: belforte/WMCore
    def getOutputFile(self, fileName, outputModule, step):
        """
        _getOutputFile_

        Takes a fileRef object and returns a DataStructs/File object as output
        """

        outputMod = self.getOutputModule(step=step, outputModule=outputModule)

        if not outputMod:
            return None

        fileRef = getattr(outputMod.files, fileName, None)
        newFile = File(locations=set())

        # Locations
        newFile.setLocation(getattr(fileRef, "location", None))

        # Runs
        runList = fileRef.runs.listSections_()
        for run in runList:
            lumis = getattr(fileRef.runs, run)
            if isinstance(lumis, dict):
                newRun = Run(int(run), *lumis.items())
            else:
                newRun = Run(int(run), *lumis)
            newFile.addRun(newRun)

        newFile["lfn"] = getattr(fileRef, "lfn", None)
        newFile["pfn"] = getattr(fileRef, "pfn", None)
        newFile["events"] = int(getattr(fileRef, "events", 0))
        newFile["size"] = int(getattr(fileRef, "size", 0))
        newFile["branches"] = getattr(fileRef, "branches", [])
        newFile["input"] = getattr(fileRef, "input", [])
        newFile["inputpfns"] = getattr(fileRef, "inputpfns", [])
        newFile["branch_hash"] = getattr(fileRef, "branch_hash", None)
        newFile["catalog"] = getattr(fileRef, "catalog", "")
        newFile["guid"] = getattr(fileRef, "guid", "")
        newFile["module_label"] = getattr(fileRef, "module_label", "")
        newFile["checksums"] = getattr(fileRef, "checksums", {})
        newFile["merged"] = bool(getattr(fileRef, "merged", False))
        newFile["dataset"] = getattr(fileRef, "dataset", {})
        newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None)
        newFile["processingVer"] = getattr(fileRef, 'processingVer', None)
        newFile["validStatus"] = getattr(fileRef, 'validStatus', None)
        newFile["globalTag"] = getattr(fileRef, 'globalTag', None)
        newFile["prep_id"] = getattr(fileRef, 'prep_id', None)
        newFile['configURL'] = getattr(fileRef, 'configURL', None)
        newFile['inputPath'] = getattr(fileRef, 'inputPath', None)
        newFile["outputModule"] = outputModule
        newFile["fileRef"] = fileRef

        return newFile
コード例 #12
0
ファイル: WorkQueue_t.py プロジェクト: zhiwenuil/WMCore
    def createResubmitSpec(self, serverUrl, couchDB):
        """
        _createResubmitSpec_
        Create a bogus resubmit workload.
        """
        self.site = "cmssrm.fnal.gov"
        workload = WMWorkloadHelper(WMWorkload("TestWorkload"))
        reco = workload.newTask("reco")
        workload.setOwnerDetails(name = "evansde77", group = "DMWM")

        # first task uses the input dataset
        reco.addInputDataset(primary = "PRIMARY", processed = "processed-v1", tier = "TIER1")
        reco.data.input.splitting.algorithm = "File"
        reco.setTaskType("Processing")
        cmsRunReco = reco.makeStep("cmsRun1")
        cmsRunReco.setStepType("CMSSW")
        reco.applyTemplates()
        cmsRunRecoHelper = cmsRunReco.getTypeHelper()
        cmsRunRecoHelper.addOutputModule("outputRECO",
                                        primaryDataset = "PRIMARY",
                                        processedDataset = "processed-v2",
                                        dataTier = "TIER2",
                                        lfnBase = "/store/dunkindonuts",
                                        mergedLFNBase = "/store/kfc")
        
        dcs = DataCollectionService(url = serverUrl, database = couchDB)

        def getJob(workload):
            job = Job()
            job["task"] = workload.getTask("reco").getPathName()
            job["workflow"] = workload.name()
            job["location"] = self.site
            job["owner"] = "evansde77"
            job["group"] = "DMWM"
            return job

        testFileA = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.setLocation([self.site])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.setLocation([self.site])
        testFileB.addRun(Run(1, 3, 4))
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        
        dcs.failedJobs([testJobA])
        topLevelTask = workload.getTopLevelTask()[0]
        workload.truncate("Resubmit_TestWorkload", topLevelTask.getPathName(), 
                          serverUrl, couchDB)
                                  
        return workload
コード例 #13
0
    def createFile(lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
コード例 #14
0
ファイル: Report.py プロジェクト: sofian86/WMCore
    def getInputFilesFromStep(self, stepName, inputSource = None):
        """
        _getInputFilesFromStep_

        Retrieve a list of input files from the given step.
        """
        step = self.retrieveStep(stepName)

        inputSources = []
        if inputSource == None:
            inputSources = step.input.listSections_()
        else:
            inputSources = [inputSource]

        inputFiles = []
        for inputSource in inputSources:
            source = getattr(step.input, inputSource)
            for fileNum in range(source.files.fileCount):
                fwjrFile = getattr(source.files, "file%d" % fileNum)

                lfn = getattr(fwjrFile, "lfn", None)
                pfn = getattr(fwjrFile, "pfn", None)
                size = getattr(fwjrFile, "size", 0)
                events = getattr(fwjrFile, "events", 0)
                branches = getattr(fwjrFile, "branches", [])
                catalog = getattr(fwjrFile, "catalog", None)
                guid = getattr(fwjrFile, "guid", None)
                inputSourceClass = getattr(fwjrFile, "input_source_class", None)
                moduleLabel = getattr(fwjrFile, "module_label", None)
                inputType = getattr(fwjrFile, "input_type", None)

                inputFile = File(lfn = lfn, size = size, events = events)
                inputFile["pfn"] = pfn
                inputFile["branches"] = branches
                inputFile["catalog"] = catalog
                inputFile["guid"] = guid
                inputFile["input_source_class"] = inputSourceClass
                inputFile["module_label"] = moduleLabel
                inputFile["input_type"] = inputType

                runSection = getattr(fwjrFile, "runs")
                runNumbers = runSection.listSections_()

                for runNumber in runNumbers:
                    lumiTuple = getattr(runSection, str(runNumber))
                    inputFile.addRun(Run(int(runNumber), *lumiTuple))

                inputFiles.append(inputFile)

        return inputFiles
コード例 #15
0
ファイル: EventBased_t.py プロジェクト: stuartw/WMCore
 def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10):
     # MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name="MCTestFileset")
     newFile = File("MCFakeFileTest", size=1000, events=numEvents)
     newFile.setLocation("se01")
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(
         fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production"
     )
     return singleMCFileSubscription
コード例 #16
0
ファイル: FileBased_t.py プロジェクト: ticoann/WMCore
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
コード例 #17
0
    def execute(self, *args, **kwargs):
        self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname'])

        if 'tm_user_files' in kwargs['task'] and kwargs['task']['tm_user_files']:
            userfiles = kwargs['task']['tm_user_files']
        else: ## For backward compatibility only.
            userfiles = kwargs['task']['tm_arguments'].get('userfiles')
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg))
            configreq = {'workflow': kwargs['task']['tm_taskname'],
                         'status': "FAILED",
                         'subresource': 'failure',
                         'failure': b64encode(msg)}
            self.server.post(self.resturi, data = urllib.urlencode(configreq))
            raise StopHandler(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                              "cert":self.config.TaskWorker.cmscert})
            locations = sbj.getAllCMSNames()

        userFileset = Fileset(name = kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." % len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size = 1000, events = 1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task = kwargs['task'], result = userFileset)
コード例 #18
0
    def createFilesetFromDBS(self, collection, filesetName, dbsURL, dataset, mask=None):
        """
        _createFilesetFromDBS_

        Get info from DBS, apply mask (filter) and create a fileset
        """
        fileSet = CouchFileset(database=self.database, url=self.url, name=filesetName)
        fileSet.setCollection(collection)

        files = []
        blockLocations = {}

        dbsReader = DBSReader(dbsURL, version="DBS_2_0_9", mode="GET")

        dbsResults = dbsReader.dbs.listFiles(path=dataset, retriveList=["retrive_lumi", "retrive_run"])
        logging.info("Found %s files from DBS" % len(dbsResults))

        for dbsResult in dbsResults:
            blockName = dbsResult["Block"]["Name"]
            if not blockName in blockLocations:
                blockLocations[blockName] = dbsReader.listFileBlockLocation(blockName)

            file = File(
                lfn=dbsResult["LogicalFileName"],
                size=dbsResult["FileSize"],
                merged=True,
                events=dbsResult["NumberOfEvents"],
                locations=blockLocations[blockName],
            )
            runs = {}
            for lumi in dbsResult["LumiList"]:
                runNumber = lumi["RunNumber"]
                runString = str(runNumber)
                lumiNumber = lumi["LumiSectionNumber"]
                if runString in runs:
                    runs[runString].lumis.append(lumiNumber)
                else:
                    runs[runString] = Run(runNumber, lumiNumber)

            for run in runs.values():
                file.addRun(run)
            files.append(file)

        logging.info("Uploading %s files in fileset" % len(files))
        fileList = fileSet.add(files, mask)

        return fileSet, fileList
コード例 #19
0
ファイル: Runtime_t.py プロジェクト: vkuznet/WMCore
    def getFileset(self):
        """
        Get a fileset based on the task

        """

        fileset = Fileset(name='Merge%s' % (type))

        for i in range(0, random.randint(15, 25)):
            # Use the testDir to generate a random lfn
            inpFile = File(lfn="%s/%s.root" % (self.testDir, makeUUID()),
                           size=random.randint(200000, 1000000),
                           events=random.randint(1000, 2000))
            inpFile.setLocation('Megiddo')
            fileset.addFile(inpFile)

        return fileset
コード例 #20
0
ファイル: File_t.py プロジェクト: stuartw/WMCore
    def testDataStructsFile(self):
        """
        _testDataStructsFile_

        Tests our ability to create a WMBS file from a DataStructs File and vice versa
        """

        myThread = threading.currentThread()
        
        testLFN     = "lfn1"
        testSize    = 1024
        testEvents  = 100
        testCksum   = {"cksum": '1'}
        testParents = set(["lfn2"])
        testRun     = Run( 1, *[45])
        testSE      = "se1.cern.ch"

        parentFile = File(lfn= "lfn2")
        parentFile.create()

        testFile = File()

        inputFile = WMFile(lfn = testLFN, size = testSize, events = testEvents, checksums = testCksum, parents = testParents)
        inputFile.addRun(testRun)
        inputFile.setLocation(se = testSE)

        testFile.loadFromDataStructsFile(file = inputFile)
        testFile.create()
        testFile.save()

        
        loadFile = File(lfn = "lfn1")
        loadFile.loadData(parentage = 1)

        self.assertEqual(loadFile['size'],   testSize)
        self.assertEqual(loadFile['events'], testEvents)
        self.assertEqual(loadFile['checksums'], testCksum)
        self.assertEqual(loadFile['locations'], set([testSE]))
        #self.assertEqual(loadFile['parents'].pop()['lfn'], 'lfn2')

        wmFile = loadFile.returnDataStructsFile()
        self.assertEqual(wmFile == inputFile, True)

        return
コード例 #21
0
    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        newFile.setLocation(self.config.Sites.available)
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFackBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
コード例 #22
0
ファイル: File.py プロジェクト: AndrewLevin/WMCore
    def __init__(self, lfn = "", id = -1, size = 0, events = 0, checksums = {},
                 parents = None, locations = None, first_event = 0,
                 last_event = 0, merged = True):
        WMBSBase.__init__(self)
        WMFile.__init__(self, lfn = lfn, size = size, events = events,
                        checksums = checksums, parents = parents, merged = merged)

        if locations == None:
            self.setdefault("newlocations", set())
        else:
            if type(locations) == str:
                self.setdefault("newlocations", set())
                self['newlocations'].add(locations)
            else:
                self.setdefault("newlocations", locations)

        # overwrite the default value set from the WMFile
        self["first_event"] =  first_event
        self["last_event"] = last_event
        self.setdefault("id", id)
        self['locations'] = set()
コード例 #23
0
    def execute(self, *args, **kwargs):
        self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_user_files']
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            raise TaskWorkerException(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            with self.config.TaskWorker.envForCMSWEB :
                configDict = {"cacheduration": 1, "pycurl": True} # cache duration is in hours
                resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
                locations = resourceCatalog.getAllPSNs()

        userFileset = Fileset(name = kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." % len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size = 1000, events = 1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task = kwargs['task'], result = userFileset)
コード例 #24
0
ファイル: DBSBufferFile.py プロジェクト: DAMason/WMCore
    def __init__(self, lfn=None, id=-1, size=None,
                 events=None, checksums=None, parents=None, locations=None,
                 status="NOTUPLOADED", inPhedex=0, workflowId=None, prep_id=None):

        checksums = checksums or {}
        WMBSBase.__init__(self)
        WMFile.__init__(self, lfn=lfn, size=size, events=events,
                        checksums=checksums, parents=parents, merged=True)
        self.setdefault("status", status)
        self.setdefault("in_phedex", inPhedex)
        self.setdefault("id", id)
        self.setdefault("workflowId", workflowId)

        # Parameters for the algorithm
        self.setdefault("appName", None)
        self.setdefault("appVer", None)
        self.setdefault("appFam", None)
        self.setdefault("psetHash", None)
        self.setdefault("configContent", None)
        self.setdefault("datasetPath", None)
        self.setdefault("processingVer", None)
        self.setdefault("acquisitionEra", None)
        self.setdefault("validStatus", None)
        self.setdefault("globalTag", None)
        self.setdefault("datasetParent", None)
        self.setdefault("prep_id", None)

        if locations is None:
            self.setdefault("newlocations", set())
        else:
            self.setdefault("newlocations", self.makeset(locations))

        # The WMBS base class creates a DAO factory for WMBS, we'll need to
        # overwrite that so we can use the factory for DBSBuffer objects.
        self.daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                     logger=self.logger,
                                     dbinterface=self.dbi)

        return
コード例 #25
0
    def getChunkFiles(self, collectionName, filesetName, chunkOffset, chunkSize=100):
        """
        _getChunkFiles_

        Retrieve a chunk of files from the given collection and task.
        """
        chunkFiles = []
        files = self._getFilesetInfo(collectionName, filesetName, chunkOffset, chunkSize)

        files = mergeFakeFiles(files)
        for fileInfo in files:
            newFile = File(lfn=fileInfo["lfn"], size=fileInfo["size"],
                           events=fileInfo["events"], parents=set(fileInfo["parents"]),
                           locations=set(fileInfo["locations"]), merged=fileInfo["merged"])
            for run in fileInfo["runs"]:
                newRun = Run(run["run_number"])
                newRun.extend(run["lumis"])
                newFile.addRun(newRun)

            chunkFiles.append(newFile)

        return chunkFiles
コード例 #26
0
ファイル: Job_t.py プロジェクト: alexanderrichards/WMCore
    def setUp(self):
        """
        _setUp_

        Initial Setup for the Job Testcase
        """
        self.inputFiles = []

        for i in range(1,1000):
            lfn = "/store/data/%s/%s/file.root" % (random.randint(1000, 9999),
                                                   random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn = lfn, size = size, events = events, checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            self.inputFiles.append(file)

        self.dummyJob = Job(files = self.inputFiles)
        return
コード例 #27
0
ファイル: ReportEmu.py プロジェクト: PerilousApricot/WMCore
    def addOutputFilesToReport(self, report):
        """
        _addOutputFilesToReport_

        Add output files to every output module in the step.  Scale the size
        and number of events in the output files appropriately.
        """
        (outputSize, outputEvents) = self.determineOutputSize()

        if not os.path.exists('ReportEmuTestFile.txt'):
            f = open('ReportEmuTestFile.txt', 'w')
            f.write('A Shubbery')
            f.close()

        for outputModuleName in self.step.listOutputModules():
            outputModuleSection = self.step.getOutputModule(outputModuleName)
            outputModuleSection.fixedLFN    = False
            outputModuleSection.disableGUID = False

            outputLFN = "%s/%s.root" % (outputModuleSection.lfnBase,
                                        str(makeUUID()))
            outputFile = File(lfn = outputLFN, size = outputSize, events = outputEvents,
                              merged = False)
            outputFile.setLocation(self.job["location"])
            outputFile['pfn'] = "ReportEmuTestFile.txt"
            outputFile['guid'] = "ThisIsGUID"
            outputFile["checksums"] = {"adler32": "1234", "cksum": "5678"}
            outputFile["dataset"] = {"primaryDataset": outputModuleSection.primaryDataset,
                                     "processedDataset": outputModuleSection.processedDataset,
                                     "dataTier": outputModuleSection.dataTier,
                                     "applicationName": "cmsRun",
                                     "applicationVersion": self.step.getCMSSWVersion()}
            outputFile["module_label"] = outputModuleName

            outputFileSection = report.addOutputFile(outputModuleName, outputFile)
            for inputFile in self.job["input_files"]:
                Report.addRunInfoToFile(outputFileSection, inputFile["runs"])

        return
コード例 #28
0
ファイル: EventBased_t.py プロジェクト: vkuznet/WMCore
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setLocation('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset=self.multipleFileFileset,
                                                     workflow=testWorkflow,
                                                     split_algo="EventBased",
                                                     type="Processing")
        self.singleFileSubscription = Subscription(fileset=self.singleFileFileset,
                                                   workflow=testWorkflow,
                                                   split_algo="EventBased",
                                                   type="Processing")
        self.emptyFileSubscription = Subscription(fileset=self.emptyFileFileset,
                                                  workflow=testWorkflow,
                                                  split_algo="EventBased",
                                                  type="Processing")

        self.eventsPerJob = 100
        self.performanceParams = {'timePerEvent': None,
                                  'memoryRequirement': 2300,
                                  'sizePerEvent': 400}

        return
コード例 #29
0
ファイル: File_t.py プロジェクト: AndresTanasijczuk/WMCore
    def testAddRun(self):
        """
        This tests the addRun() function of a DataStructs File object

        """

        testLFN     = "lfn"
        testSize    = "1024"
        testEvents  = "100"
        testCksum   = "1"
        testParents = "parent"

        testLumi      = 1
        testRunNumber = 1000000

        testFile = File(lfn = testLFN, size = testSize, events = testEvents, checksums = testCksum, parents = testParents)
        testRun = Run(testRunNumber, testLumi)

        testFile.addRun(testRun)

        assert testRun in testFile['runs'], "Run not added properly to run in File.addRun()"

        return
コード例 #30
0
ファイル: File.py プロジェクト: AndrewLevin/WMCore
    def returnDataStructsFile(self):
        """
        _returnDataStructsFile_

        Creates a dataStruct file out of this file
        """
        parents = set()
        for parent in self["parents"]:
            parents.add(WMFile(lfn = parent['lfn'], size = parent['size'],
                               events = parent['events'], checksums = parent['checksums'],
                               parents = parent['parents'], merged = parent['merged']))

        file = WMFile(lfn = self['lfn'], size = self['size'],
                      events = self['events'], checksums = self['checksums'],
                      parents = parents, merged = self['merged'])

        for run in self['runs']:
            file.addRun(run)

        for location in self['locations']:
            file.setLocation(se = location)

        return file
コード例 #31
0
ファイル: SizeBased_t.py プロジェクト: tsarangi/WMCore
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        self.singleFileFileset.addFile(newFile)

        self.multipleSiteFileset = Fileset(name = "TestFileset3")
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.setLocation("somese.cern.ch")
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation(["somese.cern.ch","otherse.cern.ch"])
            self.multipleSiteFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "SizeBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "SizeBased",
                                                   type = "Processing")
        self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        return
コード例 #32
0
    def testDropCount(self):
        """
        _testDropCount_

        Verify that dropping a fileset and counting the files in a fileset works
        correctly.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionA.setOwner(self.owner)
        testCollectionB.setOwner(self.owner)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)

        testFilesetC.drop()

        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionC.setOwner(self.owner)
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 1,
            "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionC["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")

        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.setOwner(self.owner)
        testCollectionD.populate()

        self.assertEqual(
            len(testCollectionD["filesets"]), 1,
            "Error: There should be one fileset in this collection.")
        self.assertEqual(testCollectionD["filesets"][0].fileCount(), 5,
                         "Error: Wrong number of files in fileset.")
        return
コード例 #33
0
    def testD_NoFileSplitNoHardLimit(self):
        """
        _testD_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job
        # As we have files with 0 events per lumi, this will configure the splitting to
        # a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100,
                         "All 100 files must be in the job")

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5,
                                    "blenheim")  # job1, job2
        testFileB = self.createFile("/this/is/file2", 600, 1, 1,
                                    "blenheim")  # job3
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2,
                                    "blenheim")  # job4, job5
        testFileD = self.createFile("/this/is/file4", 100, 3, 1,
                                    "blenheim")  # job6
        testFileE = self.createFile("/this/is/file5", 30, 4, 1,
                                    "blenheim")  # job6
        testFileF = self.createFile("/this/is/file6", 10, 5, 1,
                                    "blenheim")  # job6
        testFileG = self.createFile("/this/is/file7", 151, 6, 3,
                                    "blenheim")  # job7
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Optimal settings are: jobs with 150 events per job
        # This means, the first file must be splitted in 3 lumis per job which would leave room
        # for another lumi in the second job, but the second file has a lumi too big for that
        # The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7, "Seven jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0: [[0, 2]]},
                         "Wrong mask for the first job")
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0: [[3, 4]]},
                         "Wrong mask for the second job")
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1: [[1, 1]]},
                         "Wrong mask for the third job")
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2: [[4, 4]]},
                         "Wrong mask for the fourth job")
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2: [[5, 5]]},
                         "Wrong mask for the fifth job")
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(), {
            3: [[3, 3]],
            4: [[4, 4]],
            5: [[5, 5]]
        }, "Wrong mask for the sixth job")
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6: [[18, 20]]},
                         "Wrong mask for the seventh job")
        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
コード例 #34
0
ファイル: LumiBased_t.py プロジェクト: ticoann/WMCore
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100)
            lumis = []
            for lumi in range(lumisPerFile):
                lumis.append((i * 100) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.setLocation('blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100)
                lumis = []
                for lumi in range(lumisPerFile):
                    lumis.append((i * 100) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.setLocation('malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")

        return testSubscription
コード例 #35
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            lumis = []
            for lumi in range(20):
                lumis.append((i * 100) + lumi)
                newFile.addRun(Run(i, *lumis))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        lumis = list(range(50, 60)) + list(range(70, 80))
        newFile.addRun(Run(13, *lumis))
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }

        return
コード例 #36
0
    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/F"})
        testFileF.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/G"})
        testFileG.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/H"})
        testFileH.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5)

        self.assertEqual(
            len(chunks), 4,
            "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {
            1: {
                "lumis": 2,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 1024
            },
            2: {
                "lumis": 4,
                "locations": ["cmssrm.fnal.gov"],
                "events": 2048
            },
            3: {
                "lumis": 6,
                "locations":
                ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"],
                "events": 3072
            },
            5: {
                "lumis": 10,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 5120
            }
        }

        testFiles = [
            testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK
        ]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {
            1: [lastFile],
            2: [testFileD, testFileE],
            3: [testFileF, testFileG, testFileH],
            5: testFiles
        }

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"])
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"])
            self.assertEqual(chunkMetaData["events"], chunk["events"])
            self.assertEqual(chunkMetaData["locations"], chunk["locations"])

            self.assertTrue(chunk["files"] in goldenMetaData,
                            "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"],
                             goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"],
                             goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"],
                             goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco",
                                           chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles,
                            "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertIsNotNone(
                    foundFile, "Error: Missing chunk file: %s, %s" %
                    (chunkFiles, goldenChunkFiles))
                self.assertEqual(set(foundFile["parents"]),
                                 chunkFile["parents"],
                                 "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"],
                                 "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"],
                                 chunkFile["locations"],
                                 "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"])
                self.assertEqual(foundFile["size"], chunkFile["size"])
                self.assertEqual(len(foundFile["runs"]),
                                 len(chunkFile["runs"]),
                                 "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch,
                                    "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(
            singleChunk, {
                "offset": 0,
                "files": 11,
                "events": 11264,
                "lumis": 22,
                "locations":
                {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}
            }, "Error: Single chunk metadata is wrong")

        return
コード例 #37
0
ファイル: Subscription_t.py プロジェクト: vytjan/WMCore
    def testFailFiles(self):
        """
        Testcase for the failFiles method of the Subscription Class

        """
        # Cleaning possible files already occupying the available set
        self.dummySubscription.failFiles([])

        # First test - Test if initial file (on available set) is inserted in the
        # failed set - no arguments

        dummyFile2 = File('/tmp/dummyfile2,8888', 1, 1, 1)
        # Insert dummyFile2 into the available files Set at dummySubscription
        self.dummySubscription.available.addFile(dummyFile2)

        S = self.dummySubscription.availableFiles()
        # Fail all files
        self.dummySubscription.failFiles(S)

        assert len(self.dummySubscription.availableFiles()) == 0, \
            "failed subscription still has %s files, what's up with that?" % \
            len(self.dummySubscription.availableFiles())

        # Second test - Test if target files are inserted at the failed set

        dummyFileList = []
        # Populating the dummy List with a random number of files
        for i in range(1, random.randint(100, 1000)):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                                   random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn=lfn, size=size, events=events,
                        checksums={"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            dummyFileList.append(file)
        # Add the new files
        self.dummySubscription.available.addFile(dummyFileList)
        # and fail them
        self.dummySubscription.failFiles(files=dummyFileList)
        # Check there are no files available - everything should be failed
        assert len(self.dummySubscription.availableFiles()) == 0, \
            "failed subscription still has %s files, what's up with that?" % \
            len(self.dummySubscription.availableFiles())

        # Check if all files were inserted at subscription's failed files Set
        for x in dummyFileList:
            assert x in self.dummySubscription.failed.getFiles(type='set'), \
                'Couldn\'t make file failed %s' % x.dict['lfn']

        # Third test - Test if a replicate file is erased from the other Sets,
        # when a file is considered failed

        dummyFile3 = File('/tmp/dummyfile3,5555', 1, 1, 1)
        dummyFileList = []
        dummyFileList.append(dummyFile3)

        # Inserting dummyFile3 to be used as an argument, into each of the other
        # file sets
        self.dummySubscription.acquired.addFile(dummyFile3)
        self.dummySubscription.available.addFile(dummyFile3)
        self.dummySubscription.completed.addFile(dummyFile3)

        # Run the method failFiles
        self.dummySubscription.failFiles(files=dummyFileList)

        # Check if dummyFile3 was inserted at the failed Set
        assert dummyFile3 in self.dummySubscription.failed.getFiles(type='set'), \
            'Replicated file could\'nt be inserted at failed Set'

        # Check if dummyFile3 was erased from all the other Sets
        assert dummyFile3 not in self.dummySubscription.acquired.getFiles(type='set'), \
            'Failed file still present at acquired Set'
        assert dummyFile3 not in self.dummySubscription.completed.getFiles(type='set'), \
            'Failed file still present at completed Set'
        assert dummyFile3 not in self.dummySubscription.available.getFiles(type='set'), \
            'Failed file still present at available Set'
コード例 #38
0
    def formatOutput(self, task, requestname, datasetfiles, locations):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        file_counter = 0
        uniquelumis = set()
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Createa a WMCore File object.
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=infos['Size'],
                          checksums=infos['Checksums'],
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError, ke:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException, ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print "Couldn't map SE to site: %s" % pnn
                        print "got problem: %s" % ex
                        print "got another problem: %s" % ex.__dict__
                if pnn and pnn in pnn_psn_map:
                    if type(pnn_psn_map[pnn]) == list:
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
コード例 #39
0
    def formatOutput(self, task, requestname, datasetfiles, locations):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({"key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert})

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        file_counter = 0
        uniquelumis = set()
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                self.logger.warning("Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                        "because you specified useParents=True but some your files have no" +
                        "parents.\nExample: " + lfn)
            ## Createa a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" %pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError as ke:
                        self.logger.error("Impossible translating %s to a CMS name through SiteDB" %pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException as ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print("Couldn't map SE to site: %s" % pnn)
                        print("got problem: %s" % ex)
                        print("got another problem: %s" % ex.__dict__)
                if pnn and pnn in pnn_psn_map:
                    if isinstance(pnn_psn_map[pnn], list):
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)
            file_counter += 1

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' % (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))
コード例 #40
0
    def testGetLumiWhitelist(self):
        """
        _testGetLumiWhitelist_

        Verify that the ACDC whitelist generation code works correctly.  We'll
        add jobs with the following lumi info:
          # Run 1, lumis [1, 2, 3], [4, 6], [7], [9], [11, 12]
          # Run 2, lumis [5, 6, 7], [10, 11, 12], [15]
          # Run 3, lumis [20]

        And should get out a whitelist that looks like this:
          {"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
           "2": [[5, 7], [10, 12], [15, 15]],
           "3": [[20, 20]]}
        """
        dcs = DataCollectionService(url = self.testInit.couchUrl,
                                    database = "wmcore-acdc-datacollectionsvc")

        def getJob():
            job = Job()
            job["task"] = "/ACDCTest/reco"
            job["workflow"] = "ACDCTest"
            job["location"] = "cmssrm.fnal.gov"
            job["owner"] = "cmsdataops"
            job["group"] = "cmsdataops"
            return job

        testFileA = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.addRun(Run(1, 3))
        testJobA = getJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileC.addRun(Run(1, 4, 6))
        testJobB = getJob()
        testJobB.addFile(testFileC)

        testFileD = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileD.addRun(Run(1, 7))
        testJobC = getJob()
        testJobC.addFile(testFileD)

        testFileE = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileE.addRun(Run(1, 11, 12))
        testJobD = getJob()
        testJobD.addFile(testFileE)

        testFileF = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileF.addRun(Run(2, 5, 6, 7))
        testJobE = getJob()
        testJobE.addFile(testFileF)

        testFileG = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileG.addRun(Run(2, 10, 11, 12))
        testJobF = getJob()
        testJobF.addFile(testFileG)

        testFileH = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileH.addRun(Run(2, 15))
        testJobG = getJob()
        testJobG.addFile(testFileH)

        testFileI = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileI.addRun(Run(3, 20))
        testJobH = getJob()
        testJobH.addFile(testFileI)

        testFileJ = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileJ.addRun(Run(1, 9))
        testJobI = getJob()
        testJobI.addFile(testFileJ)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
                        testJobF, testJobG, testJobH, testJobI])
        whiteList = dcs.getLumiWhitelist("ACDCTest", "/ACDCTest/reco")

        self.assertEqual(len(whiteList.keys()), 3,
                         "Error: There should be 3 runs.")
        self.assertEqual(whiteList["1"], [[1, 4], [6, 7], [9, 9], [11, 12]],
                         "Error: Whitelist for run 1 is wrong.")
        self.assertEqual(whiteList["2"], [[5, 7], [10, 12], [15, 15]],
                         "Error: Whitelist for run 2 is wrong.")
        self.assertEqual(whiteList["3"], [[20, 20]],
                         "Error: Whitelist for run 3 is wrong.")
        return
コード例 #41
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45 + i // 3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        return
コード例 #42
0
    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False, nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=360,
                               performance=self.performanceParams)

        # One job in one job group with 100 files
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True, halt_job_on_file_boundaries=False, events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)
コード例 #43
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("MergeSize = %s" % self.mergeSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        logging.debug("Connection to DBS at: %s" % self.dbsUrl)

        reader = DBSReader(self.dbsUrl)
        blockList = reader.dbs.listBlocks(dataset=self.inputDataset())
        jobDefs = []

        for block in blockList:
            blockName = block['Name']
            logging.debug("Getting files for block %s" % blockName)
            locations = reader.listFileBlockLocation(blockName)
            fileList = reader.dbs.listFiles(blockName=blockName)
            if not fileList:  # Skip empty blocks
                continue

            thefiles = Fileset(name='FilesToSplit')
            for f in fileList:
                f['Block']['StorageElementList'].extend(locations)
                wmbsFile = File(f['LogicalFileName'])
                [wmbsFile['locations'].add(x) for x in locations]
                wmbsFile['block'] = blockName
                wmbsFile['size'] = f['FileSize']
                thefiles.addFile(wmbsFile)

            work = Workflow()
            subs = Subscription(fileset=thefiles,
                                workflow=work,
                                split_algo='MergeBySize',
                                type="Merge")
            logging.debug("Info for Subscription %s" % subs)
            splitter = SplitterFactory()
            jobfactory = splitter(subs)

            jobGroups = jobfactory(
                merge_size=self.mergeSize,  # min in Bytes
                all_files=True  # merge all files
            )
            if not jobGroups:
                raise (SyntaxError)
            for jobGroup in jobGroups:
                for job in jobGroup.getJobs():
                    jobDef = JobDefinition()
                    jobDef['LFNS'].extend(job.getFiles(type='lfn'))
                    jobDef['SkipEvents'] = 0
                    jobDef['MaxEvents'] = -1
                    [
                        jobDef['SENames'].extend(list(x['locations']))
                        for x in job.getFiles()
                    ]
                    jobDefs.append(jobDef)

        return jobDefs
コード例 #44
0
ファイル: CouchCollection_t.py プロジェクト: vytjan/WMCore
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="ThunderStruck")
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 0,
            "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return
コード例 #45
0
    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 2, 3])
コード例 #46
0
ファイル: Subscription_t.py プロジェクト: vytjan/WMCore
    def testAcquireFiles(self):
        """
        Testcase for the acquireFiles method of the Subscription Class

        """
        # Cleaning possible files already occupying the available set
        self.dummySubscription.acquireFiles()

        # First test - Test if initial file (on available set) is inserted in the
        # acquired set - no arguments

        dummyFile2 = File('/tmp/dummyfile2,8888', 1, 1, 1)
        # Insert dummyFile2 into the available files Set at dummySubscription
        self.dummySubscription.available.addFile(dummyFile2)

        S = self.dummySubscription.available.listNewFiles()
        # Check if Set returned by method is the same that was at the previous
        # available FileSet
        assert S == self.dummySubscription.acquireFiles(), \
            'Couldn\'t acquire file using method acquireFiles - (no arguments test)'

        # Second test - Test if target files are inserted at the acquired set

        dummyFileList = set()
        # Populating the dummy List with a random number of files
        for i in range(1, random.randint(100, 1000)):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                                   random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn=lfn, size=size, events=events,
                        checksums={"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            dummyFileList.add(file)

        # Check if return value is correct - with parameters
        acqFiles = self.dummySubscription.acquireFiles(files=dummyFileList)
        assert acqFiles == dummyFileList, \
            'Return value for acquireFiles method not the acquired files'
        # Check if all files were inserted at subscription acquired files Set
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
                'Couldn\'t acquire File %s' % x.dict['lfn']

        # Third test - Test if a replicate file is erased from the other Sets,
        # when a file is acquired

        dummyFile3 = File('/tmp/dummyfile3,5555', 1, 1, 1)
        dummyFileList = []
        dummyFileList.append(dummyFile3)

        # Inserting dummyFile3 to be used as an argument, into each of the other file sets
        self.dummySubscription.available.addFile(dummyFile3)
        self.dummySubscription.failed.addFile(dummyFile3)
        self.dummySubscription.completed.addFile(dummyFile3)

        # Run the method acquireFiles
        self.dummySubscription.acquireFiles(files=dummyFileList, size=1)

        # Check if dummyFile3 was inserted at the acquired Set
        assert dummyFile3 in self.dummySubscription.acquired.getFiles(type='set'), \
            'Replicated file could\'nt be inserted at acquired Set'

        # Check if dummyFile3 was erased from all the other Sets
        assert dummyFile3 not in self.dummySubscription.available.getFiles(type='set'), \
            'Acquired file still present at available Set'
        assert dummyFile3 not in self.dummySubscription.failed.getFiles(type='set'), \
            'Acquired file still present at failed Set'
        assert dummyFile3 not in self.dummySubscription.completed.getFiles(type='set'), \
            'Acquired file still present at completed Set'

        # Fourth test - Test if the method works properly if a wrong size number
        # is given as an argument

        # Case 1: size < number of files given as an argument

        dummyFileList = []
        for i in range(90, 100):
            dummyFileSize = File('/tmp/dummyfile' + str(i), 7656, 1, 1, 1)
            dummyFileList.append(dummyFileSize)

        # Run the method:
        self.dummySubscription.acquireFiles(files=dummyFileList, size=1)
        # Check each file of the List
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
                'File wasn\'t acquired (lower Size argument test)'

        # Case 2: size = 0

        # Run the method:
        self.dummySubscription.acquireFiles(files=dummyFileList, size=0)
        # Check each file of the List
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
                'File wasn\'t acquired (zero size argument test)'
コード例 #47
0
ファイル: ReportEmu_t.py プロジェクト: menglu21/WMCore
    def testProcessing(self):
        """
        _testProcessing_

        Setup a processing workflow and job and verify that the FWJR produced
        by the emulator is reasonable.
        """
        rerecoTask = self.workload.getTask("DataProcessing")
        cmsRunStep = rerecoTask.getStep("cmsRun1")

        inputFile = File(lfn="/path/to/test/lfn",
                         size=1048576,
                         events=1000,
                         merged=True)
        inputFile.addRun(Run(1, *[1, 2, 3, 4, 5]))
        inputFile.addRun(Run(2, *[1, 2, 3, 4, 5, 6]))

        processingJob = Job(name="ProcessingJob", files=[inputFile])
        processingJob["task"] = "/Tier1ReReco/ReReco"
        processingJob["mask"].setMaxAndSkipEvents(500, 0)
        processingJob["id"] = 1
        processingJob["location"] = "cmssrm.fnal.gov"

        emu = ReportEmu(WMStep=cmsRunStep.getTypeHelper(), Job=processingJob)
        report = emu()

        reportInputFiles = report.getInputFilesFromStep("cmsRun1")

        assert len(reportInputFiles) == 1, \
               "Error: Wrong number of input files for the job."
        assert reportInputFiles[0]["lfn"] == inputFile["lfn"], \
               "Error: Input LFNs do not match: %s" % reportInputFiles[0]["lfn"]
        assert reportInputFiles[0]["size"] == inputFile["size"], \
               "Error: Input file sizes do not match."
        assert reportInputFiles[0]["events"] == inputFile["events"], \
               "Error: Input file events do not match."

        goldenRuns = [Run(1, *[1, 2, 3, 4, 5]), Run(2, *[1, 2, 3, 4, 5, 6])]

        assert len(reportInputFiles[0]["runs"]) == len(goldenRuns), \
                   "Error: Wrong number of runs in input file."

        for inputRun in reportInputFiles[0]["runs"]:
            for goldenRun in goldenRuns:
                if inputRun.run == goldenRun.run:
                    goldenRun.lumis.sort()
                    inputRun.lumis.sort()

                    if goldenRun.lumis == inputRun.lumis:
                        goldenRuns.remove(goldenRun)
                        break

        assert len(goldenRuns) == 0, \
               "Error: Run information wrong on input file."

        recoOutputFiles = report.getFilesFromOutputModule(
            "cmsRun1", "RECOoutput")
        alcaOutputFiles = report.getFilesFromOutputModule(
            "cmsRun1", "ALCARECOoutput")

        assert len(recoOutputFiles) == 1, \
               "Error: There should only be one RECO output file."
        assert len(alcaOutputFiles) == 1, \
               "Error: There should only be one ALCA output file."

        assert recoOutputFiles[0]["module_label"] == "RECOoutput", \
               "Error: RECO file has wrong output module."
        assert alcaOutputFiles[0]["module_label"] == "ALCARECOoutput", \
               "Error: ALCA file has wrong output module."

        self.verifyOutputMetaData(recoOutputFiles[0], processingJob)
        self.verifyOutputMetaData(alcaOutputFiles[0], processingJob)

        dataTierMap = {"RECOoutput": "RECO", "ALCARECOoutput": "ALCARECO"}
        for outputFile in [recoOutputFiles[0], alcaOutputFiles[0]]:
            assert outputFile["dataset"]["applicationName"] == "cmsRun", \
                   "Error: Application name is incorrect."
            assert outputFile["dataset"]["primaryDataset"] == self.primaryDataset, \
                   "Error: Primary dataset is incorrect."
            assert outputFile["dataset"]["dataTier"] == dataTierMap[outputFile["module_label"]], \
                   "Error: Data tier is incorrect."

        return
コード例 #48
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        # can't affort one message from CRIC per file, unless critical !
        previousLogLevel = self.logger.getEffectiveLevel()
        resourceCatalog = CRIC(logger=self.logger)
        self.logger.setLevel(logging.ERROR)
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            try:
                wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                    locations[wmfile['block']])
            except Exception as ex:
                self.logger.error(
                    "Impossible translating %s to a CMS name through CMS Resource Catalog"
                    % locations[wmfile['block']])
                self.logger.error("got this exception:\n %s" % ex)
                raise
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        self.logger.setLevel(previousLogLevel)
        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
コード例 #49
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name = "TestFileset3")
        newFile = File("/some/file/name", size = 1000, events = 0)
        newFile.setdefault('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "EventBased",
                                                   type = "Processing")
        self.emptyFileSubscription = Subscription(fileset = self.emptyFileFileset,
                                                  workflow = testWorkflow,
                                                  split_algo = "EventBased",
                                                  type = "Processing")

        self.performanceParams = {'timePerEvent' : None,
                                  'memoryRequirement' : 2300,
                                  'sizePerEvent' : 400}

        return
コード例 #50
0
    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {
            1: [[10, 14]],
            2: [[20, 21]],
            4: [[40, 40]]
        })
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
コード例 #51
0
    def testListCollectionsFilesets(self):
        """
        _testListCollectionsFilesets_

        Verify that collections and filesets in ACDC can be listed.
        """
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        ownerA = svc.newOwner("somegroup", "someuserA")
        ownerB = svc.newOwner("somegroup", "someuserB")

        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionA.setOwner(ownerA)
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Struckthunder")
        testCollectionB.setOwner(ownerA)
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionC.setOwner(ownerB)
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionD.setOwner(ownerB)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testCollectionA.addFileset(testFilesetA)
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testCollectionB.addFileset(testFilesetB)
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionC.addFileset(testFilesetC)
        testFilesetD = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetD")
        testCollectionC.addFileset(testFilesetD)

        testFiles = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles.append(testFile)

        testFilesetA.add(testFiles)
        testFilesetB.add(testFiles)
        testFilesetC.add(testFiles)
        testFilesetD.add(testFiles)

        goldenCollectionNames = ["Thunderstruck", "Struckthunder"]
        for collection in svc.listCollections(ownerA):
            self.assertTrue(collection["name"] in goldenCollectionNames,
                            "Error: Missing collection name.")
            goldenCollectionNames.remove(collection["name"])
        self.assertEqual(len(goldenCollectionNames), 0,
                         "Error: Missing collections.")

        goldenFilesetNames = ["TestFilesetC", "TestFilesetD"]
        for fileset in svc.listFilesets(testCollectionD):
            self.assertTrue(fileset["name"] in goldenFilesetNames,
                            "Error: Missing fileset.")
            goldenFilesetNames.remove(fileset["name"])
        self.assertEqual(len(goldenFilesetNames), 0,
                         "Error: Missing filesets.")

        return
コード例 #52
0
    def testGetLumiWhitelist(self):
        """
        _testGetLumiWhitelist_

        Verify that the ACDC whitelist generation code works correctly.  We'll
        add jobs with the following lumi info:
          # Run 1, lumis [1, 2, 3], [4, 6], [7], [9], [11, 12]
          # Run 2, lumis [5, 6, 7], [10, 11, 12], [15]
          # Run 3, lumis [20]

        And should get out a whitelist that looks like this:
          {"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
           "2": [[5, 7], [10, 12], [15, 15]],
           "3": [[20, 20]]}
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.addRun(Run(1, 3))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.addRun(Run(1, 4, 6))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.addRun(Run(1, 7))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.addRun(Run(1, 11, 12))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024)
        testFileF.addRun(Run(2, 5, 6, 7))
        testJobE = self.getMinimalJob()
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(), size=1024, events=1024)
        testFileG.addRun(Run(2, 10, 11, 12))
        testJobF = self.getMinimalJob()
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(), size=1024, events=1024)
        testFileH.addRun(Run(2, 15))
        testJobG = self.getMinimalJob()
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024)
        testFileI.addRun(Run(3, 20))
        testJobH = self.getMinimalJob()
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(), size=1024, events=1024)
        testFileJ.addRun(Run(1, 9))
        testJobI = self.getMinimalJob()
        testJobI.addFile(testFileJ)

        dcs.failedJobs([
            testJobA, testJobB, testJobC, testJobD, testJobE, testJobF,
            testJobG, testJobH, testJobI
        ])
        whiteList = dcs.getLumiWhitelist("ACDCTest", "/ACDCTest/reco")

        self.assertEqual(len(whiteList), 3, "Error: There should be 3 runs.")
        self.assertEqual(whiteList["1"], [[1, 4], [6, 7], [9, 9], [11, 12]],
                         "Error: Whitelist for run 1 is wrong.")
        self.assertEqual(whiteList["2"], [[5, 7], [10, 12], [15, 15]],
                         "Error: Whitelist for run 2 is wrong.")
        self.assertEqual(whiteList["3"], [[20, 20]],
                         "Error: Whitelist for run 3 is wrong.")

        correctLumiList = LumiList(
            compactList={
                "1": [[1, 4], [6, 7], [9, 9], [11, 12]],
                "2": [[5, 7], [10, 12], [15, 15]],
                "3": [[20, 20]]
            })
        testLumiList = dcs.getLumilistWhitelist("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(correctLumiList.getCMSSWString(),
                         testLumiList.getCMSSWString())

        return
コード例 #53
0
ファイル: ResubmitBlock_t.py プロジェクト: tsarangi/WMCore
    def stuffACDCDatabase(self,
                          numFiles=50,
                          lumisPerFile=20,
                          lumisPerACDCRecord=2):
        """
        _stuffACDCDatabase_

        Fill the ACDC database with ACDC records, both for processing
        and merge
        """
        filesetName = '/%s/DataProcessing' % self.workflowName
        owner = 'unknown'
        group = 'unknown'

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/data/a/%d' % i
                acdcFile = File(lfn=lfn,
                                size=100,
                                events=250,
                                locations=self.validLocations,
                                merged=1)
                run = Run(
                    i + 1,
                    *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {
                    'collection_name': self.workflowName,
                    'collection_type': 'ACDC.CollectionTypes.DataCollection',
                    'files': {
                        lfn: acdcFile
                    },
                    'fileset_name': filesetName,
                    'owner': {
                        'user': owner,
                        'group': group
                    }
                }
                self.acdcDB.queue(acdcDoc)
        filesetName = '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/unmerged/b/%d' % i
                acdcFile = File(lfn=lfn,
                                size=100,
                                events=250,
                                locations=set([choice(self.validLocations)]),
                                merged=0)
                run = Run(
                    i + 1,
                    *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {
                    'collection_name': self.workflowName,
                    'collection_type': 'ACDC.CollectionTypes.DataCollection',
                    'files': {
                        lfn: acdcFile
                    },
                    'fileset_name': filesetName,
                    'owner': {
                        'user': owner,
                        'group': group
                    }
                }
                self.acdcDB.queue(acdcDoc)
        self.acdcDB.commit()

        return