Esempio n. 1
0
    def loadData(self): 
        """
        _loadData_

        Load all the files that belong to this fileset.   
        """
        existingTransaction = self.beginTransaction()

        if self.name == None or self.id < 0:
            self.load()

        action = self.daofactory(classname = "Files.InFileset")
        results = action.execute(fileset = self.id,
                                 conn = self.getDBConn(),
                                 transaction = self.existingTransaction())

        self.files = set()
        self.newfiles = set()

        for result in results:
            file = File(id = result["fileid"])
            file.loadData(parentage = 1)
            self.files.add(file)

        self.commitTransaction(existingTransaction)
        return
Esempio n. 2
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if type(file["locations"]) == set:
            seName = list(file["locations"])[0]
        elif type(file["locations"]) == list:
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            seName = file["locations"][0]
        else:
            seName = file["locations"]

        wmbsFile["locations"] = set()

        if seName != None:
            wmbsFile.setLocation(se = seName, immediateSave = False)
        wmbsFile['jid'] = jobID
        self.wmbsFilesToBuild.append(wmbsFile)

        return wmbsFile
Esempio n. 3
0
    def populateACDCCouch(self, numFiles=3, lumisPerJob=4, eventsPerJob=100, numberOfJobs=250):
        """
        _populateACDCCouch_

        Create production files in couchDB to test the creation
        of ACDC jobs for the EventBased algorithm.
        """
        # Define some constants
        workflowName = "ACDC_TestEventBased"
        filesetName = "/%s/Production" % workflowName

        lumisPerFile = lumisPerJob * numberOfJobs
        for i in range(numFiles):
            lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5)
            for j in range(numberOfJobs):
                firstEvent = j * eventsPerJob + 1
                acdcFile = File(lfn=lfn, size=1024, events=eventsPerJob, locations=self.validLocations,
                                merged=False, first_event=firstEvent)
                run = Run(1, *range(1 + j * lumisPerJob + (i * lumisPerFile),
                                    1 + (j + 1) * lumisPerJob + (i * lumisPerFile)))

                acdcFile.addRun(run)
                acdcDoc = {"collection_name": workflowName,
                           "collection_type": "ACDC.CollectionTypes.DataCollection",
                           "files": {lfn: acdcFile},
                           "fileset_name": filesetName}
                self.couchDB.queue(acdcDoc)

        self.couchDB.commit()
        return
Esempio n. 4
0
    def testAddToFileset(self):
        """
        _AddToFileset_

        Test to see if we can add to a fileset using the DAO
        """
        testFileset = Fileset(name = "inputFileset")
        testFileset.create()

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run( 1, *[45]))
        testFileB.create()

        addToFileset = self.daofactory(classname = "Files.AddToFileset")
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFileset.id)

        testFileset2 = Fileset(name = "inputFileset")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFileset.id)
Esempio n. 5
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_
        
        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing"
        )
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
Esempio n. 6
0
def sortedFilesFromMergeUnits(mergeUnits):
    """
    _sortedFilesFromMergeUnits_

    Given a list of merge units sort them and the files that they contain.
    Return a list of sorted WMBS File structures.
    """
    mergeUnits.sort(mergeUnitCompare)

    sortedFiles = []
    for mergeUnit in mergeUnits:
        mergeUnit["files"].sort(fileCompare)

        for file in mergeUnit["files"]:
            newFile = File(id=file["file_id"], lfn=file["file_lfn"],
                           events=file["file_events"])
            newFile.addRun(Run(file["file_run"], file["file_lumi"]))

            # The WMBS data structure puts locations that are passed in through
            # the constructor in the "newlocations" attribute.  We want these to
            # be in the "locations" attribute so that they get picked up by the
            # job submitter.
            newFile["locations"] = set([file["pnn"]])
            newFile.addRun(Run(file["file_run"], file["file_lumi"]))
            sortedFiles.append(newFile)

    return sortedFiles
Esempio n. 7
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Esempio n. 8
0
    def populateACDCCouch(self, numFiles = 2, lumisPerJob = 35,
                          eventsPerJob = 20000):
        """
        _populateACDCCouch_

        Create production files in couchDB to test the creation
        of ACDC jobs for the EventBased algorithm
        """
        # Define some constants
        workflowName = "ACDC_TestEventBased"
        filesetName = "/%s/Production" % workflowName
        owner = "*****@*****.**"
        group = "unknown"

        lumisPerFile = lumisPerJob * 250
        for i in range(numFiles):
            for j in range(250):
                lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5)
                acdcFile = File(lfn = lfn, size = 100, events = eventsPerJob, locations = self.validLocations,
                                merged = False, first_event = 1)
                run = Run(1, *range(1 + (i * lumisPerFile) + j * lumisPerJob,
                                    (j + 1) * lumisPerJob + (i * lumisPerFile) + 2))
                acdcFile.addRun(run)
                acdcDoc = {"collection_name" : workflowName,
                           "collection_type" : "ACDC.CollectionTypes.DataCollection",
                           "files" : {lfn : acdcFile},
                           "fileset_name" : filesetName,
                           "owner" : {"user": owner,
                                      "group" : group}}
                self.couchDB.queue(acdcDoc)

        self.couchDB.commit()
        return
Esempio n. 9
0
    def loadData(self):
        """
        _loadData_

        Load all information about the job, including the mask and all input
        files.  Either the ID or the name must be specified before this is
        called.
        """
        existingTransaction = self.beginTransaction()

        self.load()
        self.getMask()

        fileAction = self.daofactory(classname = "Jobs.LoadFiles")
        files = fileAction.execute(self["id"], conn = self.getDBConn(),
                                   transaction = self.existingTransaction())

        self["input_files"] = []
        for file in files:
            newFile = File(id = file["id"])
            newFile.loadData(parentage = 0)
            self.addFile(newFile)

        self.commitTransaction(existingTransaction)
        return
Esempio n. 10
0
    def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                           lastEvent = 100, firstLumi = 1, lastLumi = 10,
                           index = 1):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCTestFileset %i" % index)
        singleMCFileset.create()
        newFile = File("MCFakeFileTest %i" % index, size = 1000,
                       events = numEvents,
                       locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        singleMCFileset.addFile(newFile)
        singleMCFileset.commit()
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()

        singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                                workflow = testWorkflow,
                                                split_algo = "EventBased",
                                                type = "Production")
        singleMCFileSubscription.create()
        return singleMCFileSubscription
Esempio n. 11
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn = pnn, immediateSave = False)
        wmbsFile['jid'] = jobID
        
        return wmbsFile
Esempio n. 12
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement, numberOfCores = 1):
        """
        _createJob_

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - RAW on local disk (factor 1)
        jobTime = 300 + jobSize/500000 + (jobSize*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = jobSize/1024, memory = memoryRequirement)

        return
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name)
    args={}
    args["url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["version"] = "DBS_2_1_1"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"])
    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList:
            continue
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = "cmssrm.fnal.gov", merged = True)
        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception, "No files were selected!"

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Esempio n. 14
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name)
    args={}
    args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
    args["version"] = "DBS_2_0_9"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_block","retrive_lumi", "retrive_run"])

    # NOTE : this is to limit the number of jobs to create ... simply using first 10 files get for the needed dataset
    dbsResults =dbsResults[0:2]


    print "  found %d files, inserting into wmbs..." % (len(dbsResults))


    for dbsResult in dbsResults:
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = set(['srm.ciemat.es','storm-se-01.ba.infn.it','storage01.lcg.cscs.ch']))

        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test")
        testWorkflowA.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(
            fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing"
        )
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.")

        return
Esempio n. 16
0
    def createJob(self, streamerList, jobEvents, jobSize):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - 45s/evt reco speed
        #   - checksum calculation at 5MB/s (twice)
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer on local disk (factor 1)
        #   - RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (factor 4)
        jobTime = 300 + jobSize/500000 + jobEvents*45 + (jobSize*4*3)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize*6)/1024)

        return
Esempio n. 17
0
    def _addACDCFileToWMBSFile(self, acdcFile, inFileset = True):
        """
        """
        wmbsParents = []
        #pass empty check sum since it won't be updated to dbs anyway
        checksums = {}
        wmbsFile = File(lfn = str(acdcFile["lfn"]),
                        size = acdcFile["size"],
                        events = acdcFile["events"],
                        checksums = checksums,
                        #TODO: need to get list of parent lfn
                        parents = acdcFile["parents"],
                        locations = acdcFile["locations"],
                        merged = acdcFile.get('merged', True))

        ## TODO need to get the lumi lists
        for run in acdcFile['runs']:
            wmbsFile.addRun(run)
        

        dbsFile = self._convertACDCFileToDBSFile(acdcFile)
        self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"])
            
        logging.info("WMBS File: %s\n on Location: %s" 
                     % (wmbsFile['lfn'], wmbsFile['locations']))

        if inFileset:
            wmbsFile['inFileset'] = True
        else:
            wmbsFile['inFileset'] = False
            
        self.wmbsFilesToCreate.append(wmbsFile)
        
        return wmbsFile
Esempio n. 18
0
    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_
        
        Creates a large group of files for testing
        """
        testFileset = Fileset(name = "TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()
            
        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman",
                                name = "wf003", task="Test" )
        testWorkflow.create()

        largeSubscription = Subscription(fileset = testFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")
        largeSubscription.create()

        return largeSubscription
Esempio n. 19
0
    def createSingleJobWorkflow(self):
        """
        Create a workflow with one jobs and two files and store the results in instance variables
        """

        self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        self.testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        self.testFileA.addRun(Run(1, *[45]))
        self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        self.testFileB.addRun(Run(1, *[46]))
        self.testFileA.create()
        self.testFileB.create()

        self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB])

        self.testJob.create(group=testJobGroup)
        self.testJob.associateFiles()
Esempio n. 20
0
    def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization (twice)
        #   - 0.5MB/s repack speed
        #   - reco with timePerEvent
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (sizePerEvent)
        jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600),
                                             disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000),
                                             memory = memoryRequirement)

        return
Esempio n. 21
0
    def createJob(self, fileList, jobSize):
        """
        _createJob_

        create an express merge job for
        the passed in list of files

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        largestFile = 0
        for fileInfo in fileList:
            largestFile = max(largestFile, fileInfo['filesize'])
            f = File(id = fileInfo['id'],
                     lfn = fileInfo['lfn'])
            f.setLocation(fileInfo['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 5MB/s merge speed
        #   - checksum calculation at 5MB/s (twice)
        #   - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + (jobSize*4)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize+largestFile)/1024)

        return
Esempio n. 22
0
    def testD_NonContinuousLumis(self):
        """
        _NonContinuousLumis_

        Test and see if LumiBased can work when the lumis are non continuous
        """


        baseName = makeUUID()
        nFiles = 10

        testFileset = Fileset(name = baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            # Set to two non-continuous lumi numbers
            lumis = [100 + i, 200 + i]
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = testSubscription)

        jobGroups = jobFactory(lumis_per_job = 2,
                               halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)
        for j in jobs:
            runs = j['mask'].getRunAndLumis()
            for r in runs.keys():
                self.assertEqual(len(runs[r]), 2)
                for l in runs[r]:
                    # Each run should have two lumis
                    # Each lumi should be of form [x, x]
                    # meaning that the first and last lumis are the same
                    self.assertEqual(len(l), 2)
                    self.assertEqual(l[0], l[1])
            self.assertEqual(j['estimatedJobTime'], 100 * 12)
            self.assertEqual(j['estimatedDiskUsage'], 100 * 400)
            self.assertEqual(j['estimatedMemoryUsage'], 2300)


        return
Esempio n. 23
0
    def loadFiles(self, size=10):
        """
        _loadFiles_

        Grab some files from the resultProxy
        Should handle multiple proxies.  Not really sure about that
        """

        if len(self.proxies) < 1:
            # Well, you don't have any proxies.
            # This is what happens when you ran out of files last time
            logging.info("No additional files found; Ending.")
            return set()

        resultProxy = self.proxies[0]
        rawResults = []
        if type(resultProxy.keys) == list:
            keys = resultProxy.keys
        else:
            keys = resultProxy.keys()
            if type(keys) == set:
                # If it's a set, handle it
                keys = list(keys)
        files = set()

        while len(rawResults) < size and len(self.proxies) > 0:
            length = size - len(rawResults)
            newResults = resultProxy.fetchmany(size=length)
            if len(newResults) < length:
                # Assume we're all out
                # Eliminate this proxy
                self.proxies.remove(resultProxy)
            rawResults.extend(newResults)

        if rawResults == []:
            # Nothing to do
            return set()

        fileList = self.formatDict(results=rawResults, keys=keys)
        fileIDs = list(set([x["fileid"] for x in fileList]))

        myThread = threading.currentThread()
        fileInfoAct = self.daoFactory(classname="Files.GetForJobSplittingByID")
        fileInfoDict = fileInfoAct.execute(file=fileIDs, conn=myThread.transaction.conn, transaction=True)

        getLocAction = self.daoFactory(classname="Files.GetLocationBulk")
        getLocDict = getLocAction.execute(files=fileIDs, conn=myThread.transaction.conn, transaction=True)

        for fID in fileIDs:
            fl = WMBSFile(id=fID)
            fl.update(fileInfoDict[fID])
            locations = getLocDict.get((fID), [])
            for loc in locations:
                fl.setLocation(loc, immediateSave=False)
            files.add(fl)

        return files
Esempio n. 24
0
    def testAddDupsToFileset(self):
        """
        _AddToDupsFileset_

        Verify the the dups version of the AddToFileset DAO will not add files
        to a fileset if they're already associated to another fileset with the
        same workflow.
        """
        testWorkflowA = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production2")
        testWorkflowB.create()        

        testFilesetA = Fileset(name = "inputFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name = "inputFilesetB")
        testFilesetB.create()        

        testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA)
        testSubscriptionA.create()
        testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB)
        testSubscriptionB.create()        

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run( 1, *[45]))
        testFileB.create()

        addToFileset = self.daofactory(classname = "Files.AddDupsToFileset")
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetA.id, workflow = "wf001")

        testFileset2 = Fileset(name = "inputFilesetA")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetA.id, workflow = "wf001")

        # Files should not get added to fileset B because fileset A is associated
        # with wf001.
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetB.id, workflow = "wf001")

        testFileset2 = Fileset(name = "inputFilesetB")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 0)
        return
Esempio n. 25
0
    def testMultipleLocations(self):
        """
        _testMultipleLocations_

        Verify that the sibling processing based algorithm doesn't create jobs
        that run over files at multiple sites.
        """
        testFile1 = File("testFile1", size=1000, events=100, locations=set(["somese2.cern.ch"]))
        testFile1.create()
        testFile2 = File("testFile2", size=1000, events=100, locations=set(["somese2.cern.ch"]))
        testFile2.create()
        testFile3 = File("testFile3", size=1000, events=100, locations=set(["somese2.cern.ch"]))
        testFile3.create()

        self.testFilesetA.addFile(testFile1)
        self.testFilesetA.addFile(testFile2)
        self.testFilesetA.addFile(testFile3)
        self.testFilesetA.commit()
        self.testFilesetA.markOpen(False)

        self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3])
        self.testSubscriptionA.completeFiles([self.testFileA, self.testFileB, self.testFileC])

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS", subscription=self.deleteSubscriptionA)

        result = deleteFactoryA(files_per_job=50)

        assert len(result) == 2, "Error: Wrong number of jobgroups returned."

        goldenFilesA = ["testFileA", "testFileB", "testFileC"]
        goldenFilesB = ["testFile1", "testFile2", "testFile3"]

        locations = {
            "testFileA": "somese.cern.ch",
            "testFileB": "somese.cern.ch",
            "testFileC": "somese.cern.ch",
            "testFile1": "somese2.cern.ch",
            "testFile2": "somese2.cern.ch",
            "testFile3": "somese2.cern.ch",
        }

        for jobGroup in result:
            assert len(jobGroup.jobs) == 1, "Error: Wrong number of jobs in jobgroup."
            assert len(jobGroup.jobs[0]["input_files"]) == 3, "Error: Wrong number of input files in job."

            jobSite = list(jobGroup.jobs[0]["input_files"][0]["locations"])[0]

            if jobSite == "somese.cern.ch":
                goldenFiles = goldenFilesA
            else:
                goldenFiles = goldenFilesB

            for jobFile in jobGroup.jobs[0]["input_files"]:
                assert list(jobFile["locations"])[0] == locations[jobFile["lfn"]], "Error: Wrong site for file."
                goldenFiles.remove(jobFile["lfn"])

            assert len(goldenFiles) == 0, "Error: Files are missing."

        return
Esempio n. 26
0
    def injectFile(self):
        """
        _injectFile_

        Inject a file into the periodic splitting input fileset.
        """
        testFile = File(lfn = "/this/is/a/lfn%s" % time.time(), size = 1000,
                        events = 100, locations = set(["somese.cern.ch"]))
        testFile.create()
        self.testFileset.addFile(testFile)    
        self.testFileset.commit()

        return
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Esempio n. 28
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Begin a transaction and then create a file in the database.  Afterwards,
        rollback the transaction.  Use the File class's exists() method to
        to verify that the file doesn't exist before it was created, exists
        after it was created and doesn't exist after the transaction was rolled
        back.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        
        testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum':1111})

        assert testFile.exists() == False, \
               "ERROR: File exists before it was created"

        testFile.addRun(Run(1, *[45]))
        testFile.create()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after it was created"

        myThread.transaction.rollback()

        assert testFile.exists() == False, \
               "ERROR: File exists after transaction was rolled back."
        return    
    def testLargeNumberOfFiles(self):
        """
        _testLargeNumberOfFiles_

        Setup a subscription with 500 files and verify that the splitting algo
        works correctly.
        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "FileBased",
                                         type = "Processing")
        testSubscriptionA.create()
        testSubscriptionB = Subscription(fileset = testFileset,
                                         workflow = testWorkflowB,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionB.create()

        testSubscriptionA.completeFiles(allFiles)

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionB)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
Esempio n. 30
0
    def testCreateWithLocation(self):
        """
        _testCreateWithLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums = {'cksum':1}, 
                        locations = set(["se1.fnal.gov", "se1.cern.ch"]))
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        
        testFileB = File(id = testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"    
        return
Esempio n. 31
0
    def testLoadOutputID(self):
        """
        _testLoadOutputID_

        Test whether we can load an output ID for a job
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob.create(group=testJobGroup)

        self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id)

        return
Esempio n. 32
0
    def filesOfStatus(self, status, limit = 0, loadChecksums = True, doingJobSplitting = False):
        """
        _filesOfStatus_

        Return a Set of File objects that have the given status with respect
        to this subscription.
        """
        existingTransaction = self.beginTransaction()

        status = status.title()
        files  = set()
        if limit > 0:
            action = self.daofactory(classname = "Subscriptions.Get%sFilesByLimit" % status)
            fileList = action.execute(self["id"], limit, conn = self.getDBConn(),
                                      transaction = self.existingTransaction())
        else:
            action = self.daofactory(classname = "Subscriptions.Get%sFiles" % status)
            fileList = action.execute(self["id"], conn = self.getDBConn(),
                                      transaction = self.existingTransaction())

        if doingJobSplitting:
            fileInfoAct  = self.daofactory(classname = "Files.GetForJobSplittingByID")
        else:
            fileInfoAct  = self.daofactory(classname = "Files.GetByID")

        fileInfoDict = fileInfoAct.execute(file = [x["file"] for x in fileList],
                                           conn = self.getDBConn(),
                                           transaction = self.existingTransaction())

        #Run through all files
        for f in fileList:
            fl = File(id = f['file'])
            if loadChecksums:
                fl.loadChecksum()
            fl.update(fileInfoDict[f['file']])
            if 'locations' in f.keys():
                fl.setLocation(f['locations'], immediateSave = False)
            files.add(fl)

        self.commitTransaction(existingTransaction)
        return files
Esempio n. 33
0
    def test01(self):
        """
        _test01_

        Test size and event triggers for single lumis (they are ignored)
        Test latency trigger (timed out)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        mySplitArgs['maxInputSize'] = 1
        mySplitArgs['maxInputFiles'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        return
Esempio n. 34
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test latency trigger (wait and 0)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxLatency'] = 0
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("ExpressMerge-"),
                        "ERROR: Job has wrong name")

        return
Esempio n. 35
0
    def test02(self):
        """
        _test02_

        Test multi lumis

        """
        insertClosedLumiBinds = []
        for lumi in [1,2]:
            filecount = 1
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "Express",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False)

        jobGroups = jobFactory(maxInputEvents = 100)

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Esempio n. 36
0
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn = lfn, size = 1000,
                       events = events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Esempio n. 37
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese2.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese2.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
               "ERROR: Three jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert len(inputFile["locations"]) == 1, \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
Esempio n. 38
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileRAL",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        ralJobs = 0
        fnalJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL"]):
                fnalJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalJobs, 2)

        return
Esempio n. 39
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (
        datasetPath, inputFileset.name)
    args = {}
    args[
        "url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["version"] = "DBS_2_0_9"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path=datasetPath,
                                  retriveList=["retrive_lumi", "retrive_run"])
    dbsResults = dbsResults[0:10]
    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        myFile = File(lfn=dbsResult["LogicalFileName"],
                      size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"],
                      checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov",
                      merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn=dbsResult["LogicalFileName"],
                                size=dbsResult["FileSize"],
                                events=dbsResult["NumberOfEvents"],
                                checksums={"cksum": dbsResult["Checksum"]},
                                locations="cmssrm.fnal.gov",
                                status="LOCAL")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName="cmsRun",
                             appVer="Unknown",
                             appFam="Unknown",
                             psetHash="Unknown",
                             configContent="Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Esempio n. 40
0
    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = ['FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi', 'LastRun']
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)
        locations = set()
        for site in self.getLocations.execute(conn=self.getDBConn(),
                                              transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(site, conn=self.getDBConn(),
                                                        transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info('Skipping MonteCarlo injection to site "%s" as unknown to wmbs' % site)
                    continue
                locations.add(siteInfo[0]['pnn'])
            except Exception as ex:
                self.logger.error('Error getting storage element for "%s": %s' % (site, str(ex)))
        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode('ascii', 'ignore')
        wmbsFile = File(lfn=mcFakeFileName,
                        first_event=self.mask['FirstEvent'],
                        last_event=self.mask['LastEvent'],
                        events=self.mask['LastEvent'] - self.mask['FirstEvent'] + 1,  # inclusive range
                        locations=locations,
                        merged=False,  # merged causes dbs parentage relation
                        )

        if self.mask:
            lumis = range(self.mask['FirstLumi'], self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'], *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS MC Fake File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations'])

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(self.wmbsFilesToCreate,
                                                               self.wmSpec.name(),
                                                               isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles
Esempio n. 41
0
    def test07(self):
        """
        _test07_

        Test over merge

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Esempio n. 42
0
    def createFileCollection(self,
                             name,
                             nSubs,
                             nFiles,
                             workflowURL='test',
                             site=None):
        """
        _createFileCollection_

        Create a collection of files for splitting into jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL,
                                owner="mnorman",
                                name=name,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                if not site:
                    tmpSite = 'se.%s' % (random.choice(self.sites))
                else:
                    tmpSite = 'se.%s' % (site)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f),
                                size=1024,
                                events=10)
                testFile.setLocation(tmpSite)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testFileset.markOpen(isOpen=0)
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

        return
Esempio n. 43
0
    def test03(self):
        """
        _test03_

        Test input size threshold on multi lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        mySplitArgs['maxInputSize'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Esempio n. 44
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Esempio n. 45
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print("injecting files from %s into %s, please wait..." %
          (datasetPath, inputFileset.name))
    args = {}
    args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFileArray(
        path=datasetPath, retriveList=["retrive_lumi", "retrive_run"])
    # Limiter on number of files
    dbsResults = dbsResults[0:20]
    print("  found %d files, inserting into wmbs..." % (len(dbsResults)))

    for dbsResult in dbsResults:
        myFile = File(lfn=dbsResult["LogicalFileName"],
                      size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"],
                      checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov",
                      merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.appendLumi(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn=dbsResult["LogicalFileName"],
                                size=dbsResult["FileSize"],
                                events=dbsResult["NumberOfEvents"],
                                checksums={"cksum": dbsResult["Checksum"]},
                                locations="cmssrm.fnal.gov",
                                status="NOTUPLOADED")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName="cmsRun",
                             appVer="Unknown",
                             appFam="Unknown",
                             psetHash="Unknown",
                             configContent="Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
    def createFile(lfn, events, run, lumis, location, lumiMultiplier=None):
        """
        _createFile_

        Create a file for testing
        """
        if lumiMultiplier is None:
            lumiMultiplier = run

        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((lumiMultiplier * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Esempio n. 47
0
    def fileLumiMaps(self, filesAtLocation=None, getParents=False, lumiMask=None):
        """
        Args:
            filesAtLocation: the list of file objects at a particular location
            getParents: Attach a list of parents files to the file
            lumiMask: the lumiMask for the step. Files excluded by the lumiMask are dropped

        Returns:
            lumisByFile: LumiList for each LFN
            eventsByLumi: Estimate (for now) of how many events are in each lumi
        """
        lumisByFile = {}
        eventsByLumi = defaultdict(lambda: defaultdict(float))
        self.filesByLumi = defaultdict(lambda: defaultdict(list))

        for fileObj in filesAtLocation:
            lfn = fileObj['lfn']
            eventsInFile = fileObj['events']
            lumisInFile = self.countLumis(fileObj['runs'])
            if getParents:
                parentLFNs = self.findParent(lfn=lfn)
                for lfn in parentLFNs:
                    parent = File(lfn=lfn)
                    fileObj['parents'].add(parent)
            runsAndLumis = {str(runLumi.run): runLumi.lumis for runLumi in fileObj['runs']}
            lumiList = LumiList(runsAndLumis=runsAndLumis)
            if lumiMask:  # Apply the mask if there is one
                lumiList &= lumiMask
            if lumiList:  # Skip files with no lumis of interest
                lumisByFile[lfn] = lumiList

                for runLumi in fileObj['runs']:
                    run = runLumi.run
                    for lumi in runLumi.lumis:
                        if (runLumi.run, lumi) in lumiList:
                            self.filesByLumi[run][lumi].append(fileObj)
                            eventsByLumi[run][lumi] = round(eventsInFile / lumisInFile)

        return lumisByFile, eventsByLumi
Esempio n. 48
0
    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = ['FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi', 'LastRun']
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)

        locations = set()
        siteInfo = self.getLocationInfo.execute(conn=self.getDBConn(),
                                                transaction=self.existingTransaction())
        for site in siteInfo:
            if site['pnn'] in self.commonLocation:
                locations.add(site['pnn'])

        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = "MCFakeFile-%s" % self.topLevelFileset.name
        wmbsFile = File(lfn=mcFakeFileName,
                        first_event=self.mask['FirstEvent'],
                        last_event=self.mask['LastEvent'],
                        events=self.mask['LastEvent'] - self.mask['FirstEvent'] + 1,  # inclusive range
                        locations=locations,
                        merged=False,  # merged causes dbs parentage relation
                        )

        if self.mask:
            lumis = list(range(self.mask['FirstLumi'], self.mask['LastLumi'] + 1))  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'], *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.debug("WMBS MC Fake File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations'])

        self.wmbsFilesToCreate.add(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(self.wmbsFilesToCreate,
                                                               self.wmSpec.name(),
                                                               isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles
Esempio n. 49
0
    def generateFakeMCFile(self,
                           numEvents=100,
                           firstEvent=1,
                           lastEvent=100,
                           firstLumi=1,
                           lastLumi=10,
                           index=1,
                           existingSub=None):
        """
        _generateFakeMCFile_

        Generates a fake MC file for testing production EventBased
        creation of jobs, it creates a single file subscription if no
        existing subscription is provided.
        """
        # MC comes with MCFakeFile(s)
        newFile = File("MCFakeFile-some-hash-%s" % str(index).zfill(5),
                       size=1000,
                       events=numEvents,
                       locations=set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset-%i" % index)
            singleMCFileset.create()
            singleMCFileset.addFile(newFile)
            singleMCFileset.commit()
            testWorkflow = Workflow(spec="spec.xml",
                                    owner="Steve",
                                    name="wf001",
                                    task="Test")
            testWorkflow.create()
            singleMCFileSubscription = Subscription(fileset=singleMCFileset,
                                                    workflow=testWorkflow,
                                                    split_algo="EventBased",
                                                    type="Production")
            singleMCFileSubscription.create()
            return singleMCFileSubscription
        else:
            existingSub['fileset'].addFile(newFile)
            existingSub['fileset'].commit()
            return existingSub
Esempio n. 50
0
    def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'):
        """
        _createJobCollection_

        Create a collection of jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL,
                                owner="mnorman",
                                name=name,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            myThread.transaction.begin()

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                site = random.choice(self.sites)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f),
                                size=1024,
                                events=10)
                testFile.setLocation(site)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

            myThread.transaction.commit()

        return
Esempio n. 51
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (
        datasetPath, inputFileset.name)
    args = {}
    args[
        "url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["version"] = "DBS_2_0_9"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(
        path=datasetPath,
        retriveList=["retrive_block", "retrive_lumi", "retrive_run"])

    # NOTE : this is to limit the number of jobs to create ... simply using first 10 files get for the needed dataset
    dbsResults = dbsResults[0:2]

    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        myFile = File(lfn=dbsResult["LogicalFileName"],
                      size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"],
                      checksums={"cksum": dbsResult["Checksum"]},
                      locations=set([
                          'srm.ciemat.es', 'storm-se-01.ba.infn.it',
                          'storage01.lcg.cscs.ch'
                      ]))

        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Esempio n. 52
0
    def testAddRunSet(self):
        """
        _testAddRunSet_

        Test the ability to add run and lumi information to a file.
        """
        testFile = File(lfn="/this/is/a/lfn",
                        size=1024,
                        events=10,
                        checksums={'cksum': 1},
                        locations="se1.fnal.gov")
        testFile.create()
        runSet = set()
        runSet.add(Run(1, *[45]))
        runSet.add(Run(2, *[67, 68]))
        testFile.addRunSet(runSet)

        assert (runSet - testFile["runs"]) == set(), \
            "Error: addRunSet is not updating set correctly"

        return
Esempio n. 53
0
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (
        datasetPath, inputFileset.name)
    args = {}
    args[
        "url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["version"] = "DBS_2_1_1"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path=datasetPath,
                                  retriveList=["retrive_lumi", "retrive_run"])
    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        if runsWhiteList and str(
                dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList:
            continue
        myFile = File(lfn=dbsResult["LogicalFileName"],
                      size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"],
                      checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov",
                      merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception, "No files were selected!"

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Esempio n. 54
0
    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size=1000, events=nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Esempio n. 55
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        return
Esempio n. 56
0
    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return
Esempio n. 57
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Esempio n. 58
0
    def makeNJobs(self,
                  name,
                  task,
                  nJobs,
                  jobGroup,
                  fileset,
                  sub,
                  site=None,
                  bl=[],
                  wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n),
                            size=1024,
                            events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation('se.%s' % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name='%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob['custom']['location'] = f.getLocations()[0]
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['owner'] = 'tapas'
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['ownerDN'] = 'tapas'
            testJob['ownerRole'] = 'cmsrole'
            testJob['ownerGroup'] = 'phgroup'

            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub),
                                    'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile
Esempio n. 59
0
    def createTestJobGroup(self, nJobs, subType="Processing", retryOnce=False):
        """
        _createTestJobGroup_

        Creates a group of several jobs
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()
        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type=subType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()
        testFileB.create()

        for _ in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)

        testJobGroup.commit()
        if retryOnce:
            self.increaseRetry.execute(testJobGroup.jobs)

        return testJobGroup
Esempio n. 60
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split up all the available files such that each job will process a
        maximum of 'files_per_job'.  If the 'files_per_job' parameters is not
        passed in jobs will process a maximum of 10 files.
        """

        filesPerJob   = int(kwargs.get("files_per_job", 10))
        jobsPerGroup  = int(kwargs.get("jobs_per_group", 0))
        runBoundaries = kwargs.get("respect_run_boundaries", False)
        getParents    = kwargs.get("include_parents", False)
        filesInJob    = 0
        listOfFiles   = []

        # Grab the fileset
        fileset = self.subscription.getFileset()
        try:
            fileset.load()
        except AttributeError as ae:
            pass

        #Get a dictionary of sites, files
        locationDict = self.sortByLocation()

        for location in locationDict:
            #Now we have all the files in a certain location
            fileList    = locationDict[location]
            filesInJob  = 0
            jobsInGroup = 0
            fileCounter = 0
            if len(fileList) == 0:
                #No files for this location
                #This isn't supposed to happen, but better safe then sorry
                continue
            if len(fileList) < filesPerJob and fileset.open:
                continue
            self.newGroup()
            jobRun = None
            for f in fileList:
                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)
                fileRun = f.get('minrun', None)
                if filesInJob == 0 or filesInJob == filesPerJob or (runBoundaries and fileRun != jobRun):
                    if jobsPerGroup:
                        if jobsInGroup > jobsPerGroup:
                            self.newGroup()
                            jobsInGroup = 0

                    filesInJob   = 0
                    # We've finished a job.  Should we create another one?
                    if len(fileList) - fileCounter < filesPerJob and fileset.open:
                        # If we don't have the full job's worth of files
                        # and the fileset is still open
                        # then we shouldn't create a new job
                        continue
                    self.newJob(name = self.getJobName())

                    jobsInGroup += 1
                    jobRun       = fileRun

                filesInJob += 1
                self.currentJob.addFile(f)
                fileCounter += 1

                listOfFiles.append(f)

        return