Beispiel #1
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Test the create(), delete() and exists() methods of the file class
        by creating and deleting a file.  The exists() method will be
        called before and after creation and after deletion.
        """
        testFile = File(lfn="/this/is/a/lfn",
                        size=1024,
                        events=10,
                        checksums={'cksum': 1111})

        assert testFile.exists() == False, \
               "ERROR: File exists before it was created"

        testFile.addRun(Run(1, *[45]))
        testFile.create()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after it was created"

        testFile.delete()

        assert testFile.exists() == False, \
               "ERROR: File exists after it has been deleted"
        return
Beispiel #2
0
    def testSetLocation(self):
        """
        _testSetLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn="/this/is/a/lfn",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1})
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()

        testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"])
        testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"],
                              immediateSave=False)

        testFileB = File(id=testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"
        return
Beispiel #3
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Begin a transaction and then create a file in the database.  Afterwards,
        rollback the transaction.  Use the File class's exists() method to
        to verify that the file doesn't exist before it was created, exists
        after it was created and doesn't exist after the transaction was rolled
        back.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        testFile = File(lfn="/this/is/a/lfn",
                        size=1024,
                        events=10,
                        checksums={'cksum': 1111})

        assert testFile.exists() == False, \
               "ERROR: File exists before it was created"

        testFile.addRun(Run(1, *[45]))
        testFile.create()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after it was created"

        myThread.transaction.rollback()

        assert testFile.exists() == False, \
               "ERROR: File exists after transaction was rolled back."
        return
Beispiel #4
0
    def testAddToFileset(self):
        """
        _AddToFileset_

        Test to see if we can add to a fileset using the DAO
        """
        testFileset = Fileset(name="inputFileset")
        testFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[45]))
        testFileB.create()

        addToFileset = self.daofactory(classname="Files.AddToFileset")
        addToFileset.execute(file=[testFileA['lfn'], testFileB['lfn']],
                             fileset=testFileset.id)

        testFileset2 = Fileset(name="inputFileset")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addToFileset.execute(file=[testFileA['lfn'], testFileB['lfn']],
                             fileset=testFileset.id)
Beispiel #5
0
def sortedFilesFromMergeUnits(mergeUnits):
    """
    _sortedFilesFromMergeUnits_

    Given a list of merge units sort them and the files that they contain.
    Return a list of sorted WMBS File structures.
    """
    mergeUnits.sort(mergeUnitCompare)

    sortedFiles = []
    for mergeUnit in mergeUnits:
        mergeUnit["files"].sort(fileCompare)

        for file in mergeUnit["files"]:
            newFile = File(id=file["file_id"], lfn=file["file_lfn"],
                           events=file["file_events"])
            newFile.addRun(Run(file["file_run"], file["file_lumi"]))

            # The WMBS data structure puts locations that are passed in through
            # the constructor in the "newlocations" attribute.  We want these to
            # be in the "locations" attribute so that they get picked up by the
            # job submitter.
            newFile["locations"] = set([file["pnn"]])
            newFile.addRun(Run(file["file_run"], file["file_lumi"]))
            sortedFiles.append(newFile)

    return sortedFiles
    def createMergeJob(self, mergeableFiles):
        """
        _createMergeJob_

        Create a merge job for the given merge units.  All the files contained
        in the merge units will be associated to the job.
        """
        if self.currentGroup == None:
            self.newGroup()

        self.newJob(name=self.getJobName())
        mergeableFiles.sort(fileCompare)

        for file in mergeableFiles:
            newFile = File(id=file["file_id"],
                           lfn=file["file_lfn"],
                           events=file["file_events"])

            # The WMBS data structure puts locations that are passed in through
            # the constructor in the "newlocations" attribute.  We want these to
            # be in the "locations" attribute so that they get picked up by the
            # job submitter.
            newFile["locations"] = set([file["se_name"]])
            newFile.addRun(Run(file["file_run"], file["file_lumi"]))
            self.currentJob.addFile(newFile)
            self.currentJob.addResourceEstimates(
                disk=float(file["file_size"]) / 1024)
Beispiel #7
0
    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size = 1000, events = nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

	self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Beispiel #8
0
def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name)
    args={}
    args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
    args["version"] = "DBS_2_0_9"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_block","retrive_lumi", "retrive_run"])

    # NOTE : this is to limit the number of jobs to create ... simply using first 10 files get for the needed dataset
    dbsResults =dbsResults[0:2]


    print "  found %d files, inserting into wmbs..." % (len(dbsResults))


    for dbsResult in dbsResults:
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = set(['srm.ciemat.es','storm-se-01.ba.infn.it','storage01.lcg.cscs.ch']))

        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Beispiel #9
0
    def testSetLocation(self):
        """
        _testSetLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"])
        testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"],
                              immediateSave = False)

        testFileB = File(id = testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"    
        return
Beispiel #10
0
    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return
Beispiel #11
0
    def testAddToFileset(self):
        """
        _AddToFileset_

        Test to see if we can add to a fileset using the DAO
        """
        testFileset = Fileset(name = "inputFileset")
        testFileset.create()

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run( 1, *[45]))
        testFileB.create()

        addToFileset = self.daofactory(classname = "Files.AddToFileset")
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFileset.id)

        testFileset2 = Fileset(name = "inputFileset")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFileset.id)
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]):
    """
    _injectFilesFromDBS_

    """
    print("injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name))
    args = {}
    args["url"] = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader"
    args["version"] = "DBS_2_1_1"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFileArray(path=datasetPath, retriveList=["retrive_lumi", "retrive_run"])
    print("  found %d files, inserting into wmbs..." % (len(dbsResults)))

    for dbsResult in dbsResults:
        if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList:
            continue
        myFile = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"],
                      events=dbsResult["NumberOfEvents"], checksums={"cksum": dbsResult["Checksum"]},
                      locations="cmssrm.fnal.gov", merged=True)
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.appendLumi(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception("No files were selected!")

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Beispiel #13
0
    def test03(self):
        """
        _test03_

        Test input size threshold on multi lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2)

        mySplitArgs["maxInputSize"] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup")

        time.sleep(1)

        mySplitArgs["maxLatency"] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs")

        return
Beispiel #14
0
    def _addACDCFileToWMBSFile(self, acdcFile, inFileset = True):
        """
        """
        wmbsParents = []
        #pass empty check sum since it won't be updated to dbs anyway
        checksums = {}
        wmbsFile = File(lfn = str(acdcFile["lfn"]),
                        size = acdcFile["size"],
                        events = acdcFile["events"],
                        checksums = checksums,
                        #TODO: need to get list of parent lfn
                        parents = acdcFile["parents"],
                        locations = acdcFile["locations"],
                        merged = acdcFile.get('merged', True))

        ## TODO need to get the lumi lists
        for run in acdcFile['runs']:
            wmbsFile.addRun(run)
        

        dbsFile = self._convertACDCFileToDBSFile(acdcFile)
        self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"])
            
        logging.info("WMBS File: %s\n on Location: %s" 
                     % (wmbsFile['lfn'], wmbsFile['locations']))

        if inFileset:
            wmbsFile['inFileset'] = True
        else:
            wmbsFile['inFileset'] = False
            
        self.wmbsFilesToCreate.append(wmbsFile)
        
        return wmbsFile
Beispiel #15
0
    def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                           lastEvent = 100, firstLumi = 1, lastLumi = 10,
                           index = 1):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCTestFileset %i" % index)
        singleMCFileset.create()
        newFile = File("MCFakeFileTest %i" % index, size = 1000,
                       events = numEvents,
                       locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        singleMCFileset.addFile(newFile)
        singleMCFileset.commit()
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()

        singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                                workflow = testWorkflow,
                                                split_algo = "EventBased",
                                                type = "Production")
        singleMCFileSubscription.create()
        return singleMCFileSubscription
Beispiel #16
0
    def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                           lastEvent = 100, firstLumi = 1, lastLumi = 10,
                           index = 1):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCTestFileset %i" % index)
        singleMCFileset.create()
        newFile = File("MCFakeFileTest %i" % index, size = 1000,
                       events = numEvents,
                       locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        singleMCFileset.addFile(newFile)
        singleMCFileset.commit()
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()

        singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                                workflow = testWorkflow,
                                                split_algo = "EventBased",
                                                type = "Production")
        singleMCFileSubscription.create()
        return singleMCFileSubscription
Beispiel #17
0
    def populateACDCCouch(self, numFiles = 2, lumisPerJob = 35,
                          eventsPerJob = 20000):
        """
        _populateACDCCouch_

        Create production files in couchDB to test the creation
        of ACDC jobs for the EventBased algorithm
        """
        # Define some constants
        workflowName = "ACDC_TestEventBased"
        filesetName = "/%s/Production" % workflowName
        owner = "*****@*****.**"
        group = "unknown"

        lumisPerFile = lumisPerJob * 250
        for i in range(numFiles):
            for j in range(250):
                lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5)
                acdcFile = File(lfn = lfn, size = 100, events = eventsPerJob, locations = self.validLocations,
                                merged = False, first_event = 1)
                run = Run(1, *range(1 + (i * lumisPerFile) + j * lumisPerJob,
                                    (j + 1) * lumisPerJob + (i * lumisPerFile) + 2))
                acdcFile.addRun(run)
                acdcDoc = {"collection_name" : workflowName,
                           "collection_type" : "ACDC.CollectionTypes.DataCollection",
                           "files" : {lfn : acdcFile},
                           "fileset_name" : filesetName,
                           "owner" : {"user": owner,
                                      "group" : group}}
                self.couchDB.queue(acdcDoc)

        self.couchDB.commit()
        return
Beispiel #18
0
    def test05(self):
        """
        _test05_
        Test multi lumis express merges with holes
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]):
    """
    _injectFilesFromDBS_

    """
    print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name)
    args={}
    args["url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
    args["version"] = "DBS_2_1_1"
    args["mode"] = "GET"
    dbsApi = DbsApi(args)
    dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"])
    print "  found %d files, inserting into wmbs..." % (len(dbsResults))

    for dbsResult in dbsResults:
        if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList:
            continue
        myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                      events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                      locations = "cmssrm.fnal.gov", merged = True)
        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception, "No files were selected!"

    inputFileset.commit()
    inputFileset.markOpen(False)
    return
Beispiel #20
0
def createCommonFileset():
    """
    Create a simple fileset with 2 files at the same location
    """
    multipleFilesFileset = Fileset(name="TestFileset")

    newFile = File("/some/file/test1", size=1000, events=100)
    newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7]))
    newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test2", size=2000, events=200)
    newFile.addRun(Run(3, *[2, 8]))
    newFile.addRun(Run(4, *[3, 8]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test3", size=3000, events=300)
    newFile.addRun(Run(5, *[10, 11, 12]))
    newFile.addRun(Run(6, *[10, 11, 12]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test4", size=4000, events=400)
    newFile.addRun(Run(2, *[3, 8, 9]))
    newFile.addRun(Run(3, *[3, 4, 5, 6]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    multipleFilesFileset.create()
    return multipleFilesFileset
Beispiel #21
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return
Beispiel #22
0
    def testDeleteTransaction(self):
        """
        _testDeleteTransaction_

        Create a file and commit it to the database.  Start a new transaction
        and delete the file.  Rollback the transaction after the file has been
        deleted.  Use the file class's exists() method to verify that the file
        does not exist after it has been deleted but does exist after the
        transaction is rolled back.
        """
        testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums={'cksum': 1111})

        assert testFile.exists() == False, \
               "ERROR: File exists before it was created"

        testFile.addRun(Run(1, *[45]))
        testFile.create()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()
        
        testFile.delete()

        assert testFile.exists() == False, \
               "ERROR: File exists after it has been deleted"

        myThread.transaction.rollback()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after transaction was rolled back."
        return
    def populateACDCCouch(self, numFiles=3, lumisPerJob=4, eventsPerJob=100, numberOfJobs=250):
        """
        _populateACDCCouch_

        Create production files in couchDB to test the creation
        of ACDC jobs for the EventBased algorithm.
        """
        # Define some constants
        workflowName = "ACDC_TestEventBased"
        filesetName = "/%s/Production" % workflowName

        lumisPerFile = lumisPerJob * numberOfJobs
        for i in range(numFiles):
            lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5)
            for j in range(numberOfJobs):
                firstEvent = j * eventsPerJob + 1
                acdcFile = File(lfn=lfn, size=1024, events=eventsPerJob, locations=self.validLocations,
                                merged=False, first_event=firstEvent)
                run = Run(1, *range(1 + j * lumisPerJob + (i * lumisPerFile),
                                    1 + (j + 1) * lumisPerJob + (i * lumisPerFile)))

                acdcFile.addRun(run)
                acdcDoc = {"collection_name": workflowName,
                           "collection_type": "ACDC.CollectionTypes.DataCollection",
                           "files": {lfn: acdcFile},
                           "fileset_name": filesetName}
                self.couchDB.queue(acdcDoc)

        self.couchDB.commit()
        return
Beispiel #24
0
    def test05(self):
        """
        _test05_

        Test multi lumis express merges with holes

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Beispiel #25
0
    def testCreateWithLocation(self):
        """
        _testCreateWithLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums = {'cksum':1},
                        locations = set(["T1_US_FNAL_Disk", "T2_CH_CERN"]))
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()


        testFileB = File(id = testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"
        return
Beispiel #26
0
    def testSetLocationByLFN(self):
        """
        _testSetLocationByLFN_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10,
                        checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10,
                        checksums = {'cksum':1})
        testFileB.addRun(Run( 1, *[45]))
        testFileB.create()

        parentAction = self.daofactory(classname = "Files.SetLocationByLFN")
        binds = [{'lfn': "/this/is/a/lfnA", 'location': 'se1.fnal.gov'},
                 {'lfn': "/this/is/a/lfnB", 'location': 'se1.fnal.gov'}]
        parentAction.execute(lfn = binds)

        testFileC = File(id = testFileA["id"])
        testFileC.loadData()
        testFileD = File(id = testFileB["id"])
        testFileD.loadData()

        self.assertEqual(testFileC['locations'], set(['se1.fnal.gov']))
        self.assertEqual(testFileD['locations'], set(['se1.fnal.gov']))

        
        return
Beispiel #27
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Begin a transaction and then create a file in the database.  Afterwards,
        rollback the transaction.  Use the File class's exists() method to
        to verify that the file doesn't exist before it was created, exists
        after it was created and doesn't exist after the transaction was rolled
        back.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        
        testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum':1111})

        assert testFile.exists() == False, \
               "ERROR: File exists before it was created"

        testFile.addRun(Run(1, *[45]))
        testFile.create()

        assert testFile.exists() > 0, \
               "ERROR: File does not exist after it was created"

        myThread.transaction.rollback()

        assert testFile.exists() == False, \
               "ERROR: File exists after transaction was rolled back."
        return    
Beispiel #28
0
    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           rand=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        parentFile = File('%s_parent' % baseName,
                          size=1000,
                          events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i),
                           size=1000,
                           events=100,
                           locations="T1_US_FNAL_Disk")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100,
                               locations="T2_CH_CERN")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Beispiel #29
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Beispiel #30
0
    def testD_NonContinuousLumis(self):
        """
        _NonContinuousLumis_

        Test and see if LumiBased can work when the lumis are non continuous
        """


        baseName = makeUUID()
        nFiles = 10

        testFileset = Fileset(name = baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            # Set to two non-continuous lumi numbers
            lumis = [100 + i, 200 + i]
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = testSubscription)

        jobGroups = jobFactory(lumis_per_job = 2,
                               halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)
        for j in jobs:
            runs = j['mask'].getRunAndLumis()
            for r in runs.keys():
                self.assertEqual(len(runs[r]), 2)
                for l in runs[r]:
                    # Each run should have two lumis
                    # Each lumi should be of form [x, x]
                    # meaning that the first and last lumis are the same
                    self.assertEqual(len(l), 2)
                    self.assertEqual(l[0], l[1])
            self.assertEqual(j['estimatedJobTime'], 100 * 12)
            self.assertEqual(j['estimatedDiskUsage'], 100 * 400)
            self.assertEqual(j['estimatedMemoryUsage'], 2300)


        return
Beispiel #31
0
    def testAddDupsToFilesetBulk(self):
        """
        _AddToDupsFilesetBulk_

        Same as testAddDupsToFileset() but faster
        """
        testWorkflowA = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production2")
        testWorkflowB.create()

        testFilesetA = Fileset(name = "inputFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name = "inputFilesetB")
        testFilesetB.create()

        testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA)
        testSubscriptionA.create()
        testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB)
        testSubscriptionB.create()

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, locations = ['SiteA'])
        testFileA.addRun(Run( 1, *[45]))
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, locations = ['SiteB'])
        testFileB.addRun(Run( 1, *[45]))

        addFilesToWMBSInBulk(testFilesetA.id, "wf001",
                                     [testFileA, testFileB],
                                     conn = testFileA.getDBConn(),
                                     transaction = testFileA.existingTransaction())

        testFileset2 = Fileset(name = "inputFilesetA")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addFilesToWMBSInBulk(testFilesetA.id, "wf001",
                                     [testFileA, testFileB],
                                     conn = testFileA.getDBConn(),
                                     transaction = testFileA.existingTransaction())

        # Files should not get added to fileset B because fileset A is associated
        # with wf001.
        addFilesToWMBSInBulk(testFilesetB.id, "wf001",
                                     [testFileA, testFileB],
                                     conn = testFileA.getDBConn(),
                                     transaction = testFileA.existingTransaction())

        testFileset2 = Fileset(name = "inputFilesetB")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 0)
        return
Beispiel #32
0
    def notestTwoJobGroups(self):
        """
        Test two job groups with a shared fileset. (Minimal part of testGetLocations which was failing)
        """

        testWorkflow1 = Workflow(spec="spec.xml",
                                 owner="Simon",
                                 name="wf001",
                                 task="Test1")
        testWorkflow1.create()

        testWMBSFileset1 = WMBSFileset(name="TestFileset1")
        testWMBSFileset1.create()

        testSubscription1 = Subscription(fileset=testWMBSFileset1,
                                         workflow=testWorkflow1)
        testSubscription1.create()

        testJobGroup1 = JobGroup(subscription=testSubscription1)
        testJobGroup1.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.create()

        testJobA = Job(name="TestJobA")
        testJobA.addFile(testFileA)
        testJobGroup1.add(testJobA)

        testJobGroup1.commit()

        testWorkflow2 = Workflow(spec="spec.xml",
                                 owner="Simon",
                                 name="wf002",
                                 task="Test2")
        testWorkflow2.create()

        testWMBSFileset2 = WMBSFileset(name="TestFileset1")
        testWMBSFileset2.create()

        testSubscription2 = Subscription(fileset=testWMBSFileset2,
                                         workflow=testWorkflow2)
        testSubscription2.create()

        testJobGroup2 = JobGroup(subscription=testSubscription2)
        testJobGroup2.create()

        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))

        testFileC.create()

        testJobA1 = Job(name="TestJobA1")
        testJobA1.addFile(testFileC)

        testJobGroup2.add(testJobA1)
        testJobGroup2.commit()
Beispiel #33
0
    def testAddDupsToFileset(self):
        """
        _AddToDupsFileset_

        Verify the the dups version of the AddToFileset DAO will not add files
        to a fileset if they're already associated to another fileset with the
        same workflow.
        """
        testWorkflowA = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = 'hello', owner = "mnorman",
                                 name = "wf001", task="basicWorkload/Production2")
        testWorkflowB.create()        

        testFilesetA = Fileset(name = "inputFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name = "inputFilesetB")
        testFilesetB.create()        

        testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA)
        testSubscriptionA.create()
        testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB)
        testSubscriptionB.create()        

        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()
        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run( 1, *[45]))
        testFileB.create()

        addToFileset = self.daofactory(classname = "Files.AddDupsToFileset")
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetA.id, workflow = "wf001")

        testFileset2 = Fileset(name = "inputFilesetA")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 2)
        for file in testFileset2.files:
            self.assertTrue(file in [testFileA, testFileB])

        # Check that adding twice doesn't crash
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetA.id, workflow = "wf001")

        # Files should not get added to fileset B because fileset A is associated
        # with wf001.
        addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']],
                             fileset = testFilesetB.id, workflow = "wf001")

        testFileset2 = Fileset(name = "inputFilesetB")
        testFileset2.loadData()

        self.assertEqual(len(testFileset2.files), 0)
        return
Beispiel #34
0
    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Beispiel #35
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk",
                                     "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC",
                                     "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN",
                                     "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum,
                           size=1000,
                           events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Beispiel #36
0
    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Beispiel #37
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return
Beispiel #38
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return
Beispiel #39
0
    def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset=True):
        """
        There are two assumptions made to make this method behave properly,
        1. DBS returns only one level of ParentList.
           If DBS returns multiple level of parentage, it will be still get handled.
           However that might not be what we wanted. In that case, restrict to one level.
        2. Assumes parents files are in the same location as child files.
           This is not True in general case, but workquue should only select work only
           where child and parent files are in the same location
        """
        # TODO get dbsFile with lumi event information
        wmbsParents = []
        dbsFile.setdefault("ParentList", [])
        for parent in dbsFile["ParentList"]:
            wmbsParents.append(
                self._addDBSFileToWMBSFile(parent,
                                           storageElements,
                                           inFileset=False))

        checksums = {}
        if dbsFile.get('Checksum'):
            checksums['cksum'] = dbsFile['Checksum']
        if dbsFile.get('Adler32'):
            checksums['adler32'] = dbsFile['Adler32']

        wmbsFile = File(
            lfn=dbsFile["LogicalFileName"],
            size=dbsFile["FileSize"],
            events=dbsFile["NumberOfEvents"],
            checksums=checksums,
            # TODO: need to get list of parent lfn
            parents=wmbsParents,
            locations=set(storageElements))

        for lumi in dbsFile['LumiList']:
            if isinstance(lumi['LumiSectionNumber'], list):
                lumiSecList = (
                    list(zip(lumi['LumiSectionNumber'], lumi['EventCount']))
                    if 'EventCount' in lumi else lumi['LumiSectionNumber'])
                run = Run(lumi['RunNumber'], lumiSecList)
            else:
                lumiSecTuple = ((lumi['LumiSectionNumber'],
                                 lumi['EventCount']) if 'EventCount' in lumi
                                else lumi['LumiSectionNumber'])
                run = Run(lumi['RunNumber'], lumiSecTuple)
            wmbsFile.addRun(run)

        self._addToDBSBuffer(dbsFile, checksums, storageElements)

        logging.debug("WMBS File: %s on Location: %s", wmbsFile['lfn'],
                      wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)
        self.wmbsFilesToCreate.add(wmbsFile)

        return wmbsFile
Beispiel #40
0
    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Beispiel #41
0
    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Beispiel #42
0
    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Beispiel #43
0
    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = [
            'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi',
            'LastRun'
        ]
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)

        locations = set()
        siteInfo = self.getLocationInfo.execute(
            conn=self.getDBConn(), transaction=self.existingTransaction())
        for site in siteInfo:
            if site['pnn'] in self.commonLocation:
                locations.add(site['pnn'])

        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode(
            'ascii', 'ignore')
        wmbsFile = File(
            lfn=mcFakeFileName,
            first_event=self.mask['FirstEvent'],
            last_event=self.mask['LastEvent'],
            events=self.mask['LastEvent'] - self.mask['FirstEvent'] +
            1,  # inclusive range
            locations=locations,
            merged=False,  # merged causes dbs parentage relation
        )

        if self.mask:
            lumis = list(
                range(self.mask['FirstLumi'],
                      self.mask['LastLumi'] + 1))  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'],
                                *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.debug("WMBS MC Fake File: %s on Location: %s", wmbsFile['lfn'],
                      wmbsFile['newlocations'])

        self.wmbsFilesToCreate.add(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles
Beispiel #44
0
    def _addACDCFileToWMBSFile(self, acdcFile, inFileset=True):
        """
        adds the ACDC files into WMBS database
        """
        wmbsParents = []
        # TODO:  this check can be removed when ErrorHandler filters parents file for unmerged data
        if acdcFile["parents"]:
            firstParent = next(iter(acdcFile["parents"]))
            # If files is merged and has unmerged parents skip the wmbs population
            if acdcFile.get("merged", 0) and ("/store/unmerged/" in firstParent
                                              or "MCFakeFile" in firstParent):
                # don't set the parents
                pass
            else:
                # set the parentage for all the unmerged parents
                for parent in acdcFile["parents"]:
                    logging.debug("WMBS ACDC Parent File: %s", parent)
                    parent = self._addACDCFileToWMBSFile(DatastructFile(
                        lfn=parent,
                        locations=acdcFile["locations"],
                        merged=True),
                                                         inFileset=False)
                    wmbsParents.append(parent)

        # pass empty check sum since it won't be updated to dbs anyway
        checksums = {}
        wmbsFile = File(lfn=str(acdcFile["lfn"]),
                        size=acdcFile["size"],
                        events=acdcFile["events"],
                        first_event=acdcFile.get('first_event', 0),
                        last_event=acdcFile.get('last_event', 0),
                        checksums=checksums,
                        parents=wmbsParents,
                        locations=acdcFile["locations"],
                        merged=acdcFile.get('merged', True))

        ## TODO need to get the lumi lists
        for run in acdcFile['runs']:
            wmbsFile.addRun(run)

        if not acdcFile["lfn"].startswith("/store/unmerged") or wmbsParents:
            # only add to DBSBuffer if is not unmerged file or it has parents.
            dbsFile = self._convertACDCFileToDBSFile(acdcFile)
            self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"])

        logging.debug("WMBS ACDC File: %s on Location: %s", wmbsFile['lfn'],
                      wmbsFile['newlocations'])

        wmbsFile['inFileset'] = bool(inFileset)

        self.wmbsFilesToCreate.add(wmbsFile)

        return wmbsFile
Beispiel #45
0
    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Beispiel #46
0
    def createLargerTestJobGroup(self, commitFlag=True):
        """
        _createTestJobGroup_

        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = WMBSFileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation("goodse.cern.ch")
        testFileC.setLocation("malpaquet")

        testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10)
        testFileD.addRun(Run(10, *[12312]))
        testFileD.setLocation("goodse.cern.ch")
        testFileD.setLocation("malpaquet")

        testFileC.create()
        testFileD.create()

        testJobA = Job(name="TestJobA1")
        testJobA.addFile(testFileC)

        testJobB = Job(name="TestJobB1")
        testJobB.addFile(testFileD)

        testJobGroup.add(testJobA)
        testJobGroup.add(testJobB)

        for i in range(0, 100):
            testJob = Job(name="TestJob%i" % (i))
            testJob.addFile(testFileC)
            testJobGroup.add(testJob)

        if commitFlag:
            testJobGroup.commit()

        return testJobGroup
Beispiel #47
0
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Beispiel #49
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese3.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese3.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \
                       inputFile["locations"] == set(["somese3.cern.ch"]), \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
Beispiel #50
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        testFileset.create()
        parentFile = File('%s_parent' % (baseName), size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
                               events = 100, locations = "otherse.cern.ch")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        return testSubscription
Beispiel #51
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type="Processing",
                                        split_algo="FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name='%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileRAL",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        ralJobs = 0
        fnalJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL"]):
                fnalJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalJobs, 2)

        return
Beispiel #53
0
    def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset = True):
        """
        There are two assumptions made to make this method behave properly,
        1. DBS returns only one level of ParentList.
           If DBS returns multiple level of parentage, it will be still get handled.
           However that might not be what we wanted. In that case, restrict to one level.
        2. Assumes parents files are in the same location as child files.
           This is not True in general case, but workquue should only select work only
           where child and parent files are in the same location
        """
        wmbsParents = []
        dbsFile.setdefault("ParentList", [])
        for parent in dbsFile["ParentList"]:
            wmbsParents.append(self._addDBSFileToWMBSFile(parent,
                                            storageElements, inFileset = False))

        checksums = {}
        if dbsFile.get('Checksum'):
            checksums['cksum'] = dbsFile['Checksum']
        if dbsFile.get('Adler32'):
            checksums['adler32'] = dbsFile['Adler32']

        wmbsFile = File(lfn = dbsFile["LogicalFileName"],
                        size = dbsFile["FileSize"],
                        events = dbsFile["NumberOfEvents"],
                        checksums = checksums,
                        #TODO: need to get list of parent lfn
                        parents = wmbsParents,
                        locations = set(storageElements))

        for lumi in dbsFile['LumiList']:
            if type(lumi['LumiSectionNumber']) == list:
                run = Run(lumi['RunNumber'], *lumi['LumiSectionNumber'])
            else:
                run = Run(lumi['RunNumber'], lumi['LumiSectionNumber'])
            wmbsFile.addRun(run)

        self._addToDBSBuffer(dbsFile, checksums, storageElements)

        logging.info("WMBS File: %s\n on Location: %s"
                     % (wmbsFile['lfn'], wmbsFile['newlocations']))

        if inFileset:
            wmbsFile['inFileset'] = True
        else:
            wmbsFile['inFileset'] = False

        self.wmbsFilesToCreate.append(wmbsFile)

        return wmbsFile