Python DataCollectionService.getChunkInfo Examples

Programming Language: Python

Namespace/Package Name: WMCore.ACDC.DataCollectionService

Method/Function: getChunkInfo

Examples at hotexamples.com: 8

Python DataCollectionService.getChunkInfo - 8 examples found. These are the top rated real world Python examples of WMCore.ACDC.DataCollectionService.DataCollectionService.getChunkInfo extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataCollectionService(27)

failedJobs(8)

getLumiWhitelist(6)

getChunkInfo(4)

getChunkFiles(3)

getLumilistWhitelist(3)

getProductionACDCInfo(3)

chunkFileset(2)

getDataCollection(2)

_getFilesetInfo(1)

createCollection(1)

singleChunkFileset(1)

Example #1

Show file

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                chunkSize,
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'],
                                                  block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(
                    block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks

Example #2

Show file

File: ResubmitBlock.py Project: ticoann/WMCore

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           chunkSize,
                                           user = self.wmspec.getOwner().get("name"),
                                           group = self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'], block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks

Example #3

Show file

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists():
                dbsBlock["Sites"] = self.sites
            else:
                # TODO remove this line when all DBS origin_site_name is converted to PNN
                block["locations"] = self.siteDB.checkAndConvertSENameToPNN(
                    block["locations"])
                # upto this
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

Example #4

Show file

File: ResubmitBlock.py Project: HassenRiahi/WMCore

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.inputLocationFlag():
                dbsBlock["Sites"] = self.sites
            else:
                #TODO remove this line when all DBS origin_site_name is converted to PNN
                block["locations"] = self.siteDB.checkAndConvertSENameToPNN(block["locations"])
                #upto this
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

Example #5

Show file

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(next(iter(self.data)))
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = next(iter(self.data))
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.cric.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

Example #6

Show file

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

Example #7

Show file

File: DataCollectionService_t.py Project: cinquo/WMCore

    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url = self.testInit.couchUrl,
                                    database = "wmcore-acdc-datacollectionsvc")

        def getJob():
            job = Job()
            job["task"] = "/ACDCTest/reco"
            job["workflow"] = "ACDCTest"
            job["location"] = "cmssrm.fnal.gov"
            job["owner"] = "cmsdataops"
            job["group"] = "cmsdataops"
            return job

        testFileA = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = getJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn = makeUUID(), size = 1024, events = 1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = getJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/F"]))
        testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/G"]))
        testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"] )
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn = makeUUID(), size = 1024, events = 1024,
                         parents = set(["/some/parent/H"]))
        testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = getJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"] )
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn = makeUUID(), size = 1024, events = 1024, merged = True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = getJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco",
                                  chunkSize = 5)

        self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024},
                          2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048},
                          3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"], "events": 3072},
                          5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}}

        testFiles =[testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {1: [lastFile],
                       2: [testFileD, testFileE],
                       3: [testFileF, testFileG, testFileH],
                       5: testFiles}

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["events"], chunk["events"],
                             "Error: Metadata doesn't match.")
            self.assertEqual(chunkMetaData["locations"], chunk["locations"],
                             "Error: Metadata doesn't match.")

            self.assertTrue(chunk["files"] in goldenMetaData.keys(),
                            "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco",
                                           chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles.keys(),
                            "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertTrue(foundFile != None,
                                "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles))
                self.assertEqual(foundFile["parents"], chunkFile["parents"],
                                 "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"],
                                 "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"], chunkFile["locations"],
                                 "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"],
                                 "Error: File locations should match: %s" % chunk["files"])
                self.assertEqual(foundFile["size"], chunkFile["size"],
                                 "Error: File locations should match.")
                self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]),
                                 "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch, "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(singleChunk, {"offset" : 0,
                                       "files" : 11,
                                       "events" : 11264,
                                       "lumis" : 22,
                                       "locations" : set(["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"])},
                         "Error: Single chunk metadata is wrong")

        return

Example #8

Show file

    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/F"})
        testFileF.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/G"})
        testFileG.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         parents={"/some/parent/H"})
        testFileH.setLocation(
            ["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5)

        self.assertEqual(
            len(chunks), 4,
            "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {
            1: {
                "lumis": 2,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 1024
            },
            2: {
                "lumis": 4,
                "locations": ["cmssrm.fnal.gov"],
                "events": 2048
            },
            3: {
                "lumis": 6,
                "locations":
                ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"],
                "events": 3072
            },
            5: {
                "lumis": 10,
                "locations": ["castor.cern.ch", "cmssrm.fnal.gov"],
                "events": 5120
            }
        }

        testFiles = [
            testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK
        ]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {
            1: [lastFile],
            2: [testFileD, testFileE],
            3: [testFileF, testFileG, testFileH],
            5: testFiles
        }

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"])
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"])
            self.assertEqual(chunkMetaData["events"], chunk["events"])
            self.assertEqual(chunkMetaData["locations"], chunk["locations"])

            self.assertTrue(chunk["files"] in goldenMetaData.keys(),
                            "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"],
                             goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"],
                             goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"],
                             goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco",
                                           chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles.keys(),
                            "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertIsNotNone(
                    foundFile, "Error: Missing chunk file: %s, %s" %
                    (chunkFiles, goldenChunkFiles))
                self.assertEqual(set(foundFile["parents"]),
                                 chunkFile["parents"],
                                 "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"],
                                 "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"],
                                 chunkFile["locations"],
                                 "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"])
                self.assertEqual(foundFile["size"], chunkFile["size"])
                self.assertEqual(len(foundFile["runs"]),
                                 len(chunkFile["runs"]),
                                 "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch,
                                    "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(
            singleChunk, {
                "offset": 0,
                "files": 11,
                "events": 11264,
                "lumis": 22,
                "locations":
                {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}
            }, "Error: Single chunk metadata is wrong")

        return