def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl, fakeLocation=False):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset",datasetName,"parts",outputDatasetParts   
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)
    
    has_parent = False
    try:
        parents = dbsReader.listDatasetParents( datasetName )
        if parents: has_parent=True
    except:
        print "Dataset with no parent"
        pass

    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue
        
        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
        #has_parent = dbsReader.listBlockParents(blockName)
        if has_parent:
            try:
                blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            except:
                print blockName,"does not appear to have a parent, even though it should. Very suspicious"
                blockFileParents = dbsReader.listFilesInBlock(blockName)
        else:
            blockFileParents = dbsReader.listFilesInBlock(blockName)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.PNNtoPSN(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    se = siteDB.cmsNametoSE(cmsName)
                    blockLocations.update(se)
                    logging.debug("cmsName %s mapped to se %s", cmsName, se)
                logging.debug("PhEDEx node %s, cmsSites %s, blockLocations %s", node, cmsSites, blockLocations)

        # We cannot upload docs without location, so force it in case it's empty
        if not blockLocations:
            if fakeLocation:
                logging.info("\t\t %s\tno location", blockName)
                blockLocations.update([u'cmssrmdisk.fnal.gov', u'srm-eoscms.cern.ch'])
            elif not has_parent: ## this should be the source
                logging.info("Blockname: %s\tno location, ABORT", blockName)
                sys.exit(1)
        
        logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)
 
        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            ## remove when https://github.com/dmwm/WMCore/issues/7128 gets fixed
            #elif not 'RAW' in blockName:
            #    print "no parent info"
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files
Beispiel #2
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['StorageElementList']
            if x['Name'].find('cern.ch') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertFalse(
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60',
            files[0]['Block']['Name'])
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        self.dbs = DBSReader(self.endpoint)
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x.find('cern.ch') > -1
        ]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['StorageElementList']
            if x.find("cern.ch") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'
            ))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Beispiel #3
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integration")
    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        for endpoint in [
                self.endpoint,
                'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:'
        ]:
            self.dbs = DBSReader(endpoint)
            details = self.dbs.listDatasetFileDetails(DATASET)
            self.assertEqual(len(details), 49)
            self.assertTrue(TESTFILE in details)
            self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
            self.assertEqual(details[TESTFILE]['Size'], 286021145)
            self.assertEqual(
                details[TESTFILE]['BlockName'],
                '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace'
            )
            self.assertEqual(details[TESTFILE]['Checksums'], {
                'Checksum': '22218315',
                'Adler32': 'a41a1446',
                'Md5': 'NOTSET'
            })
            self.assertTrue(173658 in details[TESTFILE]['Lumis'])
            self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]),
                sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37,  \
                                    25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \
                                    57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54,  \
                                    63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\
                                    108, 98, 103, 109, 105]))
            )

    @attr("integration")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['StorageElementList']
            if x['Name'].find('cern.ch') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertFalse(
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60',
            files[0]['Block']['Name'])
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader(
            'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x and x.find('cern.ch') > -1
        ]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK,
                                                                BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual(
            '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['StorageElementList']
            if x.find("cern.ch") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'
            ))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Beispiel #4
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        return

    @attr("integration")
    def testListDatatiers(self):
        """
        listDatatiers returns all datatiers available
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listDatatiers()
        self.assertTrue('RAW' in results)
        self.assertTrue('GEN-SIM-RECO' in results)
        self.assertTrue('GEN-SIM' in results)
        self.assertFalse('RAW-ALAN' in results)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(
            self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertEqual([173657], runs)

    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts (None for DBS3)"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], None)
        runs = self.dbs.listRunLumis(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertTrue(173657 in runs)
        self.assertEqual(runs[173657], None)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        self.dbs = DBSReader(self.endpoint)
        details = self.dbs.listDatasetFileDetails(DATASET)
        self.assertEqual(len(details), 49)
        self.assertTrue(TESTFILE in details)
        self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
        self.assertEqual(details[TESTFILE]['file_size'], 286021145)
        self.assertEqual(
            details[TESTFILE]['BlockName'],
            '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
        self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
        self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
        self.assertTrue(173658 in details[TESTFILE]['Lumis'])
        self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
            54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
            71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
            88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
            104, 105, 106, 107, 108, 109, 110, 111
        ])

    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET,
                          BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [
            x['Name'] for x in block['PhEDExNodeList']
            if x['Name'].find('CH_CERN') > -1
        ]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo,
                          DATASET + 'blah')
        self.assertRaises(DBSReaderError,
                          self.dbs.getFileBlocksInfo,
                          DATASET,
                          blockName=BLOCK + 'asas')

    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        self.assertTrue(BLOCK in blocks)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET,
                                        blockName=BLOCK,
                                        onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.blockExists,
                          DATASET + '#somethingelse')

    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                          DATASET + '#blah')

    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(4, len(files))
        self.assertEqual(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
            files[0]['block_name'])
        self.assertEqual(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
            files[0]['BlockName'])
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents,
                          BLOCK + 'asas')

    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(
            FILE in
            [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader(
            'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [
            x for x in self.dbs.listFileBlockLocation(BLOCK)
            if x and x.find('CH_CERN') > -1
        ]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK,
                                                                BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(
            2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(
            1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock,
                          BLOCK + 'asas')

    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(len(block), 1)
        block = block[
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0']
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents,
                          BLOCK + 'asas')

    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'
        )
        self.assertEqual(1, len(parents))
        self.assertEqual(
            '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0',
            parents[0]['Name'])
        sites = [
            x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1
        ]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents(
                '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'
            ))

    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath,
                          BLOCK + 'asas')
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1],
                                                outputDatasetParts[3],
                                                outputDatasetParts[2])

    if len(datasets) == 0:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)

    blockNames = dbsReader.listFileBlocks(datasetName)
    for blockName in blockNames:
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        block = dbsReader.listFilesInBlockWithParents(blockName)
        blockLocations = set()
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        for blockFile in block:
            parentLFNs = []
            for fileParent in blockFile["ParentList"]:
                parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files
Beispiel #6
0
class DBSReaderTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = DBSReader(endpoint)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        runs = self.dbs.listRuns(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(files))
        self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name'])
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(len(block), 1)
        block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
                         parents[0]['Name'])
        sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
Beispiel #7
0
        def run(self):
            self.files = {}
            logging = self.l
            has_parent = self.hp
            fakeLocation = self.fl
            blockName = self.bn
            blockBlacklist = self.bbl
            blockWhitelist = self.bwl

            if blockBlacklist and blockName in blockBlacklist:
                return
            if blockWhitelist and blockName not in blockWhitelist:
                return

            phedexReader = PhEDEx()
            siteDB = SiteDBJSON()
            dbsReader = DBSReader(endpoint=self.dbs)
            replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                               subscribed='y')
            blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
            if has_parent:
                try:
                    blockFileParents = dbsReader.listFilesInBlockWithParents(
                        blockName)
                except:
                    print blockName, "does not appear to have a parent, even though it should. Very suspicious"
                    blockFileParents = dbsReader.listFilesInBlock(blockName)
            else:
                blockFileParents = dbsReader.listFilesInBlock(blockName)

            blockLocations = set()
            # load block locations
            if len(replicaInfo["phedex"]["block"]) > 0:
                for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                    PNN = replica["node"]
                    PSNs = siteDB.PNNtoPSN(PNN)
                    blockLocations.add(PNN)
                    #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs)

            # We cannot upload docs without location, so force it in case it's empty
            if not blockLocations:
                if fakeLocation:
                    #logging.info("\t\t %s\tno location", blockName)
                    blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN'])
                elif not has_parent:  ## this should be the source
                    logging.info("Blockname: %s\tno location, ABORT",
                                 blockName)
                    self.major_failure = True
                    #sys.exit(1)

            #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

            # for each file on the block
            for blockFile in blockFiles:
                parentLFNs = []
                # populate parent information
                if blockFileParents and "ParentList" in blockFileParents[0]:
                    for fileParent in blockFileParents[0]["ParentList"]:
                        parentLFNs.append(fileParent["LogicalFileName"])
                runInfo = {}
                # Lumis not included in file
                for lumiSection in blockFile["LumiList"]:
                    if runBlacklist and lumiSection[
                            "RunNumber"] in runBlacklist:
                        continue
                    if runWhitelist and lumiSection[
                            "RunNumber"] not in runWhitelist:
                        continue

                    if lumiSection["RunNumber"] not in runInfo.keys():
                        runInfo[lumiSection["RunNumber"]] = []

                    runInfo[lumiSection["RunNumber"]].append(
                        lumiSection["LumiSectionNumber"])
                if len(runInfo.keys()) > 0:
                    self.files[blockFile["LogicalFileName"]] = {
                        "runs": runInfo,
                        "events": blockFile["NumberOfEvents"],
                        "size": blockFile["FileSize"],
                        "locations": list(blockLocations),
                        "parents": parentLFNs
                    }
            return
Beispiel #8
0
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint=dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset", datasetName, "parts", outputDatasetParts
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" %
                           datasetName)

    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                           subscribed='y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #get parent information about file
            blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection[
                        "RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(
                    lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {
                    "runs": runInfo,
                    "events": blockFile["NumberOfEvents"],
                    "size": blockFile["FileSize"],
                    "locations": list(blockLocations),
                    "parents": parentLFNs
                }
    return files
Beispiel #9
0
class DBSReaderTest(EmulatedUnitTestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        super(DBSReaderTest, self).setUp()
        return

    def tearDown(self):
        """
        _tearDown_


        :return:
        """

        super(DBSReaderTest, self).tearDown()
        return

    @attr("integration")
    def testListDatatiers(self):
        """
        listDatatiers returns all datatiers available
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listDatatiers()
        self.assertTrue('RAW' in results)
        self.assertTrue('GEN-SIM-RECO' in results)
        self.assertTrue('GEN-SIM' in results)
        self.assertFalse('RAW-ALAN' in results)
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertEqual([173657], runs)

    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts (None for DBS3)"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], None)
        runs = self.dbs.listRunLumis(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertTrue(173657 in runs)
        self.assertEqual(runs[173657], None)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        self.dbs = DBSReader(self.endpoint)
        details = self.dbs.listDatasetFileDetails(DATASET)
        self.assertEqual(len(details), 49)
        self.assertTrue(TESTFILE in details)
        self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
        self.assertEqual(details[TESTFILE]['file_size'], 286021145)
        self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
        self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
        self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
        self.assertTrue(173658 in details[TESTFILE]['Lumis'])
        self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]),
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                          27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
                          51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
                          75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
                          99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111])

    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['PhEDExNodeList'] if x['Name'].find('CH_CERN') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET, blockName=BLOCK + 'asas')

    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        self.assertTrue(BLOCK in blocks)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse')

    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(4, len(files))
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name'])
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName'])
        self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4]+'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('CH_CERN') > -1]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(len(block), 1)
        block = block['/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0']
        self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name'])
        sites = [x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'))

    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
Beispiel #10
0
class DBSReaderTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs      = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset = DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK)
        self.assertEqual({173657 : 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integration")
    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        for endpoint in [self.endpoint, 'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:']:
            self.dbs = DBSReader(endpoint)
            details = self.dbs.listDatasetFileDetails(DATASET)
            self.assertEqual(len(details), 49)
            self.assertTrue(TESTFILE in details)
            self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
            self.assertEqual(details[TESTFILE]['Size'], 286021145)
            self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
            self.assertEqual(details[TESTFILE]['Checksums'],
                {'Checksum': '22218315', 'Adler32': 'a41a1446', 'Md5': 'NOTSET'}
            )
            self.assertTrue( 173658 in details[TESTFILE]['Lumis'])
            self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]),
                sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37,  \
                                    25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \
                                    57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54,  \
                                    63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\
                                    108, 98, 103, 109, 105]))
            )

    @attr("integration")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], '22075')
        self.assertEqual(dataset['NumberOfBlocks'], '46')
        self.assertEqual(dataset['total_size'], '4001680824')
        self.assertEqual(dataset['NumberOfFiles'], '49')
        self.assertEqual(dataset['NumberOfLumis'], '7223')

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], '')
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], '377')
        self.assertEqual(block['NumberOfBlocks'], '1')
        self.assertEqual(block['total_size'], '150780132')
        self.assertEqual(block['NumberOfFiles'], '2')
        self.assertEqual(block['NumberOfLumis'], '94')

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah')
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        #self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah')
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas'))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse'))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(files))
        self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name'])
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4]+'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\
                                    'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('cern.ch') > -1]
        self.assertTrue(sites)
        #This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        #test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(len(block), 1)
        block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60']
        self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root',
                         block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60')
        self.assertEqual(1, len(parents))
        self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60',
                         parents[0]['Name'])
        sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'))

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
        def run(self):
            self.files = {}
            logging = self.l
            has_parent = self.hp
            fakeLocation = self.fl
            blockName = self.bn
            blockBlacklist = self.bbl
            blockWhitelist = self.bwl


            if blockBlacklist and blockName in blockBlacklist:
                return
            if blockWhitelist and blockName not in blockWhitelist:
                return

            phedexReader = PhEDEx()
            siteDB = SiteDBJSON()
            dbsReader = DBSReader(endpoint=self.dbs)
            replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName,
                                                                    subscribed='y')
            blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)
            if has_parent:
                try:
                    blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
                except:
                    print blockName, "does not appear to have a parent, even though it should. Very suspicious"
                    blockFileParents = dbsReader.listFilesInBlock(blockName)
            else:
                blockFileParents = dbsReader.listFilesInBlock(blockName)

            blockLocations = set()
            # load block locations
            if len(replicaInfo["phedex"]["block"]) > 0:
                for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                    PNN = replica["node"]
                    PSNs = siteDB.PNNtoPSN(PNN)
                    blockLocations.add(PNN)
                    #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs)

            # We cannot upload docs without location, so force it in case it's empty
            if not blockLocations:
                if fakeLocation:
                    #logging.info("\t\t %s\tno location", blockName)
                    blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN'])
                elif not has_parent:  ## this should be the source
                    logging.info("Blockname: %s\tno location, ABORT", blockName)
                    self.major_failure = True
                    #sys.exit(1)
                
            #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations)

            # for each file on the block
            for blockFile in blockFiles:
                parentLFNs = []
                # populate parent information
                if blockFileParents and "ParentList" in blockFileParents[0]:
                    for fileParent in blockFileParents[0]["ParentList"]:
                        parentLFNs.append(fileParent["LogicalFileName"])
                runInfo = {}
                # Lumis not included in file
                for lumiSection in blockFile["LumiList"]:
                    if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                        continue
                    if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                        continue

                    if lumiSection["RunNumber"] not in runInfo.keys():
                        runInfo[lumiSection["RunNumber"]] = []

                    runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
                if len(runInfo.keys()) > 0:
                    self.files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                                "events": blockFile["NumberOfEvents"],
                                                                "size": blockFile["FileSize"],
                                                                "locations": list(blockLocations),
                                                                "parents": parentLFNs}
            return
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    print "dataset",datasetName,"parts",outputDatasetParts   
    try:
        #retrieve list of blocks from dataset
        blockNames = dbsReader.listFileBlocks(datasetName)
    except:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)
    
    #traverse each block
    for blockName in blockNames:
        #deal with white and black list.
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue
        
        #existing blocks in phedex
        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True)

        blockLocations = set()
        #load block locations
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        #for each file on the block
        for blockFile in blockFiles:
            parentLFNs = []
            #get parent information about file
            blockFileParents = dbsReader.listFilesInBlockWithParents(blockName)
            #populate parent information
            if blockFileParents and "ParentList" in blockFileParents[0]:
                for fileParent in blockFileParents[0]["ParentList"]:
                    parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            #Lumis not included in file
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files
Beispiel #13
0
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist,
             blockWhitelist, dbsUrl):
    """
    _getFiles_

    Get the full information of a dataset including files, blocks, runs and lumis.
    Filter it using run and block white/black lists.

    It can receive and optional DBSUrl.
    """
    dbsReader = DBSReader(endpoint = dbsUrl)
    phedexReader = PhEDEx()
    siteDB = SiteDBJSON()

    files = {}
    outputDatasetParts = datasetName.split("/")
    datasets = dbsReader.matchProcessedDatasets(outputDatasetParts[1],
                                                outputDatasetParts[3],
                                                outputDatasetParts[2])

    if len(datasets) == 0:
        raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName)

    blockNames = dbsReader.listFileBlocks(datasetName)
    for blockName in blockNames:
        if blockBlacklist and blockName in blockBlacklist:
            continue
        if blockWhitelist and blockName not in blockWhitelist:
            continue

        replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName,
                                                           subscribed = 'y')
        block = dbsReader.listFilesInBlockWithParents(blockName)
        blockLocations = set()
        if len(replicaInfo["phedex"]["block"]) > 0:
            for replica in replicaInfo["phedex"]["block"][0]["replica"]:
                node = replica["node"]
                cmsSites = siteDB.phEDExNodetocmsName(node)
                if type(cmsSites) != list:
                    cmsSites = [cmsSites]
                for cmsName in cmsSites:
                    blockLocations.update(siteDB.cmsNametoSE(cmsName))
        for blockFile in block:
            parentLFNs = []
            for fileParent in blockFile["ParentList"]:
                parentLFNs.append(fileParent["LogicalFileName"])
            runInfo = {}
            for lumiSection in blockFile["LumiList"]:
                if runBlacklist and lumiSection["RunNumber"] in runBlacklist:
                    continue
                if runWhitelist and lumiSection["RunNumber"] not in runWhitelist:
                    continue

                if lumiSection["RunNumber"] not in runInfo.keys():
                    runInfo[lumiSection["RunNumber"]] = []

                runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"])
            if len(runInfo.keys()) > 0:
                files[blockFile["LogicalFileName"]] = {"runs": runInfo,
                                                       "events": blockFile["NumberOfEvents"],
                                                       "size": blockFile["FileSize"],
                                                       "locations": list(blockLocations),
                                                       "parents": parentLFNs}
    return files
Beispiel #14
0
class DBSReaderTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        # self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
        self.endpoint = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        self.dbs = None
        return

    @attr("integration")
    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets("Jet*")
        self.assertTrue("Jet" in results)
        self.assertTrue("JetMET" in results)
        self.assertTrue("JetMETTau" in results)
        self.assertFalse(self.dbs.listPrimaryDatasets("DoesntExist"))
        return

    @attr("integration")
    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v1")
        self.assertEqual(1, len(dataset))
        self.assertEqual(["/Jet/Run2011A-v1/RAW"], dataset[0]["PathList"])
        self.assertEqual("Run2011A-v1", dataset[0]["Name"])
        self.assertFalse(self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v666"))

    @attr("integration")
    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual([173657], runs)

    @attr("integration")
    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], 2782)
        runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK)
        self.assertEqual({173657: 94}, runs)

    @attr("integration")
    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets("Jet", "RAW")
        self.assertTrue("Run2011A-v1" in datasets)
        self.assertTrue("Run2011B-v1" in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets("Jet", "blah"))
        self.assertFalse(self.dbs.listProcessedDatasets("blah", "RAW"))

    @attr("integration")
    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    @attr("integrtion")
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset["path"], DATASET)
        self.assertEqual(dataset["block"], "")
        self.assertEqual(dataset["NumberOfEvents"], "22075")
        self.assertEqual(dataset["NumberOfBlocks"], "46")
        self.assertEqual(dataset["total_size"], "4001680824")
        self.assertEqual(dataset["NumberOfFiles"], "49")
        self.assertEqual(dataset["NumberOfLumis"], "7223")

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block["path"], "")
        self.assertEqual(block["block"], BLOCK)
        self.assertEqual(block["NumberOfEvents"], "377")
        self.assertEqual(block["NumberOfBlocks"], "1")
        self.assertEqual(block["total_size"], "150780132")
        self.assertEqual(block["NumberOfFiles"], "2")
        self.assertEqual(block["NumberOfLumis"], "94")

        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + "blah")
        self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + "asas")

    @attr("integration")
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block["Name"] in [x["Name"] for x in blocks])
        self.assertEqual(BLOCK, block["Name"])
        # self.assertEqual(377, block['NumberOfEvents'])
        self.assertEqual(150780132, block["BlockSize"])
        self.assertEqual(2, block["NumberOfFiles"])
        # possibly fragile but assume block located at least at cern
        sites = [x["Name"] for x in block["StorageElementList"] if x["Name"].find("cern.ch") > -1]
        self.assertTrue(sites)

        # weird error handling - depends on whether block or dataset is missing
        self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + "blah")
        self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + "asas"))

    @attr("integration")
    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    @attr("integration")
    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    @attr("integration")
    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertFalse(self.dbs.blockExists(DATASET + "#somethingelse"))

    @attr("integration")
    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x["LogicalFileName"] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + "#blah")

    @attr("integration")
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        # hope PromptReco doesn't get deleted
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"
        )
        self.assertEqual(1, len(files))
        self.assertEqual(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60", files[0]["Block"]["Name"]
        )
        self.assertEqual(
            "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root",
            files[0]["ParentList"][0]["LogicalFileName"],
        )

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + "asas")

    @attr("integration")
    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + "asas")

    @attr("integration")
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        self.dbs = DBSReader(self.endpoint)
        # assume one site is cern
        sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find("cern.ch") > -1]
        self.assertTrue(sites)
        # doesn't raise on non-existant block
        self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + "blah"))

    @attr("integration")
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block["Files"]))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + "asas")

    @attr("integration")
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(
            "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"
        )
        self.assertEqual(len(block), 1)
        block = block["/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"]
        self.assertEqual(
            "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root",
            block["Files"][0]["ParentList"][0]["LogicalFileName"],
        )

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + "asas")

    @attr("integration")
    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    @attr("integration")
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60")
        self.assertEqual(1, len(parents))
        self.assertEqual("/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60", parents[0]["Name"])
        sites = [x for x in parents[0]["StorageElementList"] if x.find("cern.ch") > -1]
        self.assertTrue(sites)

        self.assertFalse(
            self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl")
        )

    @attr("integration")
    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    @attr("integration")
    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + "asas"))