Esempio n. 1
0
class PhEDExTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_
        
        Initialize the PhEDEx API to point at the test server.
        """
        phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        self.phedexApi = PhEDEx({"endpoint": phedexTestDS,
                                 "method": "POST"})
        return
        
    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(result["phedex"]["injected"],
                         {"stats": {"closed_datasets": 0, "closed_blocks": 0,
                                    "new_blocks": 0, "new_datasets": 1,
                                    "new_files": 0}})
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        
        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                      "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return

    @attr("integration")
    def testNodeMap(self):
        """
        _testNodeMap_

        Verify that the node map can be retrieve from PhEDEx and that the
        getNodeSE() and getNodeNames() methods work correctly.
        """
        self.failUnless(self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr")
        self.failUnless(self.phedexApi.getNodeNames("cmssrm.fnal.gov") == ["T1_US_FNAL_Buffer",
                                                                           "T1_US_FNAL_MSS"])
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS",
                                            "Saturn", requestOnly = "n")
        datasetSpec = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                       testDatasetSub.getDatasetPaths())
        self.phedexApi.subscribe(testDatasetSub, datasetSpec)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn",
                                          level = "block", requestOnly = "n")
        self.phedexApi.subscribe(testBlockSub, blockSpec)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]),
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(len(subs[blockA]), 2,
                         "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    @attr('integration')
    def testXMLJSON(self):
        """
        Test XML and JSON in the same scope
        """
        site = 'T1_US_FNAL_Buffer'
        dict = {}
        dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        phedexJSON = PhEDEx(responseType='json', dict=dict)
        dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/xml/test"
        phedexXML  = PhEDEx(responseType='xml',  dict=dict)

        phedexXML.getNodeTFC(site)
        tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node' : site})
        tfc_map = {}
        tfc_map[site] = readTFC(tfc_file)
        pfn =    tfc_map[site].matchLFN('srmv2', '/store/user/jblow/dir/test.root')

        self.failUnless(pfn == 'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root')

        self.failUnless(phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov')

    @attr('integration')
    def testAuth(self):
        """
        _testAuth_

        Verify that the auth method works correctly."
        """
        self.assertFalse(self.phedexApi.getAuth("datasvc_whatever"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_inject"))

        return
Esempio n. 2
0
class PhEDExTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"})
        return

    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(
            result["phedex"]["injected"], {
                "stats": {
                    "closed_datasets": 0,
                    "closed_blocks": 0,
                    "new_blocks": 0,
                    "new_datasets": 1,
                    "new_files": 0
                }
            })
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        result = self.phedexApi.subscribe(testSub)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0], "Error: Missing request ID")
        return

    @attr("integration")
    def testBestNodeName(self):
        """
        _testBestNodeName_

        Verify that the node name is Buffer first
        """
        self.assertTrue(
            self.phedexApi.getBestNodeName("cmssrm.fnal.gov") ==
            "T1_US_FNAL_Buffer")
        return

    @attr("integration")
    def testNodeMap(self):
        """
        _testNodeMap_

        Verify that the node map can be retrieve from PhEDEx and that the
        getNodeSE() and getNodeNames() methods work correctly.
        """
        self.assertTrue(
            self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr")
        self.assertTrue(
            self.phedexApi.getNodeNames("cmssrm.fnal.gov") ==
            ["T1_US_FNAL_Buffer", "T1_US_FNAL_MSS"])
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            request_only="n")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          request_only="n")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"},
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'],
                                      ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(
            ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'],
            ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    @attr('integration')
    def testXMLJSON(self):
        """
        Test XML and JSON in the same scope
        """
        site = 'T1_US_FNAL_Buffer'
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        }
        phedexJSON = PhEDEx(responseType='json', httpDict=httpDict)
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/xml/test"
        }
        phedexXML = PhEDEx(responseType='xml', httpDict=httpDict)

        phedexXML.getNodeTFC(site)
        tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node': site})
        tfc_map = {}
        tfc_map[site] = readTFC(tfc_file)
        pfn = tfc_map[site].matchLFN('srmv2',
                                     '/store/user/jblow/dir/test.root')

        self.assertTrue(
            pfn ==
            'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root'
        )

        self.assertTrue(
            phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov')

    @attr('integration')
    def testAuth(self):
        """
        _testAuth_

        Verify that the auth method works correctly."
        """
        self.assertFalse(self.phedexApi.getAuth("datasvc_whatever"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_inject"))

        return
Esempio n. 3
0
class DBS3Reader:
    """
    _DBSReader_

    General API for reading data from DBS


    """
    def __init__(self, url, **contact):

        # instantiate dbs api object
        try:
            self.dbs = DbsApi(url, **contact)
        except dbsClientException as ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # connection to PhEDEx (Use default endpoint url)
        self.phedex = PhEDEx(responseType = "json")

    def _getLumiList(self, blockName = None, lfns = None):
        """
        currently only take one lfn but dbs api need be updated
        """
        try:
            if blockName:
                lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly = 1)
            elif lfns:
                lumiLists = []
                for slfn in slicedIterator(lfns, 50):
                    lumiLists.extend(self.dbs.listFileLumiArray(logical_file_name = slfn))
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFileLumiArray(%s)\n" % lfns
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        lumiDict = {}
        for lumisItem in lumiLists:
            lumiDict.setdefault(lumisItem['logical_file_name'], [])
            item = {}
            item["RunNumber"] = lumisItem['run_num']
            item['LumiSectionNumber'] = lumisItem['lumi_section_num']
            lumiDict[lumisItem['logical_file_name']].append(item)
        return lumiDict

    def listPrimaryDatasets(self, match = '*'):
        """
        _listPrimaryDatasets_

        return a list of primary datasets, The full dataset name must be provided
        pattern based mathcing is no longer supported.
        If no expression is provided, all datasets are returned
        """
        try:
            result = self.dbs.listPrimaryDatasets(primary_ds_name = match)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listPrimaryDataset(%s)\n" % match
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        result = [ x['primary_ds_name'] for x in result ]
        return result

    def matchProcessedDatasets(self, primary, tier, process):
        """
        _matchProcessedDatasets_

        return a list of Processed datasets
        """
        result = []
        try:
            datasets = self.dbs.listDatasets(primary_ds_name = primary, data_tier_name = tier, detail = True)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        for dataset in datasets:
            dataset = remapDBS3Keys(dataset, processed_ds_name = 'Name')
            dataset['PathList'] = [dataset['dataset']]
            if dataset['Name'] == process:
                result.append(dataset)
        return result

    def listRuns(self, dataset = None, block = None):
        """
        it gets list of DbsRun object but for our purpose
        only list of number is collected.
        DbsRun (RunNumber,
                NumberOfEvents,
                NumberOfLumiSections,
                TotalLuminosity,
                StoreNumber,
                StartOfRungetLong,
                EndOfRun,
                CreationDate,
                CreatedBy,
                LastModificationDate,
                LastModifiedBy
                )
        """
        runs = []
        try:
            if block:
                results = self.dbs.listRuns(block_name = block)
            else:
                results = self.dbs.listRuns(dataset = dataset)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)
        [runs.extend(x['run_num']) for x in results]
        return runs

    def listRunLumis(self, dataset = None, block = None):
        """
        It gets a list of DBSRun objects and returns the number of lumisections per run
        DbsRun (RunNumber,
                NumberOfEvents,
                NumberOfLumiSections,
                TotalLuminosity,
                StoreNumber,
                StartOfRungetLong,
                EndOfRun,
                CreationDate,
                CreatedBy,
                LastModificationDate,
                LastModifiedBy
                )
        """
        try:
            if block:
                results = self.dbs.listRuns(block_name = block)
            else:
                results = self.dbs.listRuns(dataset = dataset)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # send runDict format as result, this format is for sync with dbs2 call
        # which has {run_number: num_lumis} but dbs3 call doesn't return num Lumis
        # So it returns {run_number: None}
        # TODO: After DBS2 is completely removed change the return format more sensible one

        runDict = {}
        for x in results:
            for runNumber in x["run_num"]:
                runDict[runNumber] = None
        return runDict

    def listProcessedDatasets(self, primary, dataTier = '*'):
        """
        _listProcessedDatasets_

        return a list of Processed datasets for the primary and optional
        data tier value

        """
        try:
            result = self.dbs.listDatasets(primary_ds_name = primary, data_tier_name = dataTier)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        result = [ x['dataset'].split('/')[2] for x in result ]
        return result


    def listDatasetFiles(self, datasetPath):
        """
        _listDatasetFiles_

        Get list of files for dataset

        """
        return [ x['logical_file_name'] for x in self.dbs.listFileArray(dataset = datasetPath)]

    def listDatatiers(self):
        """
        _listDatatiers_

        Get a list of datatiers known by DBS.
        """
        return [ tier['data_tier_name'] for tier in self.dbs.listDataTiers() ]

    def listDatasetFileDetails(self, datasetPath, getParents=False, validFileOnly=1):
        """
        TODO: This is completely wrong need to be redone. or be removed - getting dataset altogether
        might be to costly

        _listDatasetFileDetails_

        Get list of lumis, events, and parents for each file in a dataset
        Return a dict where the keys are the files, and for each file we have something like:
            { 'NumberOfEvents': 545,
              'BlockName': '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace',
              'Lumis': {173658: [8, 12, 9, 14, 19, 109, 105]},
              'Parents': [],
              'Checksum': '22218315',
              'Adler32': 'a41a1446',
              'FileSize': 286021145,
              'ValidFile': 1
            }

        """
        fileDetails = self.dbs.listFileArray(dataset = datasetPath, validFileOnly = validFileOnly, detail=True)
        blocks = set() #the set of blocks of the dataset
        #Iterate over the files and prepare the set of blocks and a dict where the keys are the files
        files = {}
        for f in fileDetails:
            blocks.add(f['block_name'])
            files[f['logical_file_name']] = remapDBS3Keys(f, stringify = True)
            files[f['logical_file_name']]['ValidFile'] = f['is_file_valid']
            files[f['logical_file_name']]['Lumis'] = {}
            files[f['logical_file_name']]['Parents'] = []

        #Iterate over the blocks and get parents and lumis
        for blockName in blocks:
            #get the parents
            if getParents:
                parents = self.dbs.listFileParents(block_name=blockName)
                for p in parents:
                    if p['logical_file_name'] in files: #invalid files are not there if validFileOnly=1
                        files[p['logical_file_name']]['Parents'].extend(p['parent_logical_file_name'])
            #get the lumis
            file_lumis = self.dbs.listFileLumis(block_name=blockName)
            for f in file_lumis:
                if f['logical_file_name'] in files: #invalid files are not there if validFileOnly=1
                    if f['run_num'] in files[f['logical_file_name']]['Lumis']:
                        files[f['logical_file_name']]['Lumis'][f['run_num']].extend(f['lumi_section_num'])
                    else:
                        files[f['logical_file_name']]['Lumis'][f['run_num']] = f['lumi_section_num']

        return files


    def crossCheck(self, datasetPath, *lfns):
        """
        _crossCheck_

        For the dataset provided, check that the lfns listed all exist
        in the dataset.

        Return the list of lfns that are in the dataset

        """
        allLfns = self.dbs.listFileArray(dataset = datasetPath, validFileOnly = 1, detail = False)
        setOfAllLfns = set(allLfns)
        setOfKnownLfns = set(lfns)
        return list(setOfAllLfns.intersection(setOfKnownLfns))

    def crossCheckMissing(self, datasetPath, *lfns):
        """
        _crossCheckMissing_

        As cross check, but return value is a list of files that
        are *not* known by DBS

        """
        allLfns = self.dbs.listFileArray(dataset = datasetPath, validFileOnly = 1, detail = False)
        setOfAllLfns = set(allLfns)
        setOfKnownLfns = set(lfns)
        knownFiles = setOfAllLfns.intersection(setOfKnownLfns)
        unknownFiles = setOfKnownLfns.difference(knownFiles)
        return list(unknownFiles)


    def getDBSSummaryInfo(self, dataset = None, block = None):
        """
        Get dataset summary includes # of files, events, blocks and total size
        """
        #FIXME: Doesnt raise exceptions on missing data as old api did
        if dataset:
            self.checkDatasetPath(dataset)
        try:
            if block:
                summary = self.dbs.listFileSummaries(block_name = block, validFileOnly = 1)
            else: # dataset case dataset shouldn't be None
                summary = self.dbs.listFileSummaries(dataset = dataset, validFileOnly = 1)
        except Exception as ex:
            msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)
        if not summary or summary[0].get('file_size') is None: # appears to indicate missing dataset
            msg = "DBSReader.listDatasetSummary(%s, %s): No matching data"
            raise DBSReaderError(msg % (dataset, block))
        result = remapDBS3Keys(summary[0], stringify = True)
        result['path'] = dataset if not block else ''
        result['block'] = block if block else ''
        return result

    def getFileBlocksInfo(self, dataset, onlyClosedBlocks = False,
                          blockName = None, locations = True):
        """
        """
        self.checkDatasetPath(dataset)
        args = {'dataset' : dataset, 'detail' : True}
        if blockName:
            args['block_name'] = blockName
        try:
            blocks = self.dbs.listBlocks(**args)
        except Exception as ex:
            msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        blocks = [remapDBS3Keys(block, stringify = True, block_name = 'Name') for block in blocks]
        # only raise if blockName not specified - mimic dbs2 error handling
        if not blocks and not blockName:
            msg = "DBSReader.getFileBlocksInfo(%s, %s): No matching data"
            raise DBSReaderError(msg % (dataset, blockName))
        if locations:
            for block in blocks:
                block['StorageElementList'] = [{'Name' : x} for x in self.listFileBlockLocation(block['Name'])]

        if onlyClosedBlocks:
            return [x for x in blocks if str(x['OpenForWriting']) != "1"]

        return blocks

    def listFileBlocks(self, dataset, onlyClosedBlocks = False,
                       blockName = None):
        """
        _listFileBlocks_

        Retrieve a list of fileblock names for a dataset

        """
        self.checkDatasetPath(dataset)
        args = {'dataset' : dataset, 'detail' : False}
        if blockName:
            args['block_name'] = blockName
        if onlyClosedBlocks:
            args['detail'] = True
        try:
            blocks = self.dbs.listBlocks(**args)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if onlyClosedBlocks:
            result = [
                x['block_name'] for x in blocks \
                  if str(x['open_for_writing']) != "1"
                ]

        else:
            result = [ x['block_name'] for x in blocks ]

        return result

    def listOpenFileBlocks(self, dataset):
        """
        _listOpenFileBlocks_

        Retrieve a list of open fileblock names for a dataset

        """
        self.checkDatasetPath(dataset)
        try:
            blocks = self.dbs.listBlocks(dataset = dataset, detail = True)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)


        result = [
            x['block_name'] for x in blocks \
            if str(x['open_for_writing']) == "1"
        ]


        return result

    def blockExists(self, fileBlockName):
        """
        _blockExists_

        Check to see if block with name provided exists in the DBS
        Instance.

        Return True if exists, False if not

        """
        self.checkBlockName(fileBlockName)
        try:

            blocks = self.dbs.listBlocks(block_name = fileBlockName)
        except Exception as ex:
            msg = "Error in "
            msg += "DBSReader.blockExists(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if len(blocks) == 0:
            return False
        return True


    def listFilesInBlock(self, fileBlockName, lumis = True):
        """
        _listFilesInBlock_

        Get a list of files in the named fileblock
        TODO: lumis can be false when lumi splitting is not required
        However WMBSHelper expect file['LumiList'] to get the run number
        so for now it will be always true.
        We need to clean code up when dbs2 is completely deprecated.
        calling lumis for run number is expensive.
        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.listFilesInBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            files = self.dbs.listFileArray(block_name = fileBlockName, validFileOnly = 1, detail = True)
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if lumis:
            lumiDict = self._getLumiList(blockName = fileBlockName)

        result = []
        for fileInfo in files:
            if lumis:
                fileInfo["LumiList"] = lumiDict[fileInfo['logical_file_name']]
            result.append(remapDBS3Keys(fileInfo, stringify = True))
        return result

    def listFilesInBlockWithParents(self, fileBlockName, lumis = True):
        """
        _listFilesInBlockWithParents_

        Get a list of files in the named fileblock including
        the parents of that file.
        TODO: lumis can be false when lumi splitting is not required
        However WMBSHelper expect file['LumiList'] to get the run number
        so for now it will be always true.

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            #TODO: shoud we get only valid block for this?
            files = self.dbs.listFileParents(block_name = fileBlockName)
            fileDetails = self.listFilesInBlock(fileBlockName, lumis)

        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % (
                fileBlockName,)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        childByParents = defaultdict(list)
        for f in files:
            # Probably a child can have more than 1 parent file
            for fp in f['parent_logical_file_name']:
                childByParents[fp].append(f['logical_file_name'])
        parentsLFNs = childByParents.keys()

        parentFilesDetail = []
        #TODO: slicing parentLFNs util DBS api is handling that.
        #Remove slicing if DBS api handles
        for pLFNs in slicedIterator(parentsLFNs, 50):
            parentFilesDetail.extend(self.dbs.listFileArray(logical_file_name = pLFNs, detail = True))

        if lumis:
            parentLumis = self._getLumiList(lfns = parentsLFNs)

        parentsByLFN = defaultdict(list)

        for pf in parentFilesDetail:
            parentLFN = pf['logical_file_name']
            dbsFile = remapDBS3Keys(pf, stringify = True)
            if lumis:
                dbsFile["LumiList"] = parentLumis[parentLFN]

            for childLFN in childByParents[parentLFN]:
                parentsByLFN[childLFN].append(dbsFile)

        for fileInfo in fileDetails:
            fileInfo["ParentList"] = parentsByLFN[fileInfo['logical_file_name']]

        return fileDetails

    def lfnsInBlock(self, fileBlockName):
        """
        _lfnsInBlock_

        LFN list only for block, details = False => faster query

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.lfnsInBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            lfns = self.dbs.listFileArray(block_name = fileBlockName, validFileOnly = 1, detail = False)
            return lfns
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)


    def listFileBlockLocation(self, fileBlockName, dbsOnly = False, phedexNodes=False):
        """
        _listFileBlockLocation_

        Get origin_site_name of a block

        """
        blockNames = [fileBlockName] if isinstance(fileBlockName, basestring) else fileBlockName
        for block in blockNames:
            self.checkBlockName(block)

        blockInfo = {}
        if not dbsOnly:
            try:
                blockInfo = self.phedex.getReplicaSEForBlocks(phedexNodes=phedexNodes, block=blockNames, complete='y')
            except Exception as ex:
                msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockName
                msg += "%s\n" % str(ex)
                raise Exception(msg)

            if not blockInfo or len(blockInfo) != len(blockNames): #if we couldnt get data location from PhEDEx, try to look into origin site location from dbs
                dbsOnly = True
                blockNames = set(blockNames) - set(blockInfo) #get the blocks we did not find information in phedex

        if dbsOnly:
            try:
                for block in blockNames:
                    res = self.dbs.listBlockOrigin(block_name = block)
                    if res:
                        blockInfo[block] = [res[0]['origin_site_name']]
            except dbsClientException as ex:
                msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockName
                msg += "%s\n" % formatEx3(ex)
                raise DBSReaderError(msg)

            if not any(blockInfo.values()): # no data location from dbs
                return list()

        #removing duplicates and 'UNKNOWN entries
        locations = {}
        node_filter_list = set(['UNKNOWN', None])
        for name, nodes in blockInfo.iteritems():
            final_nodes = set()
            for n in nodes:
                if n in node_filter_list:
                    continue
                try:
                    cmsname(n)
                except AssertionError: ## is SE
                    n = self.phedex.getNodeNames(n) if phedexNodes else [n]
                else:  ## not SE i.e. phedexNode
                    n = [self.phedex.getNodeSE(n)] if not phedexNodes else [n]
                final_nodes = final_nodes.union(n)
            locations[name] = list(final_nodes - node_filter_list)

        #returning single list if a single block is passed
        if isinstance(fileBlockName, basestring):
            locations = locations[fileBlockName]

        return locations

    def getFileBlock(self, fileBlockName):
        """
        _getFileBlock_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : { LFN : Events },
             }
        }


        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.getFileBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        result = {fileBlockName: {
            "StorageElements" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlock(fileBlockName),
            "IsOpen" : self.blockIsOpen(fileBlockName)
                                 }
                 }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.getFileBlockWithParents(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        result = {fileBlockName: {
            "StorageElements" : self.listFileBlockLocation(fileBlockName),
            "Files" : self.listFilesInBlockWithParents(fileBlockName),
            "IsOpen" : self.blockIsOpen(fileBlockName)
                                 }
                 }
        return result



    def getFiles(self, dataset, onlyClosedBlocks = False):
        """
        _getFiles_

        Returns a dictionary of block names for the dataset where
        each block constists of a dictionary containing the StorageElements
        for that block and the files in that block by LFN mapped to NEvents

        """
        result = {}
        blocks = self.listFileBlocks(dataset, onlyClosedBlocks)

        [ result.update(self.getFileBlock(x)) for x in blocks ]

        return result


    def listBlockParents(self, blockName):
        """Get parent blocks for block"""
        result = []
        self.checkBlockName(blockName)
        blocks = self.dbs.listBlockParents(block_name = blockName)
        for block in blocks:
            toreturn = {'Name' : block['parent_block_name']}
            toreturn['StorageElementList'] = self.listFileBlockLocation(toreturn['Name'])
            result.append(toreturn)
        return result


    def blockIsOpen(self, blockName):
        """
        _blockIsOpen_

        Return True if named block is open, false if not, or if block
        doenst exist

        """
        self.checkBlockName(blockName)
        blockInstance = self.dbs.listBlocks(block_name = blockName, detail = True)
        if len(blockInstance) == 0:
            return False
        blockInstance = blockInstance[0]
        isOpen = blockInstance.get('open_for_writing', 1)
        if isOpen == 0:
            return False
        return True



    def blockToDatasetPath(self, blockName):
        """
        _blockToDatasetPath_

        Given a block name, get the dataset Path associated with that
        Block.

        Returns the dataset path, or None if not found

        """
        self.checkBlockName(blockName)
        try:
            blocks = self.dbs.listBlocks(block_name = blockName, detail = True)
        except Exception as ex:
            msg = "Error in "
            msg += "DBSReader.blockToDatasetPath(%s)\n" % blockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if blocks == []:
            return None

        pathname = blocks[-1].get('dataset', None)
        return pathname

    def listDatasetLocation(self, datasetName, dbsOnly = False):
        """
        _listDatasetLocation_

        List the origin SEs where there is at least a block of the given
        dataset.
        """
        self.checkDatasetPath(datasetName)

        if not dbsOnly:
            try:
                blocksInfo = self.phedex.getReplicaSEForBlocks(dataset=[datasetName], complete='y')
            except Exception as ex:
                msg = "Error while getting block location from PhEDEx for dataset=%s)\n" % datasetName
                msg += "%s\n" % str(ex)
                raise Exception(msg)

            if not blocksInfo: # if we couldnt get data location from PhEDEx, try to look into origin site location from dbs
                dbsOnly = True
            else:
                locations = set(blocksInfo.values()[0])
                for blockSites in blocksInfo.values():
                    locations.intersection_update(blockSites)

        if dbsOnly:
            try:
                blocksInfo = self.dbs.listBlockOrigin(dataset = datasetName)
            except dbsClientException as ex:
                msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName
                msg += "%s\n" % formatEx3(ex)
                raise DBSReaderError(msg)

            if not blocksInfo: # no data location from dbs
                return list()

            locations = set()
            for blockInfo in blocksInfo:
                locations.update([blockInfo['origin_site_name']])

            locations.difference_update(['UNKNOWN']) # remove entry when SE name is 'UNKNOWN'

        return list(locations)

    def checkDatasetPath(self, pathName):
        """
         _checkDatasetPath_
        """
        if pathName in ("", None):
            raise DBSReaderError("Invalid Dataset Path name: => %s <=" % pathName)

    def checkBlockName(self, blockName):
        """
         _checkBlockName_
        """
        if blockName in ("", "*", None):
            raise DBSReaderError("Invalid Block name: => %s <=" % blockName)
Esempio n. 4
0
class DBS3Reader:
    """
    _DBSReader_

    General API for reading data from DBS


    """
    def __init__(self, url, **contact):

        # instantiate dbs api object
        try:
            self.dbs = DbsApi(url, **contact)
        except dbsClientException as ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # connection to PhEDEx (Use default endpoint url)
        self.phedex = PhEDEx(responseType="json")

    def _getLumiList(self, blockName=None, lfns=None):
        """
        currently only take one lfn but dbs api need be updated
        """
        try:
            if blockName:
                lumiLists = self.dbs.listFileLumis(block_name=blockName,
                                                   validFileOnly=1)
            elif lfns:
                lumiLists = []
                for slfn in slicedIterator(lfns, 50):
                    lumiLists.extend(
                        self.dbs.listFileLumiArray(logical_file_name=slfn))
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFileLumiArray(%s)\n" % lfns
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        lumiDict = {}
        for lumisItem in lumiLists:
            lumiDict.setdefault(lumisItem['logical_file_name'], [])
            item = {}
            item["RunNumber"] = lumisItem['run_num']
            item['LumiSectionNumber'] = lumisItem['lumi_section_num']
            lumiDict[lumisItem['logical_file_name']].append(item)
        return lumiDict

    def listPrimaryDatasets(self, match='*'):
        """
        _listPrimaryDatasets_

        return a list of primary datasets, The full dataset name must be provided
        pattern based mathcing is no longer supported.
        If no expression is provided, all datasets are returned
        """
        try:
            result = self.dbs.listPrimaryDatasets(primary_ds_name=match)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listPrimaryDataset(%s)\n" % match
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        result = [x['primary_ds_name'] for x in result]
        return result

    def matchProcessedDatasets(self, primary, tier, process):
        """
        _matchProcessedDatasets_

        return a list of Processed datasets
        """
        result = []
        try:
            datasets = self.dbs.listDatasets(primary_ds_name=primary,
                                             data_tier_name=tier,
                                             detail=True)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        for dataset in datasets:
            dataset = remapDBS3Keys(dataset, processed_ds_name='Name')
            dataset['PathList'] = [dataset['dataset']]
            if dataset['Name'] == process:
                result.append(dataset)
        return result

    def listRuns(self, dataset=None, block=None):
        """
        it gets list of DbsRun object but for our purpose
        only list of number is collected.
        DbsRun (RunNumber,
                NumberOfEvents,
                NumberOfLumiSections,
                TotalLuminosity,
                StoreNumber,
                StartOfRungetLong,
                EndOfRun,
                CreationDate,
                CreatedBy,
                LastModificationDate,
                LastModifiedBy
                )
        """
        runs = []
        try:
            if block:
                results = self.dbs.listRuns(block_name=block)
            else:
                results = self.dbs.listRuns(dataset=dataset)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)
        [runs.extend(x['run_num']) for x in results]
        return runs

    def listRunLumis(self, dataset=None, block=None):
        """
        It gets a list of DBSRun objects and returns the number of lumisections per run
        DbsRun (RunNumber,
                NumberOfEvents,
                NumberOfLumiSections,
                TotalLuminosity,
                StoreNumber,
                StartOfRungetLong,
                EndOfRun,
                CreationDate,
                CreatedBy,
                LastModificationDate,
                LastModifiedBy
                )
        """
        try:
            if block:
                results = self.dbs.listRuns(block_name=block)
            else:
                results = self.dbs.listRuns(dataset=dataset)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # send runDict format as result, this format is for sync with dbs2 call
        # which has {run_number: num_lumis} but dbs3 call doesn't return num Lumis
        # So it returns {run_number: None}
        # TODO: After DBS2 is completely removed change the return format more sensible one

        runDict = {}
        for x in results:
            for runNumber in x["run_num"]:
                runDict[runNumber] = None
        return runDict

    def listProcessedDatasets(self, primary, dataTier='*'):
        """
        _listProcessedDatasets_

        return a list of Processed datasets for the primary and optional
        data tier value

        """
        try:
            result = self.dbs.listDatasets(primary_ds_name=primary,
                                           data_tier_name=dataTier)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        result = [x['dataset'].split('/')[2] for x in result]
        return result

    def listDatasetFiles(self, datasetPath):
        """
        _listDatasetFiles_

        Get list of files for dataset

        """
        return [
            x['logical_file_name']
            for x in self.dbs.listFileArray(dataset=datasetPath)
        ]

    def listDatasetFileDetails(self, datasetPath, getParents=False):
        """
        TODO: This is completely wrong need to be redone. or be removed - getting dataset altogether
        might be to costly

        _listDatasetFileDetails_

        Get list of lumis, events, and parents for each file in a dataset
        Return a dict where the keys are the files, and for each file we have something like:
            { 'NumberOfEvents': 545,
              'BlockName': '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace',
              'Lumis': {173658: [8, 12, 9, 14, 19, 109, 105]},
              'Parents': [],
              'Checksum': '22218315',
              'Adler32': 'a41a1446',
              'FileSize': 286021145
            }

        """
        fileDetails = self.dbs.listFileArray(dataset=datasetPath,
                                             validFileOnly=1,
                                             detail=True)
        blocks = set()  #the set of blocks of the dataset
        #Iterate over the files and prepare the set of blocks and a dict where the keys are the files
        files = {}
        for f in fileDetails:
            blocks.add(f['block_name'])
            files[f['logical_file_name']] = remapDBS3Keys(f, stringify=True)
            files[f['logical_file_name']]['Lumis'] = {}
            files[f['logical_file_name']]['Parents'] = []

        #Iterate over the blocks and get parents and lumis
        #TODO this part is completely wrong need to be redone
        for blockName in blocks:
            #get the parents
            if getParents:
                parents = self.dbs.listFileParents(block_name=blockName)
                for p in parents:
                    if p['logical_file_name'] in files:  #invalid files are not there
                        files[p['logical_file_name']]['Parents'].extend(
                            p['parent_logical_file_name'])
            #get the lumis
            file_lumis = self.dbs.listFileLumis(block_name=blockName,
                                                validFileOnly=1)
            for f in file_lumis:
                if f['run_num'] in files[f['logical_file_name']]['Lumis']:
                    files[f['logical_file_name']]['Lumis'][
                        f['run_num']].extend(f['lumi_section_num'])
                else:
                    files[f['logical_file_name']]['Lumis'][
                        f['run_num']] = f['lumi_section_num']

        return files

    def crossCheck(self, datasetPath, *lfns):
        """
        _crossCheck_

        For the dataset provided, check that the lfns listed all exist
        in the dataset.

        Return the list of lfns that are in the dataset

        """
        allLfns = self.dbs.listFileArray(dataset=datasetPath,
                                         validFileOnly=1,
                                         detail=False)
        setOfAllLfns = set(allLfns)
        setOfKnownLfns = set(lfns)
        return list(setOfAllLfns.intersection(setOfKnownLfns))

    def crossCheckMissing(self, datasetPath, *lfns):
        """
        _crossCheckMissing_

        As cross check, but return value is a list of files that
        are *not* known by DBS

        """
        allLfns = self.dbs.listFileArray(dataset=datasetPath,
                                         validFileOnly=1,
                                         detail=False)
        setOfAllLfns = set(allLfns)
        setOfKnownLfns = set(lfns)
        knownFiles = setOfAllLfns.intersection(setOfKnownLfns)
        unknownFiles = setOfKnownLfns.difference(knownFiles)
        return list(unknownFiles)

    def getDBSSummaryInfo(self, dataset=None, block=None):
        """
        Get dataset summary includes # of files, events, blocks and total size
        """
        #FIXME: Doesnt raise exceptions on missing data as old api did
        if dataset:
            self.checkDatasetPath(dataset)
        try:
            if block:
                summary = self.dbs.listFileSummaries(block_name=block,
                                                     validFileOnly=1)
            else:  # dataset case dataset shouldn't be None
                summary = self.dbs.listFileSummaries(dataset=dataset,
                                                     validFileOnly=1)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listDatasetSummary(%s, %s)\n" % (dataset,
                                                                       block)
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)
        if not summary or summary[0].get(
                'file_size') is None:  # appears to indicate missing dataset
            msg = "DBSReader.listDatasetSummary(%s, %s): No matching data"
            raise DBSReaderError(msg % (dataset, block))
        result = remapDBS3Keys(summary[0], stringify=True)
        result['path'] = dataset if not block else ''
        result['block'] = block if block else ''
        return result

    def getFileBlocksInfo(self,
                          dataset,
                          onlyClosedBlocks=False,
                          blockName=None,
                          locations=True):
        """
        """
        self.checkDatasetPath(dataset)
        args = {'dataset': dataset, 'detail': True}
        if blockName:
            args['block_name'] = blockName
        try:
            blocks = self.dbs.listBlocks(**args)
        except dbsClientException as ex:
            msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        blocks = [
            remapDBS3Keys(block, stringify=True, block_name='Name')
            for block in blocks
        ]
        # only raise if blockName not specified - mimic dbs2 error handling
        if not blocks and not blockName:
            msg = "DBSReader.getFileBlocksInfo(%s, %s): No matching data"
            raise DBSReaderError(msg % (dataset, blockName))
        if locations:
            for block in blocks:
                block['StorageElementList'] = [{
                    'Name': x
                } for x in self.listFileBlockLocation(block['Name'])]

        if onlyClosedBlocks:
            return [x for x in blocks if str(x['OpenForWriting']) != "1"]

        return blocks

    def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None):
        """
        _listFileBlocks_

        Retrieve a list of fileblock names for a dataset

        """
        self.checkDatasetPath(dataset)
        args = {'dataset': dataset, 'detail': False}
        if blockName:
            args['block_name'] = blockName
        if onlyClosedBlocks:
            args['detail'] = True
        try:
            blocks = self.dbs.listBlocks(**args)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if onlyClosedBlocks:
            result = [
                x['block_name'] for x in blocks \
                  if str(x['open_for_writing']) != "1"
                ]

        else:
            result = [x['block_name'] for x in blocks]

        return result

    def listOpenFileBlocks(self, dataset):
        """
        _listOpenFileBlocks_

        Retrieve a list of open fileblock names for a dataset

        """
        self.checkDatasetPath(dataset)
        try:
            blocks = self.dbs.listBlocks(dataset=dataset, detail=True)
        except dbsClientException as ex:
            msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)


        result = [
            x['block_name'] for x in blocks \
            if str(x['open_for_writing']) == "1"
        ]

        return result

    def blockExists(self, fileBlockName):
        """
        _blockExists_

        Check to see if block with name provided exists in the DBS
        Instance.

        Return True if exists, False if not

        """
        self.checkBlockName(fileBlockName)
        try:

            blocks = self.dbs.listBlocks(block_name=fileBlockName)
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.blockExists(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if len(blocks) == 0:
            return False
        return True

    def listFilesInBlock(self, fileBlockName, lumis=True):
        """
        _listFilesInBlock_

        Get a list of files in the named fileblock
        TODO: lumis can be false when lumi splitting is not required
        However WMBSHelper expect file['LumiList'] to get the run number
        so for now it will be always true.
        We need to clean code up when dbs2 is completely deprecated.
        calling lumis for run number is expensive.
        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.listFilesInBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            files = self.dbs.listFileArray(block_name=fileBlockName,
                                           validFileOnly=1,
                                           detail=True)
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if lumis:
            lumiDict = self._getLumiList(blockName=fileBlockName)

        result = []
        for fileInfo in files:
            if lumis:
                fileInfo["LumiList"] = lumiDict[fileInfo['logical_file_name']]
            result.append(remapDBS3Keys(fileInfo, stringify=True))
        return result

    def listFilesInBlockWithParents(self, fileBlockName, lumis=True):
        """
        _listFilesInBlockWithParents_

        Get a list of files in the named fileblock including
        the parents of that file.
        TODO: lumis can be false when lumi splitting is not required
        However WMBSHelper expect file['LumiList'] to get the run number
        so for now it will be always true.

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            #TODO: shoud we get only valid block for this?
            files = self.dbs.listFileParents(block_name=fileBlockName)
            fileDetails = self.listFilesInBlock(fileBlockName, lumis)

        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % (
                fileBlockName, )
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        childByParents = defaultdict(list)
        for f in files:
            # Probably a child can have more than 1 parent file
            for fp in f['parent_logical_file_name']:
                childByParents[fp].append(f['logical_file_name'])
        parentsLFNs = childByParents.keys()

        parentFilesDetail = []
        #TODO: slicing parentLFNs util DBS api is handling that.
        #Remove slicing if DBS api handles
        for pLFNs in slicedIterator(parentsLFNs, 50):
            parentFilesDetail.extend(
                self.dbs.listFileArray(logical_file_name=pLFNs, detail=True))

        if lumis:
            parentLumis = self._getLumiList(lfns=parentsLFNs)

        parentsByLFN = defaultdict(list)

        for pf in parentFilesDetail:
            parentLFN = pf['logical_file_name']
            dbsFile = remapDBS3Keys(pf, stringify=True)
            if lumis:
                dbsFile["LumiList"] = parentLumis[parentLFN]

            for childLFN in childByParents[parentLFN]:
                parentsByLFN[childLFN].append(dbsFile)

        for fileInfo in fileDetails:
            fileInfo["ParentList"] = parentsByLFN[
                fileInfo['logical_file_name']]

        return fileDetails

    def lfnsInBlock(self, fileBlockName):
        """
        _lfnsInBlock_

        LFN list only for block, details = False => faster query

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.lfnsInBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        try:
            lfns = self.dbs.listFileArray(block_name=fileBlockName,
                                          validFileOnly=1,
                                          detail=False)
            return lfns
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

    def listFileBlockLocation(self,
                              fileBlockName,
                              dbsOnly=False,
                              phedexNodes=False):
        """
        _listFileBlockLocation_

        Get origin_site_name of a block

        """
        blockNames = [fileBlockName] if isinstance(
            fileBlockName, basestring) else fileBlockName
        for block in blockNames:
            self.checkBlockName(block)

        blockInfo = {}
        if not dbsOnly:
            try:
                blockInfo = self.phedex.getReplicaSEForBlocks(
                    phedexNodes=phedexNodes, block=blockNames, complete='y')
            except Exception as ex:
                msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockName
                msg += "%s\n" % str(ex)
                raise Exception(msg)

            if not blockInfo or len(blockInfo) != len(
                    blockNames
            ):  #if we couldnt get data location from PhEDEx, try to look into origin site location from dbs
                dbsOnly = True
                blockNames = set(blockNames) - set(
                    blockInfo
                )  #get the blocks we did not find information in phedex

        if dbsOnly:
            try:
                for block in blockNames:
                    res = self.dbs.listBlockOrigin(block_name=block)
                    if res:
                        blockInfo[block] = [res[0]['origin_site_name']]
            except dbsClientException as ex:
                msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockName
                msg += "%s\n" % formatEx3(ex)
                raise DBSReaderError(msg)

            if not any(blockInfo.values()):  # no data location from dbs
                return list()

        #removing duplicates and 'UNKNOWN entries
        locations = {}
        node_filter_list = set(['UNKNOWN', None])
        for name, nodes in blockInfo.iteritems():
            final_nodes = set()
            for n in nodes:
                if n in node_filter_list:
                    continue
                try:
                    cmsname(n)
                except AssertionError:  ## is SE
                    n = self.phedex.getNodeNames(n) if phedexNodes else [n]
                else:  ## not SE i.e. phedexNode
                    n = [self.phedex.getNodeSE(n)] if not phedexNodes else [n]
                final_nodes = final_nodes.union(n)
            locations[name] = list(final_nodes - node_filter_list)

        #returning single list if a single block is passed
        if isinstance(fileBlockName, basestring):
            locations = locations[fileBlockName]

        return locations

    def getFileBlock(self, fileBlockName):
        """
        _getFileBlock_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : { LFN : Events },
             }
        }


        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.getFileBlock(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        result = {
            fileBlockName: {
                "StorageElements": self.listFileBlockLocation(fileBlockName),
                "Files": self.listFilesInBlock(fileBlockName),
                "IsOpen": self.blockIsOpen(fileBlockName),
            }
        }
        return result

    def getFileBlockWithParents(self, fileBlockName):
        """
        _getFileBlockWithParents_

        return a dictionary:
        { blockName: {
             "StorageElements" : [<se list>],
             "Files" : dictionaries representing each file
             }
        }

        files

        """
        if not self.blockExists(fileBlockName):
            msg = "DBSReader.getFileBlockWithParents(%s): No matching data"
            raise DBSReaderError(msg % fileBlockName)

        result = {
            fileBlockName: {
                "StorageElements": self.listFileBlockLocation(fileBlockName),
                "Files": self.listFilesInBlockWithParents(fileBlockName),
                "IsOpen": self.blockIsOpen(fileBlockName),
            }
        }
        return result

    def getFiles(self, dataset, onlyClosedBlocks=False):
        """
        _getFiles_

        Returns a dictionary of block names for the dataset where
        each block constists of a dictionary containing the StorageElements
        for that block and the files in that block by LFN mapped to NEvents

        """
        result = {}
        blocks = self.listFileBlocks(dataset, onlyClosedBlocks)

        [result.update(self.getFileBlock(x)) for x in blocks]

        return result

    def listBlockParents(self, blockName):
        """Get parent blocks for block"""
        result = []
        self.checkBlockName(blockName)
        blocks = self.dbs.listBlockParents(block_name=blockName)
        for block in blocks:
            toreturn = {'Name': block['parent_block_name']}
            toreturn['StorageElementList'] = self.listFileBlockLocation(
                toreturn['Name'])
            result.append(toreturn)
        return result

    def blockIsOpen(self, blockName):
        """
        _blockIsOpen_

        Return True if named block is open, false if not, or if block
        doenst exist

        """
        self.checkBlockName(blockName)
        blockInstance = self.dbs.listBlocks(block_name=blockName, detail=True)
        if len(blockInstance) == 0:
            return False
        blockInstance = blockInstance[0]
        isOpen = blockInstance.get('open_for_writing', 1)
        if isOpen == 0:
            return False
        return True

    def blockToDatasetPath(self, blockName):
        """
        _blockToDatasetPath_

        Given a block name, get the dataset Path associated with that
        Block.

        Returns the dataset path, or None if not found

        """
        self.checkBlockName(blockName)
        try:
            blocks = self.dbs.listBlocks(block_name=blockName, detail=True)
        except dbsClientException as ex:
            msg = "Error in "
            msg += "DBSReader.blockToDataset(%s)\n" % blockName
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        if blocks == []:
            return None

        pathname = blocks[-1].get('dataset', None)
        return pathname

    def listDatasetLocation(self, datasetName, dbsOnly=False):
        """
        _listDatasetLocation_

        List the origin SEs where there is at least a block of the given
        dataset.
        """
        self.checkDatasetPath(datasetName)

        if not dbsOnly:
            try:
                blocksInfo = self.phedex.getReplicaSEForBlocks(
                    dataset=[datasetName], complete='y')
            except Exception as ex:
                msg = "Error while getting block location from PhEDEx for dataset=%s)\n" % datasetName
                msg += "%s\n" % str(ex)
                raise Exception(msg)

            if not blocksInfo:  # if we couldnt get data location from PhEDEx, try to look into origin site location from dbs
                dbsOnly = True
            else:
                locations = set(blocksInfo.values()[0])
                for blockSites in blocksInfo.values():
                    locations.intersection_update(blockSites)

        if dbsOnly:
            try:
                blocksInfo = self.dbs.listBlockOrigin(dataset=datasetName)
            except dbsClientException as ex:
                msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName
                msg += "%s\n" % formatEx3(ex)
                raise DBSReaderError(msg)

            if not blocksInfo:  # no data location from dbs
                return list()

            locations = set()
            for blockInfo in blocksInfo:
                locations.update([blockInfo['origin_site_name']])

            locations.difference_update(
                ['UNKNOWN'])  # remove entry when SE name is 'UNKNOWN'

        return list(locations)

    def checkDatasetPath(self, pathName):
        """
         _checkDatasetPath_
        """
        if pathName in ("", None):
            raise DBSReaderError("Invalid Dataset Path name: => %s <=" %
                                 pathName)

    def checkBlockName(self, blockName):
        """
         _checkBlockName_
        """
        if blockName in ("", "*", None):
            raise DBSReaderError("Invalid Block name: => %s <=" % blockName)