예제 #1
0
    def getPileupLockedAndAvailable(self, container, account, scope="cms"):
        """
        Mock method to resolve where the pileup container (and all its blocks)
        is locked and available.
        """
        logging.info("%s: calling mock getPileupLockedAndAvailable",
                     self.__class__.__name__)
        result = dict()
        if not self.isContainer(container):
            raise WMRucioException(
                "Pileup location needs to be resolved for a container DID type"
            )

        kwargs = dict(name=container, account=account, scope=scope)

        try:
            DBS3Reader(PROD_DBS).checkDatasetPath(kwargs['name'])
            blocks = DBS3Reader(PROD_DBS).listFileBlocks(
                dataset=kwargs['name'])
            for block in blocks:
                result[block] = self.sitesByBlock(block)
        except DBSReaderError:
            logging.error("%s: Failed to fetch blocks from DBS",
                          self.__class__.__name__)
        return result
예제 #2
0
 def testListDatatiers(self):
     """
     listDatatiers returns all datatiers available
     """
     results = DBSReader.listDatatiers(self.endpoint)
     self.assertTrue('RAW' in results)
     self.assertTrue('GEN-SIM-RECO' in results)
     self.assertTrue('GEN-SIM' in results)
     self.assertFalse('RAW-ALAN' in results)
     # dbsUrl is mandatory
     with self.assertRaises(DBSReaderError):
         _ = DBSReader.listDatatiers()
     return
예제 #3
0
 def testLfnsInBlock(self):
     """lfnsInBlock returns lfns in block"""
     self.dbs = DBSReader(self.endpoint)
     self.assertTrue(
         FILE in
         [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
     self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')
예제 #4
0
    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \
                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \
                     'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')

        self.assertTrue(self.dbs.listFileBlockLocation(BLOCK))
        # This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertTrue(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        # test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))
예제 #5
0
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames):
    """
    _makePhEDExDrop_

    Given a DBS Url, dataset name and list of blockNames,
    generate an XML structure for injection

    """
    from WMCore.Services.DBS.DBS3Reader import DBS3Reader
    reader = DBS3Reader(dbsUrl)

    spec = XMLInjectionSpec(dbsUrl)

    dataset = spec.getDataset(datasetPath)

    for block in blockNames:
        blockContent = reader.getFileBlock(block)
        if blockContent['IsOpen']:
            xmlBlock = dataset.getFileblock(block, "y")
        else:
            xmlBlock = dataset.getFileblock(block, "n")

        # Any Checksum from DBS is type cksum
        for x in blockContent[block]['Files']:
            checksums = {'cksum': x['Checksum']}
            if x.get('Adler32') not in (None, ''):
                checksums['adler32'] = x['Adler32']
            xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize'])

    xml = spec.save()
    return xml
예제 #6
0
def getFromDBS(dataset, logger):
    """
    Uses the WMCore DBS3Reader object to fetch all the blocks and files
    for a given container.
    Returns a dictionary key'ed by the block name, and an inner dictionary
    with the number of valid and invalid files. It also returns a total counter
    for the number of valid and invalid files in the dataset.
    """
    dbsReader = DBS3Reader(DBS_URL, logger)

    result = dict()
    dbsFilesCounter = Counter({'valid': 0, 'invalid': 0})
    blocks = dbsReader.listFileBlocks(dataset)
    for block in blocks:
        data = dbsReader.dbs.listFileArray(block_name=block,
                                           validFileOnly=0,
                                           detail=True)
        result.setdefault(block, Counter({'valid': 0, 'invalid': 0}))
        for fileInfo in data:
            if fileInfo['is_file_valid'] == 1:
                result[block]['valid'] += 1
                dbsFilesCounter['valid'] += 1
            else:
                result[block]['invalid'] += 1
                dbsFilesCounter['invalid'] += 1
    return result, dbsFilesCounter
예제 #7
0
    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], DATASET)
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        with self.assertRaises(DBSReaderError):
            self.dbs.getDBSSummaryInfo(DATASET + 'blah')
        with self.assertRaises(DBSReaderError):
            self.dbs.getDBSSummaryInfo(DATASET, BLOCK + 'asas')
예제 #8
0
 def testlistDatasetFileDetails(self):
     """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
     TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
     self.dbs = DBSReader(self.endpoint)
     details = self.dbs.listDatasetFileDetails(DATASET)
     self.assertEqual(len(details), 49)
     self.assertTrue(TESTFILE in details)
     self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
     self.assertEqual(details[TESTFILE]['file_size'], 286021145)
     self.assertEqual(
         details[TESTFILE]['BlockName'],
         '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
     self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
     self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
     self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
     self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
     self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
     self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
     self.assertTrue(173658 in details[TESTFILE]['Lumis'])
     self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [
         1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
         37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
         71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
         88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
         104, 105, 106, 107, 108, 109, 110, 111
     ])
예제 #9
0
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS)
        self.assertItemsEqual([PARENT_BLOCK], parents)

        self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))
예제 #10
0
    def getReplicaPhEDExNodesForBlocks(self,
                                       block=None,
                                       dataset=None,
                                       complete='y'):
        """

        Args:
            block: the name of the block
            dataset: the name of the dataset
            complete: ??

        Returns:
            a fake list of blocks and the fakes sites they are at
        """
        if isinstance(dataset, list):
            dataset = dataset[0]  # Dataset is a list in these tests
        if dataset:
            # TODO: Generalize this and maybe move dataset detection into sitesByBlock
            if dataset == PILEUP_DATASET:
                return {
                    '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                    ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
                }
            else:
                try:
                    DBS3Reader(PROD_DBS).checkDatasetPath(dataset)
                    blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(
                        dataset=dataset)
                    singleBlock = blocks[0]['block_name']
                    return {singleBlock: self.sitesByBlock(singleBlock)}
                except DBSReaderError:
                    return {
                        '%s#0fcb2b12-d27e-11e0-91b1-003048caaace' % dataset:
                        []
                    }

        replicas = {}
        for oneBlock in block:
            if oneBlock.split('#')[0] == PILEUP_DATASET:
                # Pileup is at a single site
                sites = ['T2_XX_SiteC']
                _BLOCK_LOCATIONS[oneBlock] = sites
            else:
                sites = self.sitesByBlock(block=oneBlock)
                _BLOCK_LOCATIONS[oneBlock] = sites
            replicas.update({oneBlock: sites})
        return replicas
예제 #11
0
    def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl):
        """
        pileupDict is a Python dictionary containing particular pileup
        configuration information. Query DBS on given dataset contained
        now in both input defaultArguments as well as in the pileupDict
        and compare values.

        """
        reader = DBS3Reader(dbsUrl)
        phedex = PhEDEx()

        inputArgs = defaultArguments["PileupConfig"]

        self.assertEqual(len(inputArgs), len(pileupDict),
                         "Number of pileup types different.")
        for pileupType in inputArgs:
            m = ("pileup type '%s' not in PileupFetcher-produced pileup "
                 "configuration: '%s'" % (pileupType, pileupDict))
            self.assertTrue(pileupType in pileupDict, m)

        # now query DBS for compare actual results on files lists for each
        # pileup type and dataset and location (storage element names)
        # pileupDict is saved in the file and now comparing items of this
        # configuration with actual DBS results, the structure of pileupDict:
        #    {"pileupTypeA": {"BlockA": {"FileList": [], "PhEDExNodeNames": []},
        #                     "BlockB": {"FileList": [], "PhEDExNodeNames": []}, ....}
        for pileupType, datasets in inputArgs.items():
            # this is from the pileup configuration produced by PileupFetcher
            blockDict = pileupDict[pileupType]

            for dataset in datasets:
                dbsFileBlocks = reader.listFileBlocks(dataset=dataset)
                blocksLocation = phedex.getReplicaPhEDExNodesForBlocks(
                    dataset=dataset, complete='y')
                for dbsFileBlockName in dbsFileBlocks:
                    fileList = []
                    pnns = set()
                    for pnn in blocksLocation[dbsFileBlockName]:
                        pnns.add(pnn)
                    # now get list of files in the block
                    dbsFiles = reader.listFilesInBlock(dbsFileBlockName)
                    for dbsFile in dbsFiles:
                        fileList.append(dbsFile["LogicalFileName"])
                    # now compare the sets:
                    m = ("PNNs don't agree for pileup type '%s', "
                         "dataset '%s' in configuration: '%s'" %
                         (pileupType, dataset, pileupDict))
                    self.assertEqual(
                        set(blockDict[dbsFileBlockName]["PhEDExNodeNames"]),
                        pnns, m)
                    m = (
                        "FileList don't agree for pileup type '%s', dataset '%s' "
                        " in configuration: '%s'" %
                        (pileupType, dataset, pileupDict))
                    storedFileList = [
                        item['logical_file_name']
                        for item in blockDict[dbsFileBlockName]["FileList"]
                    ]
                    self.assertItemsEqual(storedFileList, fileList, m)
예제 #12
0
 def testListFilesInBlock(self):
     """listFilesInBlock returns files in block"""
     self.dbs = DBSReader(self.endpoint)
     self.assertTrue(
         FILE in
         [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
     self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock,
                       DATASET + '#blah')
예제 #13
0
 def testListProcessedDatasets(self):
     """listProcessedDatasets returns known processed datasets"""
     self.dbs = DBSReader(self.endpoint)
     datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
     self.assertTrue('Run2011A-v1' in datasets)
     self.assertTrue('Run2011B-v1' in datasets)
     self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
     self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))
예제 #14
0
    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')
예제 #15
0
    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS)
        self.assertEqual(len(block), 1)
        block = block[BLOCK_WITH_PARENTS]
        self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
예제 #16
0
    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS)
        self.assertEqual(1, len(parents))
        self.assertEqual(PARENT_BLOCK, parents[0]['Name'])
        self.assertTrue(parents[0]['PhEDExNodeList'])

        self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))
예제 #17
0
def checkDBSUrl(dbsUrl):
    if dbsUrl:
        try:
            DBS3Reader(dbsUrl).dbs.serverinfo()
        except:
            raise WMWorkloadToolsException("DBS is not responding: %s" %
                                           dbsUrl)

    return True
예제 #18
0
 def testListFileBlocks(self):
     """listFileBlocks returns block names in dataset"""
     self.dbs = DBSReader(self.endpoint)
     blocks = self.dbs.listFileBlocks(DATASET)
     self.assertTrue(BLOCK in blocks)
     # block is closed
     block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
     self.assertEqual(block, BLOCK)
     self.assertTrue(BLOCK in block)
예제 #19
0
 def testlistRuns(self):
     """listRuns returns known runs"""
     self.dbs = DBSReader(self.endpoint)
     runs = self.dbs.listRuns(dataset=DATASET)
     self.assertEqual(46, len(runs))
     self.assertTrue(174074 in runs)
     runs = self.dbs.listRuns(block=BLOCK)
     self.assertEqual(1, len(runs))
     self.assertEqual([173657], runs)
예제 #20
0
 def testMatchProcessedDatasets(self):
     """
     matchProcessedDatasets returns known processed datasets
     """
     self.dbs = DBSReader(self.endpoint)
     dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
     self.assertEqual(1, len(dataset))
     self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
     self.assertEqual('Run2011A-v1', dataset[0]['Name'])
     self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))
예제 #21
0
 def testListPrimaryDatasets(self):
     """
     listPrimaryDatasets returns known primary datasets
     """
     self.dbs = DBSReader(self.endpoint)
     results = self.dbs.listPrimaryDatasets('Jet*')
     self.assertTrue('Jet' in results)
     self.assertTrue('JetMET' in results)
     self.assertTrue('JetMETTau' in results)
     self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
     return
예제 #22
0
def _validateDatatier(datatier, dbsUrl):
    """
    _validateDatatier_

    Provided a list of datatiers extracted from the outputDatasets, checks
    whether they all exist in DBS already.
    """
    dbsTiers = DBSReader.listDatatiers(dbsUrl)
    badTiers = list(set(datatier) - set(dbsTiers))
    if badTiers:
        raise InvalidSpecParameterValue("Bad datatier(s): %s not available in DBS." % badTiers)
예제 #23
0
def _validateDatatier(datatier, dbsUrl):
    """
    _validateDatatier_

    Provided a list of datatiers extracted from the outputDatasets, checks
    whether they all exist in DBS already.
    """
    dbsTiers = DBSReader.listDatatiers(dbsUrl)
    badTiers = list(set(datatier) - set(dbsTiers))
    if badTiers:
        raise InvalidSpecParameterValue("Bad datatier(s): %s not available in DBS." % badTiers)
예제 #24
0
 def testListDatatiers(self):
     """
     listDatatiers returns all datatiers available
     """
     self.dbs = DBSReader(self.endpoint)
     results = self.dbs.listDatatiers()
     self.assertTrue('RAW' in results)
     self.assertTrue('GEN-SIM-RECO' in results)
     self.assertTrue('GEN-SIM' in results)
     self.assertFalse('RAW-ALAN' in results)
     return
예제 #25
0
 def testlistRunLumis(self):
     """listRunLumis returns known runs and lumicounts (None for DBS3)"""
     self.dbs = DBSReader(self.endpoint)
     runs = self.dbs.listRunLumis(dataset=DATASET)
     self.assertEqual(46, len(runs))
     self.assertTrue(173692 in runs)
     self.assertEqual(runs[173692], None)
     runs = self.dbs.listRunLumis(block=BLOCK)
     self.assertEqual(1, len(runs))
     self.assertTrue(173657 in runs)
     self.assertEqual(runs[173657], None)
예제 #26
0
    def validateDatatier(self, datatier, dbsUrl):
        """
        _validateDatatier_

        Provided a list of datatiers extracted from the outputDatasets, checks
        whether they all exist in DBS already.
        """
        dbsTiers = DBSReader.listDatatiers(dbsUrl)
        badTiers = list(set(datatier) - set(dbsTiers))
        if badTiers:
            raise cherrypy.HTTPError(400, "Bad datatier(s): %s not available in DBS." % badTiers)
예제 #27
0
def DBSReader(endpoint, **kwargs):
    """Function to find and instantiate desired DBSReader object"""

    try:
        dbs = DBS3Reader(endpoint, **kwargs)
        # if this doesn't throw endpoint is dbs3
        dbs.dbs.serverinfo()
        return dbs
    except Exception as ex:
        msg = 'Instantiating DBS3Reader failed with %s\n' % str(ex)
        raise DBSReaderError("Can't contact DBS at %s, got errors %s" %
                             (endpoint, msg))
예제 #28
0
def _validateInputDataset(arguments):
    
    inputdataset = arguments.get("InputDataset", None)
    dbsURL = arguments.get("DbsUrl", None)
    if inputdataset != None and dbsURL != None:
        #import DBS3Reader here, since Runtime code import this module and worker node doesn't have dbs3 client 
        from WMCore.Services.DBS.DBS3Reader import DBS3Reader
        from WMCore.Services.DBS.DBSErrors import DBSReaderError
        try:
            DBS3Reader(dbsURL).checkDatasetPath(inputdataset)
        except DBSReaderError as ex:
            # we need to Wrap the exception to WMSpecFactoryException to be caught in reqmgr validation
            raise WMSpecFactoryException(str(ex))
    return
예제 #29
0
    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(4, len(files))
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
                         files[0]['block_name'])
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
                         files[0]['BlockName'])
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')
예제 #30
0
    def getContainerLockedAndAvailable(self, **kwargs):
        """
        Mock the method to discover where container data is locked and available.
        Note that, by default, it will not return any Tape RSEs.
        :return: a unique list of RSEs
        """
        logging.info("%s: Calling mock getContainerLockedAndAvailable",
                     self.__class__.__name__)
        if 'name' not in kwargs:
            raise WMRucioException(
                "A DID name must be provided to the getContainerLockedAndAvailable API"
            )
        kwargs.setdefault("scope", "cms")

        if kwargs['name'] == PILEUP_DATASET:
            return ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC']
        try:
            DBS3Reader(PROD_DBS).checkDatasetPath(kwargs['name'])
            blocks = DBS3Reader(PROD_DBS).dbs.listBlocks(
                dataset=kwargs['name'])
            singleBlock = blocks[0]['block_name']
            return self.sitesByBlock(singleBlock)
        except DBSReaderError:
            return []
예제 #31
0
def validateInputDatasSetAndParentFlag(arguments):
    inputdataset = arguments.get("InputDataset", None)
    if strToBool(arguments.get("IncludeParents", False)):
        if inputdataset == None:
            msg = "IncludeParent flag is True but there is no inputdataset"
            raise WMSpecFactoryException(msg)
        else:
            dbsURL = arguments.get("DbsUrl", None)
            if dbsURL != None:
                #import DBS3Reader here, since Runtime code import this module and worker node doesn't have dbs3 client 
                from WMCore.Services.DBS.DBS3Reader import DBS3Reader
                result = DBS3Reader(dbsURL).listDatasetParents(inputdataset)
                if len(result) == 0:
                    msg = "IncludeParent flag is True but inputdataset %s doesn't have parents" % (inputdataset)
                    raise WMSpecFactoryException(msg)
    else:
        _validateInputDataset(arguments)
    return
예제 #32
0
    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        self.assertTrue(block['PhEDExNodeList'])

        # weird error handling - depends on whether block or dataset is missing
        with self.assertRaises(DBSReaderError):
            self.dbs.getFileBlocksInfo(DATASET + 'blah')

        with self.assertRaises(DBSReaderError):
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas')
예제 #33
0
 def testlistDatasetFileDetails(self):
     """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
     TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
     self.dbs = DBSReader(self.endpoint)
     details = self.dbs.listDatasetFileDetails(DATASET)
     self.assertEqual(len(details), 49)
     self.assertTrue(TESTFILE in details)
     self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
     self.assertEqual(details[TESTFILE]['file_size'], 286021145)
     self.assertEqual(details[TESTFILE]['BlockName'],
                      '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
     self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
     self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
     self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
     self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
     self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
     self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
     self.assertTrue(173658 in details[TESTFILE]['Lumis'])
     self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]),
                      [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
                       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
                       75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
                       99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111])
예제 #34
0
 def testlistDatasetFiles(self):
     """listDatasetFiles returns files in dataset"""
     self.dbs = DBSReader(self.endpoint)
     files = self.dbs.listDatasetFiles(DATASET)
     self.assertEqual(49, len(files))
     self.assertTrue(FILE in files)
예제 #35
0
 def testListOpenFileBlocks(self):
     """listOpenFileBlocks finds open blocks"""
     # hard to find a dataset with open blocks, so don't bother
     self.dbs = DBSReader(self.endpoint)
     self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))
예제 #36
0
 def testBlockToDatasetPath(self):
     """blockToDatasetPath extracts path from block name"""
     self.dbs = DBSReader(self.endpoint)
     self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
     self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
예제 #37
0
 def testBlockIsOpen(self):
     """blockIsOpen checks if a block is open"""
     self.dbs = DBSReader(self.endpoint)
     self.assertFalse(self.dbs.blockIsOpen(BLOCK))
예제 #38
0
 def testBlockExists(self):
     """blockExists returns existence of blocks"""
     self.dbs = DBSReader(self.endpoint)
     self.assertTrue(self.dbs.blockExists(BLOCK))
     self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse')
예제 #39
0
 def testGetFiles(self):
     """getFiles returns files in dataset"""
     self.dbs = DBSReader(self.endpoint)
     files = self.dbs.getFiles(DATASET)
     self.assertEqual(len(files), 46)
예제 #40
0
 def testListFilesInBlock(self):
     """listFilesInBlock returns files in block"""
     self.dbs = DBSReader(self.endpoint)
     self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
     self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')
예제 #41
0
class DBSReaderTest(EmulatedUnitTestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """

        self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        self.dbs = None
        super(DBSReaderTest, self).setUp()
        return

    def tearDown(self):
        """
        _tearDown_

        """

        super(DBSReaderTest, self).tearDown()
        return

    def testListDatatiers(self):
        """
        listDatatiers returns all datatiers available
        """
        results = DBSReader.listDatatiers(self.endpoint)
        self.assertTrue('RAW' in results)
        self.assertTrue('GEN-SIM-RECO' in results)
        self.assertTrue('GEN-SIM' in results)
        self.assertFalse('RAW-ALAN' in results)
        # dbsUrl is mandatory
        with self.assertRaises(DBSReaderError):
            _ = DBSReader.listDatatiers()
        return

    def testListPrimaryDatasets(self):
        """
        listPrimaryDatasets returns known primary datasets
        """
        self.dbs = DBSReader(self.endpoint)
        results = self.dbs.listPrimaryDatasets('Jet*')
        self.assertTrue('Jet' in results)
        self.assertTrue('JetMET' in results)
        self.assertTrue('JetMETTau' in results)
        self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist'))
        return

    def testMatchProcessedDatasets(self):
        """
        matchProcessedDatasets returns known processed datasets
        """
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1')
        self.assertEqual(1, len(dataset))
        self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList'])
        self.assertEqual('Run2011A-v1', dataset[0]['Name'])
        self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))

    def testlistRuns(self):
        """listRuns returns known runs"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRuns(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(174074 in runs)
        runs = self.dbs.listRuns(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertEqual([173657], runs)

    def testlistRunLumis(self):
        """listRunLumis returns known runs and lumicounts (None for DBS3)"""
        self.dbs = DBSReader(self.endpoint)
        runs = self.dbs.listRunLumis(dataset=DATASET)
        self.assertEqual(46, len(runs))
        self.assertTrue(173692 in runs)
        self.assertEqual(runs[173692], None)
        runs = self.dbs.listRunLumis(block=BLOCK)
        self.assertEqual(1, len(runs))
        self.assertTrue(173657 in runs)
        self.assertEqual(runs[173657], None)

    def testListProcessedDatasets(self):
        """listProcessedDatasets returns known processed datasets"""
        self.dbs = DBSReader(self.endpoint)
        datasets = self.dbs.listProcessedDatasets('Jet', 'RAW')
        self.assertTrue('Run2011A-v1' in datasets)
        self.assertTrue('Run2011B-v1' in datasets)
        self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah'))
        self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))

    def testlistDatasetFiles(self):
        """listDatasetFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listDatasetFiles(DATASET)
        self.assertEqual(49, len(files))
        self.assertTrue(FILE in files)

    def testlistDatasetFileDetails(self):
        """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset"""
        TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root'
        self.dbs = DBSReader(self.endpoint)
        details = self.dbs.listDatasetFileDetails(DATASET)
        self.assertEqual(len(details), 49)
        self.assertTrue(TESTFILE in details)
        self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545)
        self.assertEqual(details[TESTFILE]['file_size'], 286021145)
        self.assertEqual(details[TESTFILE]['BlockName'],
                         '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace')
        self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['md5'], 'NOTSET')
        self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446')
        self.assertEqual(details[TESTFILE]['Checksum'], '22218315')
        self.assertEqual(details[TESTFILE]['check_sum'], '22218315')
        self.assertTrue(173658 in details[TESTFILE]['Lumis'])
        self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]),
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                          27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
                          51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
                          75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
                          99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111])

    def testGetDBSSummaryInfo(self):
        """getDBSSummaryInfo returns summary of dataset and block"""
        self.dbs = DBSReader(self.endpoint)
        dataset = self.dbs.getDBSSummaryInfo(DATASET)
        self.assertEqual(dataset['path'], DATASET)
        self.assertEqual(dataset['block'], '')
        self.assertEqual(dataset['NumberOfEvents'], 22075)
        self.assertEqual(dataset['NumberOfBlocks'], 46)
        self.assertEqual(dataset['FileSize'], 4001680824)
        self.assertEqual(dataset['file_size'], 4001680824)
        self.assertEqual(dataset['NumberOfFiles'], 49)
        self.assertEqual(dataset['NumberOfLumis'], 7223)

        block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK)
        self.assertEqual(block['path'], DATASET)
        self.assertEqual(block['block'], BLOCK)
        self.assertEqual(block['NumberOfEvents'], 377)
        self.assertEqual(block['NumberOfBlocks'], 1)
        self.assertEqual(block['FileSize'], 150780132)
        self.assertEqual(block['file_size'], 150780132)
        self.assertEqual(block['NumberOfFiles'], 2)
        self.assertEqual(block['NumberOfLumis'], 94)

        with self.assertRaises(DBSReaderError):
            self.dbs.getDBSSummaryInfo(DATASET + 'blah')
        with self.assertRaises(DBSReaderError):
            self.dbs.getDBSSummaryInfo(DATASET, BLOCK + 'asas')

    def testGetFileBlocksInfo(self):
        """getFileBlocksInfo returns block info, including location lookup"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.getFileBlocksInfo(DATASET)
        block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK)
        self.assertEqual(1, len(block))
        block = block[0]
        self.assertEqual(46, len(blocks))
        self.assertTrue(block['Name'] in [x['Name'] for x in blocks])
        self.assertEqual(BLOCK, block['Name'])
        self.assertEqual(0, block['OpenForWriting'])
        self.assertEqual(150780132, block['BlockSize'])
        self.assertEqual(2, block['NumberOfFiles'])
        # possibly fragile but assume block located at least at cern
        self.assertTrue(block['PhEDExNodeList'])

        # weird error handling - depends on whether block or dataset is missing
        with self.assertRaises(DBSReaderError):
            self.dbs.getFileBlocksInfo(DATASET + 'blah')

        with self.assertRaises(DBSReaderError):
            self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas')

    def testListFileBlocks(self):
        """listFileBlocks returns block names in dataset"""
        self.dbs = DBSReader(self.endpoint)
        blocks = self.dbs.listFileBlocks(DATASET)
        self.assertTrue(BLOCK in blocks)
        # block is closed
        block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0]
        self.assertEqual(block, BLOCK)
        self.assertTrue(BLOCK in block)

    def testListOpenFileBlocks(self):
        """listOpenFileBlocks finds open blocks"""
        # hard to find a dataset with open blocks, so don't bother
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.listOpenFileBlocks(DATASET))

    def testBlockExists(self):
        """blockExists returns existence of blocks"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(self.dbs.blockExists(BLOCK))
        self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse')

    def testListFilesInBlock(self):
        """listFilesInBlock returns files in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')

    def testListFilesInBlockWithParents(self):
        """listFilesInBlockWithParents gets files with parents for a block"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.listFilesInBlockWithParents(
            '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0')
        self.assertEqual(4, len(files))
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
                         files[0]['block_name'])
        self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0',
                         files[0]['BlockName'])
        self.assertEqual(
            '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root',
            files[0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')

    def testLfnsInBlock(self):
        """lfnsInBlock returns lfns in block"""
        self.dbs = DBSReader(self.endpoint)
        self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
        self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')

    def testListFileBlockLocation(self):
        """listFileBlockLocation returns block location"""
        WRONG_BLOCK = BLOCK[:-4] + 'abcd'
        BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace'
        DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \
                    'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e'
        DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-' + \
                     'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab'
        self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/')

        self.assertTrue(self.dbs.listFileBlockLocation(BLOCK))
        # This block is only found on DBS
        self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK))
        # doesn't raise on non-existant block
        self.assertTrue(self.dbs.listFileBlockLocation(WRONG_BLOCK))
        # test bulk call:
        ## two blocks in phedex
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2])))
        ## one block in phedex one does not exist
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK])))
        ## one in phedex one in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK])))
        ## two in dbs
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2])))
        ## one in DBS and one does not exist
        self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))

    def testGetFileBlock(self):
        """getFileBlock returns block"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlock(BLOCK)
        self.assertEqual(len(block), 1)
        block = block[BLOCK]
        self.assertEqual(2, len(block['Files']))

        self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')

    def testGetFileBlockWithParents(self):
        """getFileBlockWithParents returns block and parents"""
        self.dbs = DBSReader(self.endpoint)
        block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS)
        self.assertEqual(len(block), 1)
        block = block[BLOCK_WITH_PARENTS]
        self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName'])

        self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')

    def testGetFiles(self):
        """getFiles returns files in dataset"""
        self.dbs = DBSReader(self.endpoint)
        files = self.dbs.getFiles(DATASET)
        self.assertEqual(len(files), 46)

    def testListBlockParents(self):
        """listBlockParents returns block parents"""
        self.dbs = DBSReader(self.endpoint)
        parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS)
        self.assertEqual(1, len(parents))
        self.assertEqual(PARENT_BLOCK, parents[0]['Name'])
        self.assertTrue(parents[0]['PhEDExNodeList'])

        self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))

    def testBlockIsOpen(self):
        """blockIsOpen checks if a block is open"""
        self.dbs = DBSReader(self.endpoint)
        self.assertFalse(self.dbs.blockIsOpen(BLOCK))

    def testBlockToDatasetPath(self):
        """blockToDatasetPath extracts path from block name"""
        self.dbs = DBSReader(self.endpoint)
        self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET)
        self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
예제 #42
0
 def testLfnsInBlock(self):
     """lfnsInBlock returns lfns in block"""
     self.dbs = DBSReader(self.endpoint)
     self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)])
     self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')