def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly) elif lfns: lumiLists = [] for slfn in grouper(lfns, 50): lumiLists.extend( self.dbs.listFileLumiArray(logical_file_name=slfn)) else: # shouldn't call this with both blockName and lfns empty # but still returns empty dict for that case return {} except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) lumiDict = {} for lumisItem in lumiLists: lumiDict.setdefault(lumisItem['logical_file_name'], []) item = {} item["RunNumber"] = lumisItem['run_num'] item['LumiSectionNumber'] = lumisItem['lumi_section_num'] if lumisItem.get('event_count', None) is not None: item['EventCount'] = lumisItem['event_count'] lumiDict[lumisItem['logical_file_name']].append(item) # TODO: add key for lumi and event pair. return lumiDict
def listFilesInBlockWithParents(self, fileBlockName, lumis=True): """ _listFilesInBlockWithParents_ Get a list of files in the named fileblock including the parents of that file. TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: #TODO: shoud we get only valid block for this? files = self.dbs.listFileParents(block_name=fileBlockName) fileDetails = self.listFilesInBlock(fileBlockName, lumis) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % ( fileBlockName, ) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) childByParents = defaultdict(list) for f in files: # Probably a child can have more than 1 parent file for fp in f['parent_logical_file_name']: childByParents[fp].append(f['logical_file_name']) parentsLFNs = childByParents.keys() parentFilesDetail = [] #TODO: slicing parentLFNs util DBS api is handling that. #Remove slicing if DBS api handles for pLFNs in slicedIterator(parentsLFNs, 50): parentFilesDetail.extend( self.dbs.listFileArray(logical_file_name=pLFNs, detail=True)) if lumis: parentLumis = self._getLumiList(lfns=parentsLFNs) parentsByLFN = defaultdict(list) for pf in parentFilesDetail: parentLFN = pf['logical_file_name'] dbsFile = remapDBS3Keys(pf, stringify=True) if lumis: dbsFile["LumiList"] = parentLumis[parentLFN] for childLFN in childByParents[parentLFN]: parentsByLFN[childLFN].append(dbsFile) for fileInfo in fileDetails: fileInfo["ParentList"] = parentsByLFN[ fileInfo['logical_file_name']] return fileDetails
def listFilesInBlock(self, fileBlockName, lumis=True, validFileOnly=1): """ _listFilesInBlock_ Get a list of files in the named fileblock TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. We need to clean code up when dbs2 is completely deprecated. calling lumis for run number is expensive. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: files = self.dbs.listFileArray(block_name=fileBlockName, validFileOnly=validFileOnly, detail=True) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if lumis: lumiDict = self._getLumiList(blockName=fileBlockName, validFileOnly=validFileOnly) result = [] for fileInfo in files: if lumis: fileInfo["LumiList"] = lumiDict[fileInfo['logical_file_name']] result.append(remapDBS3Keys(fileInfo, stringify=True)) return result
def getFileBlocksInfo(self, dataset, onlyClosedBlocks=False, blockName=None, locations=True): """ """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': True} if blockName: args['block_name'] = blockName try: blocks = self.dbs.listBlocks(**args) except Exception as ex: msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) blocks = [remapDBS3Keys(block, stringify=True, block_name='Name') for block in blocks] # only raise if blockName not specified - mimic dbs2 error handling if not blocks and not blockName: msg = "DBSReader.getFileBlocksInfo(%s, %s): No matching data" raise DBSReaderError(msg % (dataset, blockName)) if locations: for block in blocks: block['PhEDExNodeList'] = [{'Name': x} for x in self.listFileBlockLocation(block['Name'])] if onlyClosedBlocks: return [x for x in blocks if str(x['OpenForWriting']) != "1"] return blocks
def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): """ _listFileBlocks_ Retrieve a list of fileblock names for a dataset """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': False} if blockName: args['block_name'] = blockName if onlyClosedBlocks: args['detail'] = True try: blocks = self.dbs.listBlocks(**args) except dbsClientException as ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if onlyClosedBlocks: result = [ x['block_name'] for x in blocks if str(x['open_for_writing']) != "1" ] else: result = [x['block_name'] for x in blocks] return result
def getDBSSummaryInfo(self, dataset=None, block=None): """ Get dataset summary includes # of files, events, blocks and total size """ if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name=block, validFileOnly=1) else: summary = self.dbs.listFileSummaries(dataset=dataset, validFileOnly=1) except Exception as ex: msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not summary: # missing data or all files invalid return {} result = remapDBS3Keys(summary[0], stringify=True) result['path'] = dataset if dataset else '' result['block'] = block if block else '' return result
def listDatasetLocation(self, datasetName): """ _listDatasetLocation_ List the origin SEs where there is at least a block of the given dataset. """ self.checkDatasetPath(datasetName) locations = set() try: blocksInfo = self.dbs.listBlockOrigin(dataset=datasetName) except dbsClientException as ex: msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not blocksInfo: # no data location from dbs return list() for blockInfo in blocksInfo: locations.update(blockInfo['origin_site_name']) locations.difference_update( ['UNKNOWN', None]) # remove entry when SE name is 'UNKNOWN' return list(locations)
def listRunLumis(self, dataset=None, block=None): """ It gets a list of DBSRun objects and returns the number of lumisections per run DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ try: if block: results = self.dbs.listRuns(block_name=block) else: results = self.dbs.listRuns(dataset=dataset) except dbsClientException, ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listRuns(self, dataset=None, block=None): """ it gets list of DbsRun object but for our purpose only list of number is collected. DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ runs = [] try: if block: results = self.dbs.listRuns(block_name=block) else: results = self.dbs.listRuns(dataset=dataset) except dbsClientException as ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) for x in results: runs.extend(x['run_num']) return runs
def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly) elif lfns: lumiLists = [] for slfn in grouper(lfns, 50): lumiLists.extend(self.dbs.listFileLumiArray(logical_file_name = slfn)) else: # shouldn't call this with both blockName and lfns empty # but still returns empty dict for that case return {} except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) lumiDict = {} for lumisItem in lumiLists: lumiDict.setdefault(lumisItem['logical_file_name'], []) item = {} item["RunNumber"] = lumisItem['run_num'] item['LumiSectionNumber'] = lumisItem['lumi_section_num'] lumiDict[lumisItem['logical_file_name']].append(item) return lumiDict
def listFilesInBlockWithParents(self, fileBlockName, lumis = True): """ _listFilesInBlockWithParents_ Get a list of files in the named fileblock including the parents of that file. TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: #TODO: shoud we get only valid block for this? files = self.dbs.listFileParents(block_name = fileBlockName) fileDetails = self.listFilesInBlock(fileBlockName, lumis) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % ( fileBlockName,) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listRuns(self, dataset = None, block = None): """ it gets list of DbsRun object but for our purpose only list of number is collected. DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ runs = [] try: if block: results = self.dbs.listRuns(block_name = block) else: results = self.dbs.listRuns(dataset = dataset) except dbsClientException, ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def getDBSSummaryInfo(self, dataset=None, block=None): """ Get dataset summary includes # of files, events, blocks and total size """ # FIXME: Doesnt raise exceptions on missing data as old api did if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name=block, validFileOnly=1) else: # dataset case dataset shouldn't be None summary = self.dbs.listFileSummaries(dataset=dataset, validFileOnly=1) except Exception as ex: msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not summary or summary[0].get( 'file_size') is None: # appears to indicate missing dataset msg = "DBSReader.listDatasetSummary(%s, %s): No matching data" raise DBSReaderError(msg % (dataset, block)) result = remapDBS3Keys(summary[0], stringify=True) result['path'] = dataset if dataset else '' result['block'] = block if block else '' return result
def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly) elif lfns: lumiLists = [] for slfn in grouper(lfns, 50): lumiLists.extend( self.dbs.listFileLumiArray(logical_file_name=slfn)) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) lumiDict = {} for lumisItem in lumiLists: lumiDict.setdefault(lumisItem['logical_file_name'], []) item = {} item["RunNumber"] = lumisItem['run_num'] item['LumiSectionNumber'] = lumisItem['lumi_section_num'] lumiDict[lumisItem['logical_file_name']].append(item) return lumiDict
def listFileBlockLocation(self, fileBlockName, dbsOnly = False, phedexNodes=False): """ _listFileBlockLocation_ Get origin_site_name of a block """ blockNames = [fileBlockName] if isinstance(fileBlockName, basestring) else fileBlockName for block in blockNames: self.checkBlockName(block) blockInfo = {} if not dbsOnly: try: blockInfo = self.phedex.getReplicaSEForBlocks(phedexNodes=phedexNodes, block=blockNames, complete='y') except Exception as ex: msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockName msg += "%s\n" % str(ex) raise Exception(msg) if not blockInfo or len(blockInfo) != len(blockNames): #if we couldnt get data location from PhEDEx, try to look into origin site location from dbs dbsOnly = True blockNames = set(blockNames) - set(blockInfo) #get the blocks we did not find information in phedex if dbsOnly: try: for block in blockNames: res = self.dbs.listBlockOrigin(block_name = block) if res: blockInfo[block] = [res[0]['origin_site_name']] except dbsClientException as ex: msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not any(blockInfo.values()): # no data location from dbs return list() #removing duplicates and 'UNKNOWN entries locations = {} node_filter_list = set(['UNKNOWN', None]) for name, nodes in blockInfo.iteritems(): final_nodes = set() for n in nodes: if n in node_filter_list: continue try: cmsname(n) except AssertionError: ## is SE n = self.phedex.getNodeNames(n) if phedexNodes else [n] else: ## not SE i.e. phedexNode n = [self.phedex.getNodeSE(n)] if not phedexNodes else [n] final_nodes = final_nodes.union(n) locations[name] = list(final_nodes - node_filter_list) #returning single list if a single block is passed if isinstance(fileBlockName, basestring): locations = locations[fileBlockName] return locations
def listRunLumis(self, dataset = None, block = None): """ It gets a list of DBSRun objects and returns the number of lumisections per run DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ try: if block: results = self.dbs.listRuns(block_name = block) else: results = self.dbs.listRuns(dataset = dataset) except dbsClientException, ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): """ _listFileBlocks_ Retrieve a list of fileblock names for a dataset """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': False} if blockName: args['block_name'] = blockName if onlyClosedBlocks: args['detail'] = True try: blocks = self.dbs.listBlocks(**args) except dbsClientException as ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if onlyClosedBlocks: result = [x['block_name'] for x in blocks if str(x['open_for_writing']) != "1"] else: result = [x['block_name'] for x in blocks] return result
def listFileBlockLocation(self, fileBlockNames, dbsOnly=False): """ _listFileBlockLocation_ Get origin_site_name of a block """ singleBlockName = None if isinstance(fileBlockNames, basestring): singleBlockName = fileBlockNames fileBlockNames = [fileBlockNames] for block in fileBlockNames: self.checkBlockName(block) locations = {} node_filter = set(['UNKNOWN', None]) if dbsOnly: blocksInfo = {} try: for block in fileBlockNames: for blockInfo in self.dbs.listBlockOrigin(block_name=block): if blockInfo: # TODO remove this line when all DBS origin_site_name is converted to PNN blockInfo['origin_site_name'] = self.siteDB.checkAndConvertSENameToPNN(blockInfo['origin_site_name']) # upto this blocksInfo[block] = blockInfo['origin_site_name'] except dbsClientException as ex: msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockNames msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not blocksInfo: # no data location from dbs return list() for name, node in blocksInfo.iteritems(): valid_nodes = set(node) - node_filter if valid_nodes: # dont add if only 'UNKNOWN' or None locations[name] = list(valid_nodes) else: try: blocksInfo = self.phedex.getReplicaPhEDExNodesForBlocks(block=fileBlockNames, complete='y') except Exception as ex: msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockNames msg += "%s\n" % str(ex) raise Exception(msg) for name, nodes in blocksInfo.iteritems(): valid_nodes = set(nodes) - node_filter if valid_nodes: # dont add if only 'UNKNOWN' or None then get with dbs locations[name] = list(valid_nodes) # returning single list if a single block is passed if singleBlockName is not None: return locations[singleBlockName] return locations
def listFilesInBlockWithParents(self, fileBlockName, lumis = True): """ _listFilesInBlockWithParents_ Get a list of files in the named fileblock including the parents of that file. TODO: lumis can be false when lumi splitting is not required However WMBSHelper expect file['LumiList'] to get the run number so for now it will be always true. """ if not self.blockExists(fileBlockName): msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: #TODO: shoud we get only valid block for this? files = self.dbs.listFileParents(block_name = fileBlockName) fileDetails = self.listFilesInBlock(fileBlockName, lumis) except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % ( fileBlockName,) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) childByParents = defaultdict(list) for f in files: # Probably a child can have more than 1 parent file for fp in f['parent_logical_file_name']: childByParents[fp].append(f['logical_file_name']) parentsLFNs = childByParents.keys() parentFilesDetail = [] #TODO: slicing parentLFNs util DBS api is handling that. #Remove slicing if DBS api handles for pLFNs in slicedIterator(parentsLFNs, 50): parentFilesDetail.extend(self.dbs.listFileArray(logical_file_name = pLFNs, detail = True)) if lumis: parentLumis = self._getLumiList(lfns = parentsLFNs) parentsByLFN = defaultdict(list) for pf in parentFilesDetail: parentLFN = pf['logical_file_name'] dbsFile = remapDBS3Keys(pf, stringify = True) if lumis: dbsFile["LumiList"] = parentLumis[parentLFN] for childLFN in childByParents[parentLFN]: parentsByLFN[childLFN].append(dbsFile) for fileInfo in fileDetails: fileInfo["ParentList"] = parentsByLFN[fileInfo['logical_file_name']] return fileDetails
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except dbsClientException, ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def __init__(self, url, logger=None, **contact): # instantiate dbs api object try: self.dbsURL = url.replace("cmsweb.cern.ch", "cmsweb-prod.cern.ch") self.dbs = DbsApi(self.dbsURL, **contact) self.logger = logger or logging.getLogger(self.__class__.__name__) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # connection to PhEDEx (Use default endpoint url) self.phedex = PhEDEx(responseType = "json")
def checkDBSServer(self): """ check whether dbs server is up and running returns {"dbs_instance": "prod/global", "dbs_version": "3.3.144"} """ try: return self.dbs.serverinfo() except dbsClientException as ex: msg = "Error in " msg += "DBS server is not up: %s" % self.dbsURL msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # connection to PhEDEx (Use default endpoint url) self.phedex = PhEDEx(responseType="json")
def listDatasetParents(self, childDataset): """ list the the parents dataset path given childDataset """ try: parentList = self.dbs.listDatasetParents(dataset=childDataset) return parentList except dbsClientException as ex: msg = "Error in " msg += "DBSReader.listDatasetParents(%s)\n" % childDataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def matchProcessedDatasets(self, primary, tier, process): """ _matchProcessedDatasets_ return a list of Processed datasets """ result = [] try: datasets = self.dbs.listDatasets(primary_ds_name = primary, data_tier_name = tier, detail = True) except dbsClientException, ex: msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def __init__(self, url, logger=None, **contact): # instantiate dbs api object try: self.dbsURL = url self.dbs = DbsApi(url, **contact) self.logger = logger or logging.getLogger(self.__class__.__name__) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # connection to PhEDEx (Use default endpoint url) self.phedex = PhEDEx(responseType="json", dbsUrl=self.dbsURL)
def getFileBlocksInfo(self, dataset, onlyClosedBlocks = False, blockName = None, locations = True): """ """ self.checkDatasetPath(dataset) args = {'dataset' : dataset, 'detail' : True} if blockName: args['block_name'] = blockName try: blocks = self.dbs.listBlocks(**args) except dbsClientException, ex: msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listProcessedDatasets(self, primary, dataTier = '*'): """ _listProcessedDatasets_ return a list of Processed datasets for the primary and optional data tier value """ try: result = self.dbs.listDatasets(primary_ds_name = primary, data_tier_name = dataTier) except dbsClientException, ex: msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listPrimaryDatasets(self, match = '*'): """ _listPrimaryDatasets_ return a list of primary datasets, The full dataset name must be provided pattern based mathcing is no longer supported. If no expression is provided, all datasets are returned """ try: result = self.dbs.listPrimaryDatasets(primary_ds_name = match) except dbsClientException, ex: msg = "Error in DBSReader.listPrimaryDataset(%s)\n" % match msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listOpenFileBlocks(self, dataset): """ _listOpenFileBlocks_ Retrieve a list of open fileblock names for a dataset """ self.checkDatasetPath(dataset) try: blocks = self.dbs.listBlocks(dataset=dataset, detail=True) except dbsClientException, ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listFileBlockLocation(self, fileBlockNames, dbsOnly=False): """ _listFileBlockLocation_ Get origin_site_name of a block """ singleBlockName = None if isinstance(fileBlockNames, basestring): singleBlockName = fileBlockNames fileBlockNames = [fileBlockNames] for block in fileBlockNames: self.checkBlockName(block) locations = {} node_filter = set(['UNKNOWN', None]) if dbsOnly: blocksInfo = {} try: for block in fileBlockNames: blocksInfo.setdefault(block, []) # there should be only one element with a single origin site string ... for blockInfo in self.dbs.listBlockOrigin( block_name=block): blocksInfo[block].append(blockInfo['origin_site_name']) except dbsClientException as ex: msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockNames msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) else: try: blocksInfo = self.phedex.getReplicaPhEDExNodesForBlocks( block=fileBlockNames, complete='y') except Exception as ex: msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockNames msg += "%s\n" % str(ex) raise Exception(msg) for block in fileBlockNames: valid_nodes = set(blocksInfo.get(block, [])) - node_filter locations[block] = list(valid_nodes) # returning single list if a single block is passed if singleBlockName: return locations[singleBlockName] return locations
def listOpenFileBlocks(self, dataset): """ _listOpenFileBlocks_ Retrieve a list of open fileblock names for a dataset """ self.checkDatasetPath(dataset) try: blocks = self.dbs.listBlocks(dataset = dataset, detail = True) except dbsClientException, ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listPrimaryDatasets(self, match='*'): """ _listPrimaryDatasets_ return a list of primary datasets, The full dataset name must be provided pattern based mathcing is no longer supported. If no expression is provided, all datasets are returned """ try: result = self.dbs.listPrimaryDatasets(primary_ds_name=match) except dbsClientException, ex: msg = "Error in DBSReader.listPrimaryDataset(%s)\n" % match msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def matchProcessedDatasets(self, primary, tier, process): """ _matchProcessedDatasets_ return a list of Processed datasets """ result = [] try: datasets = self.dbs.listDatasets(primary_ds_name=primary, data_tier_name=tier, detail=True) except dbsClientException, ex: msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listProcessedDatasets(self, primary, dataTier='*'): """ _listProcessedDatasets_ return a list of Processed datasets for the primary and optional data tier value """ try: result = self.dbs.listDatasets(primary_ds_name=primary, data_tier_name=dataTier) except dbsClientException, ex: msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listFileBlockLocation(self, fileBlockNames, dbsOnly=False): """ _listFileBlockLocation_ Get origin_site_name of a block """ singleBlockName = None if isinstance(fileBlockNames, basestring): singleBlockName = fileBlockNames fileBlockNames = [fileBlockNames] for block in fileBlockNames: self.checkBlockName(block) locations = {} node_filter = set(['UNKNOWN', None]) if dbsOnly: blocksInfo = {} try: for block in fileBlockNames: blocksInfo.setdefault(block, []) # there should be only one element with a single origin site string ... for blockInfo in self.dbs.listBlockOrigin(block_name=block): blocksInfo[block].append(blockInfo['origin_site_name']) except dbsClientException as ex: msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockNames msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) else: try: blocksInfo = self.phedex.getReplicaPhEDExNodesForBlocks(block=fileBlockNames, complete='y') except Exception as ex: msg = "Error while getting block location from PhEDEx for block_name=%s)\n" % fileBlockNames msg += "%s\n" % str(ex) raise Exception(msg) for block in fileBlockNames: valid_nodes = set(blocksInfo.get(block, [])) - node_filter locations[block] = list(valid_nodes) # returning single list if a single block is passed if singleBlockName: return locations[singleBlockName] return locations
def _getLumiList(self, blockName = None, lfns = None): """ currently only take one lfn but dbs api need be updated """ try: if blockName: lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly = 1) elif lfns: lumiLists = [] for slfn in slicedIterator(lfns, 50): lumiLists.extend(self.dbs.listFileLumiArray(logical_file_name = slfn)) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.listFileLumiArray(%s)\n" % lfns msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def getDBSSummaryInfo(self, dataset = None, block = None): """ Get dataset summary includes # of files, events, blocks and total size """ #FIXME: Doesnt raise exceptions on missing data as old api did if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name = block, validFileOnly = 1) else: # dataset case dataset shouldn't be None summary = self.dbs.listFileSummaries(dataset = dataset, validFileOnly = 1) except dbsClientException, ex: msg = "Error in DBSReader.listDatasetSummary(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def checkDatasetPath(self, pathName): """ _checkDatasetPath_ """ if pathName in ("", None): raise DBSReaderError("Invalid Dataset Path name: => %s <=" % pathName) else: try: result = self.dbs.listDatasets(dataset=pathName, dataset_access_type='*') if len(result) == 0: raise DBSReaderError("Dataset %s doesn't exist in DBS %s" % (pathName, self.dbsURL)) except (dbsClientException, HTTPError) as ex: msg = "Error in " msg += "DBSReader.checkDatasetPath(%s)\n" % pathName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) return
def listDatasetLocation(self, datasetName, dbsOnly=False): """ _listDatasetLocation_ List the origin SEs where there is at least a block of the given dataset. """ self.checkDatasetPath(datasetName) locations = set() if dbsOnly: try: blocksInfo = self.dbs.listBlockOrigin(dataset=datasetName) except dbsClientException as ex: msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) if not blocksInfo: # no data location from dbs return list() for blockInfo in blocksInfo: #TODO remove this line when all DBS origin_site_name is converted to PNN blockInfo[ 'origin_site_name'] = self.siteDB.checkAndConvertSENameToPNN( blockInfo['origin_site_name']) #upto this locations.update(blockInfo['origin_site_name']) locations.difference_update( ['UNKNOWN', None]) # remove entry when SE name is 'UNKNOWN' else: try: blocksInfo = self.phedex.getReplicaPhEDExNodesForBlocks( dataset=[datasetName], complete='y') except Exception as ex: msg = "Error while getting block location from PhEDEx for dataset=%s)\n" % datasetName msg += "%s\n" % str(ex) raise Exception(msg) if blocksInfo: for blockSites in blocksInfo.values(): locations.update(blockSites) return list(locations)
def getFileBlocksInfo(self, dataset, onlyClosedBlocks=False, blockName=None, locations=True): """ """ self.checkDatasetPath(dataset) args = {'dataset': dataset, 'detail': True} if blockName: args['block_name'] = blockName try: blocks = self.dbs.listBlocks(**args) except dbsClientException, ex: msg = "Error in DBSReader.getFileBlocksInfo(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def getDBSSummaryInfo(self, dataset=None, block=None): """ Get dataset summary includes # of files, events, blocks and total size """ #FIXME: Doesnt raise exceptions on missing data as old api did if dataset: self.checkDatasetPath(dataset) try: if block: summary = self.dbs.listFileSummaries(block_name=block) else: # dataset case dataset shouldn't be None summary = self.dbs.listFileSummaries(dataset=dataset) except dbsClientException, ex: msg = "Error in DBSReader.listDatasetSummary(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listOpenFileBlocks(self, dataset): """ _listOpenFileBlocks_ Retrieve a list of open fileblock names for a dataset """ self.checkDatasetPath(dataset) try: blocks = self.dbs.listBlocks(dataset=dataset, detail=True) except dbsClientException as ex: msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) result = [x['block_name'] for x in blocks if str(x['open_for_writing']) == "1"] return result
def getFileListByDataset(self, dataset, validFileOnly=1, detail=True): """ _getFileListByDataset_ Given a dataset, retrieves all blocks, lfns and number of events (among other not really important info). Returns a list of dict. """ try: fileList = self.dbs.listFileArray(dataset=dataset, validFileOnly=validFileOnly, detail=detail) return fileList except dbsClientException as ex: msg = "Error in " msg += "DBSReader.getFileListByDataset(%s)\n" % dataset msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ LFN list only for block, details = False => faster query """ if not self.blockExists(fileBlockName): msg = "DBSReader.lfnsInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: files = self.dbs.listFiles(block_name=fileBlockName, detail=False) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def lfnsInBlock(self, fileBlockName): """ _lfnsInBlock_ LFN list only for block, details = False => faster query """ if not self.blockExists(fileBlockName): msg = "DBSReader.lfnsInBlock(%s): No matching data" raise DBSReaderError(msg % fileBlockName) try: files = self.dbs.listFiles(block_name = fileBlockName, detail = False) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def blockToDatasetPath(self, blockName): """ _blockToDatasetPath_ Given a block name, get the dataset Path associated with that Block. Returns the dataset path, or None if not found """ self.checkBlockName(blockName) try: blocks = self.dbs.listBlocks(block_name = blockName, detail = True) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.blockToDataset(%s)\n" % blockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def blockToDatasetPath(self, blockName): """ _blockToDatasetPath_ Given a block name, get the dataset Path associated with that Block. Returns the dataset path, or None if not found """ self.checkBlockName(blockName) try: blocks = self.dbs.listBlocks(block_name=blockName, detail=True) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.blockToDataset(%s)\n" % blockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def blockExists(self, fileBlockName): """ _blockExists_ Check to see if block with name provided exists in the DBS Instance. Return True if exists, False if not """ self.checkBlockName(fileBlockName) try: blocks = self.dbs.listBlocks(block_name = fileBlockName) except dbsClientException, ex: msg = "Error in " msg += "DBSReader.blockExists(%s)\n" % fileBlockName msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg)
def listRunLumis(self, dataset=None, block=None): """ It gets a list of DBSRun objects and returns the number of lumisections per run DbsRun (RunNumber, NumberOfEvents, NumberOfLumiSections, TotalLuminosity, StoreNumber, StartOfRungetLong, EndOfRun, CreationDate, CreatedBy, LastModificationDate, LastModifiedBy ) """ # Pointless code in python3 if isinstance(block, str): block = unicode(block) if isinstance(dataset, str): dataset = unicode(dataset) try: if block: results = self.dbs.listRuns(block_name=block) else: results = self.dbs.listRuns(dataset=dataset) except dbsClientException as ex: msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block) msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # send runDict format as result, this format is for sync with dbs2 call # which has {run_number: num_lumis} but dbs3 call doesn't return num Lumis # So it returns {run_number: None} # TODO: After DBS2 is completely removed change the return format more sensible one runDict = {} for x in results: for runNumber in x["run_num"]: runDict[runNumber] = None return runDict