print 'Resetting attempts', jobNum jobinfo = jobDB.get(jobNum) jobinfo.attempt = 0 jobinfo.history = {} for key in jobinfo.dict.keys(): if key.startswith('history'): jobinfo.dict.pop(key) jobDB.commit(jobNum, jobinfo) print str.join(' ', map(str, jobDB.getJobsIter(selected))) if opts.diff: if len(args) != 2: utils.exitWithUsage('%s <dataset source 1> <dataset source 2>' % sys.argv[0]) utils.eprint = lambda *x: {} a = DataProvider.getInstance('ListProvider', config, args[0], None) b = DataProvider.getInstance('ListProvider', config, args[1], None) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(a.getBlocks(), b.getBlocks()) utils.printTabular([(DataProvider.Dataset, 'Dataset'), (DataProvider.BlockName, 'Block')], blocksMissing) if opts.findrm: removed = [] utils.eprint = lambda *x: {} oldDP = DataProvider.getInstance('ListProvider', config, args[0], None) for new in args[1:]: newDP = DataProvider.getInstance('ListProvider', config, new, None) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldDP.getBlocks(), newDP.getBlocks()) for block in blocksMissing: tmp = dict(block) tmp[-1] = new removed.append(tmp)
def main(): dataset = args[0].strip() cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False', 'remove empty files *': 'False', 'location format *': opts.locationfmt, 'nickname check collision *': 'False'} if opts.metadata or opts.blockmetadata: cfgSettings['lumi filter *'] = '-' cfgSettings['keep lumi metadata *'] = 'True' config = getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings}) if os.path.exists(dataset): provider = DataProvider.getInstance('ListProvider', config, dataset, None) else: provider = DataProvider.create(config, dataset, opts.provider) blocks = provider.getBlocks() if len(blocks) == 0: raise DatasetError('No blocks!') datasets = set(map(lambda x: x[DataProvider.Dataset], blocks)) if len(datasets) > 1 or opts.info: headerbase = [(DataProvider.Dataset, 'Dataset')] else: print('Dataset: %s' % blocks[0][DataProvider.Dataset]) headerbase = [] if opts.configentry: print('') print('dataset =') infos = {} order = [] maxnick = 5 for block in blocks: dsName = block[DataProvider.Dataset] if not infos.get(dsName, None): order.append(dsName) infos[dsName] = dict([(DataProvider.Dataset, dsName)]) if DataProvider.Nickname not in block and opts.confignick: try: if '/' in dsName: block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1] else: block[DataProvider.Nickname] = dsName except Exception: pass if DataProvider.Nickname not in block and opts.confignick: block[DataProvider.Nickname] = np.getName(None, dsName, block) if DataProvider.Nickname in block: nick = block[DataProvider.Nickname] infos[dsName][DataProvider.Nickname] = nick maxnick = max(maxnick, len(nick)) if len(block[DataProvider.FileList]): infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL] for dsID, dsName in enumerate(order): info = infos[dsName] short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__) nickname = info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick) filterExpr = utils.QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '') print('\t%s : %s : %s%s' % (nickname, short, provider._datasetExpr, filterExpr)) if opts.listdatasets: # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print('') infos = {} order = [] infosum = {DataProvider.Dataset : 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, map(lambda x: infos[x], order) + ['=', infosum]) if opts.listblocks: print('') utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks) if opts.listfiles: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList]) print('') def printMetadata(src, maxlen): for (mk, mv) in src: if len(str(mv)) > 200: mv = '<metadata entry size: %s> %s...' % (len(str(mv)), repr(mv)[:200]) print('\t%s: %s' % (mk.rjust(maxlen), mv)) if src: print('') if opts.metadata and not opts.save: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mk_len = max(map(len, block.get(DataProvider.Metadata, ['']))) for f in block[DataProvider.FileList]: print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries])) printMetadata(zip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len) print('') if opts.blockmetadata and not opts.save: for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mkdict = lambda x: dict(zip(block[DataProvider.Metadata], x[DataProvider.Metadata])) metadata = utils.QM(block[DataProvider.FileList], mkdict(block[DataProvider.FileList][0]), {}) for fileInfo in block[DataProvider.FileList]: utils.intersectDict(metadata, mkdict(fileInfo)) printMetadata(metadata.items(), max(map(len, metadata.keys()))) if opts.liststorage: print('') infos = {} print('Storage elements:') for block in blocks: dsName = block[DataProvider.Dataset] if len(headerbase) > 0: print('Dataset: %s' % dsName) if block.get(DataProvider.BlockName, None): print('Blockname: %s' % block[DataProvider.BlockName]) if block[DataProvider.Locations] == None: print('\tNo location contraint specified') elif block[DataProvider.Locations] == []: print('\tNot located at anywhere') else: for se in block[DataProvider.Locations]: print('\t%s' % se) print('') if opts.info: evSum = 0 for block in blocks: blockId = '%s %s' % (block.get(DataProvider.Dataset, '-'), block.get(DataProvider.BlockName, '-')) blockStorage = '-' if block.get(DataProvider.Locations, None): blockStorage = str.join(',', block.get(DataProvider.Locations, '-')) evSum += block.get(DataProvider.NEntries, 0) print('%s %s %d %d' % (blockId, blockStorage, block.get(DataProvider.NEntries, 0), evSum)) if opts.save: print('') blocks = provider.getBlocks() if opts.sort: blocks.sort(key = lambda b: b[DataProvider.Dataset] + '#' + b[DataProvider.BlockName]) for b in blocks: b[DataProvider.FileList].sort(key = lambda fi: fi[DataProvider.URL]) provider.saveState(opts.save, blocks) print('Dataset information saved to ./%s' % opts.save)