def resync(self): (result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self) if self.resyncEnabled() and self.dataProvider: # Get old and new dataset information old = DataProvider.loadState(self.getDataPath('cache.dat')).getBlocks() self.dataProvider.clearCache() new = self.dataProvider.getBlocks() self.dataProvider.saveState(self.getDataPath('cache-new.dat')) # Use old splitting information to synchronize with new dataset infos jobChanges = self.dataSplitter.resyncMapping(self.getDataPath('map-new.tar'), old, new) if jobChanges: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self.keepOld: os.rename(self.getDataPath(cur), self.getDataPath(old)) os.rename(self.getDataPath(new), self.getDataPath(cur)) backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') old_maxN = self.dataSplitter.getMaxJobs() self.dataSplitter.importState(self.getDataPath('map.tar')) self.maxN = self.dataSplitter.getMaxJobs() self.dataSplitter.getMaxJobs() result_redo.update(jobChanges[0]) result_disable.update(jobChanges[1]) result_sizeChange = result_sizeChange or (old_maxN != self.maxN) self.resyncFinished() return (result_redo, result_disable, result_sizeChange)
print "Resetting attempts", jobNum jobinfo = jobDB.get(jobNum) jobinfo.attempt = 0 jobinfo.history = {} for key in jobinfo.dict.keys(): if key.startswith('history'): jobinfo.dict.pop(key) jobDB.commit(jobNum, jobinfo) print str.join(' ', map(str, jobDB.getJobsIter(selected))) if opts.diff: if len(args) != 2: utils.exitWithUsage("%s <dataset source 1> <dataset source 2>" % sys.argv[0]) utils.eprint = lambda *x: {} a = DataProvider.loadState(args[0]) b = DataProvider.loadState(args[1]) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(a.getBlocks(), b.getBlocks()) utils.printTabular([(DataProvider.Dataset, "Dataset"), (DataProvider.BlockName, "Block")], blocksMissing) if opts.findrm: removed = [] utils.eprint = lambda *x: {} oldDP = DataProvider.loadState(args[0]) for new in args[1:]: newDP = DataProvider.loadState(new) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldDP.getBlocks(), newDP.getBlocks()) for block in blocksMissing: tmp = dict(block) tmp[-1] = new removed.append(tmp)
def main(): dataset = args[0].strip() cfgSettings = {'dbs blacklist T1': 'False', 'remove empty blocks': 'False', 'remove empty files': 'False', 'location format': opts.locationfmt, 'nickname check collision': 'False'} if opts.metadata or opts.blockmetadata: cfgSettings['lumi filter'] = '-' cfgSettings['keep lumi metadata'] = 'True' section = 'dataset' fillerList = [DefaultFilesConfigFiller()] if opts.settings: fillerList.append(FileConfigFiller([opts.settings])) tmpCfg = Config(fillerList, opts.settings) section = tmpCfg.get('global', ['task', 'module']) dummyConfig = Config(fillerList + [DictConfigFiller({section: cfgSettings})], opts.settings) dummyConfig.opts = opts dummyConfig = dummyConfig.addSections(['dataset']) if os.path.exists(dataset): provider = DataProvider.loadState(dataset, dummyConfig) else: provider = DataProvider.create(dummyConfig, dataset, opts.provider) blocks = provider.getBlocks() if len(blocks) == 0: raise DatasetError('No blocks!') datasets = set(map(lambda x: x[DataProvider.Dataset], blocks)) if len(datasets) > 1 or opts.info: headerbase = [(DataProvider.Dataset, 'Dataset')] else: print 'Dataset: %s' % blocks[0][DataProvider.Dataset] headerbase = [] if opts.configentry: print print 'dataset =' infos = {} order = [] maxnick = 5 for block in blocks: dsName = block[DataProvider.Dataset] if not infos.get(dsName, None): order.append(dsName) infos[dsName] = dict([(DataProvider.Dataset, dsName)]) if DataProvider.Nickname not in block and opts.confignick: try: if '/' in dsName: block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1] else: block[DataProvider.Nickname] = dsName except: pass if DataProvider.Nickname not in block and opts.confignick: block[DataProvider.Nickname] = np.getName(None, dsName, block) if DataProvider.Nickname in block: nick = block[DataProvider.Nickname] infos[dsName][DataProvider.Nickname] = nick maxnick = max(maxnick, len(nick)) if len(block[DataProvider.FileList]): infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL] for dsID, dsName in enumerate(order): info = infos[dsName] short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__) print '', info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick), ':', short, ':', print '%s%s' % (provider._datasetExpr, QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '')) if opts.listdatasets: # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print infos = {} order = [] infosum = {DataProvider.Dataset : 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, map(lambda x: infos[x], order) + ["=", infosum]) if opts.listblocks: print utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks) if opts.listfiles: print for block in blocks: if len(datasets) > 1: print 'Dataset: %s' % block[DataProvider.Dataset] print 'Blockname: %s' % block[DataProvider.BlockName] utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList]) print def printMetadata(src, maxlen): for (mk, mv) in src: if len(str(mv)) > 200: mv = '<metadata entry size: %s> %s...' % (len(str(mv)), repr(mv)[:200]) print '\t%s: %s' % (mk.rjust(maxlen), mv) if src: print if opts.metadata and not opts.save: print for block in blocks: if len(datasets) > 1: print 'Dataset: %s' % block[DataProvider.Dataset] print 'Blockname: %s' % block[DataProvider.BlockName] mk_len = max(map(len, block.get(DataProvider.Metadata, ['']))) for f in block[DataProvider.FileList]: print '%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries]) printMetadata(zip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len) print if opts.blockmetadata and not opts.save: for block in blocks: if len(datasets) > 1: print 'Dataset: %s' % block[DataProvider.Dataset] print 'Blockname: %s' % block[DataProvider.BlockName] mkdict = lambda x: dict(zip(block[DataProvider.Metadata], x[DataProvider.Metadata])) metadata = QM(block[DataProvider.FileList], mkdict(block[DataProvider.FileList][0]), {}) for fileInfo in block[DataProvider.FileList]: utils.intersectDict(metadata, mkdict(fileInfo)) printMetadata(metadata.items(), max(map(len, metadata.keys()))) if opts.liststorage: print infos = {} print 'Storage elements:' for block in blocks: dsName = block[DataProvider.Dataset] if len(headerbase) > 0: print 'Dataset: %s' % dsName if block.get(DataProvider.BlockName, None): print 'Blockname: %s' % block[DataProvider.BlockName] if block[DataProvider.Locations] == None: print '\tNo location contraint specified' elif block[DataProvider.Locations] == []: print '\tNot located at anywhere' else: for se in block[DataProvider.Locations]: print '\t%s' % se print if opts.info: evSum = 0 for block in blocks: print block.get(DataProvider.Dataset, '-'), print block.get(DataProvider.BlockName, '-'), if block.get(DataProvider.Locations, None): print str.join(',', block.get(DataProvider.Locations, '-')), else: print '-', print block.get(DataProvider.NEntries, 0), evSum += block.get(DataProvider.NEntries, 0) print evSum if opts.save: print blocks = provider.getBlocks() if opts.sort: blocks.sort(key = lambda b: b[DataProvider.Dataset] + '#' + b[DataProvider.BlockName]) for b in blocks: b[DataProvider.FileList].sort(key = lambda fi: fi[DataProvider.URL]) provider.saveState(opts.save, blocks) print 'Dataset information saved to ./%s' % opts.save