def resync(self): (result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self) if self.resyncEnabled() and self._dataProvider: # Get old and new dataset information old = DataProvider.loadFromFile(self.getDataPath('cache.dat')).getBlocks() self._dataProvider.clearCache() new = self._dataProvider.getBlocks() self._dataProvider.saveToFile(self.getDataPath('cache-new.dat'), new) # Use old splitting information to synchronize with new dataset infos jobChanges = self._dataSplitter.resyncMapping(self.getDataPath('map-new.tar'), old, new) if jobChanges: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self._keepOld: os.rename(self.getDataPath(cur), self.getDataPath(old)) os.rename(self.getDataPath(new), self.getDataPath(cur)) backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') old_maxN = self._dataSplitter.getMaxJobs() self._dataSplitter.importPartitions(self.getDataPath('map.tar')) self._maxN = self._dataSplitter.getMaxJobs() result_redo.update(jobChanges[0]) result_disable.update(jobChanges[1]) result_sizeChange = result_sizeChange or (old_maxN != self._maxN) self.resyncFinished() return (result_redo, result_disable, result_sizeChange)
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) utils.vprint('Mapping between nickname and other settings:\n', -1) report = [] for nick in sorted(nickNames): lumi_filter_str = formatLumi( self._nmLumi.lookup(nick, '', is_selector=False)) if len(lumi_filter_str) > 4: nice_lumi_filter = '%s ... %s (%d entries)' % ( lumi_filter_str[0], lumi_filter_str[-1], len(lumi_filter_str)) else: nice_lumi_filter = str.join(', ', lumi_filter_str) config_files = self._nmCfg.lookup(nick, '', is_selector=False) tmp = { 0: nick, 1: str.join(', ', imap(os.path.basename, config_files)), 2: nice_lumi_filter } lookupvars = {'DATASETNICK': nick} for src in self._pm.lookupSources: src.fillParameterInfo(None, lookupvars) tmp.update(lookupvars) report.append(tmp) utils.printTabular(head, report, 'cl') utils.vprint(level=-1)
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) log = logging.getLogger('user') log.info('Mapping between nickname and other settings:') report = [] (ps_basic, ps_nested) = self._pfactory.getLookupSources() if ps_nested: log.info( 'This list doesn\'t show "nickname constants" with multiple values!' ) for nick in sorted(nickNames): tmp = {'DATASETNICK': nick} for src in ps_basic: src.fillParameterInfo(None, tmp) tmp[1] = str.join( ', ', imap(os.path.basename, self._nmCfg.lookup(nick, '', is_selector=False))) tmp[2] = formatLumiNice( self._nmLumi.lookup(nick, '', is_selector=False)) report.append(tmp) utils.printTabular(head, report, 'cl')
def _resync(self): if self._data_provider: activity = Activity('Performing resync of datasource %r' % self._name) # Get old and new dataset information ds_old = DataProvider.loadFromFile(self._getDataPath('cache.dat')).getBlocks(show_stats = False) self._data_provider.clearCache() ds_new = self._data_provider.getBlocks(show_stats = False) self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new) # Use old splitting information to synchronize with new dataset infos old_maxN = self._data_splitter.getMaxJobs() jobChanges = self._data_splitter.resyncMapping(self._getDataPath('map-new.tar'), ds_old, ds_new) activity.finish() if jobChanges is not None: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self._keepOld: os.rename(self._getDataPath(cur), self._getDataPath(old)) os.rename(self._getDataPath(new), self._getDataPath(cur)) backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') self._data_splitter.importPartitions(self._getDataPath('map.tar')) self._maxN = self._data_splitter.getMaxJobs() self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN) return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(show_stats = False): nickNames.add(block[DataProvider.Nickname]) log = logging.getLogger('user') log.info('Mapping between nickname and other settings:') report = [] (ps_basic, ps_nested) = self._pfactory.getLookupSources() if ps_nested: log.info('This list doesn\'t show "nickname constants" with multiple values!') for nick in sorted(nickNames): tmp = {'DATASETNICK': nick} for src in ps_basic: src.fillParameterInfo(None, tmp) tmp[1] = str.join(', ', imap(os.path.basename, self._nmCfg.lookup(nick, '', is_selector = False))) tmp[2] = formatLumiNice(self._nmLumi.lookup(nick, '', is_selector = False)) report.append(tmp) utils.printTabular(head, report, 'cl')
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) utils.vprint('Mapping between nickname and other settings:\n', -1) report = [] for nick in sorted(nickNames): lumi_filter_str = formatLumi(self._nmLumi.lookup(nick, '', is_selector = False)) if len(lumi_filter_str) > 4: nice_lumi_filter = '%s ... %s (%d entries)' % (lumi_filter_str[0], lumi_filter_str[-1], len(lumi_filter_str)) else: nice_lumi_filter = str.join(', ', lumi_filter_str) config_files = self._nmCfg.lookup(nick, '', is_selector = False) tmp = {0: nick, 1: str.join(', ', imap(os.path.basename, config_files)), 2: nice_lumi_filter} lookupvars = {'DATASETNICK': nick} for src in self._pm.lookupSources: src.fillParameterInfo(None, lookupvars) tmp.update(lookupvars) report.append(tmp) utils.printTabular(head, report, 'cl') utils.vprint(level = -1)
def _resync(self): if self._data_provider: activity = Activity('Performing resync of datasource %r' % self._name) # Get old and new dataset information ds_old = DataProvider.loadFromFile( self._getDataPath('cache.dat')).getBlocks(show_stats=False) self._data_provider.clearCache() ds_new = self._data_provider.getBlocks(show_stats=False) self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new) # Use old splitting information to synchronize with new dataset infos old_maxN = self._data_splitter.getMaxJobs() jobChanges = self._data_splitter.resyncMapping( self._getDataPath('map-new.tar'), ds_old, ds_new) activity.finish() if jobChanges is not None: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self._keepOld: os.rename(self._getDataPath(cur), self._getDataPath(old)) os.rename(self._getDataPath(new), self._getDataPath(cur)) backupRename('map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') self._data_splitter.importPartitions( self._getDataPath('map.tar')) self._maxN = self._data_splitter.getMaxJobs() self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN) return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
def resync(self): (result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self) if self.resyncEnabled() and self._dataProvider: # Get old and new dataset information old = DataProvider.loadFromFile( self.getDataPath('cache.dat')).getBlocks() self._dataProvider.clearCache() new = self._dataProvider.getBlocks() self._dataProvider.saveToFile(self.getDataPath('cache-new.dat'), new) # Use old splitting information to synchronize with new dataset infos jobChanges = self._dataSplitter.resyncMapping( self.getDataPath('map-new.tar'), old, new) if jobChanges: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self._keepOld: os.rename(self.getDataPath(cur), self.getDataPath(old)) os.rename(self.getDataPath(new), self.getDataPath(cur)) backupRename('map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') old_maxN = self._dataSplitter.getMaxJobs() self._dataSplitter.importPartitions( self.getDataPath('map.tar')) self._maxN = self._dataSplitter.getMaxJobs() result_redo.update(jobChanges[0]) result_disable.update(jobChanges[1]) result_sizeChange = result_sizeChange or (old_maxN != self._maxN) self.resyncFinished() return (result_redo, result_disable, result_sizeChange)