def _resync_partitions(self, path, block_list_old, block_list_new): partition_resync_handler = self._splitter.get_resync_handler() progress = ProgressActivity(progress_max=self.get_parameter_len(), msg='Writing resyncronized dataset partitions (progress is estimated)') path_tmp = path + '.tmp' try: resync_result = partition_resync_handler.resync(self._splitter, self._reader, block_list_old, block_list_new) DataSplitter.save_partitions(path_tmp, resync_result.partition_iter, progress) except Exception: raise DatasetError('Unable to resync %r' % self.get_datasource_name()) os.rename(path_tmp, path) return (resync_result.pnum_list_redo, resync_result.pnum_list_disable)
def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter(DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
def _resync_psrc(self): activity = Activity('Performing resync of datasource %r' % self.get_datasource_name()) # Get old and new dataset information provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat')) block_list_old = provider_old.get_block_list_cached(show_stats=False) self._provider.clear_cache() block_list_new = self._provider.get_block_list_cached(show_stats=False) self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new) # Use old splitting information to synchronize with new dataset infos partition_len_old = self.get_parameter_len() partition_changes = self._resync_partitions( self._get_data_path('map-new.tar'), block_list_old, block_list_new) activity.finish() if partition_changes is not None: # Move current splitting to backup and use the new splitting from now on def _rename_with_backup(new, cur, old): if self._keep_old: os.rename(self._get_data_path(cur), self._get_data_path(old)) os.rename(self._get_data_path(new), self._get_data_path(cur)) _rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time()) _rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time()) self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar'))) self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len) (pnum_list_redo, pnum_list_disable) = partition_changes return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
def setupJobParameters(self, config, pm): config = config.addSections(['dataset']).addTags([self]) self.dataSplitter = None self.dataRefresh = None self.dataset = config.get('dataset', '').strip() if self.dataset == '': return config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False) config.set('default lookup', 'DATASETNICK', override = False) defaultProvider = config.get('dataset provider', 'ListProvider') dataProvider = DataProvider.create(config, self.dataset, defaultProvider) splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) self.checkSE = config.getBool('dataset storage check', True, onChange = None) # Create and register dataset parameter plugin paramSource = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, self.initDataProcessor()) DataParameterSource.datasetsAvailable['data'] = paramSource # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1) else: paramSource.resyncSetup(interval = 0) def externalRefresh(sig, frame): paramSource.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def partition_display(opts, partition_iter): def _iter_partitions(): for partition_num, partition in enumerate(partition_iter): partition['partition_num'] = partition_num yield partition header_list = lmap(lambda key: (key, DataSplitter.enum2str(key)), opts.partition_key_list) ConsoleTable.create([('partition_num', 'Partition')] + header_list, _iter_partitions())
class RunSplitter(DataSplitter.getClass('MetadataSplitter')): def _initConfig(self, config): self._run_range = self._configQuery(config.getInt, 'run range', 1) def metaKey(self, metadataNames, block, fi): selRunRange = self._setup(self._run_range, block) mdIdx = metadataNames.index('Runs') return lmap(lambda r: int(r / selRunRange), fi[DataProvider.Metadata][mdIdx])
def _resync_partitions(self, path, block_list_old, block_list_new): partition_resync_handler = self._splitter.get_resync_handler() progress = ProgressActivity( progress_max=self.get_parameter_len(), msg= 'Writing resyncronized dataset partitions (progress is estimated)') path_tmp = path + '.tmp' try: resync_result = partition_resync_handler.resync( self._splitter, self._reader, block_list_old, block_list_new) DataSplitter.save_partitions(path_tmp, resync_result.partition_iter, progress) except Exception: raise DatasetError('Unable to resync %r' % self.get_datasource_name()) os.rename(path_tmp, path) return (resync_result.pnum_list_redo, resync_result.pnum_list_disable)
def setup_dataset(config, dataset): if dataset.lower() == 'true': utils.vprint('Registering dummy data provider data') dataSplitter = DummySplitter() else: dataSplitter = DataSplitter.loadStateForScript(dataset) DataParameterSource = ParameterSource.getClass('DataParameterSource') DataParameterSource.datasetsAvailable['data'] = DataParameterSource( config.getWorkPath(), 'data', None, dataSplitter, DataSplitProcessorTest())
def main(opts, args): if opts.gc or opts.json or opts.full: return lumi_expr(opts, args) if opts.job_json or opts.job_gc or opts.job_events: (config, jobDB) = initGC(args) workDir = config.getWorkPath() splitter = None try: splitter = DataSplitter.loadStateForScript(os.path.join(workDir, 'datamap.tar')) except Exception: pass return lumi_calc(opts, workDir, sorted(jobDB.getJobs(ClassSelector(JobClass.SUCCESS))), splitter)
def setup_dataset(config, dataset): if dataset.lower() == 'true': log.info('Registering dummy data provider data') dataSplitter = DummySplitter() else: dataSplitter = DataSplitter.loadPartitionsForScript(dataset) config = config.changeView(setSections = None) partProcessor = config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = 'PartitionProcessor', onChange = None) ParameterSource.createInstance('DataParameterSource', config.getWorkPath(), 'data', None, dataSplitter, partProcessor, repository)
def setupJobParameters(self, config, pm): config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self.dataRefresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) # Create and register dataset parameter source partProcessor = config.getCompositePlugin('partition processor', 'BasicPartitionProcessor LocationPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: self._dataPS.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self.dataRefresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def _setupJobParameters(self, config): data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self._data_refresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(data_config) # Create and register dataset parameter source partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if self._data_refresh > 0: self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def _init_reader(self): # look for aborted inits / resyncs - and try to restore old state if possible if self._exists_data_path('map.tar.resync') and self._exists_data_path('cache.dat.resync'): rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar')) elif self._exists_data_path('map.tar.resync') or self._exists_data_path('cache.dat.resync'): raise DatasetError('Found broken dataset partition resync state in work directory') if self._exists_data_path('map.tar') and not self._exists_data_path('cache.dat'): raise DatasetError('Found broken dataset partition in work directory') elif not self._exists_data_path('map.tar'): # create initial partition map file if not self._exists_data_path('cache.dat'): provider = self._provider else: provider = DataProvider.load_from_file(self._get_data_path('cache.dat')) block_iter = DataProvider.save_to_file_iter(self._get_data_path('cache.dat.init'), provider.get_block_list_cached(show_stats=True)) partition_iter = self._splitter.split_partitions(block_iter) DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter) rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar')) return DataSplitter.load_partitions(self._get_data_path('map.tar'))
def _init_reader(self): # look for aborted inits / resyncs - and try to restore old state if possible if self._exists_data_path('map.tar.resync') and self._exists_data_path( 'cache.dat.resync'): rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar')) elif self._exists_data_path( 'map.tar.resync') or self._exists_data_path( 'cache.dat.resync'): raise DatasetError( 'Found broken dataset partition resync state in work directory' ) if self._exists_data_path( 'map.tar') and not self._exists_data_path('cache.dat'): raise DatasetError( 'Found broken dataset partition in work directory') elif not self._exists_data_path('map.tar'): # create initial partition map file if not self._exists_data_path('cache.dat'): provider = self._provider else: provider = DataProvider.load_from_file( self._get_data_path('cache.dat')) block_iter = DataProvider.save_to_file_iter( self._get_data_path('cache.dat.init'), provider.get_block_list_cached(show_stats=True)) partition_iter = self._splitter.split_partitions(block_iter) DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter) rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar')) return DataSplitter.load_partitions(self._get_data_path('map.tar'))
def main(opts, args): if opts.gc or opts.json or opts.full: return lumi_expr(opts, args) if opts.job_json or opts.job_gc or opts.job_events: (config, jobDB) = initGC(args) workDir = config.getWorkPath() splitter = None try: splitter = DataSplitter.loadStateForScript( os.path.join(workDir, 'datamap.tar')) except Exception: pass return lumi_calc( opts, workDir, sorted(jobDB.getJobs(ClassSelector(JobClass.SUCCESS))), splitter)
def setup_dataset(config, dataset, repository): if dataset.lower() == 'true': logging.info('Registering dummy data provider data') def _create_partition(ds_name, nick, n_events, fn_list): return {DataSplitter.Dataset: ds_name, DataSplitter.Nickname: nick, DataSplitter.FileList: fn_list, DataSplitter.NEntries: n_events} reader = PartitionReader.create_instance('TrivialPartitionReader', [ _create_partition('ds1', 'data_1', 23, ['a', 'b']), _create_partition('ds1', 'data_1', 42, ['1']), _create_partition('ds2', 'data_2', 123, ['m', 'n']), _create_partition('ds2', 'data_3', 987, ['x', 'y', 'z']) ]) else: reader = DataSplitter.load_partitions(dataset) config = config.change_view(set_sections=None, default_on_change=None) ParameterSource.create_instance('BaseDataParameterSource', config, 'dataset', repository, reader)
def setup_dataset(config, dataset): if dataset.lower() == 'true': log.info('Registering dummy data provider data') dataSplitter = DummySplitter() else: dataSplitter = DataSplitter.loadPartitionsForScript(dataset) config = config.changeView(setSections=None) partProcessor = config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls='PartitionProcessor', onChange=None) ParameterSource.createInstance('DataParameterSource', config.getWorkPath(), 'data', None, dataSplitter, partProcessor, repository)
def setup_dataset(config, dataset): if dataset.lower() == 'true': utils.vprint('Registering dummy data provider data') dataSplitter = DummySplitter() else: dataSplitter = DataSplitter.loadPartitionsForScript(dataset) config = config.changeView(setSections=None) partProcessor = config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls='PartitionProcessor', onChange=None) DataParameterSource = ParameterSource.getClass('DataParameterSource') DataParameterSource.datasetsAvailable['data'] = DataParameterSource( config.getWorkPath(), 'data', None, dataSplitter, partProcessor)
def _main(): signal.signal(signal.SIGINT, handle_abort_interrupt) parser = ScriptOptions() parser.section('expr', 'Manipulate lumi filter expressions', '%s <lumi filter expression>') parser.add_bool('expr', 'G', 'gc', default=False, help='Output grid-control compatible lumi expression') parser.add_bool('expr', 'J', 'json', default=False, help='Output JSON file with lumi expression') parser.add_bool('expr', 'F', 'full', default=False, help='Output JSON file with full expression') parser.section('calc', 'Options which allow luminosity related calculations', '%s <config file> [<job selector>]') parser.add_text('calc', 'O', 'output-dir', default=None, help='Set output directory (default: work directory)') parser.add_bool('calc', 'g', 'job-gc', default=False, help='Output grid-control compatible lumi expression for processed lumi sections') parser.add_bool('calc', 'j', 'job-json', default=False, help='Output JSON file with processed lumi sections') parser.add_bool('calc', 'e', 'job-events', default=False, help='Get number of events processed') parser.add_bool('calc', 'p', 'parameterized', default=False, help='Use output file name to categorize output (useful for parameterized tasks)') parser.add_bool('calc', ' ', 'replace', default='job_%d_', help='Pattern to replace for parameterized jobs (default: job_%%d_') options = parser.script_parse() if options.opts.gc or options.opts.json or options.opts.full: if not options.args: options.parser.exit_with_usage(options.parser.usage('expr')) return convert_lumi_expr(options.opts, options.args) if options.opts.job_json or options.opts.job_gc or options.opts.job_events: if not options.args: options.parser.exit_with_usage(options.parser.usage('calc')) script_obj = get_script_object_cmdline(options.args, only_success=True) work_dn = script_obj.config.get_work_path() reader = None try: reader = DataSplitter.load_partitions(os.path.join(work_dn, 'datamap.tar')) except Exception: clear_current_exception() jobnum_list = sorted(script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS))) return lumi_calc(options.opts, work_dn, jobnum_list, reader)
def setupJobParameters(self, config, pm): config = config.changeView(viewClass = TaggedConfigView, addSections = ['dataset'], addTags = [self]) self.dataSplitter = None self.dataRefresh = None self._forceRefresh = config.getState('resync', detail = 'dataset', default = False) def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if ((old_obj == '') and (cur_obj != '')): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._forceRefresh = True return cur_obj self.dataset = config.get('dataset', '', onChange = userRefresh).strip() if self.dataset == '': return config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') config.set('default lookup', 'DATASETNICK') defaultProvider = config.get('dataset provider', 'ListProvider') dataProvider = DataProvider.create(config, self.dataset, defaultProvider) splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) # Create and register dataset parameter source paramSplitProcessor = config.getCompositePlugin('dataset processor', 'BasicDataSplitProcessor SECheckSplitProcessor', 'MultiDataSplitProcessor', cls = DataSplitProcessor).getInstance(config) paramSource = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, paramSplitProcessor) DataParameterSource.datasetsAvailable['data'] = paramSource # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1) else: paramSource.resyncSetup(interval = 0) if self._forceRefresh: paramSource.resyncSetup(force = True) def externalRefresh(sig, frame): paramSource.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail = 'datasets') config.setState(False, 'resync', detail = 'datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh) def externalRefresh(sig, frame): self._log.info('External signal triggered resync of dataset source') dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError('Currently used dataset does not provide jobs to process') self._log.warning('Currently used dataset does not provide jobs to process')
def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter( DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
def setup_dataset(config, dataset, repository): if dataset.lower() == 'true': logging.info('Registering dummy data provider data') def _create_partition(ds_name, nick, n_events, fn_list): return { DataSplitter.Dataset: ds_name, DataSplitter.Nickname: nick, DataSplitter.FileList: fn_list, DataSplitter.NEntries: n_events } reader = PartitionReader.create_instance('TrivialPartitionReader', [ _create_partition('ds1', 'data_1', 23, ['a', 'b']), _create_partition('ds1', 'data_1', 42, ['1']), _create_partition('ds2', 'data_2', 123, ['m', 'n']), _create_partition('ds2', 'data_3', 987, ['x', 'y', 'z']) ]) else: reader = DataSplitter.load_partitions(dataset) config = config.change_view(set_sections=None, default_on_change=None) ParameterSource.create_instance('BaseDataParameterSource', config, 'dataset', repository, reader)
def _resync_psrc(self): activity = Activity('Performing resync of datasource %r' % self.get_datasource_name()) # Get old and new dataset information provider_old = DataProvider.load_from_file( self._get_data_path('cache.dat')) block_list_old = provider_old.get_block_list_cached(show_stats=False) self._provider.clear_cache() block_list_new = self._provider.get_block_list_cached(show_stats=False) self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new) # Use old splitting information to synchronize with new dataset infos partition_len_old = self.get_parameter_len() partition_changes = self._resync_partitions( self._get_data_path('map-new.tar'), block_list_old, block_list_new) activity.finish() if partition_changes is not None: # Move current splitting to backup and use the new splitting from now on def _rename_with_backup(new, cur, old): if self._keep_old: os.rename(self._get_data_path(cur), self._get_data_path(old)) os.rename(self._get_data_path(new), self._get_data_path(cur)) _rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time()) _rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time()) self._set_reader( DataSplitter.load_partitions(self._get_data_path('map.tar'))) self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len) (pnum_list_redo, pnum_list_disable) = partition_changes return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
def get_partition_reader(options): if len(options.args) != 1: options.parser.exit_with_usage(options.parser.usage('part')) return DataSplitter.load_partitions(options.args[0])
def printError(curJ, curS, msg): if curJ != curS: logging.warning('%s in job %d (j:%s != s:%s)', msg, jobNum, curJ, curS) fail.add(jobNum) printError(events, splitInfo[DataSplitter.NEntries], 'Inconsistent number of events') printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events') printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files') except Exception: logging.warning('Job %d was never initialized!', jobNum) if fail: logging.warning('Failed: ' + str.join('\n', imap(str, fail))) if (opts.partition_list is not None) or opts.partition_list_invalid or opts.partition_check: if len(args) != 1: utils.exitWithUsage(parser.usage('part')) splitter = DataSplitter.loadPartitionsForScript(args[0]) if opts.partition_list_invalid: utils.printTabular([(0, 'Job')], partition_invalid(splitter)) if opts.partition_list is not None: if opts.partition_list in ('', 'all'): keyStrings = DataSplitter.enumNames else: keyStrings = opts.partition_list.split(',') keyList = lmap(DataSplitter.str2enum, keyStrings) if None in keyList: logging.warning('Available keys: %r', DataSplitter.enumNames) utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList)) if opts.partition_check:
'Inconsistent number of events') printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events') printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files') except Exception: logging.warning('Job %d was never initialized!', jobNum) if fail: logging.warning('Failed: ' + str.join('\n', imap(str, fail))) if (opts.partition_list is not None) or opts.partition_list_invalid or opts.partition_check: if len(args) != 1: utils.exitWithUsage(parser.usage('part')) splitter = DataSplitter.loadStateForScript(args[0]) if opts.partition_list_invalid: utils.printTabular([(0, 'Job')], partition_invalid(splitter)) if opts.partition_list is not None: if opts.partition_list: keyStrings = opts.partition_list.split(',') else: keyStrings = DataSplitter.enumNames keyList = lmap(DataSplitter.str2enum, keyStrings) if None in keyList: logging.warning('Available keys: %r', DataSplitter.enumNames) utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList))
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass='TaggedConfigView', addSections=['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin( 'dataset', '', ':MultiDatasetProvider:', cls=DataProvider, requirePlugin=False, onChange=triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail='datasets') config.setState(False, 'resync', detail='datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter( DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls=PartitionProcessor, onChange=triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange=None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh) def externalRefresh(sig, frame): self._log.info( 'External signal triggered resync of dataset source') dataPS.resyncSetup(force=True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError( 'Currently used dataset does not provide jobs to process') self._log.warning( 'Currently used dataset does not provide jobs to process')
# | you may not use this file except in compliance with the License. # | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets import DataProvider, DataSplitter from python_compat import imap FileClassSplitter = DataSplitter.get_class('FileClassSplitter') # pylint:disable=invalid-name class RunSplitter(FileClassSplitter): alias_list = ['runs'] def __init__(self, config, datasource_name): FileClassSplitter.__init__(self, config, datasource_name) self._run_range = config.get_lookup(self._get_part_opt('run range'), {None: 1}, parser=int, strfun=int.__str__) def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs') return tuple(imap(lambda r: int(r / run_range), fi[DataProvider.Metadata][metadata_idx]))
def main(): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section = 'global') config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections = ['parameters']) if opts.parameters: utils.vprint('Provided options:') for p in opts.parameters: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') utils.vprint('\t%s: %s' % (k.strip(), v.strip())) utils.vprint('') if not os.path.exists(args[0]): configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') # configParameters.set('parameter adapter', 'BasicParameterAdapter', '=') # Don't track parameter changes if opts.verbosity > 2: config.changeView(setSections = None).write(sys.stdout) # Initialize ParameterFactory configTask = config.changeView(setSections = [config.get(['task', 'module'], 'DummyTask')]) pm = config.getPlugin('parameter factory', 'SimpleParameterFactory', cls = ParameterFactory).getInstance() # Create dataset parameter source class DummySplitter: def getMaxJobs(self): return 3 def getSplitInfo(self, pNum): mkEntry = lambda ds, fl, n, nick: { DataSplitter.Dataset: ds, DataSplitter.Nickname: nick, DataSplitter.FileList: fl, DataSplitter.NEntries: n } rndStr = lambda: md5(str(random.random())).hexdigest()[:10] tmp = [ mkEntry('ds1', ['a', 'b'], 23, 'data_1'), mkEntry('ds1', ['1'], 42, 'data_1'), mkEntry('ds2', ['m', 'n'], 123, 'data_2'), mkEntry('ds2', ['x', 'y', 'z'], 987, 'data_3') ] return tmp[pNum] class DataSplitProcessorTest: def getKeys(self): return map(lambda k: ParameterMetadata(k, untracked=True), ['DATASETINFO', 'DATASETID', 'DATASETPATH', 'DATASETBLOCK', 'DATASETNICK']) def process(self, pNum, splitInfo, result): result.update({ 'DATASETINFO': '', 'DATASETID': splitInfo.get(DataSplitter.DatasetID, None), 'DATASETPATH': splitInfo.get(DataSplitter.Dataset, None), 'DATASETBLOCK': splitInfo.get(DataSplitter.BlockName, None), 'DATASETNICK': splitInfo.get(DataSplitter.Nickname, None), 'DATASETSPLIT': pNum, }) if opts.dataset.lower() == 'true': utils.vprint('Registering dummy data provider data') dataSplitter = DummySplitter() elif opts.dataset: dataSplitter = DataSplitter.loadState(opts.dataset) if opts.dataset: DataParameterSource.datasetsAvailable['data'] = DataParameterSource( config.getWorkPath(), 'data', None, dataSplitter, DataSplitProcessorTest()) psource = pm.getSource(config) if opts.forceiv: for dp in DataParameterSource.datasetSources: dp.intervention = (set([1]), set([0]), True) if opts.listparams: result = [] needGCParam = False if psource.getMaxJobs() != None: countActive = 0 for jobNum in range(psource.getMaxJobs()): info = psource.getJobInfo(jobNum) if info[ParameterInfo.ACTIVE]: countActive += 1 if opts.inactive or info[ParameterInfo.ACTIVE]: if not info[ParameterInfo.ACTIVE]: info['GC_PARAM'] = 'N/A' if str(info['GC_PARAM']) != str(jobNum): needGCParam = True result.append(info) if opts.displaymode == 'parseable': utils.vprint('Count,%d,%d' % (countActive, psource.getMaxJobs())) else: utils.vprint('Number of parameter points: %d' % psource.getMaxJobs()) if countActive != psource.getMaxJobs(): utils.vprint('Number of active parameter points: %d' % countActive) else: result.append(psource.getJobInfo(123)) enabledOutput = opts.output.split(',') output = filter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = filter(lambda k: k.untracked == False, output) untracked = filter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if (opts.collapse == 1): stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5(repr(map(lambda key: pset.get(key), stored))).hexdigest() result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = map(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(zip(stored, stored))) if opts.untracked: head.extend(sorted(map(lambda n: (n, '(%s)' % n), filter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result) if opts.save: utils.vprint('') ParameterSource.getClass('GCDumpParameterSource').write(opts.save, psource) utils.vprint('Parameter information saved to ./%s' % opts.save) if opts.intervention: utils.vprint('') tmp = psource.getJobIntervention() if tmp: if opts.displaymode == 'parseable': utils.vprint('R: %s' % str.join(',', map(str, tmp[0]))) utils.vprint('D: %s' % str.join(',', map(str, tmp[1]))) else: utils.vprint(' Redo: %r' % tmp[0]) utils.vprint('Disable: %r' % tmp[1]) else: if opts.displaymode == 'parseable': utils.vprint('NOINT') else: utils.vprint('No intervention')
def fillParameterInfo(self, pNum, result): splitInfo = self.dataSplitter.getSplitInfo(pNum) if utils.verbosity() > 2: utils.vprint('Dataset task number: %d' % pNum) DataSplitter.printInfoForJob(splitInfo) self.dataProc.process(pNum, splitInfo, result)
def printError(curJ, curS, msg): if curJ != curS: logging.warning('%s in job %d (j:%s != s:%s)', msg, jobNum, curJ, curS) fail.add(jobNum) printError(events, splitInfo[DataSplitter.NEntries], 'Inconsistent number of events') printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events') printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files') except Exception: logging.warning('Job %d was never initialized!', jobNum) if fail: logging.warning('Failed: ' + str.join('\n', imap(str, fail))) if (opts.partition_list is not None) or opts.partition_list_invalid or opts.partition_check: if len(args) != 1: utils.exitWithUsage(parser.usage('part')) splitter = DataSplitter.loadStateForScript(args[0]) if opts.partition_list_invalid: utils.printTabular([(0, 'Job')], partition_invalid(splitter)) if opts.partition_list is not None: if opts.partition_list: keyStrings = opts.partition_list.split(',') else: keyStrings = DataSplitter.enumNames keyList = lmap(DataSplitter.str2enum, keyStrings) if None in keyList: logging.warning('Available keys: %r', DataSplitter.enumNames) utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList)) if opts.partition_check:
if opts.findrm: removed = [] utils.eprint = lambda *x: {} oldDP = DataProvider.loadState(args[0]) for new in args[1:]: newDP = DataProvider.loadState(new) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldDP.getBlocks(), newDP.getBlocks()) for block in blocksMissing: tmp = dict(block) tmp[-1] = new removed.append(tmp) oldDP = newDP utils.printTabular([(DataProvider.Dataset, "Dataset"), (DataProvider.BlockName, "Block"), (-1, "Removed in file")], removed) if opts.invalid: splitter = DataSplitter.loadState(opts.invalid) def getInvalid(): for jobNum in range(splitter.getMaxJobs()): splitInfo = splitter.getSplitInfo(jobNum) if splitInfo.get(DataSplitter.Invalid, False): yield str(jobNum) print str.join(",", getInvalid()) if opts.jdl: print job.get("jdl") if opts.state: try: newState = getattr(Job, opts.state) except: print "Invalid state: %s", opts.state
# | Licensed under the Apache License, Version 2.0 (the "License"); # | you may not use this file except in compliance with the License. # | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets import DataProvider, DataSplitter from python_compat import imap FileClassSplitter = DataSplitter.get_class('FileClassSplitter') # pylint:disable=invalid-name class RunSplitter(FileClassSplitter): alias_list = ['runs'] def __init__(self, config, datasource_name): FileClassSplitter.__init__(self, config, datasource_name) self._run_range = config.get_lookup(self._get_part_opt('run range'), {None: 1}, parser=int, strfun=int.__str__) def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs')
def main(): if opts.save_jobjson or opts.save_jobgc or opts.get_events: (workDir, nJobs, jobList) = getWorkJobs(args) (log, incomplete, splitter, splitInfo) = (None, False, None, {}) (lumiDict, readDict, writeDict) = ({}, {}, {}) try: splitter = DataSplitter.loadState(os.path.join(workDir, 'datamap.tar')) except Exception: pass jobList = sorted(jobList) for jobNum in jobList: del log log = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1])) jobInfo = getJobInfo(workDir, jobNum, lambda retCode: retCode == 0) if not jobInfo: if not incomplete: print 'WARNING: Not all jobs have finished - results will be incomplete!' incomplete = True continue if not parameterized: if splitter: splitInfo = splitter.getSplitInfo(jobNum) outputName = splitInfo.get(DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0)) else: outputName = jobInfo['file'].split()[2].replace("_%d_" % jobNum, '_').replace('/', '_').replace('__', '_') # Read framework report files to get number of events try: outputDir = os.path.join(workDir, 'output', 'job_' + str(jobNum)) for fwkXML in getCMSSWInfo(os.path.join(outputDir, 'cmssw.dbs.tar.gz')): for run in fwkXML.getElementsByTagName('Run'): for lumi in run.getElementsByTagName('LumiSection'): run_id = int(run.getAttribute('ID')) lumi_id = int(lumi.getAttribute('ID')) lumiDict.setdefault(outputName, {}).setdefault(run_id, set()).add(lumi_id) for outFile in fwkXML.getElementsByTagName('File'): pfn = outFile.getElementsByTagName('PFN')[0].childNodes[0].data if pfn not in writeDict.setdefault(outputName, {}): writeDict[outputName][pfn] = 0 writeDict[outputName][pfn] += int(outFile.getElementsByTagName('TotalEvents')[0].childNodes[0].data) for inFile in fwkXML.getElementsByTagName('InputFile'): if outputName not in readDict: readDict[outputName] = 0 readDict[outputName] += int(inFile.getElementsByTagName('EventsRead')[0].childNodes[0].data) except KeyboardInterrupt: sys.exit(os.EX_OK) except Exception: raise print 'Error while parsing framework output of job %s!' % jobNum continue del log log = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) del log for sample, lumis in lumis.items(): print 'Sample:', sample print '=========================================' print 'Number of events processed: %12d' % readDict[sample] print ' Number of events written: %12d' % sum(writeDict.get(sample, {}).values()) if writeDict.get(sample, None): print head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, map(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.save_jobjson: outputJSON(lumis, open(os.path.join(workDir, 'processed_%s.json' % sample), 'w')) print 'Saved processed lumi sections in', os.path.join(workDir, 'processed_%s.json' % sample) if opts.save_jobgc: print print 'List of processed lumisections:' print '-----------------------------------------' outputGC(lumis) print ########################### # Lumi filter manuipulation ########################### if opts.save_exprgc or opts.save_exprjson or opts.save_exprfull: if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.save_exprgc: outputGC(lumis) if opts.save_exprjson: outputJSON(lumis) if opts.save_exprfull: result = {} for rlrange in lumis: start, end = rlrange assert(start[0] == end[0]) llist = result.setdefault(start[0], []).extend(range(start[1], end[1] + 1)) print result
def _main(): signal.signal(signal.SIGINT, handle_abort_interrupt) parser = ScriptOptions() parser.section('expr', 'Manipulate lumi filter expressions', '%s <lumi filter expression>') parser.add_bool('expr', 'G', 'gc', default=False, help='Output grid-control compatible lumi expression') parser.add_bool('expr', 'J', 'json', default=False, help='Output JSON file with lumi expression') parser.add_bool('expr', 'F', 'full', default=False, help='Output JSON file with full expression') parser.section('calc', 'Options which allow luminosity related calculations', '%s <config file> [<job selector>]') parser.add_text('calc', 'O', 'output-dir', default=None, help='Set output directory (default: work directory)') parser.add_bool( 'calc', 'g', 'job-gc', default=False, help= 'Output grid-control compatible lumi expression for processed lumi sections' ) parser.add_bool('calc', 'j', 'job-json', default=False, help='Output JSON file with processed lumi sections') parser.add_bool('calc', 'e', 'job-events', default=False, help='Get number of events processed') parser.add_bool( 'calc', 'p', 'parameterized', default=False, help= 'Use output file name to categorize output (useful for parameterized tasks)' ) parser.add_bool( 'calc', ' ', 'replace', default='job_%d_', help='Pattern to replace for parameterized jobs (default: job_%%d_') options = parser.script_parse() if options.opts.gc or options.opts.json or options.opts.full: if not options.args: options.parser.exit_with_usage(options.parser.usage('expr')) return convert_lumi_expr(options.opts, options.args) if options.opts.job_json or options.opts.job_gc or options.opts.job_events: if not options.args: options.parser.exit_with_usage(options.parser.usage('calc')) script_obj = get_script_object_cmdline(options.args, only_success=True) work_dn = script_obj.config.get_work_path() reader = None try: reader = DataSplitter.load_partitions( os.path.join(work_dn, 'datamap.tar')) except Exception: clear_current_exception() jobnum_list = sorted( script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS))) return lumi_calc(options.opts, work_dn, jobnum_list, reader)