def setupJobParameters(self, config, pm): config = config.addSections(['dataset']).addTags([self]) self.dataSplitter = None self.dataRefresh = None self.dataset = config.get('dataset', '').strip() if self.dataset == '': return config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False) config.set('default lookup', 'DATASETNICK', override = False) defaultProvider = config.get('dataset provider', 'ListProvider') dataProvider = DataProvider.create(config, self.dataset, defaultProvider) splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) self.checkSE = config.getBool('dataset storage check', True, onChange = None) # Create and register dataset parameter plugin paramSource = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, self.initDataProcessor()) DataParameterSource.datasetsAvailable['data'] = paramSource # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1) else: paramSource.resyncSetup(interval = 0) def externalRefresh(sig, frame): paramSource.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
class RunSplitter(DataSplitter.getClass('MetadataSplitter')): def _initConfig(self, config): self._run_range = self._configQuery(config.getInt, 'run range', 1) def metaKey(self, metadataNames, block, fi): selRunRange = self._setup(self._run_range, block) mdIdx = metadataNames.index('Runs') return lmap(lambda r: int(r / selRunRange), fi[DataProvider.Metadata][mdIdx])
def _setupJobParameters(self, config): data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self._data_refresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(data_config) # Create and register dataset parameter source partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if self._data_refresh > 0: self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def setupJobParameters(self, config, pm): config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self.dataRefresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) # Create and register dataset parameter source partProcessor = config.getCompositePlugin('partition processor', 'BasicPartitionProcessor LocationPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: self._dataPS.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self.dataRefresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def setupJobParameters(self, config, pm): config = config.changeView(viewClass = TaggedConfigView, addSections = ['dataset'], addTags = [self]) self.dataSplitter = None self.dataRefresh = None self._forceRefresh = config.getState('resync', detail = 'dataset', default = False) def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if ((old_obj == '') and (cur_obj != '')): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._forceRefresh = True return cur_obj self.dataset = config.get('dataset', '', onChange = userRefresh).strip() if self.dataset == '': return config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') config.set('default lookup', 'DATASETNICK') defaultProvider = config.get('dataset provider', 'ListProvider') dataProvider = DataProvider.create(config, self.dataset, defaultProvider) splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) # Create and register dataset parameter source paramSplitProcessor = config.getCompositePlugin('dataset processor', 'BasicDataSplitProcessor SECheckSplitProcessor', 'MultiDataSplitProcessor', cls = DataSplitProcessor).getInstance(config) paramSource = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, paramSplitProcessor) DataParameterSource.datasetsAvailable['data'] = paramSource # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1) else: paramSource.resyncSetup(interval = 0) if self._forceRefresh: paramSource.resyncSetup(force = True) def externalRefresh(sig, frame): paramSource.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail = 'datasets') config.setState(False, 'resync', detail = 'datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh) def externalRefresh(sig, frame): self._log.info('External signal triggered resync of dataset source') dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError('Currently used dataset does not provide jobs to process') self._log.warning('Currently used dataset does not provide jobs to process')
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass='TaggedConfigView', addSections=['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin( 'dataset', '', ':MultiDatasetProvider:', cls=DataProvider, requirePlugin=False, onChange=triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail='datasets') config.setState(False, 'resync', detail='datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter( DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls=PartitionProcessor, onChange=triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange=None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh) def externalRefresh(sig, frame): self._log.info( 'External signal triggered resync of dataset source') dataPS.resyncSetup(force=True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError( 'Currently used dataset does not provide jobs to process') self._log.warning( 'Currently used dataset does not provide jobs to process')