def getVarMapping(self): if self._dataSplitter: return utils.mergeDicts( [TaskModule.getVarMapping(self), { 'NICK': 'DATASETNICK' }]) return TaskModule.getVarMapping(self)
def getSubmitInfo(self, jobNum): jobInfo = self.source.getJobInfo(jobNum) submitInfo = { 'nevtJob': jobInfo.get('MAX_EVENTS', 0), 'datasetFull': jobInfo.get('DATASETPATH', 'none') } return utils.mergeDicts( [TaskModule.getSubmitInfo(self, jobNum), submitInfo])
def _setup_repository(self, config, psrc_repository): TaskModule._setup_repository(self, config, psrc_repository) psrc_list = [] for datasource_name in config.get_list('datasource names', ['dataset'], on_change=TriggerResync(['datasets', 'parameters'])): data_config = config.change_view(view_class='TaggedConfigView', add_sections=[datasource_name]) self._create_datasource(data_config, datasource_name, psrc_repository, psrc_list) self._has_dataset = (psrc_list != []) # Register signal handler for manual dataset refresh def _external_refresh(sig, frame): for psrc in psrc_list: self._log.info('External signal triggered resync of datasource %r', psrc.get_datasource_name()) psrc.setup_resync(force=True) signal.signal(signal.SIGUSR2, _external_refresh) config.set_state(False, 'resync', detail='datasets')
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail = 'datasets') config.setState(False, 'resync', detail = 'datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh) def externalRefresh(sig, frame): self._log.info('External signal triggered resync of dataset source') dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError('Currently used dataset does not provide jobs to process') self._log.warning('Currently used dataset does not provide jobs to process')
def _setup_repository(self, config, psrc_repository): TaskModule._setup_repository(self, config, psrc_repository) psrc_list = [] for datasource_name in config.get_list( 'datasource names', ['dataset'], on_change=TriggerResync(['datasets', 'parameters'])): data_config = config.change_view(view_class='TaggedConfigView', add_sections=[datasource_name]) self._create_datasource(data_config, datasource_name, psrc_repository, psrc_list) self._has_dataset = (psrc_list != []) # Register signal handler for manual dataset refresh def _external_refresh(sig, frame): for psrc in psrc_list: self._log.info( 'External signal triggered resync of datasource %r', psrc.get_datasource_name()) psrc.setup_resync(force=True) signal.signal(signal.SIGUSR2, _external_refresh) config.set_state(False, 'resync', detail='datasets')
def getSubmitInfo(self, jobNum): jobInfo = self.source.getJobInfo(jobNum) submitInfo = {'nevtJob': jobInfo.get('MAX_EVENTS', 0), 'datasetFull': jobInfo.get('DATASETPATH', 'none')} return utils.mergeDicts([TaskModule.getSubmitInfo(self, jobNum), submitInfo])
def getVarMapping(self): if self.dataSplitter: return utils.mergeDicts([TaskModule.getVarMapping(self), {'NICK': 'DATASETNICK'}]) return TaskModule.getVarMapping(self)
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass='TaggedConfigView', addSections=['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin( 'dataset', '', ':MultiDatasetProvider:', cls=DataProvider, requirePlugin=False, onChange=triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail='datasets') config.setState(False, 'resync', detail='datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter( DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls=PartitionProcessor, onChange=triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange=None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh) def externalRefresh(sig, frame): self._log.info( 'External signal triggered resync of dataset source') dataPS.resyncSetup(force=True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError( 'Currently used dataset does not provide jobs to process') self._log.warning( 'Currently used dataset does not provide jobs to process')
def get_var_alias_map(self): if self._has_dataset: # create alias NICK for DATASETNICK return dict_union(TaskModule.get_var_alias_map(self), {'NICK': 'DATASETNICK'}) return TaskModule.get_var_alias_map(self)