Example #1
0
	def setupJobParameters(self, config, pm):
		config = config.addSections(['dataset']).addTags([self])
		self.dataSplitter = None
		self.dataRefresh = None
		self.dataset = config.get('dataset', '').strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False)
		config.set('default lookup', 'DATASETNICK', override = False)

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)
		self.checkSE = config.getBool('dataset storage check', True, onChange = None)

		# Create and register dataset parameter plugin
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, self.initDataProcessor())
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Example #2
0
class RunSplitter(DataSplitter.getClass('MetadataSplitter')):
    def _initConfig(self, config):
        self._run_range = self._configQuery(config.getInt, 'run range', 1)

    def metaKey(self, metadataNames, block, fi):
        selRunRange = self._setup(self._run_range, block)
        mdIdx = metadataNames.index('Runs')
        return lmap(lambda r: int(r / selRunRange),
                    fi[DataProvider.Metadata][mdIdx])
Example #3
0
	def _setupJobParameters(self, config):
		data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self.dataSplitter = None
		self._data_refresh = -1
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if (old_obj == '') and (cur_obj != ''):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._log.info('Dataset setup was changed - forcing resync...')
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options
			return cur_obj
		dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = userRefresh)
		self._forceRefresh = config.getState('resync', detail = 'dataset')
		config.setState(False, 'resync', detail = 'dataset')
		if not dataProvider:
			return

		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(data_config)

		# Create and register dataset parameter source
		partProcessor = data_config.getCompositePlugin('partition processor',
			'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
			'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters']))
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, partProcessor)
		DataParameterSource.datasetsAvailable['data'] = self._dataPS

		# Select dataset refresh rate
		self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None)
		if self._data_refresh > 0:
			self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1)
		else:
			self._dataPS.resyncSetup(interval = 0)
		if self._forceRefresh:
			self._dataPS.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			self._dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Example #4
0
	def setupJobParameters(self, config, pm):
		config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self.dataSplitter = None
		self.dataRefresh = -1
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if (old_obj == '') and (cur_obj != ''):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._log.info('Dataset setup was changed - forcing resync...')
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options
			return cur_obj
		dataProvider = config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = userRefresh)
		self._forceRefresh = config.getState('resync', detail = 'dataset')
		config.setState(False, 'resync', detail = 'dataset')
		if not dataProvider:
			return

		tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)

		# Create and register dataset parameter source
		partProcessor = config.getCompositePlugin('partition processor',
			'BasicPartitionProcessor LocationPartitionProcessor', 'MultiPartitionProcessor',
			cls = PartitionProcessor)
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		self._dataPS = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, partProcessor)
		DataParameterSource.datasetsAvailable['data'] = self._dataPS

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			self._dataPS.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % strTime(self.dataRefresh), -1)
		else:
			self._dataPS.resyncSetup(interval = 0)
		if self._forceRefresh:
			self._dataPS.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			self._dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Example #5
0
	def setupJobParameters(self, config, pm):
		config = config.changeView(viewClass = TaggedConfigView, addSections = ['dataset'], addTags = [self])
		self.dataSplitter = None
		self.dataRefresh = None
		self._forceRefresh = config.getState('resync', detail = 'dataset', default = False)
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if ((old_obj == '') and (cur_obj != '')):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._forceRefresh = True
			return cur_obj
		self.dataset = config.get('dataset', '', onChange = userRefresh).strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		config.set('default lookup', 'DATASETNICK')

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)

		# Create and register dataset parameter source
		paramSplitProcessor = config.getCompositePlugin('dataset processor',
			'BasicDataSplitProcessor SECheckSplitProcessor', 'MultiDataSplitProcessor',
			cls = DataSplitProcessor).getInstance(config)
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, paramSplitProcessor)
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		if self._forceRefresh:
			paramSource.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Example #6
0
	def _setupJobParameters(self, config, psrc_repository):
		TaskModule._setupJobParameters(self, config, psrc_repository)
		data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self._dataSplitter = None
		dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters']))
		self._forceRefresh = config.getState('resync', detail = 'datasets')
		config.setState(False, 'resync', detail = 'datasets')
		if not dataProvider:
			return

		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self._dataSplitter = splitterClass(data_config)

		# Create and register dataset parameter source
		self._partProcessor = data_config.getCompositePlugin('partition processor',
			'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
			'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters']))
		dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data',
			dataProvider, self._dataSplitter, self._partProcessor, psrc_repository)

		# Select dataset refresh rate
		data_refresh = data_config.getTime('dataset refresh', -1, onChange = None)
		if data_refresh >= 0:
			data_refresh = max(data_refresh, dataProvider.queryLimit())
			self._log.info('Dataset source will be queried every %s', strTime(data_refresh))
		dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh)
		def externalRefresh(sig, frame):
			self._log.info('External signal triggered resync of dataset source')
			dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self._dataSplitter.getMaxJobs() == 0:
			if data_refresh < 0:
				raise UserError('Currently used dataset does not provide jobs to process')
			self._log.warning('Currently used dataset does not provide jobs to process')
Example #7
0
    def _setupJobParameters(self, config, psrc_repository):
        TaskModule._setupJobParameters(self, config, psrc_repository)
        data_config = config.changeView(viewClass='TaggedConfigView',
                                        addSections=['dataset'])
        self._dataSplitter = None
        dataProvider = data_config.getCompositePlugin(
            'dataset',
            '',
            ':MultiDatasetProvider:',
            cls=DataProvider,
            requirePlugin=False,
            onChange=triggerResync(['datasets', 'parameters']))
        self._forceRefresh = config.getState('resync', detail='datasets')
        config.setState(False, 'resync', detail='datasets')
        if not dataProvider:
            return

        tmp_config = data_config.changeView(viewClass='TaggedConfigView',
                                            setClasses=None,
                                            setNames=None,
                                            setTags=[],
                                            addSections=['storage'])
        tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
        tmp_config = data_config.changeView(viewClass='TaggedConfigView',
                                            setClasses=None,
                                            setNames=None,
                                            setTags=[],
                                            addSections=['parameters'])
        tmp_config.set('default lookup', 'DATASETNICK')

        splitterName = data_config.get('dataset splitter',
                                       'FileBoundarySplitter')
        splitterClass = dataProvider.checkSplitter(
            DataSplitter.getClass(splitterName))
        self._dataSplitter = splitterClass(data_config)

        # Create and register dataset parameter source
        self._partProcessor = data_config.getCompositePlugin(
            'partition processor',
            'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
            'MultiPartitionProcessor',
            cls=PartitionProcessor,
            onChange=triggerResync(['parameters']))
        dataPS = ParameterSource.createInstance('DataParameterSource',
                                                data_config.getWorkPath(),
                                                'data', dataProvider,
                                                self._dataSplitter,
                                                self._partProcessor,
                                                psrc_repository)

        # Select dataset refresh rate
        data_refresh = data_config.getTime('dataset refresh',
                                           -1,
                                           onChange=None)
        if data_refresh >= 0:
            data_refresh = max(data_refresh, dataProvider.queryLimit())
            self._log.info('Dataset source will be queried every %s',
                           strTime(data_refresh))
        dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh)

        def externalRefresh(sig, frame):
            self._log.info(
                'External signal triggered resync of dataset source')
            dataPS.resyncSetup(force=True)

        signal.signal(signal.SIGUSR2, externalRefresh)

        if self._dataSplitter.getMaxJobs() == 0:
            if data_refresh < 0:
                raise UserError(
                    'Currently used dataset does not provide jobs to process')
            self._log.warning(
                'Currently used dataset does not provide jobs to process')