Exemple #1
0
	def __init__(self, config):
		InfoScanner.__init__(self, config)
		self._importParents = config.getBool('include parent infos', False, onChange = triggerResync(['datasets', 'parameters']))
		self._mergeKey = 'CMSSW_CONFIG_FILE'
		if config.getBool('merge config infos', True, onChange = triggerResync(['datasets', 'parameters'])):
			self._mergeKey = 'CMSSW_CONFIG_HASH'
		self._cfgStore = {}
		self._gtStore = {}
		self._regexAnnotation = re.compile(r'.*annotation.*=.*cms.untracked.string.*\((.*)\)')
		self._regexDataTier = re.compile(r'.*dataTier.*=.*cms.untracked.string.*\((.*)\)')
Exemple #2
0
 def __init__(self, config):
     PartitionProcessor.__init__(self, config)
     changeTrigger = triggerResync(['datasets', 'parameters'])
     self._lumi_filter = config.getLookup('lumi filter', {},
                                          parser=parseLumiFilter,
                                          strfun=strLumi,
                                          onChange=changeTrigger)
Exemple #3
0
	def __init__(self, config, datasetExpr, datasetNick = None):
		self._changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger)
		config.set('phedex sites matcher mode', 'shell', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
		self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		(self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#')
		instance_default = config.get('dbs instance', '', onChange = self._changeTrigger)
		self._datasetInstance = self._datasetInstance or instance_default
		if not self._datasetInstance:
			self._datasetInstance = 'prod/global'
		elif '/' not in self._datasetInstance:
			self._datasetInstance = 'prod/%s' % self._datasetInstance
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)
Exemple #4
0
    def __init__(self, config, datasetExpr, datasetNick=None):
        ConfigurablePlugin.__init__(self, config)
        self._log = logging.getLogger('dataset.provider')
        (self._datasetExpr, self._datasetNick) = (datasetExpr, datasetNick)
        (self._cache_block, self._cache_dataset) = (None, None)
        self._dataset_query_interval = config.getTime(
            'dataset default query interval', 60, onChange=None)

        triggerDataResync = triggerResync(['datasets', 'parameters'])
        self._stats = DataProcessor.createInstance(
            'SimpleStatsDataProcessor', config, triggerDataResync, self._log,
            ' * Dataset %s:\n\tcontains ' % repr(datasetNick or datasetExpr))
        self._nickProducer = config.getPlugin('nickname source',
                                              'SimpleNickNameProducer',
                                              cls=DataProcessor,
                                              pargs=(triggerDataResync, ),
                                              onChange=triggerDataResync)
        self._datasetProcessor = config.getCompositePlugin(
            'dataset processor',
            'NickNameConsistencyProcessor EntriesConsistencyDataProcessor URLDataProcessor URLCountDataProcessor '
            +
            'EntriesCountDataProcessor EmptyDataProcessor UniqueDataProcessor LocationDataProcessor',
            'MultiDataProcessor',
            cls=DataProcessor,
            pargs=(triggerDataResync, ),
            onChange=triggerDataResync)
Exemple #5
0
 def __init__(self, config):
     InfoScanner.__init__(self, config)
     self._importParents = config.getBool('include parent infos',
                                          False,
                                          onChange=triggerResync(
                                              ['datasets', 'parameters']))
     self._mergeKey = 'CMSSW_CONFIG_FILE'
     if config.getBool('merge config infos',
                       True,
                       onChange=triggerResync(['datasets', 'parameters'])):
         self._mergeKey = 'CMSSW_CONFIG_HASH'
     self._cfgStore = {}
     self._gtStore = {}
     self._regexAnnotation = re.compile(
         r'.*annotation.*=.*cms.untracked.string.*\((.*)\)')
     self._regexDataTier = re.compile(
         r'.*dataTier.*=.*cms.untracked.string.*\((.*)\)')
Exemple #6
0
	def _setupJobParameters(self, config, psrc_repository):
		TaskModule._setupJobParameters(self, config, psrc_repository)
		data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self._dataSplitter = None
		dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters']))
		self._forceRefresh = config.getState('resync', detail = 'datasets')
		config.setState(False, 'resync', detail = 'datasets')
		if not dataProvider:
			return

		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self._dataSplitter = splitterClass(data_config)

		# Create and register dataset parameter source
		self._partProcessor = data_config.getCompositePlugin('partition processor',
			'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
			'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters']))
		dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data',
			dataProvider, self._dataSplitter, self._partProcessor, psrc_repository)

		# Select dataset refresh rate
		data_refresh = data_config.getTime('dataset refresh', -1, onChange = None)
		if data_refresh >= 0:
			data_refresh = max(data_refresh, dataProvider.queryLimit())
			self._log.info('Dataset source will be queried every %s', strTime(data_refresh))
		dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh)
		def externalRefresh(sig, frame):
			self._log.info('External signal triggered resync of dataset source')
			dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self._dataSplitter.getMaxJobs() == 0:
			if data_refresh < 0:
				raise UserError('Currently used dataset does not provide jobs to process')
			self._log.warning('Currently used dataset does not provide jobs to process')
	def __init__(self, config):
		DataProcessor.__init__(self, config)
		changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger)
		if self._lumi_filter.empty():
			lumi_keep_default = LumiKeep.none
		else:
			lumi_keep_default = LumiKeep.Run
			config.setBool('lumi metadata', True)
			logging.getLogger('user.once').info('Runs/lumi section filter enabled!')
		self._lumi_keep = config.getEnum('lumi keep', LumiKeep, lumi_keep_default, onChange = changeTrigger)
		self._lumi_strict = config.getBool('strict lumi filter', True, onChange = changeTrigger)
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		ConfigurablePlugin.__init__(self, config)
		self._log = logging.getLogger('user.dataprovider')
		(self._datasetExpr, self._datasetNick, self._datasetID) = (datasetExpr, datasetNick, datasetID)
		(self._cache_block, self._cache_dataset, self._passthrough) = (None, None, False)

		self._stats = DataProcessor.createInstance('StatsDataProcessor', config)
		self._nickProducer = config.getPlugin('nickname source', 'SimpleNickNameProducer', cls = DataProcessor)
		self._datasetProcessor = config.getCompositePlugin('dataset processor',
			'EntriesConsistencyDataProcessor URLDataProcessor URLCountDataProcessor ' +
			'EntriesCountDataProcessor EmptyDataProcessor UniqueDataProcessor LocationDataProcessor',
			'MultiDataProcessor', cls = DataProcessor, onChange = triggerResync(['datasets', 'parameters']))
Exemple #9
0
	def _setupJobParameters(self, config):
		data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset'])
		self.dataSplitter = None
		self._data_refresh = -1
		def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
			if (old_obj == '') and (cur_obj != ''):
				raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
			self._log.info('Dataset setup was changed - forcing resync...')
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options
			return cur_obj
		dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:',
			cls = DataProvider, requirePlugin = False, onChange = userRefresh)
		self._forceRefresh = config.getState('resync', detail = 'dataset')
		config.setState(False, 'resync', detail = 'dataset')
		if not dataProvider:
			return

		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage'])
		tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
		tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters'])
		tmp_config.set('default lookup', 'DATASETNICK')

		splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(data_config)

		# Create and register dataset parameter source
		partProcessor = data_config.getCompositePlugin('partition processor',
			'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
			'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters']))
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, partProcessor)
		DataParameterSource.datasetsAvailable['data'] = self._dataPS

		# Select dataset refresh rate
		self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None)
		if self._data_refresh > 0:
			self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1)
		else:
			self._dataPS.resyncSetup(interval = 0)
		if self._forceRefresh:
			self._dataPS.resyncSetup(force = True)
		def externalRefresh(sig, frame):
			self._dataPS.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
Exemple #10
0
	def __init__(self, config, datasetExpr, datasetNick = None):
		ConfigurablePlugin.__init__(self, config)
		self._log = logging.getLogger('dataset.provider')
		(self._datasetExpr, self._datasetNick) = (datasetExpr, datasetNick)
		(self._cache_block, self._cache_dataset) = (None, None)
		self._dataset_query_interval = config.getTime('dataset default query interval', 60, onChange = None)

		triggerDataResync = triggerResync(['datasets', 'parameters'])
		self._stats = DataProcessor.createInstance('SimpleStatsDataProcessor', config, triggerDataResync, self._log,
			' * Dataset %s:\n\tcontains ' % repr(datasetNick or datasetExpr))
		self._nickProducer = config.getPlugin('nickname source', 'SimpleNickNameProducer',
			cls = DataProcessor, pargs = (triggerDataResync,), onChange = triggerDataResync)
		self._datasetProcessor = config.getCompositePlugin('dataset processor',
			'NickNameConsistencyProcessor EntriesConsistencyDataProcessor URLDataProcessor URLCountDataProcessor ' +
			'EntriesCountDataProcessor EmptyDataProcessor UniqueDataProcessor LocationDataProcessor', 'MultiDataProcessor',
			cls = DataProcessor, pargs = (triggerDataResync,), onChange = triggerDataResync)
Exemple #11
0
    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick,
                              datasetID)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=changeTrigger)
        # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-T3_US_FNALLPC',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='weak',
                                              onChange=changeTrigger)
        self._phedexT1Filter = config.getFilter('phedex t1 accept',
                                                'T1_DE_KIT T1_US_FNAL',
                                                defaultMatcher='blackwhite',
                                                defaultFilter='weak',
                                                onChange=changeTrigger)
        self._phedexT1Mode = config.getEnum('phedex t1 mode',
                                            PhedexT1Mode,
                                            PhedexT1Mode.disk,
                                            onChange=changeTrigger)
        self.onlyComplete = config.getBool('only complete sites',
                                           True,
                                           onChange=changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )

        (self._datasetPath, self._url,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        self._url = self._url or config.get('dbs instance', '')
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=changeTrigger)
    def __init__(self, config, datasetExpr, datasetNick=None):
        self._changeTrigger = triggerResync(['datasets', 'parameters'])
        self._lumi_filter = config.getLookup('lumi filter', {},
                                             parser=parseLumiFilter,
                                             strfun=strLumi,
                                             onChange=self._changeTrigger)
        if not self._lumi_filter.empty():
            config.set('dataset processor', 'LumiDataProcessor', '+=')
        DataProvider.__init__(self, config, datasetExpr, datasetNick)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = config.getBool('lumi metadata',
                                          not self._lumi_filter.empty(),
                                          onChange=self._changeTrigger)
        config.set('phedex sites matcher mode', 'shell', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedexFilter = config.getFilter('phedex sites',
                                              '-* T1_*_Disk T2_* T3_*',
                                              defaultMatcher='blackwhite',
                                              defaultFilter='strict',
                                              onChange=self._changeTrigger)
        self._onlyComplete = config.getBool('only complete sites',
                                            True,
                                            onChange=self._changeTrigger)
        self._locationFormat = config.getEnum('location format',
                                              CMSLocationFormat,
                                              CMSLocationFormat.hostname,
                                              onChange=self._changeTrigger)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        (self._datasetPath, self._datasetInstance,
         self._datasetBlock) = optSplit(datasetExpr, '@#')
        instance_default = config.get('dbs instance',
                                      '',
                                      onChange=self._changeTrigger)
        self._datasetInstance = self._datasetInstance or instance_default
        if not self._datasetInstance:
            self._datasetInstance = 'prod/global'
        elif '/' not in self._datasetInstance:
            self._datasetInstance = 'prod/%s' % self._datasetInstance
        self._datasetBlock = self._datasetBlock or 'all'
        self.onlyValid = config.getBool('only valid',
                                        True,
                                        onChange=self._changeTrigger)
Exemple #13
0
    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        ConfigurablePlugin.__init__(self, config)
        self._log = logging.getLogger('user.dataprovider')
        (self._datasetExpr, self._datasetNick,
         self._datasetID) = (datasetExpr, datasetNick, datasetID)
        (self._cache_block, self._cache_dataset,
         self._passthrough) = (None, None, False)

        self._stats = DataProcessor.createInstance('StatsDataProcessor',
                                                   config)
        self._nickProducer = config.getPlugin('nickname source',
                                              'SimpleNickNameProducer',
                                              cls=DataProcessor)
        self._datasetProcessor = config.getCompositePlugin(
            'dataset processor',
            'EntriesConsistencyDataProcessor URLDataProcessor URLCountDataProcessor '
            +
            'EntriesCountDataProcessor EmptyDataProcessor UniqueDataProcessor LocationDataProcessor',
            'MultiDataProcessor',
            cls=DataProcessor,
            onChange=triggerResync(['datasets', 'parameters']))
Exemple #14
0
 def __init__(self, config):
     DataProcessor.__init__(self, config)
     changeTrigger = triggerResync(['datasets', 'parameters'])
     self._lumi_filter = config.getLookup('lumi filter', {},
                                          parser=parseLumiFilter,
                                          strfun=strLumi,
                                          onChange=changeTrigger)
     if self._lumi_filter.empty():
         lumi_keep_default = LumiKeep.none
     else:
         lumi_keep_default = LumiKeep.Run
         config.setBool('lumi metadata', True)
         logging.getLogger('user.once').info(
             'Runs/lumi section filter enabled!')
     self._lumi_keep = config.getEnum('lumi keep',
                                      LumiKeep,
                                      lumi_keep_default,
                                      onChange=changeTrigger)
     self._lumi_strict = config.getBool('strict lumi filter',
                                        True,
                                        onChange=changeTrigger)
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = changeTrigger)
		# PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL',
			defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
		self._phedexT1Mode = config.getEnum('phedex t1 mode', PhedexT1Mode, PhedexT1Mode.disk, onChange = changeTrigger)
		self.onlyComplete = config.getBool('only complete sites', True, onChange = changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')

		(self._datasetPath, self._url, self._datasetBlock) = optSplit(datasetExpr, '@#')
		self._url = self._url or config.get('dbs instance', '')
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = changeTrigger)
	def __init__(self, config):
		PartitionProcessor.__init__(self, config)
		changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger)
Exemple #17
0
# |
# | Unless required by applicable law or agreed to in writing, software
# | distributed under the License is distributed on an "AS IS" BASIS,
# | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# | See the License for the specific language governing permissions and
# | limitations under the License.

import os, re, xml.dom.minidom
from grid_control import utils
from grid_control.config import triggerResync
from grid_control.datasets import DatasetError
from grid_control.datasets.provider_scan import GCProviderSetup
from grid_control.datasets.scanner_base import InfoScanner
from python_compat import all, bytes2str, ifilter, imap, lfilter, tarfile

triggerDataResync = triggerResync(['datasets', 'parameters'])

class GCProviderSetup_CMSSW(GCProviderSetup):
	scan_pipeline = ['ObjectsFromCMSSW', 'JobInfoFromOutputDir', 'FilesFromJobInfo',
		'MatchOnFilename', 'MatchDelimeter', 'MetadataFromCMSSW', 'SEListFromPath',
		'LFNFromPath', 'DetermineEvents', 'AddFilePrefix']


def readTag(base, tag, default = None):
	try:
		return str(base.getElementsByTagName(tag)[0].childNodes[0].data)
	except Exception:
		return default


def readList(base, container, items):
Exemple #18
0
    def _setupJobParameters(self, config, psrc_repository):
        TaskModule._setupJobParameters(self, config, psrc_repository)
        data_config = config.changeView(viewClass='TaggedConfigView',
                                        addSections=['dataset'])
        self._dataSplitter = None
        dataProvider = data_config.getCompositePlugin(
            'dataset',
            '',
            ':MultiDatasetProvider:',
            cls=DataProvider,
            requirePlugin=False,
            onChange=triggerResync(['datasets', 'parameters']))
        self._forceRefresh = config.getState('resync', detail='datasets')
        config.setState(False, 'resync', detail='datasets')
        if not dataProvider:
            return

        tmp_config = data_config.changeView(viewClass='TaggedConfigView',
                                            setClasses=None,
                                            setNames=None,
                                            setTags=[],
                                            addSections=['storage'])
        tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@')
        tmp_config = data_config.changeView(viewClass='TaggedConfigView',
                                            setClasses=None,
                                            setNames=None,
                                            setTags=[],
                                            addSections=['parameters'])
        tmp_config.set('default lookup', 'DATASETNICK')

        splitterName = data_config.get('dataset splitter',
                                       'FileBoundarySplitter')
        splitterClass = dataProvider.checkSplitter(
            DataSplitter.getClass(splitterName))
        self._dataSplitter = splitterClass(data_config)

        # Create and register dataset parameter source
        self._partProcessor = data_config.getCompositePlugin(
            'partition processor',
            'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor',
            'MultiPartitionProcessor',
            cls=PartitionProcessor,
            onChange=triggerResync(['parameters']))
        dataPS = ParameterSource.createInstance('DataParameterSource',
                                                data_config.getWorkPath(),
                                                'data', dataProvider,
                                                self._dataSplitter,
                                                self._partProcessor,
                                                psrc_repository)

        # Select dataset refresh rate
        data_refresh = data_config.getTime('dataset refresh',
                                           -1,
                                           onChange=None)
        if data_refresh >= 0:
            data_refresh = max(data_refresh, dataProvider.queryLimit())
            self._log.info('Dataset source will be queried every %s',
                           strTime(data_refresh))
        dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh)

        def externalRefresh(sig, frame):
            self._log.info(
                'External signal triggered resync of dataset source')
            dataPS.resyncSetup(force=True)

        signal.signal(signal.SIGUSR2, externalRefresh)

        if self._dataSplitter.getMaxJobs() == 0:
            if data_refresh < 0:
                raise UserError(
                    'Currently used dataset does not provide jobs to process')
            self._log.warning(
                'Currently used dataset does not provide jobs to process')
Exemple #19
0
	def __init__(self, config):
		InfoScanner.__init__(self, config)
		self.includeConfig = config.getBool('include config infos', False, onChange = triggerResync(['datasets', 'parameters']))
Exemple #20
0
 def __init__(self, config):
     InfoScanner.__init__(self, config)
     self.stripPath = config.get('lfn marker',
                                 '/store/',
                                 onChange=triggerResync(
                                     ['datasets', 'parameters']))
Exemple #21
0
 def __init__(self, config):
     InfoScanner.__init__(self, config)
     self.includeConfig = config.getBool('include config infos',
                                         False,
                                         onChange=triggerResync(
                                             ['datasets', 'parameters']))
Exemple #22
0
# |
# | Unless required by applicable law or agreed to in writing, software
# | distributed under the License is distributed on an "AS IS" BASIS,
# | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# | See the License for the specific language governing permissions and
# | limitations under the License.

import os, re, xml.dom.minidom
from grid_control import utils
from grid_control.config import triggerResync
from grid_control.datasets import DatasetError
from grid_control.datasets.provider_scan import GCProviderSetup
from grid_control.datasets.scanner_base import InfoScanner
from python_compat import all, bytes2str, ifilter, imap, lfilter, tarfile

triggerDataResync = triggerResync(['datasets', 'parameters'])


class GCProviderSetup_CMSSW(GCProviderSetup):
    scan_pipeline = [
        'ObjectsFromCMSSW', 'JobInfoFromOutputDir', 'FilesFromJobInfo',
        'MatchOnFilename', 'MatchDelimeter', 'MetadataFromCMSSW',
        'SEListFromPath', 'LFNFromPath', 'DetermineEvents', 'AddFilePrefix'
    ]


def readTag(base, tag, default=None):
    try:
        return str(base.getElementsByTagName(tag)[0].childNodes[0].data)
    except Exception:
        return default
Exemple #23
0
 def onChange(config, old_obj, cur_obj, cur_entry, obj2str):
     self._log.critical('Dataset %r changed', datasetExpr)
     return triggerResync(['datasets',
                           'parameters'])(config, old_obj, cur_obj,
                                          cur_entry, obj2str)
Exemple #24
0
	def __init__(self, config):
		InfoScanner.__init__(self, config)
		self.stripPath = config.get('lfn marker', '/store/', onChange = triggerResync(['datasets', 'parameters']))