Esempio n. 1
0
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters']))
		self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name],
			default={}, parser=parse_lumi_filter, strfun=str_lumi)
		if not self._lumi_filter.empty():
			config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = dataset_config.get_bool(
			['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty())
		config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
		self._only_complete = dataset_config.get_bool('only complete sites', True)
		self._only_valid = dataset_config.get_bool('only valid', True)
		self._location_format = dataset_config.get_enum('location format',
			CMSLocationFormat, CMSLocationFormat.hostname)
		self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		dataset_expr_parts = split_opt(dataset_expr, '@#')
		(self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts
		instance_default = dataset_config.get('dbs instance', '')
		self._dataset_instance = self._dataset_instance or instance_default
		if not self._dataset_instance:
			self._dataset_instance = 'prod/global'
		elif '/' not in self._dataset_instance:
			self._dataset_instance = 'prod/%s' % self._dataset_instance
		self._dataset_block_selector = self._dataset_block_selector or 'all'
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)

		(self._path, self._events, selist) = split_opt(dataset_expr, '|@')
		self._selist = parse_list(selist, ',') or None
		if not (self._path and self._events):
			raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
Esempio n. 3
0
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)
        self._common_prefix = max(DataProvider.enum_value_list) + 1
        self._common_metadata = max(DataProvider.enum_value_list) + 2

        self._entry_handler_info = {
            'events': (DataProvider.NEntries, int, 'block entry counter'),
            'id': (None, None, 'dataset ID'),  # legacy key - skip
            'metadata':
            (DataProvider.Metadata, parse_json, 'metadata description'),
            'metadata common':
            (self._common_metadata, parse_json, 'common metadata'),
            'nickname': (DataProvider.Nickname, str, 'dataset nickname'),
            'prefix': (self._common_prefix, str, 'common prefix'),
            'se list':
            (DataProvider.Locations, lambda value: parse_list(value, ','),
             'block location'),
        }

        (path, self._forced_prefix,
         self._filter) = split_opt(dataset_expr, '@%')
        self._filename = config.resolve_path(
            path, True, 'Error resolving dataset file: %s' % path)
Esempio n. 4
0
	def _setup(self, setup_vn, setup_key, setup_mod):
		if setup_key:
			(delim, delim_start_str, delim_end_str) = split_opt(setup_key, '::')
			modifier = identity
			if setup_mod and (setup_mod.strip() != 'value'):
				try:
					modifier = eval('lambda value: ' + setup_mod)  # pylint:disable=eval-used
				except Exception:
					raise ConfigError('Unable to parse delimeter modifier %r' % setup_mod)
			(delim_start, delim_end) = (parse_str(delim_start_str, int), parse_str(delim_end_str, int))
			self._setup_arg_list.append((setup_vn, delim, delim_start, delim_end, modifier))
			return [setup_vn]
		return []
Esempio n. 5
0
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        dataset_config = config.change_view(
            default_on_change=TriggerResync(['datasets', 'parameters']))
        self._lumi_filter = dataset_config.get_lookup(
            ['lumi filter', '%s lumi filter' % datasource_name],
            default={},
            parser=parse_lumi_filter,
            strfun=str_lumi)
        if not self._lumi_filter.empty():
            config.set('%s processor' % datasource_name, 'LumiDataProcessor',
                       '+=')
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)
        # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
        self._lumi_query = dataset_config.get_bool(
            ['lumi metadata',
             '%s lumi metadata' % datasource_name],
            default=not self._lumi_filter.empty())
        config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
        # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
        self._phedex_filter = dataset_config.get_filter(
            'phedex sites',
            '-* T1_*_Disk T2_* T3_*',
            default_matcher='BlackWhiteMatcher',
            default_filter='StrictListFilter')
        self._only_complete = dataset_config.get_bool('only complete sites',
                                                      True)
        self._only_valid = dataset_config.get_bool('only valid', True)
        self._allow_phedex = dataset_config.get_bool('allow phedex', True)
        self._location_format = dataset_config.get_enum(
            'location format', CMSLocationFormat, CMSLocationFormat.hostname)
        self._pjrc = JSONRestClient(
            url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas'
        )
        self._sitedb = SiteDB()

        dataset_expr_parts = split_opt(dataset_expr, '@#')
        (self._dataset_path, self._dataset_instance,
         self._dataset_block_selector) = dataset_expr_parts
        instance_default = dataset_config.get('dbs instance', '')
        self._dataset_instance = self._dataset_instance or instance_default
        if not self._dataset_instance:
            self._dataset_instance = 'prod/global'
        elif '/' not in self._dataset_instance:
            self._dataset_instance = 'prod/%s' % self._dataset_instance
        self._dataset_block_selector = self._dataset_block_selector or 'all'
Esempio n. 6
0
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)

        (self._path, self._events, selist) = split_opt(dataset_expr, '|@')
        self._selist = parse_list(selist, ',') or None
        if not (self._path and self._events):
            raise ConfigError(
                'Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]'
            )
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		self._common_prefix = max(DataProvider.enum_value_list) + 1
		self._common_metadata = max(DataProvider.enum_value_list) + 2

		self._entry_handler_info = {
			'events': (DataProvider.NEntries, int, 'block entry counter'),
			'id': (None, None, 'dataset ID'),  # legacy key - skip
			'metadata': (DataProvider.Metadata, parse_json, 'metadata description'),
			'metadata common': (self._common_metadata, parse_json, 'common metadata'),
			'nickname': (DataProvider.Nickname, str, 'dataset nickname'),
			'prefix': (self._common_prefix, str, 'common prefix'),
			'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'),
		}

		(path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%')
		self._filename = config.resolve_path(path, True, 'Error resolving dataset file: %s' % path)
Esempio n. 8
0
 def _setup(self, setup_vn, setup_key, setup_mod):
     if setup_key:
         (delim, delim_start_str,
          delim_end_str) = split_opt(setup_key, '::')
         modifier = identity
         if setup_mod and (setup_mod.strip() != 'value'):
             try:
                 modifier = eval('lambda value: ' + setup_mod)  # pylint:disable=eval-used
             except Exception:
                 raise ConfigError('Unable to parse delimeter modifier %r' %
                                   setup_mod)
         (delim_start, delim_end) = (parse_str(delim_start_str, int),
                                     parse_str(delim_end_str, int))
         self._setup_arg_list.append(
             (setup_vn, delim, delim_start, delim_end, modifier))
         return [setup_vn]
     return []
Esempio n. 9
0
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		ds_config = config.change_view(view_class='TaggedConfigView', add_names=[md5_hex(dataset_expr)])
		if os.path.isdir(dataset_expr):
			scanner_list = ['OutputDirsFromWork']
			ds_config.set('source directory', dataset_expr)
			dataset_expr = os.path.join(dataset_expr, 'work.conf')
		else:
			scanner_list = ['OutputDirsFromConfig', 'MetadataFromTask']
			dataset_expr, selector = split_opt(dataset_expr, '%')
			ds_config.set('source config', dataset_expr)
			ds_config.set('source job selector', selector)
		ext_config = create_config(dataset_expr)
		ext_task_name = ext_config.change_view(set_sections=['global']).get(['module', 'task'])
		ext_task_cls = Plugin.get_class(ext_task_name)
		for ext_task_cls in Plugin.get_class(ext_task_name).iter_class_bases():
			scan_setup_name = 'GCProviderSetup_' + ext_task_cls.__name__
			scan_setup_cls = GCProviderSetup.get_class(scan_setup_name, ignore_missing=True)
			if scan_setup_cls:
				scanner_list += scan_setup_cls.scanner_list
				break
		ScanProviderBase.__init__(self, ds_config, datasource_name, dataset_expr,
			dataset_nick, dataset_proc, scanner_list)