def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters'])) self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name], default={}, parser=parse_lumi_filter, strfun=str_lumi) if not self._lumi_filter.empty(): config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = dataset_config.get_bool( ['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty()) config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter') self._only_complete = dataset_config.get_bool('only complete sites', True) self._only_valid = dataset_config.get_bool('only valid', True) self._location_format = dataset_config.get_enum('location format', CMSLocationFormat, CMSLocationFormat.hostname) self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas') self._sitedb = SiteDB() dataset_expr_parts = split_opt(dataset_expr, '@#') (self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts instance_default = dataset_config.get('dbs instance', '') self._dataset_instance = self._dataset_instance or instance_default if not self._dataset_instance: self._dataset_instance = 'prod/global' elif '/' not in self._dataset_instance: self._dataset_instance = 'prod/%s' % self._dataset_instance self._dataset_block_selector = self._dataset_block_selector or 'all'
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) (self._path, self._events, selist) = split_opt(dataset_expr, '|@') self._selist = parse_list(selist, ',') or None if not (self._path and self._events): raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) self._common_prefix = max(DataProvider.enum_value_list) + 1 self._common_metadata = max(DataProvider.enum_value_list) + 2 self._entry_handler_info = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (None, None, 'dataset ID'), # legacy key - skip 'metadata': (DataProvider.Metadata, parse_json, 'metadata description'), 'metadata common': (self._common_metadata, parse_json, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._common_prefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'), } (path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%') self._filename = config.resolve_path( path, True, 'Error resolving dataset file: %s' % path)
def _setup(self, setup_vn, setup_key, setup_mod): if setup_key: (delim, delim_start_str, delim_end_str) = split_opt(setup_key, '::') modifier = identity if setup_mod and (setup_mod.strip() != 'value'): try: modifier = eval('lambda value: ' + setup_mod) # pylint:disable=eval-used except Exception: raise ConfigError('Unable to parse delimeter modifier %r' % setup_mod) (delim_start, delim_end) = (parse_str(delim_start_str, int), parse_str(delim_end_str, int)) self._setup_arg_list.append((setup_vn, delim, delim_start, delim_end, modifier)) return [setup_vn] return []
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): dataset_config = config.change_view( default_on_change=TriggerResync(['datasets', 'parameters'])) self._lumi_filter = dataset_config.get_lookup( ['lumi filter', '%s lumi filter' % datasource_name], default={}, parser=parse_lumi_filter, strfun=str_lumi) if not self._lumi_filter.empty(): config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = dataset_config.get_bool( ['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty()) config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedex_filter = dataset_config.get_filter( 'phedex sites', '-* T1_*_Disk T2_* T3_*', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter') self._only_complete = dataset_config.get_bool('only complete sites', True) self._only_valid = dataset_config.get_bool('only valid', True) self._allow_phedex = dataset_config.get_bool('allow phedex', True) self._location_format = dataset_config.get_enum( 'location format', CMSLocationFormat, CMSLocationFormat.hostname) self._pjrc = JSONRestClient( url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas' ) self._sitedb = SiteDB() dataset_expr_parts = split_opt(dataset_expr, '@#') (self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts instance_default = dataset_config.get('dbs instance', '') self._dataset_instance = self._dataset_instance or instance_default if not self._dataset_instance: self._dataset_instance = 'prod/global' elif '/' not in self._dataset_instance: self._dataset_instance = 'prod/%s' % self._dataset_instance self._dataset_block_selector = self._dataset_block_selector or 'all'
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) (self._path, self._events, selist) = split_opt(dataset_expr, '|@') self._selist = parse_list(selist, ',') or None if not (self._path and self._events): raise ConfigError( 'Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]' )
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) self._common_prefix = max(DataProvider.enum_value_list) + 1 self._common_metadata = max(DataProvider.enum_value_list) + 2 self._entry_handler_info = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (None, None, 'dataset ID'), # legacy key - skip 'metadata': (DataProvider.Metadata, parse_json, 'metadata description'), 'metadata common': (self._common_metadata, parse_json, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._common_prefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'), } (path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%') self._filename = config.resolve_path(path, True, 'Error resolving dataset file: %s' % path)
def _setup(self, setup_vn, setup_key, setup_mod): if setup_key: (delim, delim_start_str, delim_end_str) = split_opt(setup_key, '::') modifier = identity if setup_mod and (setup_mod.strip() != 'value'): try: modifier = eval('lambda value: ' + setup_mod) # pylint:disable=eval-used except Exception: raise ConfigError('Unable to parse delimeter modifier %r' % setup_mod) (delim_start, delim_end) = (parse_str(delim_start_str, int), parse_str(delim_end_str, int)) self._setup_arg_list.append( (setup_vn, delim, delim_start, delim_end, modifier)) return [setup_vn] return []
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): ds_config = config.change_view(view_class='TaggedConfigView', add_names=[md5_hex(dataset_expr)]) if os.path.isdir(dataset_expr): scanner_list = ['OutputDirsFromWork'] ds_config.set('source directory', dataset_expr) dataset_expr = os.path.join(dataset_expr, 'work.conf') else: scanner_list = ['OutputDirsFromConfig', 'MetadataFromTask'] dataset_expr, selector = split_opt(dataset_expr, '%') ds_config.set('source config', dataset_expr) ds_config.set('source job selector', selector) ext_config = create_config(dataset_expr) ext_task_name = ext_config.change_view(set_sections=['global']).get(['module', 'task']) ext_task_cls = Plugin.get_class(ext_task_name) for ext_task_cls in Plugin.get_class(ext_task_name).iter_class_bases(): scan_setup_name = 'GCProviderSetup_' + ext_task_cls.__name__ scan_setup_cls = GCProviderSetup.get_class(scan_setup_name, ignore_missing=True) if scan_setup_cls: scanner_list += scan_setup_cls.scanner_list break ScanProviderBase.__init__(self, ds_config, datasource_name, dataset_expr, dataset_nick, dataset_proc, scanner_list)