def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) (self._path, self._events, selist) = split_opt(dataset_expr, '|@') self._selist = parse_list(selist, ',') or None if not (self._path and self._events): raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) self._common_prefix = max(DataProvider.enum_value_list) + 1 self._common_metadata = max(DataProvider.enum_value_list) + 2 self._entry_handler_info = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (None, None, 'dataset ID'), # legacy key - skip 'metadata': (DataProvider.Metadata, parse_json, 'metadata description'), 'metadata common': (self._common_metadata, parse_json, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._common_prefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'), } (path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%') self._filename = config.resolve_path( path, True, 'Error resolving dataset file: %s' % path)
def _read_block(self, ds_config, dataset_expr, dataset_nick): metadata_name_list = parse_json(ds_config.get('metadata', '[]', on_change=None)) common_metadata = parse_json(ds_config.get('metadata common', '[]', on_change=None)) if len(common_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) + 'with %d metadata keys' % len(metadata_name_list)) common_prefix = ds_config.get('prefix', '', on_change=None) fn_list = [] has_events = False has_se_list = False for url in ds_config.get_option_list(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in ['dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix']: fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix) fn_list.append(fi) if not fn_list: raise DatasetError('There are no dataset files specified for dataset %r' % dataset_expr) result = { DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None), DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL]) } result.update(DataProvider.parse_block_id(dataset_expr)) if metadata_name_list: result[DataProvider.Metadata] = metadata_name_list if has_events: result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None) if has_se_list: result[DataProvider.Locations] = parse_list(ds_config.get('se list', '', on_change=None), ',') return result
def get_path_list(self, option, default=unspecified, must_exist=True, **kwargs): # Return multiple resolved paths (each line processed same as get_path) def _patlist2pathlist(value, must_exist): exc = ExceptionCollector() search_path_list = self._config_view.config_vault.get( 'path:search', []) for pattern in value: try: for fn in resolve_paths(pattern, search_path_list, must_exist, ConfigError): yield fn except Exception: exc.collect() exc.raise_any(ConfigError('Error resolving paths')) return self._get_internal( 'paths', obj2str=lambda value: '\n' + str.join( '\n', _patlist2pathlist(value, False)), str2obj=lambda value: list( _patlist2pathlist(parse_list(value, None), must_exist)), def2obj=None, option=option, default_obj=default, **kwargs)
def get_list(self, option, default=unspecified, parse_item=identity, **kwargs): # Get whitespace separated list (space, tab, newline) return self._get_internal( 'list', obj2str=lambda value: '\n' + str.join('\n', imap(str, value)), str2obj=lambda value: lmap(parse_item, parse_list(value, None)), def2obj=None, option=option, default_obj=default, **kwargs)
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) (self._path, self._events, selist) = split_opt(dataset_expr, '|@') self._selist = parse_list(selist, ',') or None if not (self._path and self._events): raise ConfigError( 'Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]' )
def get_path_list(self, option, default=unspecified, must_exist=True, **kwargs): # Return multiple resolved paths (each line processed same as get_path) def _patlist2pathlist(value, must_exist): exc = ExceptionCollector() search_path_list = self._config_view.config_vault.get('path:search', []) for pattern in value: try: for fn in resolve_paths(pattern, search_path_list, must_exist, ConfigError): yield fn except Exception: exc.collect() exc.raise_any(ConfigError('Error resolving paths')) return self._get_internal('paths', obj2str=lambda value: '\n' + str.join('\n', _patlist2pathlist(value, False)), str2obj=lambda value: list(_patlist2pathlist(parse_list(value, None), must_exist)), def2obj=None, option=option, default_obj=default, **kwargs)
def _read_block(self, ds_config, dataset_expr, dataset_nick): metadata_name_list = parse_json( ds_config.get('metadata', '[]', on_change=None)) common_metadata = parse_json( ds_config.get('metadata common', '[]', on_change=None)) if len(common_metadata) > len(metadata_name_list): raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) + 'with %d metadata keys' % len(metadata_name_list)) common_prefix = ds_config.get('prefix', '', on_change=None) fn_list = [] has_events = False has_se_list = False for url in ds_config.get_option_list(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in [ 'dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix' ]: fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix) fn_list.append(fi) if not fn_list: raise DatasetError( 'There are no dataset files specified for dataset %r' % dataset_expr) result = { DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None), DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL]) } result.update(DataProvider.parse_block_id(dataset_expr)) if metadata_name_list: result[DataProvider.Metadata] = metadata_name_list if has_events: result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None) if has_se_list: result[DataProvider.Locations] = parse_list( ds_config.get('se list', '', on_change=None), ',') return result
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) self._common_prefix = max(DataProvider.enum_value_list) + 1 self._common_metadata = max(DataProvider.enum_value_list) + 2 self._entry_handler_info = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (None, None, 'dataset ID'), # legacy key - skip 'metadata': (DataProvider.Metadata, parse_json, 'metadata description'), 'metadata common': (self._common_metadata, parse_json, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._common_prefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'), } (path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%') self._filename = config.resolve_path(path, True, 'Error resolving dataset file: %s' % path)
def __init__(self, path): activity = Activity('Reading dataset partition file') self._fmt = DictFormat() try: self._tar = tarfile.open(path, 'r:') metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str}) FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs')) self._metadata = metadata activity.finish() except Exception: raise PartitionReaderError('No valid dataset splitting found in %s' % path) self._map_enum2parser = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int, DataSplitter.Invalid: parse_bool, DataSplitter.Locations: lambda x: parse_list(x, ','), DataSplitter.MetadataHeader: parse_json, DataSplitter.Metadata: lambda x: parse_json(x.strip("'")) } (self._cache_nested_fn, self._cache_nested_tar) = (None, None)
def _get_list_shallow(section, option): for (opt, value, _) in tmp_content_configfile.get(section, []): if opt == option: for entry in parse_list(value, None): yield entry
def __init__(self, config, name, broker_prefix, item_name, discover_fun): Broker.__init__(self, config, name, broker_prefix, item_name, discover_fun) self._storage_lookup = config.get_lookup('%s storage access' % broker_prefix, {}, on_change=None, parser=lambda x: parse_list(x, ' '), strfun=lambda x: str.join(' ', x))
def get_list(self, option, default=unspecified, parse_item=identity, **kwargs): # Get whitespace separated list (space, tab, newline) return self._get_internal('list', obj2str=lambda value: '\n' + str.join('\n', imap(str, value)), str2obj=lambda value: lmap(parse_item, parse_list(value, None)), def2obj=None, option=option, default_obj=default, **kwargs)