def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)

		(self._path, self._events, selist) = split_opt(dataset_expr, '|@')
		self._selist = parse_list(selist, ',') or None
		if not (self._path and self._events):
			raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
Beispiel #2
0
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)
        self._common_prefix = max(DataProvider.enum_value_list) + 1
        self._common_metadata = max(DataProvider.enum_value_list) + 2

        self._entry_handler_info = {
            'events': (DataProvider.NEntries, int, 'block entry counter'),
            'id': (None, None, 'dataset ID'),  # legacy key - skip
            'metadata':
            (DataProvider.Metadata, parse_json, 'metadata description'),
            'metadata common':
            (self._common_metadata, parse_json, 'common metadata'),
            'nickname': (DataProvider.Nickname, str, 'dataset nickname'),
            'prefix': (self._common_prefix, str, 'common prefix'),
            'se list':
            (DataProvider.Locations, lambda value: parse_list(value, ','),
             'block location'),
        }

        (path, self._forced_prefix,
         self._filter) = split_opt(dataset_expr, '@%')
        self._filename = config.resolve_path(
            path, True, 'Error resolving dataset file: %s' % path)
	def _read_block(self, ds_config, dataset_expr, dataset_nick):
		metadata_name_list = parse_json(ds_config.get('metadata', '[]', on_change=None))
		common_metadata = parse_json(ds_config.get('metadata common', '[]', on_change=None))
		if len(common_metadata) > len(metadata_name_list):
			raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) +
				'with %d metadata keys' % len(metadata_name_list))
		common_prefix = ds_config.get('prefix', '', on_change=None)
		fn_list = []
		has_events = False
		has_se_list = False
		for url in ds_config.get_option_list():
			if url == 'se list':
				has_se_list = True
			elif url == 'events':
				has_events = True
			elif url not in ['dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix']:
				fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix)
				fn_list.append(fi)
		if not fn_list:
			raise DatasetError('There are no dataset files specified for dataset %r' % dataset_expr)

		result = {
			DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None),
			DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL])
		}
		result.update(DataProvider.parse_block_id(dataset_expr))
		if metadata_name_list:
			result[DataProvider.Metadata] = metadata_name_list
		if has_events:
			result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None)
		if has_se_list:
			result[DataProvider.Locations] = parse_list(ds_config.get('se list', '', on_change=None), ',')
		return result
    def get_path_list(self,
                      option,
                      default=unspecified,
                      must_exist=True,
                      **kwargs):
        # Return multiple resolved paths (each line processed same as get_path)
        def _patlist2pathlist(value, must_exist):
            exc = ExceptionCollector()
            search_path_list = self._config_view.config_vault.get(
                'path:search', [])
            for pattern in value:
                try:
                    for fn in resolve_paths(pattern, search_path_list,
                                            must_exist, ConfigError):
                        yield fn
                except Exception:
                    exc.collect()
            exc.raise_any(ConfigError('Error resolving paths'))

        return self._get_internal(
            'paths',
            obj2str=lambda value: '\n' + str.join(
                '\n', _patlist2pathlist(value, False)),
            str2obj=lambda value: list(
                _patlist2pathlist(parse_list(value, None), must_exist)),
            def2obj=None,
            option=option,
            default_obj=default,
            **kwargs)
 def get_list(self,
              option,
              default=unspecified,
              parse_item=identity,
              **kwargs):
     # Get whitespace separated list (space, tab, newline)
     return self._get_internal(
         'list',
         obj2str=lambda value: '\n' + str.join('\n', imap(str, value)),
         str2obj=lambda value: lmap(parse_item, parse_list(value, None)),
         def2obj=None,
         option=option,
         default_obj=default,
         **kwargs)
Beispiel #6
0
    def __init__(self,
                 config,
                 datasource_name,
                 dataset_expr,
                 dataset_nick=None,
                 dataset_proc=None):
        DataProvider.__init__(self, config, datasource_name, dataset_expr,
                              dataset_nick, dataset_proc)

        (self._path, self._events, selist) = split_opt(dataset_expr, '|@')
        self._selist = parse_list(selist, ',') or None
        if not (self._path and self._events):
            raise ConfigError(
                'Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]'
            )
	def get_path_list(self, option, default=unspecified, must_exist=True, **kwargs):
		# Return multiple resolved paths (each line processed same as get_path)
		def _patlist2pathlist(value, must_exist):
			exc = ExceptionCollector()
			search_path_list = self._config_view.config_vault.get('path:search', [])
			for pattern in value:
				try:
					for fn in resolve_paths(pattern, search_path_list, must_exist, ConfigError):
						yield fn
				except Exception:
					exc.collect()
			exc.raise_any(ConfigError('Error resolving paths'))
		return self._get_internal('paths',
			obj2str=lambda value: '\n' + str.join('\n', _patlist2pathlist(value, False)),
			str2obj=lambda value: list(_patlist2pathlist(parse_list(value, None), must_exist)),
			def2obj=None, option=option, default_obj=default, **kwargs)
Beispiel #8
0
    def _read_block(self, ds_config, dataset_expr, dataset_nick):
        metadata_name_list = parse_json(
            ds_config.get('metadata', '[]', on_change=None))
        common_metadata = parse_json(
            ds_config.get('metadata common', '[]', on_change=None))
        if len(common_metadata) > len(metadata_name_list):
            raise DatasetError('Unable to set %d common metadata items ' %
                               len(common_metadata) + 'with %d metadata keys' %
                               len(metadata_name_list))
        common_prefix = ds_config.get('prefix', '', on_change=None)
        fn_list = []
        has_events = False
        has_se_list = False
        for url in ds_config.get_option_list():
            if url == 'se list':
                has_se_list = True
            elif url == 'events':
                has_events = True
            elif url not in [
                    'dataset hash', 'metadata', 'metadata common', 'nickname',
                    'prefix'
            ]:
                fi = self._read_fi(ds_config, url, metadata_name_list,
                                   common_metadata, common_prefix)
                fn_list.append(fi)
        if not fn_list:
            raise DatasetError(
                'There are no dataset files specified for dataset %r' %
                dataset_expr)

        result = {
            DataProvider.Nickname:
            ds_config.get('nickname', dataset_nick or '', on_change=None),
            DataProvider.FileList:
            sorted(fn_list, key=lambda fi: fi[DataProvider.URL])
        }
        result.update(DataProvider.parse_block_id(dataset_expr))
        if metadata_name_list:
            result[DataProvider.Metadata] = metadata_name_list
        if has_events:
            result[DataProvider.NEntries] = ds_config.get_int('events',
                                                              -1,
                                                              on_change=None)
        if has_se_list:
            result[DataProvider.Locations] = parse_list(
                ds_config.get('se list', '', on_change=None), ',')
        return result
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		self._common_prefix = max(DataProvider.enum_value_list) + 1
		self._common_metadata = max(DataProvider.enum_value_list) + 2

		self._entry_handler_info = {
			'events': (DataProvider.NEntries, int, 'block entry counter'),
			'id': (None, None, 'dataset ID'),  # legacy key - skip
			'metadata': (DataProvider.Metadata, parse_json, 'metadata description'),
			'metadata common': (self._common_metadata, parse_json, 'common metadata'),
			'nickname': (DataProvider.Nickname, str, 'dataset nickname'),
			'prefix': (self._common_prefix, str, 'common prefix'),
			'se list': (DataProvider.Locations, lambda value: parse_list(value, ','), 'block location'),
		}

		(path, self._forced_prefix, self._filter) = split_opt(dataset_expr, '@%')
		self._filename = config.resolve_path(path, True, 'Error resolving dataset file: %s' % path)
Beispiel #10
0
	def __init__(self, path):
		activity = Activity('Reading dataset partition file')
		self._fmt = DictFormat()
		try:
			self._tar = tarfile.open(path, 'r:')

			metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str})
			FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs'))
			self._metadata = metadata
			activity.finish()
		except Exception:
			raise PartitionReaderError('No valid dataset splitting found in %s' % path)

		self._map_enum2parser = {
			None: str,
			DataSplitter.NEntries: int, DataSplitter.Skipped: int,
			DataSplitter.Invalid: parse_bool,
			DataSplitter.Locations: lambda x: parse_list(x, ','),
			DataSplitter.MetadataHeader: parse_json,
			DataSplitter.Metadata: lambda x: parse_json(x.strip("'"))
		}
		(self._cache_nested_fn, self._cache_nested_tar) = (None, None)
Beispiel #11
0
 def _get_list_shallow(section, option):
     for (opt, value, _) in tmp_content_configfile.get(section, []):
         if opt == option:
             for entry in parse_list(value, None):
                 yield entry
Beispiel #12
0
	def __init__(self, config, name, broker_prefix, item_name, discover_fun):
		Broker.__init__(self, config, name, broker_prefix, item_name, discover_fun)
		self._storage_lookup = config.get_lookup('%s storage access' % broker_prefix, {}, on_change=None,
			parser=lambda x: parse_list(x, ' '), strfun=lambda x: str.join(' ', x))
	def get_list(self, option, default=unspecified, parse_item=identity, **kwargs):
		# Get whitespace separated list (space, tab, newline)
		return self._get_internal('list',
			obj2str=lambda value: '\n' + str.join('\n', imap(str, value)),
			str2obj=lambda value: lmap(parse_item, parse_list(value, None)),
			def2obj=None, option=option, default_obj=default, **kwargs)
		def _get_list_shallow(section, option):
			for (opt, value, _) in tmp_content_configfile.get(section, []):
				if opt == option:
					for entry in parse_list(value, None):
						yield entry