예제 #1
0
	def _read_block(self, ds_config, dataset_expr, dataset_nick):
		metadata_name_list = parse_json(ds_config.get('metadata', '[]', on_change=None))
		common_metadata = parse_json(ds_config.get('metadata common', '[]', on_change=None))
		if len(common_metadata) > len(metadata_name_list):
			raise DatasetError('Unable to set %d common metadata items ' % len(common_metadata) +
				'with %d metadata keys' % len(metadata_name_list))
		common_prefix = ds_config.get('prefix', '', on_change=None)
		fn_list = []
		has_events = False
		has_se_list = False
		for url in ds_config.get_option_list():
			if url == 'se list':
				has_se_list = True
			elif url == 'events':
				has_events = True
			elif url not in ['dataset hash', 'metadata', 'metadata common', 'nickname', 'prefix']:
				fi = self._read_fi(ds_config, url, metadata_name_list, common_metadata, common_prefix)
				fn_list.append(fi)
		if not fn_list:
			raise DatasetError('There are no dataset files specified for dataset %r' % dataset_expr)

		result = {
			DataProvider.Nickname: ds_config.get('nickname', dataset_nick or '', on_change=None),
			DataProvider.FileList: sorted(fn_list, key=lambda fi: fi[DataProvider.URL])
		}
		result.update(DataProvider.parse_block_id(dataset_expr))
		if metadata_name_list:
			result[DataProvider.Metadata] = metadata_name_list
		if has_events:
			result[DataProvider.NEntries] = ds_config.get_int('events', -1, on_change=None)
		if has_se_list:
			result[DataProvider.Locations] = parse_list(ds_config.get('se list', '', on_change=None), ',')
		return result
예제 #2
0
	def _process_json_result(self, value):
		if not value:
			raise RestError('Received empty reply')
		try:
			return parse_json(value)
		except Exception:
			raise RestError('Received invalid JSON reply: %r' % value)
예제 #3
0
    def _read_block(self, ds_config, dataset_expr, dataset_nick):
        metadata_name_list = parse_json(
            ds_config.get('metadata', '[]', on_change=None))
        common_metadata = parse_json(
            ds_config.get('metadata common', '[]', on_change=None))
        if len(common_metadata) > len(metadata_name_list):
            raise DatasetError('Unable to set %d common metadata items ' %
                               len(common_metadata) + 'with %d metadata keys' %
                               len(metadata_name_list))
        common_prefix = ds_config.get('prefix', '', on_change=None)
        fn_list = []
        has_events = False
        has_se_list = False
        for url in ds_config.get_option_list():
            if url == 'se list':
                has_se_list = True
            elif url == 'events':
                has_events = True
            elif url not in [
                    'dataset hash', 'metadata', 'metadata common', 'nickname',
                    'prefix'
            ]:
                fi = self._read_fi(ds_config, url, metadata_name_list,
                                   common_metadata, common_prefix)
                fn_list.append(fi)
        if not fn_list:
            raise DatasetError(
                'There are no dataset files specified for dataset %r' %
                dataset_expr)

        result = {
            DataProvider.Nickname:
            ds_config.get('nickname', dataset_nick or '', on_change=None),
            DataProvider.FileList:
            sorted(fn_list, key=lambda fi: fi[DataProvider.URL])
        }
        result.update(DataProvider.parse_block_id(dataset_expr))
        if metadata_name_list:
            result[DataProvider.Metadata] = metadata_name_list
        if has_events:
            result[DataProvider.NEntries] = ds_config.get_int('events',
                                                              -1,
                                                              on_change=None)
        if has_se_list:
            result[DataProvider.Locations] = parse_list(
                ds_config.get('se list', '', on_change=None), ',')
        return result
예제 #4
0
	def _read_fi(self, ds_config, url, metadata_name_list, common_metadata, common_prefix):
		info = ds_config.get(url, on_change=None)
		tmp = info.split(' ', 1)
		fi = {DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0])}
		if common_metadata:
			fi[DataProvider.Metadata] = common_metadata
		if len(tmp) == 2:
			file_metadata = parse_json(tmp[1])
			if len(common_metadata) + len(file_metadata) > len(metadata_name_list):
				raise DatasetError('Unable to set %d file metadata items ' % len(file_metadata) +
					'with %d metadata keys ' % len(metadata_name_list) +
					'(%d common metadata items)' % len(common_metadata))
			fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + file_metadata
		return fi
예제 #5
0
	def __init__(self, fn):
		ParameterSource.__init__(self)
		fp = GZipTextFile(fn, 'r')
		try:
			header = fp.readline().lstrip('#').strip()
			self._output_vn_list = []
			if header:
				self._output_vn_list = parse_json(header)

			def _parse_line(line):
				if not line.startswith('#'):
					pnum_str, stored_json = line.split('\t', 1)
					is_invalid = '!' in pnum_str
					pnum = int(pnum_str.replace('!', ' '))
					return (is_invalid, pnum, lmap(parse_json, stored_json.strip().split('\t')))
			self._values = lmap(_parse_line, fp.readlines())
		finally:
			fp.close()
예제 #6
0
    def __init__(self, fn):
        ParameterSource.__init__(self)
        fp = GZipTextFile(fn, 'r')
        try:
            header = fp.readline().lstrip('#').strip()
            self._output_vn_list = []
            if header:
                self._output_vn_list = parse_json(header)

            def _parse_line(line):
                if not line.startswith('#'):
                    pnum_str, stored_json = line.split('\t', 1)
                    is_invalid = '!' in pnum_str
                    pnum = int(pnum_str.replace('!', ' '))
                    return (is_invalid, pnum,
                            lmap(parse_json,
                                 stored_json.strip().split('\t')))

            self._values = lmap(_parse_line, fp.readlines())
        finally:
            fp.close()
예제 #7
0
 def _read_fi(self, ds_config, url, metadata_name_list, common_metadata,
              common_prefix):
     info = ds_config.get(url, on_change=None)
     tmp = info.split(' ', 1)
     fi = {
         DataProvider.URL: common_prefix + url,
         DataProvider.NEntries: int(tmp[0])
     }
     if common_metadata:
         fi[DataProvider.Metadata] = common_metadata
     if len(tmp) == 2:
         file_metadata = parse_json(tmp[1])
         if len(common_metadata) + len(file_metadata) > len(
                 metadata_name_list):
             raise DatasetError(
                 'Unable to set %d file metadata items ' %
                 len(file_metadata) +
                 'with %d metadata keys ' % len(metadata_name_list) +
                 '(%d common metadata items)' % len(common_metadata))
         fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata,
                                            []) + file_metadata
     return fi
예제 #8
0
	def __init__(self, path):
		activity = Activity('Reading dataset partition file')
		self._fmt = DictFormat()
		try:
			self._tar = tarfile.open(path, 'r:')

			metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str})
			FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs'))
			self._metadata = metadata
			activity.finish()
		except Exception:
			raise PartitionReaderError('No valid dataset splitting found in %s' % path)

		self._map_enum2parser = {
			None: str,
			DataSplitter.NEntries: int, DataSplitter.Skipped: int,
			DataSplitter.Invalid: parse_bool,
			DataSplitter.Locations: lambda x: parse_list(x, ','),
			DataSplitter.MetadataHeader: parse_json,
			DataSplitter.Metadata: lambda x: parse_json(x.strip("'"))
		}
		(self._cache_nested_fn, self._cache_nested_tar) = (None, None)