def _readBlockFromConfig(self, config, datasetExpr, datasetNick, datasetID): common_metadata = parseJSON(config.get('metadata common', '[]', onChange = None)) common_prefix = config.get('prefix', '', onChange = None) file_list = [] has_events = False has_se_list = False for url in config.getOptions(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in ['dataset hash', 'id', 'metadata', 'metadata common', 'nickname', 'prefix']: info = config.get(url, onChange = None) tmp = info.split(' ', 1) fi = {DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0])} if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + parseJSON(tmp[1]) file_list.append(fi) if not file_list: raise DatasetError('There are no dataset files specified for dataset %r' % datasetExpr) result = { DataProvider.Nickname: config.get('nickname', datasetNick, onChange = None), DataProvider.DatasetID: config.getInt('id', datasetID, onChange = None), DataProvider.Dataset: datasetExpr, DataProvider.Metadata: parseJSON(config.get('metadata', '[]', onChange = None)), DataProvider.FileList: sorted(file_list, key = lambda fi: fi[DataProvider.URL]), } if has_events: result[DataProvider.NEntries] = config.getInt('events', -1, onChange = None) if has_se_list: result[DataProvider.Locations] = parseList(config.get('se list', '', onChange = None), ',') return result
def _readBlockFromConfig(self, ds_config, datasetExpr, datasetNick, datasetID): metadata_keys = parseJSON(ds_config.get('metadata', '[]', onChange = None)) common_metadata = parseJSON(ds_config.get('metadata common', '[]', onChange = None)) if len(common_metadata) > len(metadata_keys): raise DatasetError('Unable to set %d common metadata items with %d metadata keys' % (len(common_metadata), len(metadata_keys))) common_prefix = ds_config.get('prefix', '', onChange = None) file_list = [] has_events = False has_se_list = False for url in ds_config.getOptions(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in ['dataset hash', 'id', 'metadata', 'metadata common', 'nickname', 'prefix']: file_list.append(self._readFileFromConfig(ds_config, url, metadata_keys, common_metadata, common_prefix)) if not file_list: raise DatasetError('There are no dataset files specified for dataset %r' % datasetExpr) result = { DataProvider.Nickname: ds_config.get('nickname', datasetNick, onChange = None), DataProvider.DatasetID: ds_config.getInt('id', datasetID, onChange = None), DataProvider.Dataset: datasetExpr, DataProvider.FileList: sorted(file_list, key = lambda fi: fi[DataProvider.URL]), } if metadata_keys: result[DataProvider.Metadata] = metadata_keys if has_events: result[DataProvider.NEntries] = ds_config.getInt('events', -1, onChange = None) if has_se_list: result[DataProvider.Locations] = parseList(ds_config.get('se list', '', onChange = None), ',') return result
def _readBlockFromConfig(self, config, datasetExpr, datasetNick, datasetID): common_metadata = parseJSON( config.get('metadata common', '[]', onChange=None)) common_prefix = config.get('prefix', '', onChange=None) file_list = [] has_events = False has_se_list = False for url in config.getOptions(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in [ 'dataset hash', 'id', 'metadata', 'metadata common', 'nickname', 'prefix' ]: info = config.get(url, onChange=None) tmp = info.split(' ', 1) fi = { DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0]) } if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + parseJSON(tmp[1]) file_list.append(fi) if not file_list: raise DatasetError( 'There are no dataset files specified for dataset %r' % datasetExpr) result = { DataProvider.Nickname: config.get('nickname', datasetNick, onChange=None), DataProvider.DatasetID: config.getInt('id', datasetID, onChange=None), DataProvider.Dataset: datasetExpr, DataProvider.Metadata: parseJSON(config.get('metadata', '[]', onChange=None)), DataProvider.FileList: sorted(file_list, key=lambda fi: fi[DataProvider.URL]), } if has_events: result[DataProvider.NEntries] = config.getInt('events', -1, onChange=None) if has_se_list: result[DataProvider.Locations] = parseList( config.get('se list', '', onChange=None), ',') return result
def _readBlockFromConfig(self, ds_config, datasetExpr, datasetNick, datasetID): metadata_keys = parseJSON( ds_config.get('metadata', '[]', onChange=None)) common_metadata = parseJSON( ds_config.get('metadata common', '[]', onChange=None)) if len(common_metadata) > len(metadata_keys): raise DatasetError( 'Unable to set %d common metadata items with %d metadata keys' % (len(common_metadata), len(metadata_keys))) common_prefix = ds_config.get('prefix', '', onChange=None) file_list = [] has_events = False has_se_list = False for url in ds_config.getOptions(): if url == 'se list': has_se_list = True elif url == 'events': has_events = True elif url not in [ 'dataset hash', 'id', 'metadata', 'metadata common', 'nickname', 'prefix' ]: file_list.append( self._readFileFromConfig(ds_config, url, metadata_keys, common_metadata, common_prefix)) if not file_list: raise DatasetError( 'There are no dataset files specified for dataset %r' % datasetExpr) result = { DataProvider.Nickname: ds_config.get('nickname', datasetNick, onChange=None), DataProvider.DatasetID: ds_config.getInt('id', datasetID, onChange=None), DataProvider.Dataset: datasetExpr, DataProvider.FileList: sorted(file_list, key=lambda fi: fi[DataProvider.URL]), } if metadata_keys: result[DataProvider.Metadata] = metadata_keys if has_events: result[DataProvider.NEntries] = ds_config.getInt('events', -1, onChange=None) if has_se_list: result[DataProvider.Locations] = parseList( ds_config.get('se list', '', onChange=None), ',') return result
def _process_json_result(self, value): if not value: raise RestError('Received empty reply') try: return parseJSON(value) except Exception: raise RestError('Received invalid JSON reply: %r' % value)
def queryDAS(self, query): if self._instance: query += ' instance=%s' % self._instance (start, sleep) = (time.time(), 0.4) while time.time() - start < 60: tmp = readURL(self.url, {"input": query}, {"Accept": "application/json"}) if len(tmp) != 32: return parseJSON(tmp.replace('\'', '"'))['data'] time.sleep(sleep) sleep += 0.4
def queryDAS(self, query): if self._instance: query += ' instance=%s' % self._instance (start, sleep) = (time.time(), 0.4) while time.time() - start < 60: tmp = readURL(self._url, {'input': query}, {'Accept': 'application/json'}) if len(tmp) != 32: return parseJSON(tmp.replace('\'', '"'))['data'] time.sleep(sleep) sleep += 0.4
def _readFileFromConfig(self, ds_config, url, metadata_keys, common_metadata, common_prefix): info = ds_config.get(url, onChange = None) tmp = info.split(' ', 1) fi = {DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0])} if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: file_metadata = parseJSON(tmp[1]) if len(common_metadata) + len(file_metadata) > len(metadata_keys): raise DatasetError('Unable to set %d file metadata items with %d metadata keys (%d common metadata items)' % (len(file_metadata), len(metadata_keys), len(common_metadata))) fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + file_metadata return fi
def __init__(self, fn): ParameterSource.__init__(self) fp = ZipFile(fn, 'r') try: keyline = fp.readline().lstrip('#').strip() self._keys = [] if keyline: self._keys = parseJSON(keyline) def parseLine(line): if not line.startswith('#'): pNumStr, stored = lmap(str.strip, line.split('\t', 1)) return ('!' in pNumStr, int(pNumStr.rstrip('!')), lmap(parseJSON, stored.split('\t'))) self._values = lmap(parseLine, fp.readlines()) finally: fp.close()
def _readFileFromConfig(self, ds_config, url, metadata_keys, common_metadata, common_prefix): info = ds_config.get(url, onChange=None) tmp = info.split(' ', 1) fi = { DataProvider.URL: common_prefix + url, DataProvider.NEntries: int(tmp[0]) } if common_metadata: fi[DataProvider.Metadata] = common_metadata if len(tmp) == 2: file_metadata = parseJSON(tmp[1]) if len(common_metadata) + len(file_metadata) > len(metadata_keys): raise DatasetError( 'Unable to set %d file metadata items with %d metadata keys (%d common metadata items)' % (len(file_metadata), len(metadata_keys), len(common_metadata))) fi[DataProvider.Metadata] = fi.get(DataProvider.Metadata, []) + file_metadata return fi
def __init__(self, path): activity = Activity('Reading dataset partition file') self._lock = GCLock() self._fmt = utils.DictFormat() self._tar = tarfile.open(path, 'r:') (self._cacheKey, self._cacheTar) = (None, None) metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), keyParser = {None: str}) self.maxJobs = metadata.pop('MaxJobs') self.classname = metadata.pop('ClassName') self.metadata = {'dataset': dict(ifilter(lambda k_v: not k_v[0].startswith('['), metadata.items()))} for (k, v) in ifilter(lambda k_v: k_v[0].startswith('['), metadata.items()): self.metadata.setdefault('dataset %s' % k.split(']')[0].lstrip('['), {})[k.split(']')[1].strip()] = v activity.finish() self._parserMap = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int, DataSplitter.DatasetID: int, DataSplitter.Invalid: parseBool, DataSplitter.Locations: lambda x: parseList(x, ','), DataSplitter.MetadataHeader: parseJSON, DataSplitter.Metadata: lambda x: parseJSON(x.strip("'")) }
def readJSON(url, params=None, headers={}, cert=None): return parseJSON(readURL(url, params, headers, cert))