def show_config(self): buffer = StringBuffer() try: self._config.write(buffer) return _tag('pre', _tag('code', buffer.getvalue())) finally: buffer.close()
def saveToStream(stream, dataBlocks, stripMetadata = False): writer = StringBuffer() for block in dataBlocks: writer.write('[%s#%s]\n' % (block[DataProvider.Dataset], block[DataProvider.BlockName])) if DataProvider.Nickname in block: writer.write('nickname = %s\n' % block[DataProvider.Nickname]) if DataProvider.DatasetID in block: writer.write('id = %d\n' % block[DataProvider.DatasetID]) if DataProvider.NEntries in block: writer.write('events = %d\n' % block[DataProvider.NEntries]) if block.get(DataProvider.Locations) is not None: writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations])) cPrefix = os.path.commonprefix(lmap(lambda x: x[DataProvider.URL], block[DataProvider.FileList])) cPrefix = str.join('/', cPrefix.split('/')[:-1]) if len(cPrefix) > 6: writer.write('prefix = %s\n' % cPrefix) formatter = lambda x: x.replace(cPrefix + '/', '') else: formatter = identity writeMetadata = (DataProvider.Metadata in block) and not stripMetadata if writeMetadata: (idxListBlock, idxListFile) = DataProvider.classifyMetadataKeys(block) def getMetadata(fi, idxList): return json.dumps(lmap(lambda idx: fi[DataProvider.Metadata][idx], idxList)) writer.write('metadata = %s\n' % json.dumps(lmap(lambda idx: block[DataProvider.Metadata][idx], idxListBlock + idxListFile))) if idxListBlock: writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], idxListBlock)) for fi in block[DataProvider.FileList]: writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries])) if writeMetadata and idxListFile: writer.write(' %s' % getMetadata(fi, idxListFile)) writer.write('\n') writer.write('\n') stream.write(writer.getvalue())
def discover_dataset(provider_name, config_dict): buffer = StringBuffer() config = gc_create_config(config_dict={'dataset': config_dict}) config = config.change_view(set_sections=['dataset']) provider = Plugin.get_class('DataProvider').create_instance(provider_name, config, 'dataset', config_dict['dataset'], None) if config_dict['dump config'] == 'True': config.write(buffer, print_default=True, print_minimal=True) return logging.getLogger('script').info(buffer.getvalue().rstrip()) strip_metadata = config_dict['strip'] == 'True' block_iter = provider.get_block_list_cached(show_stats=False) if config_dict['output']: return provider.save_to_file(config_dict['output'], block_iter, strip_metadata) for _ in provider.save_to_stream(buffer, block_iter, strip_metadata): pass logging.getLogger('script').info(buffer.getvalue().rstrip())
def setup_config(opts, args): # Set config based on settings from config file or command line config_fn = None if os.path.exists(args[0]): config_fn = args[0] config = gc_create_config(config_file=config_fn).change_view( set_sections=['global']) if os.path.exists(config.get_work_path('datamap.tar')): opts.dataset = config.get_work_path('datamap.tar') config.change_view(set_sections=['jobs']).set('nseeds', '1', '?=') param_config = config.change_view(set_sections=['parameters']) if opts.parameter: logging.info('Provided options:') for param in opts.parameter: key, value = param.split('=', 1) param_config.set(key.strip(), value.strip().replace('\\n', '\n'), '=') logging.info('\t%s: %s', key.strip(), value.strip()) logging.info('') if config_fn is None: param_config.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: param_config.set('default lookup', 'DATASETNICK') if opts.verbose > 2: buffer = StringBuffer() config.change_view(set_sections=None).write(buffer) logging.getLogger('script').info(buffer.getvalue().rstrip()) return config
def getHash(self): buffer = StringBuffer() for _ in DataProvider.saveToStream( buffer, self._datasetProcessor.process(self.getBlocksNormed())): pass return md5_hex(buffer.getvalue())
def setup_config(opts, args): # Set config based on settings from config file or command line config_fn = None if os.path.exists(args[0]): config_fn = args[0] config = gc_create_config(config_file=config_fn).change_view(set_sections=['global']) if os.path.exists(config.get_work_path('datamap.tar')): opts.dataset = config.get_work_path('datamap.tar') config.change_view(set_sections=['jobs']).set('nseeds', '1', '?=') param_config = config.change_view(set_sections=['parameters']) if opts.parameter: logging.info('Provided options:') for param in opts.parameter: key, value = param.split('=', 1) param_config.set(key.strip(), value.strip().replace('\\n', '\n'), '=') logging.info('\t%s: %s', key.strip(), value.strip()) logging.info('') if config_fn is None: param_config.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: param_config.set('default lookup', 'DATASETNICK') if opts.verbose > 2: buffer = StringBuffer() config.change_view(set_sections=None).write(buffer) logging.getLogger('script').info(buffer.getvalue().rstrip()) return config
def discover_dataset(provider_name, config_dict): buffer = StringBuffer() config = gc_create_config(config_dict={'dataset': config_dict}) config = config.change_view(set_sections=['dataset']) provider = Plugin.get_class('DataProvider').create_instance( provider_name, config, 'dataset', config_dict['dataset'], None) if config_dict['dump config'] == 'True': config.write(buffer, print_default=True, print_minimal=True) return logging.getLogger('script').info(buffer.getvalue().rstrip()) strip_metadata = config_dict['strip'] == 'True' block_iter = provider.get_block_list_cached(show_stats=False) if config_dict['output']: return provider.save_to_file(config_dict['output'], block_iter, strip_metadata) for _ in provider.save_to_stream(buffer, block_iter, strip_metadata): pass logging.getLogger('script').info(buffer.getvalue().rstrip())
def _get_dataset_hash(self): buffer = StringBuffer() for _ in DataProvider.save_to_stream(buffer, self.iter_blocks_normed()): pass value = buffer.getvalue() buffer.close() return md5_hex(value)
def saveStateRaw(stream, dataBlocks, stripMetadata = False): writer = StringBuffer() for block in dataBlocks: writer.write('[%s#%s]\n' % (block[DataProvider.Dataset], block[DataProvider.BlockName])) if DataProvider.Nickname in block: writer.write('nickname = %s\n' % block[DataProvider.Nickname]) if DataProvider.DatasetID in block: writer.write('id = %d\n' % block[DataProvider.DatasetID]) if DataProvider.NEntries in block: writer.write('events = %d\n' % block[DataProvider.NEntries]) if block.get(DataProvider.Locations) != None: writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations])) cPrefix = os.path.commonprefix(map(lambda x: x[DataProvider.URL], block[DataProvider.FileList])) cPrefix = str.join('/', cPrefix.split('/')[:-1]) if len(cPrefix) > 6: writer.write('prefix = %s\n' % cPrefix) formatter = lambda x: x.replace(cPrefix + '/', '') else: formatter = lambda x: x writeMetadata = (DataProvider.Metadata in block) and not stripMetadata if writeMetadata: getMetadata = lambda fi, idxList: map(lambda idx: fi[DataProvider.Metadata][idx], idxList) metadataHash = lambda fi, idx: utils.md5(repr(fi[DataProvider.Metadata][idx])).digest() cMetadataIdx = range(len(block[DataProvider.Metadata])) cMetadataHash = map(lambda idx: metadataHash(block[DataProvider.FileList][0], idx), cMetadataIdx) for fi in block[DataProvider.FileList]: # Identify common metadata for idx in filter(lambda idx: metadataHash(fi, idx) != cMetadataHash[idx], cMetadataIdx): cMetadataIdx.remove(idx) def filterC(common): idxList = filter(lambda idx: (idx in cMetadataIdx) == common, range(len(block[DataProvider.Metadata]))) return utils.sorted(idxList, key = lambda idx: block[DataProvider.Metadata][idx]) writer.write('metadata = %s\n' % map(lambda idx: block[DataProvider.Metadata][idx], filterC(True) + filterC(False))) if cMetadataIdx: writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], filterC(True))) writeMetadata = len(cMetadataIdx) != len(block[DataProvider.Metadata]) for fi in block[DataProvider.FileList]: writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries])) if writeMetadata: writer.write(' %s' % getMetadata(fi, filterC(False))) writer.write('\n') writer.write('\n') stream.write(writer.getvalue())
def saveToStream(stream, dataBlocks, stripMetadata = False): writer = StringBuffer() write_separator = False for block in dataBlocks: if write_separator: writer.write('\n') writer.write('[%s]\n' % DataProvider.bName(block)) if DataProvider.Nickname in block: writer.write('nickname = %s\n' % block[DataProvider.Nickname]) if DataProvider.NEntries in block: writer.write('events = %d\n' % block[DataProvider.NEntries]) if block.get(DataProvider.Locations) is not None: writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations])) cPrefix = os.path.commonprefix(lmap(lambda x: x[DataProvider.URL], block[DataProvider.FileList])) cPrefix = str.join('/', cPrefix.split('/')[:-1]) if len(cPrefix) > 6: writer.write('prefix = %s\n' % cPrefix) formatter = lambda x: x.replace(cPrefix + '/', '') else: formatter = identity writeMetadata = (DataProvider.Metadata in block) and not stripMetadata if writeMetadata: (idxListBlock, idxListFile) = DataProvider.classifyMetadataKeys(block) def getMetadata(fi, idxList): idxList = ifilter(lambda idx: idx < len(fi[DataProvider.Metadata]), idxList) return json.dumps(lmap(lambda idx: fi[DataProvider.Metadata][idx], idxList)) writer.write('metadata = %s\n' % json.dumps(lmap(lambda idx: block[DataProvider.Metadata][idx], idxListBlock + idxListFile))) if idxListBlock: writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], idxListBlock)) for fi in block[DataProvider.FileList]: writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries])) if writeMetadata and idxListFile: writer.write(' %s' % getMetadata(fi, idxListFile)) writer.write('\n') stream.write(writer.getvalue()) writer.seek(0) writer.truncate(0) write_separator = True yield block
def getHash(self): buffer = StringBuffer() for _ in DataProvider.saveToStream(buffer, self._datasetProcessor.process(self.getBlocksNormed())): pass return md5_hex(buffer.getvalue())
def save_to_stream(stream, block_iter, strip_metadata=False): writer = StringBuffer() write_separator = False for block in block_iter: if write_separator: writer.write('\n') writer.write('[%s]\n' % DataProvider.get_block_id(block)) if DataProvider.Nickname in block: writer.write('nickname = %s\n' % block[DataProvider.Nickname]) if DataProvider.NEntries in block: writer.write('events = %d\n' % block[DataProvider.NEntries]) if block.get(DataProvider.Locations) is not None: writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations])) common_prefix = os.path.commonprefix( lmap(itemgetter(DataProvider.URL), block[DataProvider.FileList])) common_prefix = str.join('/', common_prefix.split('/')[:-1]) if len(common_prefix) > 6: def _formatter(value): return value.replace(common_prefix + '/', '') writer.write('prefix = %s\n' % common_prefix) else: _formatter = identity do_write_metadata = (DataProvider.Metadata in block) and not strip_metadata if do_write_metadata: def _get_metadata_str(fi, idx_list): idx_list = ifilter( lambda idx: idx < len(fi[DataProvider.Metadata]), idx_list) return json.dumps( lmap(lambda idx: fi[DataProvider.Metadata][idx], idx_list)) (metadata_idx_list_block, metadata_idx_list_file) = _split_metadata_idx_list(block) metadata_header_str = json.dumps( lmap(lambda idx: block[DataProvider.Metadata][idx], metadata_idx_list_block + metadata_idx_list_file)) writer.write('metadata = %s\n' % metadata_header_str) if metadata_idx_list_block: metadata_str = _get_metadata_str( block[DataProvider.FileList][0], metadata_idx_list_block) writer.write('metadata common = %s\n' % metadata_str) for fi in block[DataProvider.FileList]: writer.write('%s = %d' % (_formatter( fi[DataProvider.URL]), fi[DataProvider.NEntries])) if do_write_metadata and metadata_idx_list_file: writer.write(' %s' % _get_metadata_str(fi, metadata_idx_list_file)) writer.write('\n') stream.write(writer.getvalue()) erase_content(writer) write_separator = True yield block writer.close()