예제 #1
0
 def show_config(self):
     buffer = StringBuffer()
     try:
         self._config.write(buffer)
         return _tag('pre', _tag('code', buffer.getvalue()))
     finally:
         buffer.close()
예제 #2
0
	def saveToStream(stream, dataBlocks, stripMetadata = False):
		writer = StringBuffer()
		for block in dataBlocks:
			writer.write('[%s#%s]\n' % (block[DataProvider.Dataset], block[DataProvider.BlockName]))
			if DataProvider.Nickname in block:
				writer.write('nickname = %s\n' % block[DataProvider.Nickname])
			if DataProvider.DatasetID in block:
				writer.write('id = %d\n' % block[DataProvider.DatasetID])
			if DataProvider.NEntries in block:
				writer.write('events = %d\n' % block[DataProvider.NEntries])
			if block.get(DataProvider.Locations) is not None:
				writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations]))
			cPrefix = os.path.commonprefix(lmap(lambda x: x[DataProvider.URL], block[DataProvider.FileList]))
			cPrefix = str.join('/', cPrefix.split('/')[:-1])
			if len(cPrefix) > 6:
				writer.write('prefix = %s\n' % cPrefix)
				formatter = lambda x: x.replace(cPrefix + '/', '')
			else:
				formatter = identity

			writeMetadata = (DataProvider.Metadata in block) and not stripMetadata
			if writeMetadata:
				(idxListBlock, idxListFile) = DataProvider.classifyMetadataKeys(block)
				def getMetadata(fi, idxList):
					return json.dumps(lmap(lambda idx: fi[DataProvider.Metadata][idx], idxList))
				writer.write('metadata = %s\n' % json.dumps(lmap(lambda idx: block[DataProvider.Metadata][idx], idxListBlock + idxListFile)))
				if idxListBlock:
					writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], idxListBlock))
			for fi in block[DataProvider.FileList]:
				writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries]))
				if writeMetadata and idxListFile:
					writer.write(' %s' % getMetadata(fi, idxListFile))
				writer.write('\n')
			writer.write('\n')
		stream.write(writer.getvalue())
def discover_dataset(provider_name, config_dict):
	buffer = StringBuffer()
	config = gc_create_config(config_dict={'dataset': config_dict})
	config = config.change_view(set_sections=['dataset'])
	provider = Plugin.get_class('DataProvider').create_instance(provider_name,
		config, 'dataset', config_dict['dataset'], None)
	if config_dict['dump config'] == 'True':
		config.write(buffer, print_default=True, print_minimal=True)
		return logging.getLogger('script').info(buffer.getvalue().rstrip())
	strip_metadata = config_dict['strip'] == 'True'
	block_iter = provider.get_block_list_cached(show_stats=False)
	if config_dict['output']:
		return provider.save_to_file(config_dict['output'], block_iter, strip_metadata)
	for _ in provider.save_to_stream(buffer, block_iter, strip_metadata):
		pass
	logging.getLogger('script').info(buffer.getvalue().rstrip())
예제 #4
0
def setup_config(opts, args):
    # Set config based on settings from config file or command line
    config_fn = None
    if os.path.exists(args[0]):
        config_fn = args[0]
    config = gc_create_config(config_file=config_fn).change_view(
        set_sections=['global'])
    if os.path.exists(config.get_work_path('datamap.tar')):
        opts.dataset = config.get_work_path('datamap.tar')
    config.change_view(set_sections=['jobs']).set('nseeds', '1', '?=')
    param_config = config.change_view(set_sections=['parameters'])
    if opts.parameter:
        logging.info('Provided options:')
        for param in opts.parameter:
            key, value = param.split('=', 1)
            param_config.set(key.strip(),
                             value.strip().replace('\\n', '\n'), '=')
            logging.info('\t%s: %s', key.strip(), value.strip())
        logging.info('')

    if config_fn is None:
        param_config.set('parameters',
                         str.join(' ', args).replace('\\n', '\n'))
        if opts.dataset:
            param_config.set('default lookup', 'DATASETNICK')
        if opts.verbose > 2:
            buffer = StringBuffer()
            config.change_view(set_sections=None).write(buffer)
            logging.getLogger('script').info(buffer.getvalue().rstrip())
    return config
예제 #5
0
 def getHash(self):
     buffer = StringBuffer()
     for _ in DataProvider.saveToStream(
             buffer,
             self._datasetProcessor.process(self.getBlocksNormed())):
         pass
     return md5_hex(buffer.getvalue())
예제 #6
0
def setup_config(opts, args):
	# Set config based on settings from config file or command line
	config_fn = None
	if os.path.exists(args[0]):
		config_fn = args[0]
	config = gc_create_config(config_file=config_fn).change_view(set_sections=['global'])
	if os.path.exists(config.get_work_path('datamap.tar')):
		opts.dataset = config.get_work_path('datamap.tar')
	config.change_view(set_sections=['jobs']).set('nseeds', '1', '?=')
	param_config = config.change_view(set_sections=['parameters'])
	if opts.parameter:
		logging.info('Provided options:')
		for param in opts.parameter:
			key, value = param.split('=', 1)
			param_config.set(key.strip(), value.strip().replace('\\n', '\n'), '=')
			logging.info('\t%s: %s', key.strip(), value.strip())
		logging.info('')

	if config_fn is None:
		param_config.set('parameters', str.join(' ', args).replace('\\n', '\n'))
		if opts.dataset:
			param_config.set('default lookup', 'DATASETNICK')
		if opts.verbose > 2:
			buffer = StringBuffer()
			config.change_view(set_sections=None).write(buffer)
			logging.getLogger('script').info(buffer.getvalue().rstrip())
	return config
예제 #7
0
def discover_dataset(provider_name, config_dict):
    buffer = StringBuffer()
    config = gc_create_config(config_dict={'dataset': config_dict})
    config = config.change_view(set_sections=['dataset'])
    provider = Plugin.get_class('DataProvider').create_instance(
        provider_name, config, 'dataset', config_dict['dataset'], None)
    if config_dict['dump config'] == 'True':
        config.write(buffer, print_default=True, print_minimal=True)
        return logging.getLogger('script').info(buffer.getvalue().rstrip())
    strip_metadata = config_dict['strip'] == 'True'
    block_iter = provider.get_block_list_cached(show_stats=False)
    if config_dict['output']:
        return provider.save_to_file(config_dict['output'], block_iter,
                                     strip_metadata)
    for _ in provider.save_to_stream(buffer, block_iter, strip_metadata):
        pass
    logging.getLogger('script').info(buffer.getvalue().rstrip())
예제 #8
0
 def _get_dataset_hash(self):
     buffer = StringBuffer()
     for _ in DataProvider.save_to_stream(buffer,
                                          self.iter_blocks_normed()):
         pass
     value = buffer.getvalue()
     buffer.close()
     return md5_hex(value)
예제 #9
0
	def saveStateRaw(stream, dataBlocks, stripMetadata = False):
		writer = StringBuffer()
		for block in dataBlocks:
			writer.write('[%s#%s]\n' % (block[DataProvider.Dataset], block[DataProvider.BlockName]))
			if DataProvider.Nickname in block:
				writer.write('nickname = %s\n' % block[DataProvider.Nickname])
			if DataProvider.DatasetID in block:
				writer.write('id = %d\n' % block[DataProvider.DatasetID])
			if DataProvider.NEntries in block:
				writer.write('events = %d\n' % block[DataProvider.NEntries])
			if block.get(DataProvider.Locations) != None:
				writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations]))
			cPrefix = os.path.commonprefix(map(lambda x: x[DataProvider.URL], block[DataProvider.FileList]))
			cPrefix = str.join('/', cPrefix.split('/')[:-1])
			if len(cPrefix) > 6:
				writer.write('prefix = %s\n' % cPrefix)
				formatter = lambda x: x.replace(cPrefix + '/', '')
			else:
				formatter = lambda x: x

			writeMetadata = (DataProvider.Metadata in block) and not stripMetadata
			if writeMetadata:
				getMetadata = lambda fi, idxList: map(lambda idx: fi[DataProvider.Metadata][idx], idxList)
				metadataHash = lambda fi, idx: utils.md5(repr(fi[DataProvider.Metadata][idx])).digest()
				cMetadataIdx = range(len(block[DataProvider.Metadata]))
				cMetadataHash = map(lambda idx: metadataHash(block[DataProvider.FileList][0], idx), cMetadataIdx)
				for fi in block[DataProvider.FileList]: # Identify common metadata
					for idx in filter(lambda idx: metadataHash(fi, idx) != cMetadataHash[idx], cMetadataIdx):
						cMetadataIdx.remove(idx)
				def filterC(common):
					idxList = filter(lambda idx: (idx in cMetadataIdx) == common, range(len(block[DataProvider.Metadata])))
					return utils.sorted(idxList, key = lambda idx: block[DataProvider.Metadata][idx])
				writer.write('metadata = %s\n' % map(lambda idx: block[DataProvider.Metadata][idx], filterC(True) + filterC(False)))
				if cMetadataIdx:
					writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], filterC(True)))
					writeMetadata = len(cMetadataIdx) != len(block[DataProvider.Metadata])
			for fi in block[DataProvider.FileList]:
				writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries]))
				if writeMetadata:
					writer.write(' %s' % getMetadata(fi, filterC(False)))
				writer.write('\n')
			writer.write('\n')
		stream.write(writer.getvalue())
예제 #10
0
	def saveToStream(stream, dataBlocks, stripMetadata = False):
		writer = StringBuffer()
		write_separator = False
		for block in dataBlocks:
			if write_separator:
				writer.write('\n')
			writer.write('[%s]\n' % DataProvider.bName(block))
			if DataProvider.Nickname in block:
				writer.write('nickname = %s\n' % block[DataProvider.Nickname])
			if DataProvider.NEntries in block:
				writer.write('events = %d\n' % block[DataProvider.NEntries])
			if block.get(DataProvider.Locations) is not None:
				writer.write('se list = %s\n' % str.join(',', block[DataProvider.Locations]))
			cPrefix = os.path.commonprefix(lmap(lambda x: x[DataProvider.URL], block[DataProvider.FileList]))
			cPrefix = str.join('/', cPrefix.split('/')[:-1])
			if len(cPrefix) > 6:
				writer.write('prefix = %s\n' % cPrefix)
				formatter = lambda x: x.replace(cPrefix + '/', '')
			else:
				formatter = identity

			writeMetadata = (DataProvider.Metadata in block) and not stripMetadata
			if writeMetadata:
				(idxListBlock, idxListFile) = DataProvider.classifyMetadataKeys(block)
				def getMetadata(fi, idxList):
					idxList = ifilter(lambda idx: idx < len(fi[DataProvider.Metadata]), idxList)
					return json.dumps(lmap(lambda idx: fi[DataProvider.Metadata][idx], idxList))
				writer.write('metadata = %s\n' % json.dumps(lmap(lambda idx: block[DataProvider.Metadata][idx], idxListBlock + idxListFile)))
				if idxListBlock:
					writer.write('metadata common = %s\n' % getMetadata(block[DataProvider.FileList][0], idxListBlock))
			for fi in block[DataProvider.FileList]:
				writer.write('%s = %d' % (formatter(fi[DataProvider.URL]), fi[DataProvider.NEntries]))
				if writeMetadata and idxListFile:
					writer.write(' %s' % getMetadata(fi, idxListFile))
				writer.write('\n')
			stream.write(writer.getvalue())
			writer.seek(0)
			writer.truncate(0)
			write_separator = True
			yield block
예제 #11
0
	def getHash(self):
		buffer = StringBuffer()
		for _ in DataProvider.saveToStream(buffer, self._datasetProcessor.process(self.getBlocksNormed())):
			pass
		return md5_hex(buffer.getvalue())
예제 #12
0
    def save_to_stream(stream, block_iter, strip_metadata=False):
        writer = StringBuffer()
        write_separator = False
        for block in block_iter:
            if write_separator:
                writer.write('\n')
            writer.write('[%s]\n' % DataProvider.get_block_id(block))
            if DataProvider.Nickname in block:
                writer.write('nickname = %s\n' % block[DataProvider.Nickname])
            if DataProvider.NEntries in block:
                writer.write('events = %d\n' % block[DataProvider.NEntries])
            if block.get(DataProvider.Locations) is not None:
                writer.write('se list = %s\n' %
                             str.join(',', block[DataProvider.Locations]))
            common_prefix = os.path.commonprefix(
                lmap(itemgetter(DataProvider.URL),
                     block[DataProvider.FileList]))
            common_prefix = str.join('/', common_prefix.split('/')[:-1])
            if len(common_prefix) > 6:

                def _formatter(value):
                    return value.replace(common_prefix + '/', '')

                writer.write('prefix = %s\n' % common_prefix)
            else:
                _formatter = identity

            do_write_metadata = (DataProvider.Metadata
                                 in block) and not strip_metadata
            if do_write_metadata:

                def _get_metadata_str(fi, idx_list):
                    idx_list = ifilter(
                        lambda idx: idx < len(fi[DataProvider.Metadata]),
                        idx_list)
                    return json.dumps(
                        lmap(lambda idx: fi[DataProvider.Metadata][idx],
                             idx_list))

                (metadata_idx_list_block,
                 metadata_idx_list_file) = _split_metadata_idx_list(block)
                metadata_header_str = json.dumps(
                    lmap(lambda idx: block[DataProvider.Metadata][idx],
                         metadata_idx_list_block + metadata_idx_list_file))
                writer.write('metadata = %s\n' % metadata_header_str)
                if metadata_idx_list_block:
                    metadata_str = _get_metadata_str(
                        block[DataProvider.FileList][0],
                        metadata_idx_list_block)
                    writer.write('metadata common = %s\n' % metadata_str)
            for fi in block[DataProvider.FileList]:
                writer.write('%s = %d' % (_formatter(
                    fi[DataProvider.URL]), fi[DataProvider.NEntries]))
                if do_write_metadata and metadata_idx_list_file:
                    writer.write(' %s' %
                                 _get_metadata_str(fi, metadata_idx_list_file))
                writer.write('\n')
            stream.write(writer.getvalue())
            erase_content(writer)
            write_separator = True
            yield block
        writer.close()