예제 #1
0
def discover_blocks(options):
    # Get work directory, create dbs dump directory
    if os.path.isdir(options.args[0]):
        workDir = os.path.abspath(os.path.normpath(options.args[0]))
    else:
        workDir = getConfig(configFile=options.args[0]).getWorkPath()
    if not options.opts.tempdir:
        options.opts.tempdir = os.path.join(workDir, 'dbs')
    if not os.path.exists(options.opts.tempdir):
        os.mkdir(options.opts.tempdir)

    # get provider with dataset information
    if options.opts.input_file:
        provider = DataProvider.createInstance('ListProvider', getConfig(),
                                               options.opts.input_file, None)
    else:
        config = getConfig(configDict={'dataset': options.config_dict})
        provider = DataProvider.createInstance('DBSInfoProvider', config,
                                               options.args[0], None)

    blocks = provider.getBlocks(show_stats=False)
    DataProvider.saveToFile(os.path.join(options.opts.tempdir, 'dbs.dat'),
                            blocks)
    if options.opts.discovery:
        sys.exit(os.EX_OK)
    return blocks
예제 #2
0
def main(opts, args):
	if len(args) == 0:
		utils.exitWithUsage('Dataset path not specified!')
	datasetPath = args[0]
	if '*' in datasetPath:
		dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None)
		toProcess = dbs3.getCMSDatasetsImpl(datasetPath)
	else:
		toProcess = [datasetPath]

	nProd = Plugin.getClass('NickNameProducer').createInstance(opts.producer, getConfig())
	utils.printTabular(
		[(0, 'Nickname'), (1, 'Dataset')],
		lmap(lambda ds: {0: nProd.getName('', ds, None), 1: ds}, toProcess), 'll')
예제 #3
0
def setup_config(opts, args):
    # Set config based on settings from config file or command line
    configFile = None
    if os.path.exists(args[0]):
        configFile = args[0]
    config = getConfig(configFile, section='global')
    if os.path.exists(config.getWorkPath('datamap.tar')):
        opts.dataset = config.getWorkPath('datamap.tar')
    config.changeView(setSections=['jobs']).set('nseeds', '1', '?=')
    configParameters = config.changeView(setSections=['parameters'])
    if opts.parameter:
        utils.vprint('Provided options:')
        for p in opts.parameter:
            k, v = p.split('=', 1)
            configParameters.set(k.strip(),
                                 v.strip().replace('\\n', '\n'), '=')
            utils.vprint('\t%s: %s' % (k.strip(), v.strip()))
        utils.vprint('')

    if configFile is None:
        configParameters.set('parameters',
                             str.join(' ', args).replace('\\n', '\n'))
        if opts.dataset:
            configParameters.set('default lookup', 'DATASETNICK')
        if utils.verbosity() > 2:
            config.changeView(setSections=None).write(sys.stdout)
    return config
def discoverDataset(providerName, config_dict):
	config = getConfig(configDict = {'dataset': config_dict})
	DataProvider = Plugin.getClass('DataProvider')
	provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None)
	if config_dict['output']:
		return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), config_dict['strip'])
	return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), config_dict['strip'])
예제 #5
0
def setup_config(opts, args):
	# Set config based on settings from config file or command line
	configFile = None
	if os.path.exists(args[0]):
		configFile = args[0]
	config = getConfig(configFile, section = 'global')
	if os.path.exists(config.getWorkPath('datamap.tar')):
		opts.dataset = config.getWorkPath('datamap.tar')
	config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=')
	configParameters = config.changeView(setSections = ['parameters'])
	if opts.parameter:
		log.info('Provided options:')
		for p in opts.parameter:
			k, v = p.split('=', 1)
			configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=')
			log.info('\t%s: %s', k.strip(), v.strip())
		log.info('')

	if configFile is None:
		configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n'))
		if opts.dataset:
			configParameters.set('default lookup', 'DATASETNICK')
		if opts.verbose > 2:
			config.changeView(setSections = None).write(sys.stdout)
	return config
예제 #6
0
def main():
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = config.getClass(['task', 'module'], cls = TaskModule).getInstance()

	# Initialise job database
	jobManagerCls = config.getClass('job manager', 'SimpleJobManager', cls = JobManager, tags = [task])
	jobDB = jobManagerCls.getInstance(task, None).jobDB
	log = utils.ActivityLog('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task))
	del log

	report = Report.open(opts.reportClass, jobDB, task, selected, opts.string)
	report.display()

	sys.exit()

	# Show reports
	report = Report(jobs, selected)
	if opts.showCPU:
		cpuTime = 0
		for jobNum in selected:
			jobObj = jobs.get(jobNum)
			cpuTime += jobObj.get('runtime', 0)
		print 'Used wall time:', utils.strTime(cpuTime)
		print 'Estimated cost: $%.2f' % ((cpuTime / 60 / 60) * 0.1)
	elif opts.showMap:
		from grid_control_gui import geomap
		geomap.drawMap(report)
	else:
		report.show(opts, task)
	def main():
		configEntries = map(lambda (k, v): (k, str(v)), parser.values.__dict__.items())
		config = gcSupport.getConfig(configDict = {'dataset': dict(configEntries)})
		provider = gcSupport.datasets.DataProvider.getInstance(providerName, config, datasetExpr, None)
		if opts.output:
			provider.saveState(opts.output, None, opts.strip)
		else:
			gcSupport.datasets.DataProvider.saveStateRaw(sys.stdout, provider.getBlocks(), opts.strip)
예제 #8
0
def main(opts, args):
    if len(args) == 0:
        utils.exitWithUsage('Dataset path not specified!')
    datasetPath = args[0]
    if '*' in datasetPath:
        dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath,
                                     None)
        toProcess = dbs3.getCMSDatasetsImpl(datasetPath)
    else:
        toProcess = [datasetPath]

    nProd = Plugin.getClass('NickNameProducer').createInstance(
        opts.producer, getConfig())
    utils.printTabular([(0, 'Nickname'), (1, 'Dataset')],
                       lmap(lambda ds: {
                           0: nProd.getName('', ds, None),
                           1: ds
                       }, toProcess), 'll')
def discoverDataset(providerName, config_dict):
    config = getConfig(configDict={'dataset': config_dict})
    DataProvider = Plugin.getClass('DataProvider')
    provider = DataProvider.createInstance(providerName, config,
                                           config_dict['dataset'], None)
    if config_dict['output']:
        return DataProvider.saveToFile(config_dict['output'],
                                       provider.getBlocks(),
                                       config_dict['strip'])
    return DataProvider.saveToStream(sys.stdout, provider.getBlocks(),
                                     config_dict['strip'])
def discoverDataset(providerName, config_dict):
	config = getConfig(configDict = {'dataset': config_dict})
	if config_dict['dump config'] == 'True':
		config.write(sys.stdout, printDefault = False, printMinimal = True)
		return
	DataProvider = Plugin.getClass('DataProvider')
	provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None)
	stripMetadata = config_dict['strip'] == 'True'
	if config_dict['output']:
		return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), stripMetadata)
	return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), stripMetadata)
예제 #11
0
def get_dataset_config(opts, args):
	dataset = args[0].strip()
	if os.path.exists(dataset):
		opts.provider = 'ListProvider'
	else:
		opts.provider = 'DBS3Provider'
	cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False',
		'remove empty files *': 'False', 'location format *': opts.location,
		'nickname check collision *': 'False',
		'dataset *': dataset, 'dataset provider *': opts.provider}
	if opts.metadata or opts.block_metadata:
		cfgSettings['lumi filter *'] = '-'
		cfgSettings['keep lumi metadata *'] = 'True'
	return getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings})
예제 #12
0
def discoverDataset(providerName, config_dict):
    config = getConfig(configDict={'dataset': config_dict})
    if config_dict['dump config'] == 'True':
        config.write(sys.stdout, printDefault=False, printMinimal=True)
        return
    DataProvider = Plugin.getClass('DataProvider')
    provider = DataProvider.createInstance(providerName, config,
                                           config_dict['dataset'], None)
    stripMetadata = config_dict['strip'] == 'True'
    if config_dict['output']:
        return DataProvider.saveToFile(config_dict['output'],
                                       provider.getBlocks(), stripMetadata)
    return DataProvider.saveToStream(sys.stdout, provider.getBlocks(),
                                     stripMetadata)
예제 #13
0
def discover_blocks(options):
	# Get work directory, create dbs dump directory
	if os.path.isdir(options.args[0]):
		workDir = os.path.abspath(os.path.normpath(options.args[0]))
	else:
		workDir = getConfig(configFile = options.args[0]).getWorkPath()
	if not options.opts.tempdir:
		options.opts.tempdir = os.path.join(workDir, 'dbs')
	if not os.path.exists(options.opts.tempdir):
		os.mkdir(options.opts.tempdir)

	# get provider with dataset information
	if options.opts.input_file:
		provider = DataProvider.createInstance('ListProvider', getConfig(), options.opts.input_file, None)
	else:
		config = getConfig(configDict = {'dataset': options.config_dict})
		provider = DataProvider.createInstance('DBSInfoProvider', config, options.args[0], None)

	blocks = provider.getBlocks(show_stats = False)
	DataProvider.saveToFile(os.path.join(options.opts.tempdir, 'dbs.dat'), blocks)
	if options.opts.discovery:
		sys.exit(os.EX_OK)
	return blocks
예제 #14
0
def main(opts, args):
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = None
	if opts.use_task:
		task = config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow', pargs = ('task',)).task

	# Initialise job database
	jobDB = config.getPlugin('job database', 'TextFileJobDB', cls = 'JobDB')
	activity = Activity('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task))
	activity.finish()

	report = Report.createInstance(opts.report, jobDB, task, selected, opts.string)
	report.display()
예제 #15
0
def main(opts, args):
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = None
	if opts.use_task:
		task = config.getPlugin(['task', 'module'], cls = 'TaskModule')

	# Initialise job database
	jobDB = config.getPlugin('job database', 'JobDB', cls = 'JobDB')
	activity = utils.ActivityLog('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task))
	activity.finish()

	report = Report.createInstance(opts.report, jobDB, task, selected, opts.string)
	report.display()
예제 #16
0
def main(opts, args):
    # try to open config file
    config = getConfig(args[0], section='global')

    # Initialise task module
    task = None
    if opts.use_task:
        task = config.getPlugin(['task', 'module'], cls='TaskModule')

    # Initialise job database
    jobDB = config.getPlugin('job database', 'JobDB', cls='JobDB')
    activity = utils.ActivityLog('Filtering job entries')
    selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task=task))
    activity.finish()

    report = Report.createInstance(opts.report, jobDB, task, selected,
                                   opts.string)
    report.display()
예제 #17
0
def main():
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = None
	tags = []
	if opts.useTask:
		task = config.getPlugin(['task', 'module'], cls = TaskModule).getInstance()
		tags = [task]

	# Initialise job database
	jobManagerCls = config.getPlugin('job manager', 'SimpleJobManager', cls = JobManager, tags = tags)
	jobDB = jobManagerCls.getInstance(task, None).jobDB
	log = utils.ActivityLog('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task))
	del log

	report = Report.getInstance(opts.reportClass, jobDB, task, selected, opts.string)
	report.display()
예제 #18
0
def main(opts, args):
    # try to open config file
    config = getConfig(args[0], section='global')

    # Initialise task module
    task = None
    if opts.use_task:
        task = config.getPlugin('workflow',
                                'Workflow:global',
                                cls='Workflow',
                                pargs=('task', )).task

    # Initialise job database
    jobDB = config.getPlugin('job database', 'TextFileJobDB', cls='JobDB')
    activity = Activity('Filtering job entries')
    selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task=task))
    activity.finish()

    report = Report.createInstance(opts.report, jobDB, task, selected,
                                   opts.string)
    report.display()
예제 #19
0
def setup_config(opts, args):
	# Set config based on settings from config file or command line
	configFile = None
	if os.path.exists(args[0]):
		configFile = args[0]
	config = getConfig(configFile, section = 'global')
	config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=')
	configParameters = config.changeView(setSections = ['parameters'])
	if opts.parameter:
		utils.vprint('Provided options:')
		for p in opts.parameter:
			k, v = p.split('=', 1)
			configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=')
			utils.vprint('\t%s: %s' % (k.strip(), v.strip()))
		utils.vprint('')
	if not os.path.exists(args[0]):
		configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n'))
	if opts.dataset:
		configParameters.set('default lookup', 'DATASETNICK')
	if not opts.persistent:
		configParameters.set('parameter adapter', 'BasicParameterAdapter', '=')
	if utils.verbosity() > 2:
		config.changeView(setSections = None).write(sys.stdout)
	return config
예제 #20
0
# | limitations under the License.

from gcSupport import Options, Plugin, getConfig, scriptOptions
from grid_control.utils.webservice import JSONRestClient
from grid_control_cms.sitedb import SiteDB

def lfn2pfn(node, lfn, prot = 'srmv2'):
	return JSONRestClient().get(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn',
		params = {'node': node, 'protocol': prot, 'lfn': lfn})['phedex']['mapping']

parser = Options()
parser.addText(None, 's', 'se',      default = None,    help = 'Resolve LFN on CMS SE into PFN')
parser.addText(None, ' ', 'se-prot', default = 'srmv2', help = 'Name of default SE protocol')
parser.addText(None, ' ', 'lfn',     default = '/store/user/<hypernews name>', help = 'Name of default LFN')
options = scriptOptions(parser)

if options.opts.se:
	if '<hypernews name>' in options.opts.lfn:
		token = Plugin.getClass('AccessToken').createInstance('VomsProxy', getConfig(), 'token')
		site_db = SiteDB()
		hnName = site_db.dn_to_username(dn=token.getFQUsername())
		if not hnName:
			raise Exception('Unable to map grid certificate to hypernews name!')
		options.opts.lfn = options.opts.lfn.replace('<hypernews name>', hnName)

	tmp = lfn2pfn(node = options.opts.se, prot = options.opts.se_prot, lfn = options.opts.lfn)
	for entry in tmp:
		if len(tmp) > 1:
			print(entry['node'] + ' ' + entry['pfn'])
		print(entry['pfn'])
예제 #21
0
parser.addText('jobs', '', 'job-force-state',        default='',    help='Force new job state')
parser.addText('jobs', '', 'job-show-jdl',           default='',    help='Show JDL file if available')

parser.section('data', 'Dataset debugging', '%s <dataset file> <dataset file> ...')
parser.addText('data', '', 'dataset-show-diff',      default='',    help='Show difference between datasets')
parser.addText('data', '', 'dataset-show-removed',   default='',    help='Find removed dataset blocks')

parser.addText(None,  'd', 'logfile-decode',         default='',    help='Decode log files')
options = scriptOptions(parser)
(opts, args) = (options.opts, options.args)

########################################################
# BACKEND

if opts.backend_list_nodes or opts.backend_list_queues:
	config = getConfig()
	backend = str.join(' ', args) or 'local'
	wms = Plugin.getClass('WMS').createInstance(backend, config, backend)
	if opts.backend_list_nodes:
		logging.info(repr(wms.getNodes()))
	if opts.backend_list_queues:
		logging.info(repr(wms.getQueues()))

########################################################
# DATASET PARTITION

def partition_invalid(splitter):
	for jobNum in irange(splitter.getMaxJobs()):
		splitInfo = splitter.getSplitInfo(jobNum)
		if splitInfo.get(DataSplitter.Invalid, False):
			yield {0: jobNum}
예제 #22
0
def main():
	# Set config based on settings from config file or command line
	configFile = None
	if os.path.exists(args[0]):
		configFile = args[0]
	config = getConfig(configFile, section = 'global')
	config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=')
	configParameters = config.changeView(setSections = ['parameters'])
	if opts.parameters:
		utils.vprint('Provided options:')
		for p in opts.parameters:
			k, v = p.split('=', 1)
			configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=')
			utils.vprint('\t%s: %s' % (k.strip(), v.strip()))
		utils.vprint('')
	if not os.path.exists(args[0]):
		configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n'))
	if opts.dataset:
		configParameters.set('default lookup', 'DATASETNICK')
#	configParameters.set('parameter adapter', 'BasicParameterAdapter', '=') # Don't track parameter changes
	if opts.verbosity > 2:
		config.changeView(setSections = None).write(sys.stdout)

	# Initialize ParameterFactory
	configTask = config.changeView(setSections = [config.get(['task', 'module'], 'DummyTask')])
	pm = config.getPlugin('parameter factory', 'SimpleParameterFactory', cls = ParameterFactory).getInstance()

	# Create dataset parameter source
	class DummySplitter:
		def getMaxJobs(self):
			return 3
		def getSplitInfo(self, pNum):
			mkEntry = lambda ds, fl, n, nick: { DataSplitter.Dataset: ds, DataSplitter.Nickname: nick,
				DataSplitter.FileList: fl, DataSplitter.NEntries: n }
			rndStr = lambda: md5(str(random.random())).hexdigest()[:10]
			tmp = [ mkEntry('ds1', ['a', 'b'], 23, 'data_1'), mkEntry('ds1', ['1'], 42, 'data_1'),
				mkEntry('ds2', ['m', 'n'], 123, 'data_2'), mkEntry('ds2', ['x', 'y', 'z'], 987, 'data_3') ]
			return tmp[pNum]

	class DataSplitProcessorTest:
		def getKeys(self):
			return map(lambda k: ParameterMetadata(k, untracked=True),
				['DATASETINFO', 'DATASETID', 'DATASETPATH', 'DATASETBLOCK', 'DATASETNICK'])

		def process(self, pNum, splitInfo, result):
			result.update({
				'DATASETINFO': '',
				'DATASETID': splitInfo.get(DataSplitter.DatasetID, None),
				'DATASETPATH': splitInfo.get(DataSplitter.Dataset, None),
				'DATASETBLOCK': splitInfo.get(DataSplitter.BlockName, None),
				'DATASETNICK': splitInfo.get(DataSplitter.Nickname, None),
				'DATASETSPLIT': pNum,
			})

	if opts.dataset.lower() == 'true':
		utils.vprint('Registering dummy data provider data')
		dataSplitter = DummySplitter()
	elif opts.dataset:
		dataSplitter = DataSplitter.loadState(opts.dataset)

	if opts.dataset:
		DataParameterSource.datasetsAvailable['data'] = DataParameterSource(
			config.getWorkPath(), 'data', None, dataSplitter, DataSplitProcessorTest())

	psource = pm.getSource(config)

	if opts.forceiv:
		for dp in DataParameterSource.datasetSources:
			dp.intervention = (set([1]), set([0]), True)

	if opts.listparams:
		result = []
		needGCParam = False
		if psource.getMaxJobs() != None:
			countActive = 0
			for jobNum in range(psource.getMaxJobs()):
				info = psource.getJobInfo(jobNum)
				if info[ParameterInfo.ACTIVE]:
					countActive += 1
				if opts.inactive or info[ParameterInfo.ACTIVE]:
					if not info[ParameterInfo.ACTIVE]:
						info['GC_PARAM'] = 'N/A'
					if str(info['GC_PARAM']) != str(jobNum):
						needGCParam = True
					result.append(info)
			if opts.displaymode == 'parseable':
				utils.vprint('Count,%d,%d' % (countActive, psource.getMaxJobs()))
			else:
				utils.vprint('Number of parameter points: %d' % psource.getMaxJobs())
				if countActive != psource.getMaxJobs():
					utils.vprint('Number of active parameter points: %d' % countActive)
		else:
			result.append(psource.getJobInfo(123))
		enabledOutput = opts.output.split(',')
		output = filter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys())
		stored = filter(lambda k: k.untracked == False, output)
		untracked = filter(lambda k: k.untracked == True, output)

		if opts.collapse > 0:
			result_old = result
			result = {}
			result_nicks = {}
			head = [('COLLATE_JOBS', '# of jobs')]
			if 'DATASETSPLIT' in stored:
				stored.remove('DATASETSPLIT')
				if (opts.collapse == 1):
					stored.append('DATASETNICK')
					head.append(('DATASETNICK', 'DATASETNICK'))
				elif opts.collapse == 2:
					head.append(('COLLATE_NICK', '# of nicks'))
			for pset in result_old:
				if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
					pset.pop('DATASETSPLIT')
				nickname = None
				if ('DATASETNICK' in pset) and (opts.collapse == 2):
					nickname = pset.pop('DATASETNICK')
				h = md5(repr(map(lambda key: pset.get(key), stored))).hexdigest()
				result.setdefault(h, []).append(pset)
				result_nicks.setdefault(h, set()).add(nickname)

			def doCollate(h):
				tmp = result[h][0]
				tmp['COLLATE_JOBS'] = len(result[h])
				tmp['COLLATE_NICK'] = len(result_nicks[h])
				return tmp
			result = map(doCollate, result)
		else:
			head = [('GC_JOB_ID', '#')]
			if needGCParam:
				head.append(('GC_PARAM', 'GC_PARAM'))
		if opts.active:
			head.append((ParameterInfo.ACTIVE, 'ACTIVE'))
		if opts.visible:
			stored = opts.visible.split(',')
		head.extend(sorted(zip(stored, stored)))
		if opts.untracked:
			head.extend(sorted(map(lambda n: (n, '(%s)' % n), filter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked))))
		utils.vprint('')
		utils.printTabular(head, result)

	if opts.save:
		utils.vprint('')
		ParameterSource.getClass('GCDumpParameterSource').write(opts.save, psource)
		utils.vprint('Parameter information saved to ./%s' % opts.save)

	if opts.intervention:
		utils.vprint('')
		tmp = psource.getJobIntervention()
		if tmp:
			if opts.displaymode == 'parseable':
				utils.vprint('R: %s' % str.join(',', map(str, tmp[0])))
				utils.vprint('D: %s' % str.join(',', map(str, tmp[1])))
			else:
				utils.vprint('   Redo: %r' % tmp[0])
				utils.vprint('Disable: %r' % tmp[1])
		else:
			if opts.displaymode == 'parseable':
				utils.vprint('NOINT')
			else:
				utils.vprint('No intervention')
예제 #23
0
def realmain(opts, args):
	config = gcSupport.getConfig(configDict = {'access': {'ignore warnings': 'True'}})
	token = AccessToken.getInstance(opts.token, config, 'access', OSLayer.create(config))
	(workDir, config, jobDB) = gcSupport.initGC(args)
	jobList = jobDB.getJobs(ClassSelector(JobClass.SUCCESS))

	# Create SE output dir
	if not opts.output:
		opts.output = os.path.join(workDir, 'se_output')
	if '://' not in opts.output:
		opts.output = 'file:///%s' % os.path.abspath(opts.output)

	infos = {}
	def incInfo(x):
		infos[x] = infos.get(x, 0) + 1

	def processSingleJob(jobNum, output):
		output.init(jobNum)
		job = jobDB.get(jobNum)
		# Only run over finished and not yet downloaded jobs
		if job.state != Job.SUCCESS:
			output.error('Job has not yet finished successfully!')
			return incInfo('Processing')
		if job.get('download') == 'True' and not opts.markIgnoreDL:
			if not opts.threads:
				output.error('All files already downloaded!')
			return incInfo('Downloaded')
		retry = int(job.get('download attempt', 0))
		failJob = False

		if not token.canSubmit(20*60, True):
			sys.stderr.write('Please renew access token!')
			sys.exit(os.EX_UNAVAILABLE)

		# Read the file hash entries from job info file
		files = FileInfoProcessor().process(os.path.join(workDir, 'output', 'job_%d' % jobNum))
		if files:
			files = map(lambda fi: (fi[FileInfoProcessor.Hash], fi[FileInfoProcessor.NameLocal],
				fi[FileInfoProcessor.NameDest], fi[FileInfoProcessor.Path]), files)
		output.files(files)
		if not files:
			if opts.markEmptyFailed:
				failJob = True
			else:
				return incInfo('Job without output files')

		for (fileIdx, fileInfo) in enumerate(files):
			(hash, name_local, name_dest, pathSE) = fileInfo
			output.file(fileIdx)

			# Copy files to local folder
			outFilePath = os.path.join(opts.output, name_dest)
			if opts.selectSE:
				if not (True in map(lambda s: s in pathSE, opts.selectSE)):
					output.error('skip file because it is not located on selected SE!')
					return
			if opts.skipExisting and (storage.se_exists(outFilePath) == 0):
				output.error('skip file as it already exists!')
				return
			if storage.se_exists(os.path.dirname(outFilePath)).wait() != 0:
				storage.se_mkdir(os.path.dirname(outFilePath)).wait()

			checkPath = 'file:///tmp/dlfs.%s' % name_dest
			if 'file://' in outFilePath:
				checkPath = outFilePath

			def monitorFile(path, lock, abort):
				path = path.replace('file://', '')
				(csize, osize, stime, otime, lttime) = (0, 0, time.time(), time.time(), time.time())
				while not lock.acquire(False): # Loop until monitor lock is available
					if csize != osize:
						lttime = time.time()
					if time.time() - lttime > 5*60: # No size change in the last 5min!
						output.error('Transfer timeout!')
						abort.acquire()
						break
					if os.path.exists(path):
						csize = os.path.getsize(path)
						output.file(fileIdx, csize, osize, stime, otime)
						(osize, otime) = (csize, time.time())
					else:
						stime = time.time()
					time.sleep(0.1)
				lock.release()

			copyAbortLock = threading.Lock()
			monitorLock = threading.Lock()
			monitorLock.acquire()
			monitor = utils.gcStartThread('Download monitor %s' % jobNum,
				monitorFile, checkPath, monitorLock, copyAbortLock)
			result = -1
			procCP = storage.se_copy(os.path.join(pathSE, name_dest), outFilePath, tmp = checkPath)
			while True:
				if not copyAbortLock.acquire(False):
					monitor.join()
					break
				copyAbortLock.release()
				result = procCP.poll()
				if result != -1:
					monitorLock.release()
					monitor.join()
					break
				time.sleep(0.02)

			if result != 0:
				output.error('Unable to copy file from SE!')
				output.error(procCP.getMessage())
				failJob = True
				break

			# Verify => compute md5hash
			if opts.verify:
				try:
					hashLocal = md5sum(checkPath.replace('file://', ''))
					if not ('file://' in outFilePath):
						dlfs_rm('file://%s' % checkPath, 'SE file')
				except KeyboardInterrupt:
					raise
				except Exception:
					hashLocal = None
				output.hash(fileIdx, hashLocal)
				if hash != hashLocal:
					failJob = True
			else:
				output.hash(fileIdx)

		# Ignore the first opts.retry number of failed jobs
		if failJob and opts.retry and (retry < opts.retry):
			output.error('Download attempt #%d failed!' % (retry + 1))
			job.set('download attempt', str(retry + 1))
			jobDB.commit(jobNum, job)
			return incInfo('Download attempts')

		for (fileIdx, fileInfo) in enumerate(files):
			(hash, name_local, name_dest, pathSE) = fileInfo
			# Remove downloaded files in case of failure
			if (failJob and opts.rmLocalFail) or (not failJob and opts.rmLocalOK):
				output.status(fileIdx, 'Deleting file %s from local...' % name_dest)
				outFilePath = os.path.join(opts.output, name_dest)
				if storage.se_exists(outFilePath).wait() == 0:
					dlfs_rm(outFilePath, 'local file')
			# Remove SE files in case of failure
			if (failJob and opts.rmSEFail)    or (not failJob and opts.rmSEOK):
				output.status(fileIdx, 'Deleting file %s...' % name_dest)
				dlfs_rm(os.path.join(pathSE, name_dest), 'SE file')
			output.status(fileIdx, None)

		if failJob:
			incInfo('Failed downloads')
			if opts.markFailed:
				# Mark job as failed to trigger resubmission
				job.state = Job.FAILED
		else:
			incInfo('Successful download')
			if opts.markDL:
				# Mark as downloaded
				job.set('download', 'True')

		# Save new job status infos
		jobDB.commit(jobNum, job)
		output.finish()
		time.sleep(float(opts.slowdown))

	if opts.shuffle:
		random.shuffle(jobList)
	else:
		jobList.sort()

	if opts.threads:
		from grid_control_gui import ansi
		errorOutput = []
		class ThreadDisplay:
			def __init__(self):
				self.output = []
			def init(self, jobNum):
				self.jobNum = jobNum
				self.output = ['Job %5d' % jobNum, '']
			def infoline(self, fileIdx, msg = ''):
				return 'Job %5d [%i/%i] %s %s' % (self.jobNum, fileIdx + 1, len(self.files), self.files[fileIdx][2], msg)
			def files(self, files):
				(self.files, self.output, self.tr) = (files, self.output[1:], ['']*len(files))
				for x in range(len(files)):
					self.output.insert(2*x, self.infoline(x))
					self.output.insert(2*x+1, '')
			def file(self, idx, csize = None, osize = None, stime = None, otime = None):
				(hash, name_local, name_dest, pathSE) = self.files[idx]
				if otime:
					trfun = lambda sref, tref: gcSupport.prettySize(((csize - sref) / max(1, time.time() - tref)))
					self.tr[idx] = '%7s avg. - %7s/s inst.' % (gcSupport.prettySize(csize), trfun(0, stime))
					self.output[2*idx] = self.infoline(idx, '(%s - %7s/s)' % (self.tr[idx], trfun(osize, otime)))
			def hash(self, idx, hashLocal = None):
				(hash, name_local, name_dest, pathSE) = self.files[idx]
				if hashLocal:
					if hash == hashLocal:
						result = ansi.Console.fmt('MATCH', [ansi.Console.COLOR_GREEN])
					else:
						result = ansi.Console.fmt('FAIL', [ansi.Console.COLOR_RED])
					msg = '(R:%s L:%s) => %s' % (hash, hashLocal, result)
				else:
					msg = ''
				self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx])
				self.output[2*idx+1] = msg
				print self, repr(msg)
			def error(self, msg):
				errorOutput.append(msg)
			def write(self, msg):
				self.output.append(msg)
			def status(self, idx, msg):
				if msg:
					self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx]) + ' ' + msg
				else:
					self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx])
			def finish(self):
#				self.output.append(str(self.jobNum) + 'FINISHED')
				pass

		(active, todo) = ([], list(jobList))
		todo.reverse()
		screen = ansi.Console()
		screen.move(0, 0)
		screen.savePos()
		while True:
			screen.erase()
			screen.loadPos()
			active = filter(lambda (t, d): t.isAlive(), active)
			while len(active) < opts.threads and len(todo):
				display = ThreadDisplay()
				active.append((utils.gcStartThread('Download %s' % todo[-1],
					processSingleJob, todo.pop(), display), display))
			for (t, d) in active:
				sys.stdout.write(str.join('\n', d.output))
			sys.stdout.write(str.join('\n', ['=' * 50] + errorOutput))
			sys.stdout.flush()
			if len(active) == 0:
				break
			time.sleep(0.01)
	else:
		class DefaultDisplay:
			def init(self, jobNum):
				sys.stdout.write('Job %d: ' % jobNum)
			def files(self, files):
				self.files = files
				sys.stdout.write('The job wrote %d file%s to the SE\n' % (len(files), ('s', '')[len(files) == 1]))
			def file(self, idx, csize = None, osize = None, stime = None, otime = None):
				(hash, name_local, name_dest, pathSE) = self.files[idx]
				if otime:
					tr = lambda sref, tref: gcSupport.prettySize(((csize - sref) / max(1, time.time() - tref)))
					tmp = name_dest
					if opts.showHost:
						tmp += ' [%s]' % pathSE.split('//')[-1].split('/')[0].split(':')[0]
					self.write('\r\t%s (%7s - %7s/s avg. - %7s/s inst.)' % (tmp,
						gcSupport.prettySize(csize), tr(0, stime), tr(osize, otime)))
					sys.stdout.flush()
				else:
					self.write('\t%s' % name_dest)
					sys.stdout.flush()
			def hash(self, idx, hashLocal = None):
				(hash, name_local, name_dest, pathSE) = self.files[idx]
				self.write(' => %s\n' % ('\33[0;91mFAIL\33[0m', '\33[0;92mMATCH\33[0m')[hash == hashLocal])
				self.write('\t\tRemote site: %s\n' % hash)
				self.write('\t\t Local site: %s\n' % hashLocal)
			def error(self, msg):
				sys.stdout.write('\nJob %d: %s' % (jobNum, msg.strip()))
			def status(self, idx, msg):
				if msg:
					self.write('\t' + msg + '\r')
				else:
					self.write(' ' * len('\tDeleting file %s from SE...\r' % self.files[idx][2]) + '\r')
			def write(self, msg):
				sys.stdout.write(msg)
			def finish(self):
				sys.stdout.write('\n')

		for jobNum in jobList:
			processSingleJob(jobNum, DefaultDisplay())

	# Print overview
	if infos:
		print '\nStatus overview:'
		for (state, num) in infos.items():
			if num > 0:
				print '\t%20s: [%d/%d]' % (state, num, len(jobList))
		print

	if ('Downloaded' in infos) and (infos['Downloaded'] == len(jobDB)):
		return os.EX_OK
	return os.EX_NOINPUT
예제 #24
0
def main():
    usage = '%s [OPTIONS] <config file / work directory>' % sys.argv[0]
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-G', '--globaltag', dest='globaltag', default='crab2_tag', help='Specify global tag')
    parser.add_option('-F', '--input', dest='inputFile', default=None,
                      help='Specify dbs input file to use instead of scanning job output')
#    parser.add_option('-k', '--key-select',      dest='dataset key select', default='',
#        help='Specify dataset keys to process')
    parser.add_option('-c', '--continue-migration', dest='continue_migration', default=False, action='store_true',
                      help='Continue an already started migration')

    ogDiscover = optparse.OptionGroup(parser, 'Discovery options - ignored in case dbs input file is specified', '')
    ogDiscover.add_option('-n', '--name',        dest='dataset name pattern', default='',
        help='Specify dbs path name - Example: DataSet_@NICK@_@VAR@')
    ogDiscover.add_option('-T', '--datatype',    dest='datatype',      default=None,
        help='Supply dataset type in case cmssw report did not specify it - valid values: "mc" or "data"')
    ogDiscover.add_option('-m', '--merge',       dest='merge parents', default=False,  action='store_true',
        help='Merge output files from different parent blocks into a single block [Default: Keep boundaries]')
    ogDiscover.add_option('-j', '--jobhash',     dest='useJobHash',    default=False,  action='store_true',
        help='Use hash of all config files in job for dataset key calculation')
    ogDiscover.add_option('-u', '--unique-cfg',  dest='uniqueCfg',     default=False,  action='store_true',
        help='Circumvent edmConfigHash collisions so each dataset is stored with unique config information')
    ogDiscover.add_option('-P', '--parent',      dest='parent source', default='',
        help='Override parent information source - to bootstrap a reprocessing on local files')
    ogDiscover.add_option('-H', '--hash-keys',   dest='dataset hash keys', default='',
        help='Included additional variables in dataset hash calculation')
    parser.add_option_group(ogDiscover)

    ogDiscover2 = optparse.OptionGroup(parser, 'Discovery options II - only available when config file is used', '')
    ogDiscover2.add_option('-J', '--job-selector',    dest='selected',      default=None,
        help='Specify dataset(s) to process')
    parser.add_option_group(ogDiscover2)

    ogMode = optparse.OptionGroup(parser, 'Processing mode', '')
    ogMode.add_option('-b', '--batch',           dest='batch',         default=False, action='store_true',
        help='Enable non-interactive batch mode [Default: Interactive mode]')
    ogMode.add_option('-d', '--discovery',       dest='discovery',     default=False, action='store_true',
        help='Enable discovery mode - just collect file information and exit')
    ogMode.add_option('',   '--tempdir',         dest='tmpDir',        default='',
        help='Override temp directory')
    ogMode.add_option('-i', '--no-import',       dest='doImport',      default=True,  action='store_false',
        help='Disable import of new datasets into target DBS instance - only temporary xml files are created, ' +
            'which can be added later via datasetDBSTool.py [Default: Import datasets]')
    parser.add_option_group(ogMode)

    ogInc = optparse.OptionGroup(parser, 'Incremental adding of files to DBS', '')
    ogInc.add_option('-I', '--incremental',     dest='incremental',   default=False,  action='store_true',
        help='Skip import of existing files - Warning: this destroys coherent block structure!')
#	ogInc.add_option('-o', '--open-blocks',     dest='closeBlock',    default=True,   action='store_false',
#		help='Keep blocks open for addition of further files [Default: Close blocks]')
    parser.add_option_group(ogInc)

    ogInst = optparse.OptionGroup(parser, 'DBS instance handling', '')
    ogInst.add_option('-t', '--target-instance', dest='dbsTarget',
                      default='https://cmsweb.cern.ch/dbs/prod/phys03',
                      help='Specify target dbs instance url')
    ogInst.add_option('-s', '--source-instance', dest='dbsSource',
                      default='https://cmsweb.cern.ch/dbs/prod/global',
                      help='Specify source dbs instance url(s), where parent datasets are taken from')
    parser.add_option_group(ogInst)

    ogDbg = optparse.OptionGroup(parser, 'Display options', '')
    ogDbg.add_option('-D', '--display-dataset', dest='display_data',  default=None,
        help='Display information associated with dataset key(s) (accepts "all")')
    ogDbg.add_option('-C', '--display-config',  dest='display_cfg',   default=None,
        help='Display information associated with config hash(es) (accepts "all")')
    ogDbg.add_option('-v', '--verbose',         dest='verbosity',     default=0, action='count',
        help='Increase verbosity')
    parser.add_option_group(ogDbg)

    (opts, args) = parser.parse_args()
    utils.verbosity(opts.verbosity)
    setattr(opts, 'include parent infos', True)
    setattr(opts, 'importLumi', True)
    setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys').replace(',', ' '))
    if opts.useJobHash:
        setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys') + ' CMSSW_CONFIG_JOBHASH')

    # 0) Get work directory, create dbs dump directory
    if len(args) != 1:
        utils.exitWithUsage(usage, 'Neither work directory nor config file specified!')
    if os.path.isdir(args[0]):
        opts.workDir = os.path.abspath(os.path.normpath(args[0]))
    else:
        opts.workDir = getConfig(configFile=args[0]).getWorkPath()
    if not opts.tmpDir:
        opts.tmpDir = os.path.join(opts.workDir, 'dbs')
    if not os.path.exists(opts.tmpDir):
        os.mkdir(opts.tmpDir)
    # Lock file in case several instances of this program are running
    mutex = FileMutex(os.path.join(opts.tmpDir, 'datasetDBSAdd.lock'))

    # 1) Get dataset information
    if opts.inputFile:
        provider = DataProvider.getInstance('ListProvider', getConfig(), opts.inputFile, None)
    else:
        config = getConfig(configDict = {'dataset': dict(parser.values.__dict__)})
        if opts.discovery:
            config.set('dataset name pattern', '@DS_KEY@')
        provider = DataProvider.getInstance('DBSInfoProvider', config, args[0], None)

    provider.saveState(os.path.join(opts.tmpDir, 'dbs.dat'))
    if opts.discovery:
        sys.exit(os.EX_OK)
    blocks = provider.getBlocks()

    # 2) Filter datasets
    if opts.incremental:
        # Query target DBS for all found datasets and perform dataset resync with "supposed" state
        dNames = set(map(lambda b: b[DataProvider.Dataset], blocks))
        dNames = filter(lambda ds: hasDataset(opts.dbsTarget, ds), dNames)
        config = getConfig(configDict = {None: {'dbs instance': opts.dbsTarget}})
        oldBlocks = reduce(operator.add, map(lambda ds: DBSApiv2(config, None, ds, None).getBlocks(), dNames), [])
        (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldBlocks, blocks)
        if len(blocksMissing) or len(blocksChanged):
            if not utils.getUserBool(' * WARNING: Block structure has changed! Continue?', False):
                sys.exit(os.EX_OK)
        # Search for blocks which were partially added and generate "pseudo"-blocks with left over files
        setOldBlocks = set(map(lambda x: x[DataProvider.BlockName], oldBlocks))
        setAddedBlocks = set(map(lambda x: x[DataProvider.BlockName], blocksAdded))
        blockCollision = set.intersection(setOldBlocks, setAddedBlocks)
        if blockCollision and opts.closeBlock: # Block are closed and contents have changed
            for block in blocksAdded:
                if block[DataProvider.BlockName] in blockCollision:
                    block[DataProvider.BlockName] = utils.strGuid(md5(str(time.time())).hexdigest())
        blocks = blocksAdded

    # 3) Display dataset properties
    if opts.display_data or opts.display_cfg:
        raise APIError('Not yet reimplemented')

    #set-up logging
    logging.basicConfig(format='%(levelname)s: %(message)s')
    logger = logging.getLogger('dbs3-migration')
    logger.addHandler(NullHandler())
    logger.setLevel(logging.DEBUG)

    #set-up dbs clients
    dbs3_target_client = DBS3LiteClient(url=opts.dbsTarget)
    dbs3_source_client = DBS3LiteClient(url=opts.dbsSource)

    dbs3_migration_queue = DBS3MigrationQueue()

    for blockDump in generateDBS3BlockDumps(opts, blocks):
        if not opts.continue_migration:
            ###initiate the dbs3 to dbs3 migration of parent blocks
            logger.debug('Checking parentage for block: %s' % blockDump['block']['block_name'])
            unique_parent_lfns = set((parent[u'parent_logical_file_name'] for parent in blockDump[u'file_parent_list']))
            unique_blocks = set((block['block_name'] for parent_lfn in unique_parent_lfns
                                 for block in dbs3_source_client.listBlocks(logical_file_name=parent_lfn)))
            for block_to_migrate in unique_blocks:
                if dbs3_target_client.listBlocks(block_name=block_to_migrate):
                    #block already at destination
                    logger.debug('Block %s is already at destination' % block_to_migrate)
                    continue
                migration_task = MigrationTask(block_name=block_to_migrate,
                                               migration_url='https://cmsweb.cern.ch/dbs/prod/global/DBSReader',
                                               dbs_client=dbs3_target_client)
                try:
                    dbs3_migration_queue.add_migration_task(migration_task)
                except AlreadyQueued as aq:
                    logger.debug(aq.message)

            dbs3_migration_queue.save_to_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl'))
        else:
            try:
                dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(os.path.join(opts.tmpDir,
                                                                                      'dbs3_migration.pkl'))
            except IOError as io_err:
                msg = "Probably, there is no DBS 3 migration for this dataset ongoing, Dude!"
                logger.exception('%s\n%s' % (io_err.message, msg))
                raise

        #wait for all parent blocks migrated to dbs3
        do_migration(dbs3_migration_queue)

        #insert block into dbs3
        dbs3_target_client.insertBulkBlock(blockDump)
예제 #25
0
from grid_control.gc_exceptions import RuntimeError
from grid_control.utils.webservice import readJSON
from grid_control_cms.provider_sitedb import SiteDB

def lfn2pfn(node, lfn):
	return readJSON('https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn',
		{'node': node, 'protocol': 'srmv2', 'lfn': lfn})['phedex']['mapping'][0]['pfn']


parser = optparse.OptionParser()
parser.add_option('-s', '--SE', dest='SE', default=None, help='Resolve LFN on CMS SE into PFN')
parser.add_option('', '--lfn', dest='lfn', default='/store/user/<hypernews name>', help='Name of default LFN')
parser.add_option('', '--se-prot', dest='seprot', default='srmv2', help='Name of default SE protocol')
(opts, args) = parseOptions(parser)

if opts.SE:
	if '<hypernews name>' in opts.lfn:
		token = AccessToken.getInstance('VomsProxy', getConfig(), None)
		site_db = SiteDB()
		hnName = site_db.dn_to_username(dn=token.getFQUsername())
		if not hnName:
			raise RuntimeError('Unable to map grid certificate to hypernews name!')
		opts.lfn = opts.lfn.replace('<hypernews name>', hnName)

	tmp = readJSON('https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn',
		{'node': opts.SE, 'protocol': opts.seprot, 'lfn': opts.lfn})['phedex']['mapping']
	for entry in tmp:
		if len(tmp) > 1:
			print entry['node'],
		print entry['pfn']
예제 #26
0
def main():
	dataset = args[0].strip()
	cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False',
		'remove empty files *': 'False', 'location format *': opts.locationfmt,
		'nickname check collision *': 'False'}
	if opts.metadata or opts.blockmetadata:
		cfgSettings['lumi filter *'] = '-'
		cfgSettings['keep lumi metadata *'] = 'True'

	config = getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings})

	if os.path.exists(dataset):
		provider = DataProvider.getInstance('ListProvider', config, dataset, None)
	else:
		provider = DataProvider.create(config, dataset, opts.provider)
	blocks = provider.getBlocks()
	if len(blocks) == 0:
		raise DatasetError('No blocks!')

	datasets = set(map(lambda x: x[DataProvider.Dataset], blocks))
	if len(datasets) > 1 or opts.info:
		headerbase = [(DataProvider.Dataset, 'Dataset')]
	else:
		print('Dataset: %s' % blocks[0][DataProvider.Dataset])
		headerbase = []

	if opts.configentry:
		print('')
		print('dataset =')
		infos = {}
		order = []
		maxnick = 5
		for block in blocks:
			dsName = block[DataProvider.Dataset]
			if not infos.get(dsName, None):
				order.append(dsName)
				infos[dsName] = dict([(DataProvider.Dataset, dsName)])
				if DataProvider.Nickname not in block and opts.confignick:
					try:
						if '/' in dsName: 
							block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1]
						else:
							block[DataProvider.Nickname] = dsName
					except Exception:
						pass
				if DataProvider.Nickname not in block and opts.confignick:
					block[DataProvider.Nickname] = np.getName(None, dsName, block)
				if DataProvider.Nickname in block:
					nick = block[DataProvider.Nickname]
					infos[dsName][DataProvider.Nickname] = nick
					maxnick = max(maxnick, len(nick))
				if len(block[DataProvider.FileList]):
					infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL]
		for dsID, dsName in enumerate(order):
			info = infos[dsName]
			short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__)
			nickname = info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick)
			filterExpr = utils.QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '')
			print('\t%s : %s : %s%s' % (nickname, short, provider._datasetExpr, filterExpr))


	if opts.listdatasets:
		# Add some enums for consistent access to info dicts
		DataProvider.NFiles = -1
		DataProvider.NBlocks = -2

		print('')
		infos = {}
		order = []
		infosum = {DataProvider.Dataset : 'Sum'}
		for block in blocks:
			dsName = block.get(DataProvider.Dataset, '')
			if not infos.get(dsName, None):
				order.append(dsName)
				infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]}
			def updateInfos(target):
				target[DataProvider.NBlocks]  = target.get(DataProvider.NBlocks, 0) + 1
				target[DataProvider.NFiles]   = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList])
				target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries]
			updateInfos(infos[dsName])
			updateInfos(infosum)
		head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'),
			(DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')]
		utils.printTabular(head, map(lambda x: infos[x], order) + ['=', infosum])

	if opts.listblocks:
		print('')
		utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks)

	if opts.listfiles:
		print('')
		for block in blocks:
			if len(datasets) > 1:
				print('Dataset: %s' % block[DataProvider.Dataset])
			print('Blockname: %s' % block[DataProvider.BlockName])
			utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList])
			print('')

	def printMetadata(src, maxlen):
		for (mk, mv) in src:
			if len(str(mv)) > 200:
				mv = '<metadata entry size: %s> %s...' % (len(str(mv)), repr(mv)[:200])
			print('\t%s: %s' % (mk.rjust(maxlen), mv))
		if src:
			print('')

	if opts.metadata and not opts.save:
		print('')
		for block in blocks:
			if len(datasets) > 1:
				print('Dataset: %s' % block[DataProvider.Dataset])
			print('Blockname: %s' % block[DataProvider.BlockName])
			mk_len = max(map(len, block.get(DataProvider.Metadata, [''])))
			for f in block[DataProvider.FileList]:
				print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries]))
				printMetadata(zip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len)
			print('')

	if opts.blockmetadata and not opts.save:
		for block in blocks:
			if len(datasets) > 1:
				print('Dataset: %s' % block[DataProvider.Dataset])
			print('Blockname: %s' % block[DataProvider.BlockName])
			mkdict = lambda x: dict(zip(block[DataProvider.Metadata], x[DataProvider.Metadata]))
			metadata = utils.QM(block[DataProvider.FileList], mkdict(block[DataProvider.FileList][0]), {})
			for fileInfo in block[DataProvider.FileList]:
				utils.intersectDict(metadata, mkdict(fileInfo))
			printMetadata(metadata.items(), max(map(len, metadata.keys())))

	if opts.liststorage:
		print('')
		infos = {}
		print('Storage elements:')
		for block in blocks:
			dsName = block[DataProvider.Dataset]
			if len(headerbase) > 0:
				print('Dataset: %s' % dsName)
			if block.get(DataProvider.BlockName, None):
				print('Blockname: %s' % block[DataProvider.BlockName])
			if block[DataProvider.Locations] == None:
				print('\tNo location contraint specified')
			elif block[DataProvider.Locations] == []:
				print('\tNot located at anywhere')
			else:
				for se in block[DataProvider.Locations]:
					print('\t%s' % se)
			print('')

	if opts.info:
		evSum = 0
		for block in blocks:
			blockId = '%s %s' % (block.get(DataProvider.Dataset, '-'), block.get(DataProvider.BlockName, '-'))
			blockStorage = '-'
			if block.get(DataProvider.Locations, None):
				blockStorage = str.join(',', block.get(DataProvider.Locations, '-'))
			evSum += block.get(DataProvider.NEntries, 0)
			print('%s %s %d %d' % (blockId, blockStorage, block.get(DataProvider.NEntries, 0), evSum))

	if opts.save:
		print('')
		blocks = provider.getBlocks()
		if opts.sort:
			blocks.sort(key = lambda b: b[DataProvider.Dataset] + '#' + b[DataProvider.BlockName])
			for b in blocks:
				b[DataProvider.FileList].sort(key = lambda fi: fi[DataProvider.URL])
		provider.saveState(opts.save, blocks)
		print('Dataset information saved to ./%s' % opts.save)