Example #1
0
def main():
    # Handle command line options
    options = setup_parser()
    options.config_dict['include parent infos'] = True
    options.config_dict['dataset hash keys'] = options.config_dict[
        'dataset hash keys'].replace(',', ' ')
    if options.opts.jobhash:
        options.config_dict['dataset hash keys'] = options.config_dict[
            'dataset hash keys'] + ' CMSSW_CONFIG_JOBHASH'
    if options.opts.discovery:
        options.config_dict['dataset name pattern'] = '@DS_KEY@'
    if len(options.args) != 1:
        utils.exitWithUsage(
            options.parser.usage(),
            'Neither work directory nor config file specified!')
    # Lock file in case several instances of this program are running
    mutex = FileMutex(os.path.join(options.opts.tempdir, 'datasetDBSAdd.lock'))
    try:
        # 1) Get dataset information
        blocks = discover_blocks(options)
        # 2) Filter datasets
        blocks = filter_blocks(options.opts, blocks)
        # 3) Process datasets (migrate parents and register
        process_dbs3_json_blocks(
            options.opts,
            create_dbs3_json_blocks(options.opts, sort_dataset_blocks(blocks)))
    finally:
        mutex.release()
def main(opts, args):
	if len(args) == 0:
		utils.exitWithUsage('Dataset path not specified!')
	datasetPath = args[0]
	if '*' in datasetPath:
		dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None)
		toProcess = dbs3.getCMSDatasetsImpl(datasetPath)
	else:
		toProcess = [datasetPath]

	nProd = Plugin.getClass('NickNameProducer').createInstance(opts.producer, getConfig())
	utils.printTabular(
		[(0, 'Nickname'), (1, 'Dataset')],
		lmap(lambda ds: {0: nProd.getName('', ds, None), 1: ds}, toProcess), 'll')
Example #3
0
def main(opts, args):
    if len(args) == 0:
        utils.exitWithUsage('Dataset path not specified!')
    datasetPath = args[0]
    if '*' in datasetPath:
        dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath,
                                     None)
        toProcess = dbs3.getCMSDatasetsImpl(datasetPath)
    else:
        toProcess = [datasetPath]

    nProd = Plugin.getClass('NickNameProducer').createInstance(
        opts.producer, getConfig())
    utils.printTabular([(0, 'Nickname'), (1, 'Dataset')],
                       lmap(lambda ds: {
                           0: nProd.getName('', ds, None),
                           1: ds
                       }, toProcess), 'll')
Example #4
0
def main():
	# Handle command line options
	options = setup_parser()
	options.config_dict['include parent infos'] = True
	options.config_dict['dataset hash keys'] = options.config_dict['dataset hash keys'].replace(',', ' ')
	if options.opts.jobhash:
		options.config_dict['dataset hash keys'] = options.config_dict['dataset hash keys'] + ' CMSSW_CONFIG_JOBHASH'
	if options.opts.discovery:
		options.config_dict['dataset name pattern'] = '@DS_KEY@'
	if len(options.args) != 1:
		utils.exitWithUsage(options.parser.usage(), 'Neither work directory nor config file specified!')
	# Lock file in case several instances of this program are running
	mutex = FileMutex(os.path.join(options.opts.tempdir, 'datasetDBSAdd.lock'))
	try:
		# 1) Get dataset information
		blocks = discover_blocks(options)
		# 2) Filter datasets
		blocks = filter_blocks(options.opts, blocks)
		# 3) Process datasets (migrate parents and register
		process_dbs3_json_blocks(options.opts, create_dbs3_json_blocks(options.opts, sort_dataset_blocks(blocks)))
	finally:
		mutex.release()
Example #5
0
import sys, optparse
from gcSupport import utils, Config, TaskModule, JobManager, JobSelector, Report, GCError, parseOptions, handleException, getConfig

parser = optparse.OptionParser()
parser.add_option('', '--report', dest='reportClass', default='GUIReport')
parser.add_option('-J', '--job-selector', dest='selector', default=None)
parser.add_option('', '--str', dest='string', default=None)
#parser.add_option('-m', '--map', dest='showMap', default=False, action='store_true',
#	help='Draw map of sites')
#parser.add_option('-C', '--cpu', dest='showCPU', default=False, action='store_true',
#	help='Display time overview')
#Report.addOptions(parser)
(opts, args) = parseOptions(parser)

if len(args) != 1:
	utils.exitWithUsage('%s [options] <config file>' % sys.argv[0])

def main():
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = config.getClass(['task', 'module'], cls = TaskModule).getInstance()

	# Initialise job database
	jobManagerCls = config.getClass('job manager', 'SimpleJobManager', cls = JobManager, tags = [task])
	jobDB = jobManagerCls.getInstance(task, None).jobDB
	log = utils.ActivityLog('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task))
	del log
# | limitations under the License.

import sys
from datasetListFromX import addDatasetListOptions, discoverDataset
from gcSupport import Options, scriptOptions, utils

parser = Options(usage = '%s [OPTIONS] <config file / work directory>')
parser.addText(None, 'J', 'job-selector', dest = 'external job selector', default = '',
	help = 'Specify which jobs to process')
parser.addText(None, 'i', 'info-scanner',
	help = 'Specify which info scanner to run')
parser.addText(None, 'm', 'event-mode',   dest = 'mode',                  default = 'CMSSW-Out',
	help = 'Specify how to determine events - available: [CMSSW-Out], CMSSW-In, DataMod')
parser.addText(None, 'l', 'lfn',          dest = 'lfn marker',            default = '/store/',
	help = 'Assume everything starting with marker to be a logical file name')
parser.addBool(None, 'c', 'config',       dest = 'include config infos',  default = False,
	help = 'CMSSW specific: Add configuration data to metadata')
parser.addBool(None, 'p', 'parents',      dest = 'include parent infos',  default = False,
	help = 'CMSSW specific: Add parent infos to metadata')
addDatasetListOptions(parser)
options = scriptOptions(parser, arg_keys = ['dataset'])

# Positional parameters override options
if len(options.args) == 0:
	utils.exitWithUsage(parser.usage())
tmp = {'cmssw-out': 'CMSSW_EVENTS_WRITE', 'cmssw-in': 'CMSSW_EVENTS_READ', 'datamod': 'MAX_EVENTS'}
if options.opts.info_scanner:
	options.config_dict['scanner'] = options.opts.info_scanner.replace(',', ' ')
options.config_dict['events key'] = tmp.get(options.config_dict['mode'].lower(), '')
sys.exit(discoverDataset('GCProvider', options.config_dict))
Example #7
0
parser = Options(usage = '%s [OPTIONS] <config file>')
parser.addBool(None, 'L', 'report-list',  default = False, help = 'List available report classes')
parser.addBool(None, 'T', 'use-task',     default = False, help = 'Forward task information to report')
parser.addText(None, 'R', 'report',       default = 'GUIReport')
parser.addText(None, 'J', 'job-selector', default = None)
parser.addText(None, ' ', 'string',       default = '')
options = scriptOptions(parser)

Report = Plugin.getClass('Report')

if options.opts.report_list:
	sys.stderr.write('Available report classes:\n')
	displayPluginList(getPluginList('Report'))

if len(options.args) != 1:
	utils.exitWithUsage(parser.usage())

def main(opts, args):
	# try to open config file
	config = getConfig(args[0], section = 'global')

	# Initialise task module
	task = None
	if opts.use_task:
		task = config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow', pargs = ('task',)).task

	# Initialise job database
	jobDB = config.getPlugin('job database', 'TextFileJobDB', cls = 'JobDB')
	activity = Activity('Filtering job entries')
	selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task))
	activity.finish()
parser.addBool(None, 's', 'list-storage',   default = False, help = 'Show list of locations where data is stored')
parser.addBool(None, 'm', 'metadata',       default = False, help = 'Get metadata infomation of dataset files')
parser.addBool(None, 'M', 'block-metadata', default = False, help = 'Get common metadata infomation of dataset blocks')
parser.addBool(None, 'O', 'ordered',        default = False, help = 'Sort dataset blocks and files')
parser.addText(None, 'p', 'provider',       default = '',    help = 'Default dataset provider')
parser.addText(None, 'C', 'settings',       default = '',    help = 'Specify config file as source of detailed dataset settings')
parser.addText(None, 'S', 'save',           default = '',    help = 'Saves dataset information to specified file')
parser.addBool(None, 'i', 'info',           default = False, help = 'Gives machine readable info of given dataset(s)')
parser.addBool(None, 'c', 'config-entry',   default = False, help = 'Gives config file entries to run over given dataset(s)')
parser.addBool(None, 'n', 'config-nick',    default = False, help = 'Use dataset path to derive nickname in case it it undefined')
parser.addText(None, 'L', 'location',  default = 'hostname', help = 'Format of location information')
options = scriptOptions(parser)

# we need exactly one positional argument (dataset path)
if len(options.args) != 1:
	utils.exitWithUsage(usage)

# Disable threaded queries
def noThread(desc, fun, *args, **kargs):
	fun(*args, **kargs)
	return type('DummyThread', (), {'join': lambda self: None})()
thread_tools.start_thread = noThread

def get_dataset_config(opts, args):
	dataset = args[0].strip()
	if os.path.exists(dataset):
		opts.provider = 'ListProvider'
	else:
		opts.provider = 'DBS3Provider'
	cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False',
		'remove empty files *': 'False', 'location format *': opts.location,
def main():
    usage = '%s [OPTIONS] <config file / work directory>' % sys.argv[0]
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-G', '--globaltag', dest='globaltag', default='crab2_tag', help='Specify global tag')
    parser.add_option('-F', '--input', dest='inputFile', default=None,
                      help='Specify dbs input file to use instead of scanning job output')
#    parser.add_option('-k', '--key-select',      dest='dataset key select', default='',
#        help='Specify dataset keys to process')
    parser.add_option('-c', '--continue-migration', dest='continue_migration', default=False, action='store_true',
                      help='Continue an already started migration')

    ogDiscover = optparse.OptionGroup(parser, 'Discovery options - ignored in case dbs input file is specified', '')
    ogDiscover.add_option('-n', '--name',        dest='dataset name pattern', default='',
        help='Specify dbs path name - Example: DataSet_@NICK@_@VAR@')
    ogDiscover.add_option('-T', '--datatype',    dest='datatype',      default=None,
        help='Supply dataset type in case cmssw report did not specify it - valid values: "mc" or "data"')
    ogDiscover.add_option('-m', '--merge',       dest='merge parents', default=False,  action='store_true',
        help='Merge output files from different parent blocks into a single block [Default: Keep boundaries]')
    ogDiscover.add_option('-j', '--jobhash',     dest='useJobHash',    default=False,  action='store_true',
        help='Use hash of all config files in job for dataset key calculation')
    ogDiscover.add_option('-u', '--unique-cfg',  dest='uniqueCfg',     default=False,  action='store_true',
        help='Circumvent edmConfigHash collisions so each dataset is stored with unique config information')
    ogDiscover.add_option('-P', '--parent',      dest='parent source', default='',
        help='Override parent information source - to bootstrap a reprocessing on local files')
    ogDiscover.add_option('-H', '--hash-keys',   dest='dataset hash keys', default='',
        help='Included additional variables in dataset hash calculation')
    parser.add_option_group(ogDiscover)

    ogDiscover2 = optparse.OptionGroup(parser, 'Discovery options II - only available when config file is used', '')
    ogDiscover2.add_option('-J', '--job-selector',    dest='selected',      default=None,
        help='Specify dataset(s) to process')
    parser.add_option_group(ogDiscover2)

    ogMode = optparse.OptionGroup(parser, 'Processing mode', '')
    ogMode.add_option('-b', '--batch',           dest='batch',         default=False, action='store_true',
        help='Enable non-interactive batch mode [Default: Interactive mode]')
    ogMode.add_option('-d', '--discovery',       dest='discovery',     default=False, action='store_true',
        help='Enable discovery mode - just collect file information and exit')
    ogMode.add_option('',   '--tempdir',         dest='tmpDir',        default='',
        help='Override temp directory')
    ogMode.add_option('-i', '--no-import',       dest='doImport',      default=True,  action='store_false',
        help='Disable import of new datasets into target DBS instance - only temporary xml files are created, ' +
            'which can be added later via datasetDBSTool.py [Default: Import datasets]')
    parser.add_option_group(ogMode)

    ogInc = optparse.OptionGroup(parser, 'Incremental adding of files to DBS', '')
    ogInc.add_option('-I', '--incremental',     dest='incremental',   default=False,  action='store_true',
        help='Skip import of existing files - Warning: this destroys coherent block structure!')
#	ogInc.add_option('-o', '--open-blocks',     dest='closeBlock',    default=True,   action='store_false',
#		help='Keep blocks open for addition of further files [Default: Close blocks]')
    parser.add_option_group(ogInc)

    ogInst = optparse.OptionGroup(parser, 'DBS instance handling', '')
    ogInst.add_option('-t', '--target-instance', dest='dbsTarget',
                      default='https://cmsweb.cern.ch/dbs/prod/phys03',
                      help='Specify target dbs instance url')
    ogInst.add_option('-s', '--source-instance', dest='dbsSource',
                      default='https://cmsweb.cern.ch/dbs/prod/global',
                      help='Specify source dbs instance url(s), where parent datasets are taken from')
    parser.add_option_group(ogInst)

    ogDbg = optparse.OptionGroup(parser, 'Display options', '')
    ogDbg.add_option('-D', '--display-dataset', dest='display_data',  default=None,
        help='Display information associated with dataset key(s) (accepts "all")')
    ogDbg.add_option('-C', '--display-config',  dest='display_cfg',   default=None,
        help='Display information associated with config hash(es) (accepts "all")')
    ogDbg.add_option('-v', '--verbose',         dest='verbosity',     default=0, action='count',
        help='Increase verbosity')
    parser.add_option_group(ogDbg)

    (opts, args) = parser.parse_args()
    utils.verbosity(opts.verbosity)
    setattr(opts, 'include parent infos', True)
    setattr(opts, 'importLumi', True)
    setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys').replace(',', ' '))
    if opts.useJobHash:
        setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys') + ' CMSSW_CONFIG_JOBHASH')

    # 0) Get work directory, create dbs dump directory
    if len(args) != 1:
        utils.exitWithUsage(usage, 'Neither work directory nor config file specified!')
    if os.path.isdir(args[0]):
        opts.workDir = os.path.abspath(os.path.normpath(args[0]))
    else:
        opts.workDir = getConfig(configFile=args[0]).getWorkPath()
    if not opts.tmpDir:
        opts.tmpDir = os.path.join(opts.workDir, 'dbs')
    if not os.path.exists(opts.tmpDir):
        os.mkdir(opts.tmpDir)
    # Lock file in case several instances of this program are running
    mutex = FileMutex(os.path.join(opts.tmpDir, 'datasetDBSAdd.lock'))

    # 1) Get dataset information
    if opts.inputFile:
        provider = DataProvider.getInstance('ListProvider', getConfig(), opts.inputFile, None)
    else:
        config = getConfig(configDict = {'dataset': dict(parser.values.__dict__)})
        if opts.discovery:
            config.set('dataset name pattern', '@DS_KEY@')
        provider = DataProvider.getInstance('DBSInfoProvider', config, args[0], None)

    provider.saveState(os.path.join(opts.tmpDir, 'dbs.dat'))
    if opts.discovery:
        sys.exit(os.EX_OK)
    blocks = provider.getBlocks()

    # 2) Filter datasets
    if opts.incremental:
        # Query target DBS for all found datasets and perform dataset resync with "supposed" state
        dNames = set(map(lambda b: b[DataProvider.Dataset], blocks))
        dNames = filter(lambda ds: hasDataset(opts.dbsTarget, ds), dNames)
        config = getConfig(configDict = {None: {'dbs instance': opts.dbsTarget}})
        oldBlocks = reduce(operator.add, map(lambda ds: DBSApiv2(config, None, ds, None).getBlocks(), dNames), [])
        (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldBlocks, blocks)
        if len(blocksMissing) or len(blocksChanged):
            if not utils.getUserBool(' * WARNING: Block structure has changed! Continue?', False):
                sys.exit(os.EX_OK)
        # Search for blocks which were partially added and generate "pseudo"-blocks with left over files
        setOldBlocks = set(map(lambda x: x[DataProvider.BlockName], oldBlocks))
        setAddedBlocks = set(map(lambda x: x[DataProvider.BlockName], blocksAdded))
        blockCollision = set.intersection(setOldBlocks, setAddedBlocks)
        if blockCollision and opts.closeBlock: # Block are closed and contents have changed
            for block in blocksAdded:
                if block[DataProvider.BlockName] in blockCollision:
                    block[DataProvider.BlockName] = utils.strGuid(md5(str(time.time())).hexdigest())
        blocks = blocksAdded

    # 3) Display dataset properties
    if opts.display_data or opts.display_cfg:
        raise APIError('Not yet reimplemented')

    #set-up logging
    logging.basicConfig(format='%(levelname)s: %(message)s')
    logger = logging.getLogger('dbs3-migration')
    logger.addHandler(NullHandler())
    logger.setLevel(logging.DEBUG)

    #set-up dbs clients
    dbs3_target_client = DBS3LiteClient(url=opts.dbsTarget)
    dbs3_source_client = DBS3LiteClient(url=opts.dbsSource)

    dbs3_migration_queue = DBS3MigrationQueue()

    for blockDump in generateDBS3BlockDumps(opts, blocks):
        if not opts.continue_migration:
            ###initiate the dbs3 to dbs3 migration of parent blocks
            logger.debug('Checking parentage for block: %s' % blockDump['block']['block_name'])
            unique_parent_lfns = set((parent[u'parent_logical_file_name'] for parent in blockDump[u'file_parent_list']))
            unique_blocks = set((block['block_name'] for parent_lfn in unique_parent_lfns
                                 for block in dbs3_source_client.listBlocks(logical_file_name=parent_lfn)))
            for block_to_migrate in unique_blocks:
                if dbs3_target_client.listBlocks(block_name=block_to_migrate):
                    #block already at destination
                    logger.debug('Block %s is already at destination' % block_to_migrate)
                    continue
                migration_task = MigrationTask(block_name=block_to_migrate,
                                               migration_url='https://cmsweb.cern.ch/dbs/prod/global/DBSReader',
                                               dbs_client=dbs3_target_client)
                try:
                    dbs3_migration_queue.add_migration_task(migration_task)
                except AlreadyQueued as aq:
                    logger.debug(aq.message)

            dbs3_migration_queue.save_to_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl'))
        else:
            try:
                dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(os.path.join(opts.tmpDir,
                                                                                      'dbs3_migration.pkl'))
            except IOError as io_err:
                msg = "Probably, there is no DBS 3 migration for this dataset ongoing, Dude!"
                logger.exception('%s\n%s' % (io_err.message, msg))
                raise

        #wait for all parent blocks migrated to dbs3
        do_migration(dbs3_migration_queue)

        #insert block into dbs3
        dbs3_target_client.insertBulkBlock(blockDump)