def main(): # Handle command line options options = setup_parser() options.config_dict['include parent infos'] = True options.config_dict['dataset hash keys'] = options.config_dict[ 'dataset hash keys'].replace(',', ' ') if options.opts.jobhash: options.config_dict['dataset hash keys'] = options.config_dict[ 'dataset hash keys'] + ' CMSSW_CONFIG_JOBHASH' if options.opts.discovery: options.config_dict['dataset name pattern'] = '@DS_KEY@' if len(options.args) != 1: utils.exitWithUsage( options.parser.usage(), 'Neither work directory nor config file specified!') # Lock file in case several instances of this program are running mutex = FileMutex(os.path.join(options.opts.tempdir, 'datasetDBSAdd.lock')) try: # 1) Get dataset information blocks = discover_blocks(options) # 2) Filter datasets blocks = filter_blocks(options.opts, blocks) # 3) Process datasets (migrate parents and register process_dbs3_json_blocks( options.opts, create_dbs3_json_blocks(options.opts, sort_dataset_blocks(blocks))) finally: mutex.release()
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance(opts.producer, getConfig()) utils.printTabular( [(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: {0: nProd.getName('', ds, None), 1: ds}, toProcess), 'll')
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance( opts.producer, getConfig()) utils.printTabular([(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: { 0: nProd.getName('', ds, None), 1: ds }, toProcess), 'll')
def main(): # Handle command line options options = setup_parser() options.config_dict['include parent infos'] = True options.config_dict['dataset hash keys'] = options.config_dict['dataset hash keys'].replace(',', ' ') if options.opts.jobhash: options.config_dict['dataset hash keys'] = options.config_dict['dataset hash keys'] + ' CMSSW_CONFIG_JOBHASH' if options.opts.discovery: options.config_dict['dataset name pattern'] = '@DS_KEY@' if len(options.args) != 1: utils.exitWithUsage(options.parser.usage(), 'Neither work directory nor config file specified!') # Lock file in case several instances of this program are running mutex = FileMutex(os.path.join(options.opts.tempdir, 'datasetDBSAdd.lock')) try: # 1) Get dataset information blocks = discover_blocks(options) # 2) Filter datasets blocks = filter_blocks(options.opts, blocks) # 3) Process datasets (migrate parents and register process_dbs3_json_blocks(options.opts, create_dbs3_json_blocks(options.opts, sort_dataset_blocks(blocks))) finally: mutex.release()
import sys, optparse from gcSupport import utils, Config, TaskModule, JobManager, JobSelector, Report, GCError, parseOptions, handleException, getConfig parser = optparse.OptionParser() parser.add_option('', '--report', dest='reportClass', default='GUIReport') parser.add_option('-J', '--job-selector', dest='selector', default=None) parser.add_option('', '--str', dest='string', default=None) #parser.add_option('-m', '--map', dest='showMap', default=False, action='store_true', # help='Draw map of sites') #parser.add_option('-C', '--cpu', dest='showCPU', default=False, action='store_true', # help='Display time overview') #Report.addOptions(parser) (opts, args) = parseOptions(parser) if len(args) != 1: utils.exitWithUsage('%s [options] <config file>' % sys.argv[0]) def main(): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = config.getClass(['task', 'module'], cls = TaskModule).getInstance() # Initialise job database jobManagerCls = config.getClass('job manager', 'SimpleJobManager', cls = JobManager, tags = [task]) jobDB = jobManagerCls.getInstance(task, None).jobDB log = utils.ActivityLog('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task)) del log
# | limitations under the License. import sys from datasetListFromX import addDatasetListOptions, discoverDataset from gcSupport import Options, scriptOptions, utils parser = Options(usage = '%s [OPTIONS] <config file / work directory>') parser.addText(None, 'J', 'job-selector', dest = 'external job selector', default = '', help = 'Specify which jobs to process') parser.addText(None, 'i', 'info-scanner', help = 'Specify which info scanner to run') parser.addText(None, 'm', 'event-mode', dest = 'mode', default = 'CMSSW-Out', help = 'Specify how to determine events - available: [CMSSW-Out], CMSSW-In, DataMod') parser.addText(None, 'l', 'lfn', dest = 'lfn marker', default = '/store/', help = 'Assume everything starting with marker to be a logical file name') parser.addBool(None, 'c', 'config', dest = 'include config infos', default = False, help = 'CMSSW specific: Add configuration data to metadata') parser.addBool(None, 'p', 'parents', dest = 'include parent infos', default = False, help = 'CMSSW specific: Add parent infos to metadata') addDatasetListOptions(parser) options = scriptOptions(parser, arg_keys = ['dataset']) # Positional parameters override options if len(options.args) == 0: utils.exitWithUsage(parser.usage()) tmp = {'cmssw-out': 'CMSSW_EVENTS_WRITE', 'cmssw-in': 'CMSSW_EVENTS_READ', 'datamod': 'MAX_EVENTS'} if options.opts.info_scanner: options.config_dict['scanner'] = options.opts.info_scanner.replace(',', ' ') options.config_dict['events key'] = tmp.get(options.config_dict['mode'].lower(), '') sys.exit(discoverDataset('GCProvider', options.config_dict))
parser = Options(usage = '%s [OPTIONS] <config file>') parser.addBool(None, 'L', 'report-list', default = False, help = 'List available report classes') parser.addBool(None, 'T', 'use-task', default = False, help = 'Forward task information to report') parser.addText(None, 'R', 'report', default = 'GUIReport') parser.addText(None, 'J', 'job-selector', default = None) parser.addText(None, ' ', 'string', default = '') options = scriptOptions(parser) Report = Plugin.getClass('Report') if options.opts.report_list: sys.stderr.write('Available report classes:\n') displayPluginList(getPluginList('Report')) if len(options.args) != 1: utils.exitWithUsage(parser.usage()) def main(opts, args): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = None if opts.use_task: task = config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow', pargs = ('task',)).task # Initialise job database jobDB = config.getPlugin('job database', 'TextFileJobDB', cls = 'JobDB') activity = Activity('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task)) activity.finish()
parser.addBool(None, 's', 'list-storage', default = False, help = 'Show list of locations where data is stored') parser.addBool(None, 'm', 'metadata', default = False, help = 'Get metadata infomation of dataset files') parser.addBool(None, 'M', 'block-metadata', default = False, help = 'Get common metadata infomation of dataset blocks') parser.addBool(None, 'O', 'ordered', default = False, help = 'Sort dataset blocks and files') parser.addText(None, 'p', 'provider', default = '', help = 'Default dataset provider') parser.addText(None, 'C', 'settings', default = '', help = 'Specify config file as source of detailed dataset settings') parser.addText(None, 'S', 'save', default = '', help = 'Saves dataset information to specified file') parser.addBool(None, 'i', 'info', default = False, help = 'Gives machine readable info of given dataset(s)') parser.addBool(None, 'c', 'config-entry', default = False, help = 'Gives config file entries to run over given dataset(s)') parser.addBool(None, 'n', 'config-nick', default = False, help = 'Use dataset path to derive nickname in case it it undefined') parser.addText(None, 'L', 'location', default = 'hostname', help = 'Format of location information') options = scriptOptions(parser) # we need exactly one positional argument (dataset path) if len(options.args) != 1: utils.exitWithUsage(usage) # Disable threaded queries def noThread(desc, fun, *args, **kargs): fun(*args, **kargs) return type('DummyThread', (), {'join': lambda self: None})() thread_tools.start_thread = noThread def get_dataset_config(opts, args): dataset = args[0].strip() if os.path.exists(dataset): opts.provider = 'ListProvider' else: opts.provider = 'DBS3Provider' cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False', 'remove empty files *': 'False', 'location format *': opts.location,
def main(): usage = '%s [OPTIONS] <config file / work directory>' % sys.argv[0] parser = optparse.OptionParser(usage=usage) parser.add_option('-G', '--globaltag', dest='globaltag', default='crab2_tag', help='Specify global tag') parser.add_option('-F', '--input', dest='inputFile', default=None, help='Specify dbs input file to use instead of scanning job output') # parser.add_option('-k', '--key-select', dest='dataset key select', default='', # help='Specify dataset keys to process') parser.add_option('-c', '--continue-migration', dest='continue_migration', default=False, action='store_true', help='Continue an already started migration') ogDiscover = optparse.OptionGroup(parser, 'Discovery options - ignored in case dbs input file is specified', '') ogDiscover.add_option('-n', '--name', dest='dataset name pattern', default='', help='Specify dbs path name - Example: DataSet_@NICK@_@VAR@') ogDiscover.add_option('-T', '--datatype', dest='datatype', default=None, help='Supply dataset type in case cmssw report did not specify it - valid values: "mc" or "data"') ogDiscover.add_option('-m', '--merge', dest='merge parents', default=False, action='store_true', help='Merge output files from different parent blocks into a single block [Default: Keep boundaries]') ogDiscover.add_option('-j', '--jobhash', dest='useJobHash', default=False, action='store_true', help='Use hash of all config files in job for dataset key calculation') ogDiscover.add_option('-u', '--unique-cfg', dest='uniqueCfg', default=False, action='store_true', help='Circumvent edmConfigHash collisions so each dataset is stored with unique config information') ogDiscover.add_option('-P', '--parent', dest='parent source', default='', help='Override parent information source - to bootstrap a reprocessing on local files') ogDiscover.add_option('-H', '--hash-keys', dest='dataset hash keys', default='', help='Included additional variables in dataset hash calculation') parser.add_option_group(ogDiscover) ogDiscover2 = optparse.OptionGroup(parser, 'Discovery options II - only available when config file is used', '') ogDiscover2.add_option('-J', '--job-selector', dest='selected', default=None, help='Specify dataset(s) to process') parser.add_option_group(ogDiscover2) ogMode = optparse.OptionGroup(parser, 'Processing mode', '') ogMode.add_option('-b', '--batch', dest='batch', default=False, action='store_true', help='Enable non-interactive batch mode [Default: Interactive mode]') ogMode.add_option('-d', '--discovery', dest='discovery', default=False, action='store_true', help='Enable discovery mode - just collect file information and exit') ogMode.add_option('', '--tempdir', dest='tmpDir', default='', help='Override temp directory') ogMode.add_option('-i', '--no-import', dest='doImport', default=True, action='store_false', help='Disable import of new datasets into target DBS instance - only temporary xml files are created, ' + 'which can be added later via datasetDBSTool.py [Default: Import datasets]') parser.add_option_group(ogMode) ogInc = optparse.OptionGroup(parser, 'Incremental adding of files to DBS', '') ogInc.add_option('-I', '--incremental', dest='incremental', default=False, action='store_true', help='Skip import of existing files - Warning: this destroys coherent block structure!') # ogInc.add_option('-o', '--open-blocks', dest='closeBlock', default=True, action='store_false', # help='Keep blocks open for addition of further files [Default: Close blocks]') parser.add_option_group(ogInc) ogInst = optparse.OptionGroup(parser, 'DBS instance handling', '') ogInst.add_option('-t', '--target-instance', dest='dbsTarget', default='https://cmsweb.cern.ch/dbs/prod/phys03', help='Specify target dbs instance url') ogInst.add_option('-s', '--source-instance', dest='dbsSource', default='https://cmsweb.cern.ch/dbs/prod/global', help='Specify source dbs instance url(s), where parent datasets are taken from') parser.add_option_group(ogInst) ogDbg = optparse.OptionGroup(parser, 'Display options', '') ogDbg.add_option('-D', '--display-dataset', dest='display_data', default=None, help='Display information associated with dataset key(s) (accepts "all")') ogDbg.add_option('-C', '--display-config', dest='display_cfg', default=None, help='Display information associated with config hash(es) (accepts "all")') ogDbg.add_option('-v', '--verbose', dest='verbosity', default=0, action='count', help='Increase verbosity') parser.add_option_group(ogDbg) (opts, args) = parser.parse_args() utils.verbosity(opts.verbosity) setattr(opts, 'include parent infos', True) setattr(opts, 'importLumi', True) setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys').replace(',', ' ')) if opts.useJobHash: setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys') + ' CMSSW_CONFIG_JOBHASH') # 0) Get work directory, create dbs dump directory if len(args) != 1: utils.exitWithUsage(usage, 'Neither work directory nor config file specified!') if os.path.isdir(args[0]): opts.workDir = os.path.abspath(os.path.normpath(args[0])) else: opts.workDir = getConfig(configFile=args[0]).getWorkPath() if not opts.tmpDir: opts.tmpDir = os.path.join(opts.workDir, 'dbs') if not os.path.exists(opts.tmpDir): os.mkdir(opts.tmpDir) # Lock file in case several instances of this program are running mutex = FileMutex(os.path.join(opts.tmpDir, 'datasetDBSAdd.lock')) # 1) Get dataset information if opts.inputFile: provider = DataProvider.getInstance('ListProvider', getConfig(), opts.inputFile, None) else: config = getConfig(configDict = {'dataset': dict(parser.values.__dict__)}) if opts.discovery: config.set('dataset name pattern', '@DS_KEY@') provider = DataProvider.getInstance('DBSInfoProvider', config, args[0], None) provider.saveState(os.path.join(opts.tmpDir, 'dbs.dat')) if opts.discovery: sys.exit(os.EX_OK) blocks = provider.getBlocks() # 2) Filter datasets if opts.incremental: # Query target DBS for all found datasets and perform dataset resync with "supposed" state dNames = set(map(lambda b: b[DataProvider.Dataset], blocks)) dNames = filter(lambda ds: hasDataset(opts.dbsTarget, ds), dNames) config = getConfig(configDict = {None: {'dbs instance': opts.dbsTarget}}) oldBlocks = reduce(operator.add, map(lambda ds: DBSApiv2(config, None, ds, None).getBlocks(), dNames), []) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldBlocks, blocks) if len(blocksMissing) or len(blocksChanged): if not utils.getUserBool(' * WARNING: Block structure has changed! Continue?', False): sys.exit(os.EX_OK) # Search for blocks which were partially added and generate "pseudo"-blocks with left over files setOldBlocks = set(map(lambda x: x[DataProvider.BlockName], oldBlocks)) setAddedBlocks = set(map(lambda x: x[DataProvider.BlockName], blocksAdded)) blockCollision = set.intersection(setOldBlocks, setAddedBlocks) if blockCollision and opts.closeBlock: # Block are closed and contents have changed for block in blocksAdded: if block[DataProvider.BlockName] in blockCollision: block[DataProvider.BlockName] = utils.strGuid(md5(str(time.time())).hexdigest()) blocks = blocksAdded # 3) Display dataset properties if opts.display_data or opts.display_cfg: raise APIError('Not yet reimplemented') #set-up logging logging.basicConfig(format='%(levelname)s: %(message)s') logger = logging.getLogger('dbs3-migration') logger.addHandler(NullHandler()) logger.setLevel(logging.DEBUG) #set-up dbs clients dbs3_target_client = DBS3LiteClient(url=opts.dbsTarget) dbs3_source_client = DBS3LiteClient(url=opts.dbsSource) dbs3_migration_queue = DBS3MigrationQueue() for blockDump in generateDBS3BlockDumps(opts, blocks): if not opts.continue_migration: ###initiate the dbs3 to dbs3 migration of parent blocks logger.debug('Checking parentage for block: %s' % blockDump['block']['block_name']) unique_parent_lfns = set((parent[u'parent_logical_file_name'] for parent in blockDump[u'file_parent_list'])) unique_blocks = set((block['block_name'] for parent_lfn in unique_parent_lfns for block in dbs3_source_client.listBlocks(logical_file_name=parent_lfn))) for block_to_migrate in unique_blocks: if dbs3_target_client.listBlocks(block_name=block_to_migrate): #block already at destination logger.debug('Block %s is already at destination' % block_to_migrate) continue migration_task = MigrationTask(block_name=block_to_migrate, migration_url='https://cmsweb.cern.ch/dbs/prod/global/DBSReader', dbs_client=dbs3_target_client) try: dbs3_migration_queue.add_migration_task(migration_task) except AlreadyQueued as aq: logger.debug(aq.message) dbs3_migration_queue.save_to_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl')) else: try: dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl')) except IOError as io_err: msg = "Probably, there is no DBS 3 migration for this dataset ongoing, Dude!" logger.exception('%s\n%s' % (io_err.message, msg)) raise #wait for all parent blocks migrated to dbs3 do_migration(dbs3_migration_queue) #insert block into dbs3 dbs3_target_client.insertBulkBlock(blockDump)