def discover_blocks(options): # Get work directory, create dbs dump directory if os.path.isdir(options.args[0]): workDir = os.path.abspath(os.path.normpath(options.args[0])) else: workDir = getConfig(configFile=options.args[0]).getWorkPath() if not options.opts.tempdir: options.opts.tempdir = os.path.join(workDir, 'dbs') if not os.path.exists(options.opts.tempdir): os.mkdir(options.opts.tempdir) # get provider with dataset information if options.opts.input_file: provider = DataProvider.createInstance('ListProvider', getConfig(), options.opts.input_file, None) else: config = getConfig(configDict={'dataset': options.config_dict}) provider = DataProvider.createInstance('DBSInfoProvider', config, options.args[0], None) blocks = provider.getBlocks(show_stats=False) DataProvider.saveToFile(os.path.join(options.opts.tempdir, 'dbs.dat'), blocks) if options.opts.discovery: sys.exit(os.EX_OK) return blocks
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance(opts.producer, getConfig()) utils.printTabular( [(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: {0: nProd.getName('', ds, None), 1: ds}, toProcess), 'll')
def setup_config(opts, args): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section='global') if os.path.exists(config.getWorkPath('datamap.tar')): opts.dataset = config.getWorkPath('datamap.tar') config.changeView(setSections=['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections=['parameters']) if opts.parameter: utils.vprint('Provided options:') for p in opts.parameter: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') utils.vprint('\t%s: %s' % (k.strip(), v.strip())) utils.vprint('') if configFile is None: configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') if utils.verbosity() > 2: config.changeView(setSections=None).write(sys.stdout) return config
def discoverDataset(providerName, config_dict): config = getConfig(configDict = {'dataset': config_dict}) DataProvider = Plugin.getClass('DataProvider') provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None) if config_dict['output']: return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), config_dict['strip']) return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), config_dict['strip'])
def setup_config(opts, args): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section = 'global') if os.path.exists(config.getWorkPath('datamap.tar')): opts.dataset = config.getWorkPath('datamap.tar') config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections = ['parameters']) if opts.parameter: log.info('Provided options:') for p in opts.parameter: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') log.info('\t%s: %s', k.strip(), v.strip()) log.info('') if configFile is None: configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') if opts.verbose > 2: config.changeView(setSections = None).write(sys.stdout) return config
def main(): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = config.getClass(['task', 'module'], cls = TaskModule).getInstance() # Initialise job database jobManagerCls = config.getClass('job manager', 'SimpleJobManager', cls = JobManager, tags = [task]) jobDB = jobManagerCls.getInstance(task, None).jobDB log = utils.ActivityLog('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task)) del log report = Report.open(opts.reportClass, jobDB, task, selected, opts.string) report.display() sys.exit() # Show reports report = Report(jobs, selected) if opts.showCPU: cpuTime = 0 for jobNum in selected: jobObj = jobs.get(jobNum) cpuTime += jobObj.get('runtime', 0) print 'Used wall time:', utils.strTime(cpuTime) print 'Estimated cost: $%.2f' % ((cpuTime / 60 / 60) * 0.1) elif opts.showMap: from grid_control_gui import geomap geomap.drawMap(report) else: report.show(opts, task)
def main(): configEntries = map(lambda (k, v): (k, str(v)), parser.values.__dict__.items()) config = gcSupport.getConfig(configDict = {'dataset': dict(configEntries)}) provider = gcSupport.datasets.DataProvider.getInstance(providerName, config, datasetExpr, None) if opts.output: provider.saveState(opts.output, None, opts.strip) else: gcSupport.datasets.DataProvider.saveStateRaw(sys.stdout, provider.getBlocks(), opts.strip)
def main(opts, args): if len(args) == 0: utils.exitWithUsage('Dataset path not specified!') datasetPath = args[0] if '*' in datasetPath: dbs3 = Plugin.createInstance('DBS3Provider', getConfig(), datasetPath, None) toProcess = dbs3.getCMSDatasetsImpl(datasetPath) else: toProcess = [datasetPath] nProd = Plugin.getClass('NickNameProducer').createInstance( opts.producer, getConfig()) utils.printTabular([(0, 'Nickname'), (1, 'Dataset')], lmap(lambda ds: { 0: nProd.getName('', ds, None), 1: ds }, toProcess), 'll')
def discoverDataset(providerName, config_dict): config = getConfig(configDict={'dataset': config_dict}) DataProvider = Plugin.getClass('DataProvider') provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None) if config_dict['output']: return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), config_dict['strip']) return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), config_dict['strip'])
def discoverDataset(providerName, config_dict): config = getConfig(configDict = {'dataset': config_dict}) if config_dict['dump config'] == 'True': config.write(sys.stdout, printDefault = False, printMinimal = True) return DataProvider = Plugin.getClass('DataProvider') provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None) stripMetadata = config_dict['strip'] == 'True' if config_dict['output']: return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), stripMetadata) return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), stripMetadata)
def get_dataset_config(opts, args): dataset = args[0].strip() if os.path.exists(dataset): opts.provider = 'ListProvider' else: opts.provider = 'DBS3Provider' cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False', 'remove empty files *': 'False', 'location format *': opts.location, 'nickname check collision *': 'False', 'dataset *': dataset, 'dataset provider *': opts.provider} if opts.metadata or opts.block_metadata: cfgSettings['lumi filter *'] = '-' cfgSettings['keep lumi metadata *'] = 'True' return getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings})
def discoverDataset(providerName, config_dict): config = getConfig(configDict={'dataset': config_dict}) if config_dict['dump config'] == 'True': config.write(sys.stdout, printDefault=False, printMinimal=True) return DataProvider = Plugin.getClass('DataProvider') provider = DataProvider.createInstance(providerName, config, config_dict['dataset'], None) stripMetadata = config_dict['strip'] == 'True' if config_dict['output']: return DataProvider.saveToFile(config_dict['output'], provider.getBlocks(), stripMetadata) return DataProvider.saveToStream(sys.stdout, provider.getBlocks(), stripMetadata)
def discover_blocks(options): # Get work directory, create dbs dump directory if os.path.isdir(options.args[0]): workDir = os.path.abspath(os.path.normpath(options.args[0])) else: workDir = getConfig(configFile = options.args[0]).getWorkPath() if not options.opts.tempdir: options.opts.tempdir = os.path.join(workDir, 'dbs') if not os.path.exists(options.opts.tempdir): os.mkdir(options.opts.tempdir) # get provider with dataset information if options.opts.input_file: provider = DataProvider.createInstance('ListProvider', getConfig(), options.opts.input_file, None) else: config = getConfig(configDict = {'dataset': options.config_dict}) provider = DataProvider.createInstance('DBSInfoProvider', config, options.args[0], None) blocks = provider.getBlocks(show_stats = False) DataProvider.saveToFile(os.path.join(options.opts.tempdir, 'dbs.dat'), blocks) if options.opts.discovery: sys.exit(os.EX_OK) return blocks
def main(opts, args): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = None if opts.use_task: task = config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow', pargs = ('task',)).task # Initialise job database jobDB = config.getPlugin('job database', 'TextFileJobDB', cls = 'JobDB') activity = Activity('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task)) activity.finish() report = Report.createInstance(opts.report, jobDB, task, selected, opts.string) report.display()
def main(opts, args): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = None if opts.use_task: task = config.getPlugin(['task', 'module'], cls = 'TaskModule') # Initialise job database jobDB = config.getPlugin('job database', 'JobDB', cls = 'JobDB') activity = utils.ActivityLog('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task = task)) activity.finish() report = Report.createInstance(opts.report, jobDB, task, selected, opts.string) report.display()
def main(opts, args): # try to open config file config = getConfig(args[0], section='global') # Initialise task module task = None if opts.use_task: task = config.getPlugin(['task', 'module'], cls='TaskModule') # Initialise job database jobDB = config.getPlugin('job database', 'JobDB', cls='JobDB') activity = utils.ActivityLog('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task=task)) activity.finish() report = Report.createInstance(opts.report, jobDB, task, selected, opts.string) report.display()
def main(): # try to open config file config = getConfig(args[0], section = 'global') # Initialise task module task = None tags = [] if opts.useTask: task = config.getPlugin(['task', 'module'], cls = TaskModule).getInstance() tags = [task] # Initialise job database jobManagerCls = config.getPlugin('job manager', 'SimpleJobManager', cls = JobManager, tags = tags) jobDB = jobManagerCls.getInstance(task, None).jobDB log = utils.ActivityLog('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.selector, task = task)) del log report = Report.getInstance(opts.reportClass, jobDB, task, selected, opts.string) report.display()
def main(opts, args): # try to open config file config = getConfig(args[0], section='global') # Initialise task module task = None if opts.use_task: task = config.getPlugin('workflow', 'Workflow:global', cls='Workflow', pargs=('task', )).task # Initialise job database jobDB = config.getPlugin('job database', 'TextFileJobDB', cls='JobDB') activity = Activity('Filtering job entries') selected = jobDB.getJobs(JobSelector.create(opts.job_selector, task=task)) activity.finish() report = Report.createInstance(opts.report, jobDB, task, selected, opts.string) report.display()
def setup_config(opts, args): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section = 'global') config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections = ['parameters']) if opts.parameter: utils.vprint('Provided options:') for p in opts.parameter: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') utils.vprint('\t%s: %s' % (k.strip(), v.strip())) utils.vprint('') if not os.path.exists(args[0]): configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') if not opts.persistent: configParameters.set('parameter adapter', 'BasicParameterAdapter', '=') if utils.verbosity() > 2: config.changeView(setSections = None).write(sys.stdout) return config
# | limitations under the License. from gcSupport import Options, Plugin, getConfig, scriptOptions from grid_control.utils.webservice import JSONRestClient from grid_control_cms.sitedb import SiteDB def lfn2pfn(node, lfn, prot = 'srmv2'): return JSONRestClient().get(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn', params = {'node': node, 'protocol': prot, 'lfn': lfn})['phedex']['mapping'] parser = Options() parser.addText(None, 's', 'se', default = None, help = 'Resolve LFN on CMS SE into PFN') parser.addText(None, ' ', 'se-prot', default = 'srmv2', help = 'Name of default SE protocol') parser.addText(None, ' ', 'lfn', default = '/store/user/<hypernews name>', help = 'Name of default LFN') options = scriptOptions(parser) if options.opts.se: if '<hypernews name>' in options.opts.lfn: token = Plugin.getClass('AccessToken').createInstance('VomsProxy', getConfig(), 'token') site_db = SiteDB() hnName = site_db.dn_to_username(dn=token.getFQUsername()) if not hnName: raise Exception('Unable to map grid certificate to hypernews name!') options.opts.lfn = options.opts.lfn.replace('<hypernews name>', hnName) tmp = lfn2pfn(node = options.opts.se, prot = options.opts.se_prot, lfn = options.opts.lfn) for entry in tmp: if len(tmp) > 1: print(entry['node'] + ' ' + entry['pfn']) print(entry['pfn'])
parser.addText('jobs', '', 'job-force-state', default='', help='Force new job state') parser.addText('jobs', '', 'job-show-jdl', default='', help='Show JDL file if available') parser.section('data', 'Dataset debugging', '%s <dataset file> <dataset file> ...') parser.addText('data', '', 'dataset-show-diff', default='', help='Show difference between datasets') parser.addText('data', '', 'dataset-show-removed', default='', help='Find removed dataset blocks') parser.addText(None, 'd', 'logfile-decode', default='', help='Decode log files') options = scriptOptions(parser) (opts, args) = (options.opts, options.args) ######################################################## # BACKEND if opts.backend_list_nodes or opts.backend_list_queues: config = getConfig() backend = str.join(' ', args) or 'local' wms = Plugin.getClass('WMS').createInstance(backend, config, backend) if opts.backend_list_nodes: logging.info(repr(wms.getNodes())) if opts.backend_list_queues: logging.info(repr(wms.getQueues())) ######################################################## # DATASET PARTITION def partition_invalid(splitter): for jobNum in irange(splitter.getMaxJobs()): splitInfo = splitter.getSplitInfo(jobNum) if splitInfo.get(DataSplitter.Invalid, False): yield {0: jobNum}
def main(): # Set config based on settings from config file or command line configFile = None if os.path.exists(args[0]): configFile = args[0] config = getConfig(configFile, section = 'global') config.changeView(setSections = ['jobs']).set('nseeds', '1', '?=') configParameters = config.changeView(setSections = ['parameters']) if opts.parameters: utils.vprint('Provided options:') for p in opts.parameters: k, v = p.split('=', 1) configParameters.set(k.strip(), v.strip().replace('\\n', '\n'), '=') utils.vprint('\t%s: %s' % (k.strip(), v.strip())) utils.vprint('') if not os.path.exists(args[0]): configParameters.set('parameters', str.join(' ', args).replace('\\n', '\n')) if opts.dataset: configParameters.set('default lookup', 'DATASETNICK') # configParameters.set('parameter adapter', 'BasicParameterAdapter', '=') # Don't track parameter changes if opts.verbosity > 2: config.changeView(setSections = None).write(sys.stdout) # Initialize ParameterFactory configTask = config.changeView(setSections = [config.get(['task', 'module'], 'DummyTask')]) pm = config.getPlugin('parameter factory', 'SimpleParameterFactory', cls = ParameterFactory).getInstance() # Create dataset parameter source class DummySplitter: def getMaxJobs(self): return 3 def getSplitInfo(self, pNum): mkEntry = lambda ds, fl, n, nick: { DataSplitter.Dataset: ds, DataSplitter.Nickname: nick, DataSplitter.FileList: fl, DataSplitter.NEntries: n } rndStr = lambda: md5(str(random.random())).hexdigest()[:10] tmp = [ mkEntry('ds1', ['a', 'b'], 23, 'data_1'), mkEntry('ds1', ['1'], 42, 'data_1'), mkEntry('ds2', ['m', 'n'], 123, 'data_2'), mkEntry('ds2', ['x', 'y', 'z'], 987, 'data_3') ] return tmp[pNum] class DataSplitProcessorTest: def getKeys(self): return map(lambda k: ParameterMetadata(k, untracked=True), ['DATASETINFO', 'DATASETID', 'DATASETPATH', 'DATASETBLOCK', 'DATASETNICK']) def process(self, pNum, splitInfo, result): result.update({ 'DATASETINFO': '', 'DATASETID': splitInfo.get(DataSplitter.DatasetID, None), 'DATASETPATH': splitInfo.get(DataSplitter.Dataset, None), 'DATASETBLOCK': splitInfo.get(DataSplitter.BlockName, None), 'DATASETNICK': splitInfo.get(DataSplitter.Nickname, None), 'DATASETSPLIT': pNum, }) if opts.dataset.lower() == 'true': utils.vprint('Registering dummy data provider data') dataSplitter = DummySplitter() elif opts.dataset: dataSplitter = DataSplitter.loadState(opts.dataset) if opts.dataset: DataParameterSource.datasetsAvailable['data'] = DataParameterSource( config.getWorkPath(), 'data', None, dataSplitter, DataSplitProcessorTest()) psource = pm.getSource(config) if opts.forceiv: for dp in DataParameterSource.datasetSources: dp.intervention = (set([1]), set([0]), True) if opts.listparams: result = [] needGCParam = False if psource.getMaxJobs() != None: countActive = 0 for jobNum in range(psource.getMaxJobs()): info = psource.getJobInfo(jobNum) if info[ParameterInfo.ACTIVE]: countActive += 1 if opts.inactive or info[ParameterInfo.ACTIVE]: if not info[ParameterInfo.ACTIVE]: info['GC_PARAM'] = 'N/A' if str(info['GC_PARAM']) != str(jobNum): needGCParam = True result.append(info) if opts.displaymode == 'parseable': utils.vprint('Count,%d,%d' % (countActive, psource.getMaxJobs())) else: utils.vprint('Number of parameter points: %d' % psource.getMaxJobs()) if countActive != psource.getMaxJobs(): utils.vprint('Number of active parameter points: %d' % countActive) else: result.append(psource.getJobInfo(123)) enabledOutput = opts.output.split(',') output = filter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = filter(lambda k: k.untracked == False, output) untracked = filter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if (opts.collapse == 1): stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5(repr(map(lambda key: pset.get(key), stored))).hexdigest() result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = map(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(zip(stored, stored))) if opts.untracked: head.extend(sorted(map(lambda n: (n, '(%s)' % n), filter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result) if opts.save: utils.vprint('') ParameterSource.getClass('GCDumpParameterSource').write(opts.save, psource) utils.vprint('Parameter information saved to ./%s' % opts.save) if opts.intervention: utils.vprint('') tmp = psource.getJobIntervention() if tmp: if opts.displaymode == 'parseable': utils.vprint('R: %s' % str.join(',', map(str, tmp[0]))) utils.vprint('D: %s' % str.join(',', map(str, tmp[1]))) else: utils.vprint(' Redo: %r' % tmp[0]) utils.vprint('Disable: %r' % tmp[1]) else: if opts.displaymode == 'parseable': utils.vprint('NOINT') else: utils.vprint('No intervention')
def realmain(opts, args): config = gcSupport.getConfig(configDict = {'access': {'ignore warnings': 'True'}}) token = AccessToken.getInstance(opts.token, config, 'access', OSLayer.create(config)) (workDir, config, jobDB) = gcSupport.initGC(args) jobList = jobDB.getJobs(ClassSelector(JobClass.SUCCESS)) # Create SE output dir if not opts.output: opts.output = os.path.join(workDir, 'se_output') if '://' not in opts.output: opts.output = 'file:///%s' % os.path.abspath(opts.output) infos = {} def incInfo(x): infos[x] = infos.get(x, 0) + 1 def processSingleJob(jobNum, output): output.init(jobNum) job = jobDB.get(jobNum) # Only run over finished and not yet downloaded jobs if job.state != Job.SUCCESS: output.error('Job has not yet finished successfully!') return incInfo('Processing') if job.get('download') == 'True' and not opts.markIgnoreDL: if not opts.threads: output.error('All files already downloaded!') return incInfo('Downloaded') retry = int(job.get('download attempt', 0)) failJob = False if not token.canSubmit(20*60, True): sys.stderr.write('Please renew access token!') sys.exit(os.EX_UNAVAILABLE) # Read the file hash entries from job info file files = FileInfoProcessor().process(os.path.join(workDir, 'output', 'job_%d' % jobNum)) if files: files = map(lambda fi: (fi[FileInfoProcessor.Hash], fi[FileInfoProcessor.NameLocal], fi[FileInfoProcessor.NameDest], fi[FileInfoProcessor.Path]), files) output.files(files) if not files: if opts.markEmptyFailed: failJob = True else: return incInfo('Job without output files') for (fileIdx, fileInfo) in enumerate(files): (hash, name_local, name_dest, pathSE) = fileInfo output.file(fileIdx) # Copy files to local folder outFilePath = os.path.join(opts.output, name_dest) if opts.selectSE: if not (True in map(lambda s: s in pathSE, opts.selectSE)): output.error('skip file because it is not located on selected SE!') return if opts.skipExisting and (storage.se_exists(outFilePath) == 0): output.error('skip file as it already exists!') return if storage.se_exists(os.path.dirname(outFilePath)).wait() != 0: storage.se_mkdir(os.path.dirname(outFilePath)).wait() checkPath = 'file:///tmp/dlfs.%s' % name_dest if 'file://' in outFilePath: checkPath = outFilePath def monitorFile(path, lock, abort): path = path.replace('file://', '') (csize, osize, stime, otime, lttime) = (0, 0, time.time(), time.time(), time.time()) while not lock.acquire(False): # Loop until monitor lock is available if csize != osize: lttime = time.time() if time.time() - lttime > 5*60: # No size change in the last 5min! output.error('Transfer timeout!') abort.acquire() break if os.path.exists(path): csize = os.path.getsize(path) output.file(fileIdx, csize, osize, stime, otime) (osize, otime) = (csize, time.time()) else: stime = time.time() time.sleep(0.1) lock.release() copyAbortLock = threading.Lock() monitorLock = threading.Lock() monitorLock.acquire() monitor = utils.gcStartThread('Download monitor %s' % jobNum, monitorFile, checkPath, monitorLock, copyAbortLock) result = -1 procCP = storage.se_copy(os.path.join(pathSE, name_dest), outFilePath, tmp = checkPath) while True: if not copyAbortLock.acquire(False): monitor.join() break copyAbortLock.release() result = procCP.poll() if result != -1: monitorLock.release() monitor.join() break time.sleep(0.02) if result != 0: output.error('Unable to copy file from SE!') output.error(procCP.getMessage()) failJob = True break # Verify => compute md5hash if opts.verify: try: hashLocal = md5sum(checkPath.replace('file://', '')) if not ('file://' in outFilePath): dlfs_rm('file://%s' % checkPath, 'SE file') except KeyboardInterrupt: raise except Exception: hashLocal = None output.hash(fileIdx, hashLocal) if hash != hashLocal: failJob = True else: output.hash(fileIdx) # Ignore the first opts.retry number of failed jobs if failJob and opts.retry and (retry < opts.retry): output.error('Download attempt #%d failed!' % (retry + 1)) job.set('download attempt', str(retry + 1)) jobDB.commit(jobNum, job) return incInfo('Download attempts') for (fileIdx, fileInfo) in enumerate(files): (hash, name_local, name_dest, pathSE) = fileInfo # Remove downloaded files in case of failure if (failJob and opts.rmLocalFail) or (not failJob and opts.rmLocalOK): output.status(fileIdx, 'Deleting file %s from local...' % name_dest) outFilePath = os.path.join(opts.output, name_dest) if storage.se_exists(outFilePath).wait() == 0: dlfs_rm(outFilePath, 'local file') # Remove SE files in case of failure if (failJob and opts.rmSEFail) or (not failJob and opts.rmSEOK): output.status(fileIdx, 'Deleting file %s...' % name_dest) dlfs_rm(os.path.join(pathSE, name_dest), 'SE file') output.status(fileIdx, None) if failJob: incInfo('Failed downloads') if opts.markFailed: # Mark job as failed to trigger resubmission job.state = Job.FAILED else: incInfo('Successful download') if opts.markDL: # Mark as downloaded job.set('download', 'True') # Save new job status infos jobDB.commit(jobNum, job) output.finish() time.sleep(float(opts.slowdown)) if opts.shuffle: random.shuffle(jobList) else: jobList.sort() if opts.threads: from grid_control_gui import ansi errorOutput = [] class ThreadDisplay: def __init__(self): self.output = [] def init(self, jobNum): self.jobNum = jobNum self.output = ['Job %5d' % jobNum, ''] def infoline(self, fileIdx, msg = ''): return 'Job %5d [%i/%i] %s %s' % (self.jobNum, fileIdx + 1, len(self.files), self.files[fileIdx][2], msg) def files(self, files): (self.files, self.output, self.tr) = (files, self.output[1:], ['']*len(files)) for x in range(len(files)): self.output.insert(2*x, self.infoline(x)) self.output.insert(2*x+1, '') def file(self, idx, csize = None, osize = None, stime = None, otime = None): (hash, name_local, name_dest, pathSE) = self.files[idx] if otime: trfun = lambda sref, tref: gcSupport.prettySize(((csize - sref) / max(1, time.time() - tref))) self.tr[idx] = '%7s avg. - %7s/s inst.' % (gcSupport.prettySize(csize), trfun(0, stime)) self.output[2*idx] = self.infoline(idx, '(%s - %7s/s)' % (self.tr[idx], trfun(osize, otime))) def hash(self, idx, hashLocal = None): (hash, name_local, name_dest, pathSE) = self.files[idx] if hashLocal: if hash == hashLocal: result = ansi.Console.fmt('MATCH', [ansi.Console.COLOR_GREEN]) else: result = ansi.Console.fmt('FAIL', [ansi.Console.COLOR_RED]) msg = '(R:%s L:%s) => %s' % (hash, hashLocal, result) else: msg = '' self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx]) self.output[2*idx+1] = msg print self, repr(msg) def error(self, msg): errorOutput.append(msg) def write(self, msg): self.output.append(msg) def status(self, idx, msg): if msg: self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx]) + ' ' + msg else: self.output[2*idx] = self.infoline(idx, '(%s)' % self.tr[idx]) def finish(self): # self.output.append(str(self.jobNum) + 'FINISHED') pass (active, todo) = ([], list(jobList)) todo.reverse() screen = ansi.Console() screen.move(0, 0) screen.savePos() while True: screen.erase() screen.loadPos() active = filter(lambda (t, d): t.isAlive(), active) while len(active) < opts.threads and len(todo): display = ThreadDisplay() active.append((utils.gcStartThread('Download %s' % todo[-1], processSingleJob, todo.pop(), display), display)) for (t, d) in active: sys.stdout.write(str.join('\n', d.output)) sys.stdout.write(str.join('\n', ['=' * 50] + errorOutput)) sys.stdout.flush() if len(active) == 0: break time.sleep(0.01) else: class DefaultDisplay: def init(self, jobNum): sys.stdout.write('Job %d: ' % jobNum) def files(self, files): self.files = files sys.stdout.write('The job wrote %d file%s to the SE\n' % (len(files), ('s', '')[len(files) == 1])) def file(self, idx, csize = None, osize = None, stime = None, otime = None): (hash, name_local, name_dest, pathSE) = self.files[idx] if otime: tr = lambda sref, tref: gcSupport.prettySize(((csize - sref) / max(1, time.time() - tref))) tmp = name_dest if opts.showHost: tmp += ' [%s]' % pathSE.split('//')[-1].split('/')[0].split(':')[0] self.write('\r\t%s (%7s - %7s/s avg. - %7s/s inst.)' % (tmp, gcSupport.prettySize(csize), tr(0, stime), tr(osize, otime))) sys.stdout.flush() else: self.write('\t%s' % name_dest) sys.stdout.flush() def hash(self, idx, hashLocal = None): (hash, name_local, name_dest, pathSE) = self.files[idx] self.write(' => %s\n' % ('\33[0;91mFAIL\33[0m', '\33[0;92mMATCH\33[0m')[hash == hashLocal]) self.write('\t\tRemote site: %s\n' % hash) self.write('\t\t Local site: %s\n' % hashLocal) def error(self, msg): sys.stdout.write('\nJob %d: %s' % (jobNum, msg.strip())) def status(self, idx, msg): if msg: self.write('\t' + msg + '\r') else: self.write(' ' * len('\tDeleting file %s from SE...\r' % self.files[idx][2]) + '\r') def write(self, msg): sys.stdout.write(msg) def finish(self): sys.stdout.write('\n') for jobNum in jobList: processSingleJob(jobNum, DefaultDisplay()) # Print overview if infos: print '\nStatus overview:' for (state, num) in infos.items(): if num > 0: print '\t%20s: [%d/%d]' % (state, num, len(jobList)) print if ('Downloaded' in infos) and (infos['Downloaded'] == len(jobDB)): return os.EX_OK return os.EX_NOINPUT
def main(): usage = '%s [OPTIONS] <config file / work directory>' % sys.argv[0] parser = optparse.OptionParser(usage=usage) parser.add_option('-G', '--globaltag', dest='globaltag', default='crab2_tag', help='Specify global tag') parser.add_option('-F', '--input', dest='inputFile', default=None, help='Specify dbs input file to use instead of scanning job output') # parser.add_option('-k', '--key-select', dest='dataset key select', default='', # help='Specify dataset keys to process') parser.add_option('-c', '--continue-migration', dest='continue_migration', default=False, action='store_true', help='Continue an already started migration') ogDiscover = optparse.OptionGroup(parser, 'Discovery options - ignored in case dbs input file is specified', '') ogDiscover.add_option('-n', '--name', dest='dataset name pattern', default='', help='Specify dbs path name - Example: DataSet_@NICK@_@VAR@') ogDiscover.add_option('-T', '--datatype', dest='datatype', default=None, help='Supply dataset type in case cmssw report did not specify it - valid values: "mc" or "data"') ogDiscover.add_option('-m', '--merge', dest='merge parents', default=False, action='store_true', help='Merge output files from different parent blocks into a single block [Default: Keep boundaries]') ogDiscover.add_option('-j', '--jobhash', dest='useJobHash', default=False, action='store_true', help='Use hash of all config files in job for dataset key calculation') ogDiscover.add_option('-u', '--unique-cfg', dest='uniqueCfg', default=False, action='store_true', help='Circumvent edmConfigHash collisions so each dataset is stored with unique config information') ogDiscover.add_option('-P', '--parent', dest='parent source', default='', help='Override parent information source - to bootstrap a reprocessing on local files') ogDiscover.add_option('-H', '--hash-keys', dest='dataset hash keys', default='', help='Included additional variables in dataset hash calculation') parser.add_option_group(ogDiscover) ogDiscover2 = optparse.OptionGroup(parser, 'Discovery options II - only available when config file is used', '') ogDiscover2.add_option('-J', '--job-selector', dest='selected', default=None, help='Specify dataset(s) to process') parser.add_option_group(ogDiscover2) ogMode = optparse.OptionGroup(parser, 'Processing mode', '') ogMode.add_option('-b', '--batch', dest='batch', default=False, action='store_true', help='Enable non-interactive batch mode [Default: Interactive mode]') ogMode.add_option('-d', '--discovery', dest='discovery', default=False, action='store_true', help='Enable discovery mode - just collect file information and exit') ogMode.add_option('', '--tempdir', dest='tmpDir', default='', help='Override temp directory') ogMode.add_option('-i', '--no-import', dest='doImport', default=True, action='store_false', help='Disable import of new datasets into target DBS instance - only temporary xml files are created, ' + 'which can be added later via datasetDBSTool.py [Default: Import datasets]') parser.add_option_group(ogMode) ogInc = optparse.OptionGroup(parser, 'Incremental adding of files to DBS', '') ogInc.add_option('-I', '--incremental', dest='incremental', default=False, action='store_true', help='Skip import of existing files - Warning: this destroys coherent block structure!') # ogInc.add_option('-o', '--open-blocks', dest='closeBlock', default=True, action='store_false', # help='Keep blocks open for addition of further files [Default: Close blocks]') parser.add_option_group(ogInc) ogInst = optparse.OptionGroup(parser, 'DBS instance handling', '') ogInst.add_option('-t', '--target-instance', dest='dbsTarget', default='https://cmsweb.cern.ch/dbs/prod/phys03', help='Specify target dbs instance url') ogInst.add_option('-s', '--source-instance', dest='dbsSource', default='https://cmsweb.cern.ch/dbs/prod/global', help='Specify source dbs instance url(s), where parent datasets are taken from') parser.add_option_group(ogInst) ogDbg = optparse.OptionGroup(parser, 'Display options', '') ogDbg.add_option('-D', '--display-dataset', dest='display_data', default=None, help='Display information associated with dataset key(s) (accepts "all")') ogDbg.add_option('-C', '--display-config', dest='display_cfg', default=None, help='Display information associated with config hash(es) (accepts "all")') ogDbg.add_option('-v', '--verbose', dest='verbosity', default=0, action='count', help='Increase verbosity') parser.add_option_group(ogDbg) (opts, args) = parser.parse_args() utils.verbosity(opts.verbosity) setattr(opts, 'include parent infos', True) setattr(opts, 'importLumi', True) setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys').replace(',', ' ')) if opts.useJobHash: setattr(opts, 'dataset hash keys', getattr(opts, 'dataset hash keys') + ' CMSSW_CONFIG_JOBHASH') # 0) Get work directory, create dbs dump directory if len(args) != 1: utils.exitWithUsage(usage, 'Neither work directory nor config file specified!') if os.path.isdir(args[0]): opts.workDir = os.path.abspath(os.path.normpath(args[0])) else: opts.workDir = getConfig(configFile=args[0]).getWorkPath() if not opts.tmpDir: opts.tmpDir = os.path.join(opts.workDir, 'dbs') if not os.path.exists(opts.tmpDir): os.mkdir(opts.tmpDir) # Lock file in case several instances of this program are running mutex = FileMutex(os.path.join(opts.tmpDir, 'datasetDBSAdd.lock')) # 1) Get dataset information if opts.inputFile: provider = DataProvider.getInstance('ListProvider', getConfig(), opts.inputFile, None) else: config = getConfig(configDict = {'dataset': dict(parser.values.__dict__)}) if opts.discovery: config.set('dataset name pattern', '@DS_KEY@') provider = DataProvider.getInstance('DBSInfoProvider', config, args[0], None) provider.saveState(os.path.join(opts.tmpDir, 'dbs.dat')) if opts.discovery: sys.exit(os.EX_OK) blocks = provider.getBlocks() # 2) Filter datasets if opts.incremental: # Query target DBS for all found datasets and perform dataset resync with "supposed" state dNames = set(map(lambda b: b[DataProvider.Dataset], blocks)) dNames = filter(lambda ds: hasDataset(opts.dbsTarget, ds), dNames) config = getConfig(configDict = {None: {'dbs instance': opts.dbsTarget}}) oldBlocks = reduce(operator.add, map(lambda ds: DBSApiv2(config, None, ds, None).getBlocks(), dNames), []) (blocksAdded, blocksMissing, blocksChanged) = DataProvider.resyncSources(oldBlocks, blocks) if len(blocksMissing) or len(blocksChanged): if not utils.getUserBool(' * WARNING: Block structure has changed! Continue?', False): sys.exit(os.EX_OK) # Search for blocks which were partially added and generate "pseudo"-blocks with left over files setOldBlocks = set(map(lambda x: x[DataProvider.BlockName], oldBlocks)) setAddedBlocks = set(map(lambda x: x[DataProvider.BlockName], blocksAdded)) blockCollision = set.intersection(setOldBlocks, setAddedBlocks) if blockCollision and opts.closeBlock: # Block are closed and contents have changed for block in blocksAdded: if block[DataProvider.BlockName] in blockCollision: block[DataProvider.BlockName] = utils.strGuid(md5(str(time.time())).hexdigest()) blocks = blocksAdded # 3) Display dataset properties if opts.display_data or opts.display_cfg: raise APIError('Not yet reimplemented') #set-up logging logging.basicConfig(format='%(levelname)s: %(message)s') logger = logging.getLogger('dbs3-migration') logger.addHandler(NullHandler()) logger.setLevel(logging.DEBUG) #set-up dbs clients dbs3_target_client = DBS3LiteClient(url=opts.dbsTarget) dbs3_source_client = DBS3LiteClient(url=opts.dbsSource) dbs3_migration_queue = DBS3MigrationQueue() for blockDump in generateDBS3BlockDumps(opts, blocks): if not opts.continue_migration: ###initiate the dbs3 to dbs3 migration of parent blocks logger.debug('Checking parentage for block: %s' % blockDump['block']['block_name']) unique_parent_lfns = set((parent[u'parent_logical_file_name'] for parent in blockDump[u'file_parent_list'])) unique_blocks = set((block['block_name'] for parent_lfn in unique_parent_lfns for block in dbs3_source_client.listBlocks(logical_file_name=parent_lfn))) for block_to_migrate in unique_blocks: if dbs3_target_client.listBlocks(block_name=block_to_migrate): #block already at destination logger.debug('Block %s is already at destination' % block_to_migrate) continue migration_task = MigrationTask(block_name=block_to_migrate, migration_url='https://cmsweb.cern.ch/dbs/prod/global/DBSReader', dbs_client=dbs3_target_client) try: dbs3_migration_queue.add_migration_task(migration_task) except AlreadyQueued as aq: logger.debug(aq.message) dbs3_migration_queue.save_to_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl')) else: try: dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(os.path.join(opts.tmpDir, 'dbs3_migration.pkl')) except IOError as io_err: msg = "Probably, there is no DBS 3 migration for this dataset ongoing, Dude!" logger.exception('%s\n%s' % (io_err.message, msg)) raise #wait for all parent blocks migrated to dbs3 do_migration(dbs3_migration_queue) #insert block into dbs3 dbs3_target_client.insertBulkBlock(blockDump)
from grid_control.gc_exceptions import RuntimeError from grid_control.utils.webservice import readJSON from grid_control_cms.provider_sitedb import SiteDB def lfn2pfn(node, lfn): return readJSON('https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn', {'node': node, 'protocol': 'srmv2', 'lfn': lfn})['phedex']['mapping'][0]['pfn'] parser = optparse.OptionParser() parser.add_option('-s', '--SE', dest='SE', default=None, help='Resolve LFN on CMS SE into PFN') parser.add_option('', '--lfn', dest='lfn', default='/store/user/<hypernews name>', help='Name of default LFN') parser.add_option('', '--se-prot', dest='seprot', default='srmv2', help='Name of default SE protocol') (opts, args) = parseOptions(parser) if opts.SE: if '<hypernews name>' in opts.lfn: token = AccessToken.getInstance('VomsProxy', getConfig(), None) site_db = SiteDB() hnName = site_db.dn_to_username(dn=token.getFQUsername()) if not hnName: raise RuntimeError('Unable to map grid certificate to hypernews name!') opts.lfn = opts.lfn.replace('<hypernews name>', hnName) tmp = readJSON('https://cmsweb.cern.ch/phedex/datasvc/json/prod/lfn2pfn', {'node': opts.SE, 'protocol': opts.seprot, 'lfn': opts.lfn})['phedex']['mapping'] for entry in tmp: if len(tmp) > 1: print entry['node'], print entry['pfn']
def main(): dataset = args[0].strip() cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False', 'remove empty files *': 'False', 'location format *': opts.locationfmt, 'nickname check collision *': 'False'} if opts.metadata or opts.blockmetadata: cfgSettings['lumi filter *'] = '-' cfgSettings['keep lumi metadata *'] = 'True' config = getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings}) if os.path.exists(dataset): provider = DataProvider.getInstance('ListProvider', config, dataset, None) else: provider = DataProvider.create(config, dataset, opts.provider) blocks = provider.getBlocks() if len(blocks) == 0: raise DatasetError('No blocks!') datasets = set(map(lambda x: x[DataProvider.Dataset], blocks)) if len(datasets) > 1 or opts.info: headerbase = [(DataProvider.Dataset, 'Dataset')] else: print('Dataset: %s' % blocks[0][DataProvider.Dataset]) headerbase = [] if opts.configentry: print('') print('dataset =') infos = {} order = [] maxnick = 5 for block in blocks: dsName = block[DataProvider.Dataset] if not infos.get(dsName, None): order.append(dsName) infos[dsName] = dict([(DataProvider.Dataset, dsName)]) if DataProvider.Nickname not in block and opts.confignick: try: if '/' in dsName: block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1] else: block[DataProvider.Nickname] = dsName except Exception: pass if DataProvider.Nickname not in block and opts.confignick: block[DataProvider.Nickname] = np.getName(None, dsName, block) if DataProvider.Nickname in block: nick = block[DataProvider.Nickname] infos[dsName][DataProvider.Nickname] = nick maxnick = max(maxnick, len(nick)) if len(block[DataProvider.FileList]): infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL] for dsID, dsName in enumerate(order): info = infos[dsName] short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__) nickname = info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick) filterExpr = utils.QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '') print('\t%s : %s : %s%s' % (nickname, short, provider._datasetExpr, filterExpr)) if opts.listdatasets: # Add some enums for consistent access to info dicts DataProvider.NFiles = -1 DataProvider.NBlocks = -2 print('') infos = {} order = [] infosum = {DataProvider.Dataset : 'Sum'} for block in blocks: dsName = block.get(DataProvider.Dataset, '') if not infos.get(dsName, None): order.append(dsName) infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]} def updateInfos(target): target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1 target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList]) target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries] updateInfos(infos[dsName]) updateInfos(infosum) head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'), (DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')] utils.printTabular(head, map(lambda x: infos[x], order) + ['=', infosum]) if opts.listblocks: print('') utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks) if opts.listfiles: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList]) print('') def printMetadata(src, maxlen): for (mk, mv) in src: if len(str(mv)) > 200: mv = '<metadata entry size: %s> %s...' % (len(str(mv)), repr(mv)[:200]) print('\t%s: %s' % (mk.rjust(maxlen), mv)) if src: print('') if opts.metadata and not opts.save: print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mk_len = max(map(len, block.get(DataProvider.Metadata, ['']))) for f in block[DataProvider.FileList]: print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries])) printMetadata(zip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len) print('') if opts.blockmetadata and not opts.save: for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mkdict = lambda x: dict(zip(block[DataProvider.Metadata], x[DataProvider.Metadata])) metadata = utils.QM(block[DataProvider.FileList], mkdict(block[DataProvider.FileList][0]), {}) for fileInfo in block[DataProvider.FileList]: utils.intersectDict(metadata, mkdict(fileInfo)) printMetadata(metadata.items(), max(map(len, metadata.keys()))) if opts.liststorage: print('') infos = {} print('Storage elements:') for block in blocks: dsName = block[DataProvider.Dataset] if len(headerbase) > 0: print('Dataset: %s' % dsName) if block.get(DataProvider.BlockName, None): print('Blockname: %s' % block[DataProvider.BlockName]) if block[DataProvider.Locations] == None: print('\tNo location contraint specified') elif block[DataProvider.Locations] == []: print('\tNot located at anywhere') else: for se in block[DataProvider.Locations]: print('\t%s' % se) print('') if opts.info: evSum = 0 for block in blocks: blockId = '%s %s' % (block.get(DataProvider.Dataset, '-'), block.get(DataProvider.BlockName, '-')) blockStorage = '-' if block.get(DataProvider.Locations, None): blockStorage = str.join(',', block.get(DataProvider.Locations, '-')) evSum += block.get(DataProvider.NEntries, 0) print('%s %s %d %d' % (blockId, blockStorage, block.get(DataProvider.NEntries, 0), evSum)) if opts.save: print('') blocks = provider.getBlocks() if opts.sort: blocks.sort(key = lambda b: b[DataProvider.Dataset] + '#' + b[DataProvider.BlockName]) for b in blocks: b[DataProvider.FileList].sort(key = lambda fi: fi[DataProvider.URL]) provider.saveState(opts.save, blocks) print('Dataset information saved to ./%s' % opts.save)