def execRaw(command, args): """ execRaw - executes a given command with certain arguments and returns the raw result back from the client. args is a python list, the same python list parsed by the optparse module """ tblogger, logger, memhandler = initLoggers() try: mod = __import__('CRABClient.Commands.%s' % command, fromlist=command) except ImportError: raise CRABAPI.BadArgumentException( \ 'Could not find command "%s"' % command) try: cmdobj = getattr(mod, command)(logger, args) res = cmdobj() except SystemExit as se: # most likely an error from the OptionParser in Subcommand. # CRABClient #4283 should make this less ugly if se.code == 2: raise CRABAPI.BadArgumentException else: # We can reach here if the PSet raises a SystemExit exception # Without this, CRAB raises a confusing UnboundLocalError logger.error('PSet raised a SystemExit. Traceback follows:') logger.error(traceback.format_exc()) raise finally: flushMemoryLogger(tblogger, memhandler, logger.logfile) removeLoggerHandlers(tblogger) removeLoggerHandlers(logger) return res
def status_crab(args): '''Check jobs''' if not crabLoaded: logging.error('You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh') return crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.directories: for d in args.directories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) statusMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir',d] #if args.verbose: statusArgs += ['--long'] try: log.info('Retrieving status of {0}'.format(d)) statusMap[d] = crabClientStatus.status(logger,statusArgs)() if args.verbose: print_single_status(args,statusMap[d]) except HTTPException as hte: log.warning("Status for input directory {0} failed: {1}".format(d, hte.headers)) except ClientException as cle: log.warning("Status for input directory {0} failed: {1}".format(d, cle)) parse_crab_status(args,statusMap)
def status_crab(args): '''Check jobs''' crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.crabDirectories: for d in args.crabDirectories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) statusMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir', d] #if args.verbose: statusArgs += ['--long'] try: log.info('Retrieving status of {0}'.format(d)) statusMap[d] = crabClientStatus.status(logger, statusArgs)() except HTTPException as hte: log.warning( "Status for input directory {0} failed: {1}".format( d, hte.headers)) except ClientException as cle: log.warning( "Status for input directory {0} failed: {1}".format( d, cle)) parse_crab_status(args, statusMap)
def execRaw(command, args): """ execRaw - executes a given command with certain arguments and returns the raw result back from the client. args is a python list, the same python list parsed by the optparse module """ tblogger, logger, memhandler = initLoggers() try: mod = __import__('CRABClient.Commands.%s' % command, fromlist=command) except ImportError: raise CRABAPI.BadArgumentException( \ 'Could not find command "%s"' % command) try: cmdobj = getattr(mod, command)(logger, args) res = cmdobj() except SystemExit as se: # most likely an error from the OptionParser in Subcommand. # CRABClient #4283 should make this less ugly if se.code == 2: raise CRABAPI.BadArgumentException finally: flushMemoryLogger(tblogger, memhandler, logger.logfile) removeLoggerHandlers(tblogger) removeLoggerHandlers(logger) return res
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = ['T2_US_Wisconsin'] # whitelist wisconsin so it only runs there # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list config.Data.userInputFiles = get_hdfs_root_files(args.inputDirectory,sample) # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def status_crab(args): '''Check jobs''' crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.crabDirectories: for d in args.crabDirectories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) statusMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir',d] #if args.verbose: statusArgs += ['--long'] try: log.info('Retrieving status of {0}'.format(d)) statusMap[d] = crabClientStatus.status(logger,statusArgs)() except HTTPException as hte: log.warning("Status for input directory {0} failed: {1}".format(d, hte.headers)) except ClientException as cle: log.warning("Status for input directory {0} failed: {1}".format(d, cle)) parse_crab_status(args,statusMap)
def purge_crab(args): '''Resubmit jobs''' if not crabLoaded: logging.error( 'You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh' ) return crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.directories: for d in args.directories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) purgeMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir', d] purgeArgs = ['--cache', '--dir', d] try: summary = crabClientStatus.status(logger, statusArgs)() purge = False total = 0 finished = 0 allJobStatus = {} if 'jobs' in summary: for j, job in summary['jobs'].iteritems(): total += 1 if job['State'] not in allJobStatus: allJobStatus[job['State']] = 0 allJobStatus[job['State']] += 1 if job['State'] in ['finished']: finished += 1 if total and finished == total: purge = True if purge: log.info('Purging {0}'.format(d)) log.info(' '.join([ '{0}: {1}'.format(state, allJobStatus[state]) for state in allowedStates if state in allJobStatus ])) purgeMap[d] = crabClientPurge.purge(logger, purgeArgs)() except HTTPException as hte: log.warning( "Submission for input directory {0} failed: {1}".format( d, hte.headers)) except ClientException as cle: log.warning( "Submission for input directory {0} failed: {1}".format( d, cle))
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList,'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format(args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: # lookup reasonable sites if args.ignoreLocality: sites = get_sites(sample) if sites: # if we found an ignoreLocality site list config.Data.ignoreLocality = True config.Site.whitelist = sites else: logging.warning('Not enabling ignoreLocality, no sites found') _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97-len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] print 'Will submit with args:' print submitArgs print config.__str__() try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() # save config file text outdir = os.path.join(config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py') with open(outdir,'w') as f: f.write(config.__str__()) except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def resubmit_crab(args): '''Resubmit jobs''' if not crabLoaded: logging.error('You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh') return crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.directories: for d in args.directories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) resubmitMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir',d] resubmitArgs = ['--dir',d] try: summary = crabClientStatus.status(logger,statusArgs)() resubmit = False total = 0 failed = 0 allJobStatus = {} if 'jobs' in summary: for j,job in summary['jobs'].iteritems(): total += 1 if job['State'] not in allJobStatus: allJobStatus[job['State']] = 0 allJobStatus[job['State']] += 1 if job['State'] in ['failed']: failed += 1 resubmit = True if resubmit: log.info('Resubmitting {0}'.format(d)) log.info('{0} of {1} jobs failed'.format(failed,total)) log.info(' '.join(['{0}: {1}'.format(state,allJobStatus[state]) for state in allowedStates if state in allJobStatus])) resubmitMap[d] = crabClientResubmit.resubmit(logger,resubmitArgs)() except HTTPException as hte: log.warning("Submission for input directory {0} failed: {1}".format(d, hte.headers)) except ClientException as cle: log.warning("Submission for input directory {0} failed: {1}".format(d, cle)) for d,statMap in resubmitMap.iteritems(): if statMap['status'] != 'SUCCESS': log.info('Status: {0} - {1}'.format(statMap['status'],d))
def resubmit_crab(args): '''Resubmit jobs''' crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.crabDirectories: for d in args.crabDirectories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) resubmitMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir', d] resubmitArgs = ['--dir', d] try: summary = crabClientStatus.status(logger, statusArgs)() resubmit = False total = 0 failed = 0 if 'jobs' in summary: for j, job in summary['jobs'].iteritems(): total += 1 if job['State'] in ['failed']: failed += 1 resubmit = True if resubmit: log.info('Resubmitting {0}'.format(d)) log.info('{0} of {1} jobs failed'.format(failed, total)) resubmitMap[d] = crabClientResubmit.resubmit( logger, resubmitArgs)() except HTTPException as hte: log.warning( "Submission for input directory {0} failed: {1}".format( d, hte.headers)) except ClientException as cle: log.warning( "Submission for input directory {0} failed: {1}".format( d, cle)) for d, statMap in resubmitMap.iteritems(): if statMap['status'] != 'SUCCESS': log.info('Status: {0} - {1}'.format(statMap['status'], d))
def resubmit_crab(args): '''Resubmit jobs''' crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.crabDirectories: for d in args.crabDirectories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) resubmitMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir',d] resubmitArgs = ['--dir',d] try: summary = crabClientStatus.status(logger,statusArgs)() resubmit = False total = 0 failed = 0 if 'jobs' in summary: for j,job in summary['jobs'].iteritems(): total += 1 if job['State'] in ['failed']: failed += 1 resubmit = True if resubmit: log.info('Resubmitting {0}'.format(d)) log.info('{0} of {1} jobs failed'.format(failed,total)) resubmitMap[d] = crabClientResubmit.resubmit(logger,resubmitArgs)() except HTTPException as hte: log.warning("Submission for input directory {0} failed: {1}".format(d, hte.headers)) except ClientException as cle: log.warning("Submission for input directory {0} failed: {1}".format(d, cle)) for d,statMap in resubmitMap.iteritems(): if statMap['status'] != 'SUCCESS': log.info('Status: {0} - {1}'.format(statMap['status'],d))
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList, 'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format( args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97 - len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def status_crab(args): '''Check jobs''' if not crabLoaded: logging.error( 'You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh' ) return crab_dirs = [] if args.jobName: workArea = get_crab_workArea(args) crab_dirs += sorted(glob.glob('{0}/*'.format(workArea))) elif args.directories: for d in args.directories: crab_dirs += glob.glob(d) else: log.error("Shouldn't be possible to get here") tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.WARNING) logger.setLevel(logging.WARNING) memhandler.setLevel(logging.WARNING) statusMap = {} for d in crab_dirs: if os.path.exists(d): statusArgs = ['--dir', d] #if args.verbose: statusArgs += ['--long'] try: log.info('Retrieving status of {0}'.format(d)) statusMap[d] = crabClientStatus.status(logger, statusArgs)() if args.verbose: print_single_status(args, statusMap[d]) except HTTPException as hte: log.warning( "Status for input directory {0} failed: {1}".format( d, hte.headers)) except ClientException as cle: log.warning( "Status for input directory {0} failed: {1}".format( d, cle)) parse_crab_status(args, statusMap)
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList,'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format(args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97-len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [ args.site ] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list config.Data.userInputFiles = get_hdfs_root_files( args.inputDirectory, sample) # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList, 'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format( args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: # lookup reasonable sites if args.ignoreLocality: sites = get_sites(sample) if sites: # if we found an ignoreLocality site list config.Data.ignoreLocality = True config.Site.whitelist = sites else: logging.warning('Not enabling ignoreLocality, no sites found') _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97 - len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] print 'Will submit with args:' print submitArgs print config.__str__() try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() # save config file text outdir = os.path.join( config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py') with open(outdir, 'w') as f: f.write(config.__str__()) except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submitLimitCrab(tag,h,amasses,**kwargs): dryrun = kwargs.get('dryrun',False) jobName = kwargs.get('jobName',None) pointsPerJob = kwargs.get('pointsPerJob',10) parametric = kwargs.get('parametric',False) a = '${A}' datacard = 'datacards_shape/MuMuTauTau/mmmt_{}_HToAAH{}A{}.txt'.format(tag,h,'X' if parametric else '${A}') combineCommands = getCommands(**kwargs) sample_dir = '/{}/{}/crab_projects/{}/{}/{}'.format(scratchDir,pwd.getpwuid(os.getuid())[0], jobName, tag, h) python_mkdir(sample_dir) # create submit dir submit_dir = '{}/crab'.format(sample_dir) if os.path.exists(submit_dir): logging.warning('Submission directory exists for {0}.'.format(jobName)) return # create bash script bash_name = '{}/script.sh'.format(sample_dir) bashScript = '#!/bin/bash\n' bashScript += 'eval `scramv1 runtime -sh`\n' bashScript += 'ls\n' bashScript += 'printenv\n' bashScript += 'mkdir datacards_shape\n' bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n' bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n' bashScript += 'echo $files\n' bashScript += 'for A in $files; do\n' for cc in combineCommands: bashScript += cc.format(datacard=datacard,h=h,a=a,tag=tag)+'\n' bashScript += 'done\n' bashScript += """echo '''<FrameworkJobReport>\ <ReadBranches>\n </ReadBranches>\n <PerformanceReport>\n <PerformanceSummary Metric="StorageStatistics">\n <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n </PerformanceSummary>\n </PerformanceReport>\n <GeneratorInfo>\n </GeneratorInfo>\n </FrameworkJobReport>''' > FrameworkJobReport.xml\n""" with open(bash_name,'w') as file: file.write(bashScript) os.system('chmod +x {0}'.format(bash_name)) # setup crab config from CRABClient.UserUtilities import config config = config() config.General.workArea = submit_dir config.General.transferOutputs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format(os.environ['CMSSW_BASE']) config.JobType.scriptExe = bash_name config.JobType.outputFiles = [] config.JobType.inputFiles = ['datacards_shape/MuMuTauTau'] config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format(UNAME, jobName, tag, h) config.Data.outputDatasetTag = jobName config.Data.userInputFiles = [str(a) for a in amasses] config.Data.splitting = 'FileBased' config.Data.unitsPerJob = pointsPerJob config.Data.outputPrimaryDataset= 'Limits' config.Site.storageSite = 'T2_US_Wisconsin' # submit submitArgs = ['--config',config] if dryrun: submitArgs += ['--dryrun'] from CRABClient.ClientExceptions import ClientException from CRABClient.ClientUtilities import initLoggers from httplib import HTTPException import CRABClient.Commands.submit as crabClientSubmit tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) try: logging.info('Submitting {}/{}/{}'.format(jobName,tag,h)) crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: logging.info("Submission failed: {}".format(hte.headers)) except ClientException as cle: logging.info("Submission failed: {}".format(cle))
success.append(filename) retry += 1 #To retrieve retried job log, if there is any. except HTTPException as hte: succeded = False # Ignore the exception if the HTTP status code is 404. Status 404 means file # not found (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html). File # not found error is expected, since we try all the job retries. if hasattr(hte.args[0], 'status') and hte.args[0].status != 404: self.logger.debug(str(hte)) failed.append(filename) return failed, success class nil: def __getattr__(self,attr): return self def __call__(self,*args,**kwargs): return None import sys import trace from CRABClient.ClientUtilities import initLoggers, flushMemoryLogger, removeLoggerHandlers tblogger,logger,memhandler=initLoggers() try: trace=trace.Trace() #trace.runfunc(getlog(logger,sys.argv[1:])) getlog(logger,sys.argv[1:])() finally: flushMemoryLogger(tblogger, memhandler, logger.logfile) removeLoggerHandlers(tblogger) removeLoggerHandlers(logger)
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [args.site] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args,'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample,sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory,sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles==0: logging.warning('{0} {1} has no files.'.format(inputDirectory,sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample)) if totalSize: averageSize = totalSize/totalFiles GB = 1024.*1024.*1024. filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize)) if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [ args.site ] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args, 'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample, sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory, sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles == 0: logging.warning('{0} {1} has no files.'.format( inputDirectory, sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size( os.path.join(args.inputDirectory, sample)) if totalSize: averageSize = totalSize / totalFiles GB = 1024. * 1024. * 1024. filesPerJob = int( math.ceil(args.gigabytesPerJob * GB / averageSize)) if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format( args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submitLimitCrab(tag, h, amasses, **kwargs): dryrun = kwargs.get('dryrun', False) jobName = kwargs.get('jobName', None) pointsPerJob = kwargs.get('pointsPerJob', 10) parametric = kwargs.get('parametric', False) postscript = kwargs.get('postscript', '') a = '${A}' datacard = 'datacards_shape/MuMuTauTau/{}_HToAAH{}A{}{}.txt'.format( tag, h, 'X' if parametric else '${A}', postscript) combineCommands = getCommands(**kwargs) sample_dir = '/{}/{}/crab_projects/{}/{}{}/{}'.format( scratchDir, pwd.getpwuid(os.getuid())[0], jobName, tag, postscript, h) python_mkdir(sample_dir) # create submit dir submit_dir = '{}/crab'.format(sample_dir) if os.path.exists(submit_dir): logging.warning('Submission directory exists for {0}.'.format(jobName)) return # create bash script bash_name = '{}/script.sh'.format(sample_dir) bashScript = '#!/bin/bash\n' bashScript += 'eval `scramv1 runtime -sh`\n' bashScript += 'ls\n' bashScript += 'printenv\n' bashScript += 'mkdir datacards_shape\n' bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n' bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n' bashScript += 'echo $files\n' bashScript += 'for A in $files; do\n' for cc in combineCommands: bashScript += cc.format( datacard=datacard, h=h, a=a, tag=tag, postscript=postscript) + '\n' bashScript += 'done\n' bashScript += """echo '''<FrameworkJobReport>\ <ReadBranches>\n </ReadBranches>\n <PerformanceReport>\n <PerformanceSummary Metric="StorageStatistics">\n <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n </PerformanceSummary>\n </PerformanceReport>\n <GeneratorInfo>\n </GeneratorInfo>\n </FrameworkJobReport>''' > FrameworkJobReport.xml\n""" with open(bash_name, 'w') as file: file.write(bashScript) os.system('chmod +x {0}'.format(bash_name)) # setup crab config from CRABClient.UserUtilities import config config = config() config.General.workArea = submit_dir config.General.transferOutputs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format( os.environ['CMSSW_BASE']) config.JobType.scriptExe = bash_name config.JobType.outputFiles = [] config.JobType.inputFiles = ['datacards_shape/MuMuTauTau'] config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format( UNAME, jobName, tag, h) config.Data.outputDatasetTag = jobName config.Data.userInputFiles = [str(a) for a in amasses] config.Data.splitting = 'FileBased' config.Data.unitsPerJob = pointsPerJob config.Data.outputPrimaryDataset = 'Limits' config.Site.storageSite = 'T2_US_Wisconsin' # submit submitArgs = ['--config', config] if dryrun: submitArgs += ['--dryrun'] from CRABClient.ClientExceptions import ClientException from CRABClient.ClientUtilities import initLoggers from httplib import HTTPException import CRABClient.Commands.submit as crabClientSubmit tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) try: logging.info('Submitting {}/{}/{}'.format(jobName, tag, h)) crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: logging.info("Submission failed: {}".format(hte.headers)) except ClientException as cle: logging.info("Submission failed: {}".format(cle))
def __call__(self): (options, args) = self.parser.parse_args() ## The default logfile destination is ./crab.log. It will be changed once we ## know/create the CRAB project directory. if options.quiet: setConsoleLogLevelVar(logging.WARNING) elif options.debug: setConsoleLogLevelVar(logging.DEBUG) self.tblogger, self.logger, self.memhandler = initLoggers() #Instructions needed in case of early failures: sometimes the traceback logger #has not been set yet. ## Will replace Python's sys.excepthook default function with the next function. ## This function is used for handling uncaught exceptions (in a Python program ## this happens just before the program exits). ## In this function: ## - make sure everything is logged to the crab.log file; ## However, we already have a `finally' clause where we make sure everything is ## logged to the crab log file. def log_exception(exc_type, exc_value, tback): """ Send a short version of the exception to the console, a long version to the log Adapted from Doug Hellmann This might help sometimes: import traceback,pprint; pprint.pprint(traceback.format_tb(tback)) """ ## Add to the CRAB3 logger a file handler to the log file (if it doesn't have it ## already). tbLogger = logging.getLogger('CRAB3') hasFileHandler = False for h in tbLogger.handlers: if isinstance(h, logging.FileHandler ) and h.stream.name == client.logger.logfile: hasFileHandler = True if not hasFileHandler: filehandler = logging.FileHandler(client.logger.logfile) filehandler.setFormatter(LOGFORMATTER) tbLogger.addHandler(filehandler) ## This goes to the log file. tbLogger.error("Unhandled Exception!") tbLogger.error(exc_value, exc_info=(exc_type, exc_value, tback)) ## This goes to the console (via the CRAB3.all logger) and to the log file (via ## the parent CRAB3 logger). logger = logging.getLogger('CRAB3.all') logger.error("ERROR: %s: %s", exc_type.__name__, exc_value) logger.error( "\n\tPlease email %s for support with the crab.log file or crab.log URL.", FEEDBACKMAIL) logger.error("\tClient Version: %s", client_version) logger.error( "\tPlease use 'crab uploadlog' to upload the log file %s to the CRAB cache.", client.logger.logfile) sys.excepthook = log_exception # check that the command is valid if len(args) == 0: print("You have not specified a command.") # Described the valid commands in epilog, reuse here print(self.parser.epilog) sys.exit(-1) sub_cmd = None try: sub_cmd = next(v for k, v in self.subCommands.items() if args[0] in v.shortnames or args[0] == v.name) except StopIteration: print("'" + str(args[0]) + "' is not a valid command.") self.parser.print_help() sys.exit(-1) self.cmd = sub_cmd(self.logger, args[1:]) self.cmd()