def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = ['T2_US_Wisconsin'] # whitelist wisconsin so it only runs there # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list config.Data.userInputFiles = get_hdfs_root_files(args.inputDirectory,sample) # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def testLumiSubmit(self): """ Test submission with the lumiMask parameter" """ sections = ["General", "User", "Data", "Site", "JobType"] lumiConf = Configuration() for sec in sections: lumiConf.section_(sec) lumiConf.General.serverUrl = "localhost:8518" lumiConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py") lumiConf.Site.storageSite = 'T2_XXX' lumiInput = os.path.join(os.path.dirname(__file__), "../../../data/lumiInput.json") lumiConf.Data.splitting = 'LumiBased' lumiConf.Data.lumiMask = 'lumiInput.json' sub = submit( self.logger, self.maplistopt + [ "-c", lumiConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", "-s", "127.0.0.1:8518" ]) res = sub() expRes = CommandResult(0, None) self.assertEquals(res, expRes)
def testLumiSubmit(self): """ Test submission with the lumiMask parameter" """ sections = ["General", "User", "Data", "Site" , "JobType"] lumiConf = Configuration() for sec in sections: lumiConf.section_(sec) lumiConf.General.serverUrl = "localhost:8518" lumiConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py") lumiConf.Site.storageSite = 'T2_XXX' lumiInput = os.path.join( os.path.dirname(__file__), "../../../data/lumiInput.json") lumiConf.Data.splitting = 'LumiBased' lumiConf.Data.lumiMask = 'lumiInput.json' sub = submit(self.logger, self.maplistopt + ["-c", lumiConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", "-s", "127.0.0.1:8518"]) res = sub() expRes = CommandResult(0, None) self.assertEquals(res, expRes)
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList,'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format(args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: # lookup reasonable sites if args.ignoreLocality: sites = get_sites(sample) if sites: # if we found an ignoreLocality site list config.Data.ignoreLocality = True config.Site.whitelist = sites else: logging.warning('Not enabling ignoreLocality, no sites found') _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97-len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] print 'Will submit with args:' print submitArgs print config.__str__() try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() # save config file text outdir = os.path.join(config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py') with open(outdir,'w') as f: f.write(config.__str__()) except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def testSubmit(self): #Delete workdir if os.path.isdir("crab_" + self.TestConfig.config.General.requestName): shutil.rmtree("crab_" + self.TestConfig.config.General.requestName) #2) The config file is not found sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEqual(res[0], 1) #3) Is the client chacking the configurations? #If a mandatory section is not there => fail! sections = ["General", "User", "Data", "Site", "JobType"] #mandatory sections emptyConf = Configuration() for sec in sections: sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEqual(res[0], 1) emptyConf.section_(sec) emptyConf.General.serverUrl = "localhost:8518" emptyConf.General.requestName = 'TestAnalysisSubmit' emptyConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py") emptyConf.Site.storageSite = 'T2_XXX' expRes = CommandResult(0, None) sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEquals(res, expRes) shutil.rmtree("./crab_TestAnalysisSubmit") emptyConf.Data.runWhitelist = '1,3,9-13' emptyConf.Data.runBlacklist = '1,3,9-13' expRes = CommandResult(0, None) sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEquals(res, expRes)
def testSubmit(self): #Delete workdir if os.path.isdir("crab_" + self.TestConfig.config.General.requestName): shutil.rmtree("crab_" + self.TestConfig.config.General.requestName) #2) The config file is not found sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEqual(res[0], 1) #3) Is the client chacking the configurations? #If a mandatory section is not there => fail! sections = ["General", "User", "Data", "Site" , "JobType"]#mandatory sections emptyConf = Configuration() for sec in sections: sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEqual(res[0], 1) emptyConf.section_(sec) emptyConf.General.serverUrl = "localhost:8518" emptyConf.General.requestName = 'TestAnalysisSubmit' emptyConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py") emptyConf.Site.storageSite = 'T2_XXX' expRes = CommandResult(0, None) sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEquals(res, expRes) shutil.rmtree("./crab_TestAnalysisSubmit") emptyConf.Data.runWhitelist = '1,3,9-13' emptyConf.Data.runBlacklist = '1,3,9-13' expRes = CommandResult(0, None) sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \ "-s", "127.0.0.1:8518"]) res = sub() self.assertEquals(res, expRes)
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList, 'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format( args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97 - len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList,'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format(args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97-len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [ args.site ] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list config.Data.userInputFiles = get_hdfs_root_files( args.inputDirectory, sample) # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submitLimitCrab(tag, h, amasses, **kwargs): dryrun = kwargs.get('dryrun', False) jobName = kwargs.get('jobName', None) pointsPerJob = kwargs.get('pointsPerJob', 10) parametric = kwargs.get('parametric', False) postscript = kwargs.get('postscript', '') a = '${A}' datacard = 'datacards_shape/MuMuTauTau/{}_HToAAH{}A{}{}.txt'.format( tag, h, 'X' if parametric else '${A}', postscript) combineCommands = getCommands(**kwargs) sample_dir = '/{}/{}/crab_projects/{}/{}{}/{}'.format( scratchDir, pwd.getpwuid(os.getuid())[0], jobName, tag, postscript, h) python_mkdir(sample_dir) # create submit dir submit_dir = '{}/crab'.format(sample_dir) if os.path.exists(submit_dir): logging.warning('Submission directory exists for {0}.'.format(jobName)) return # create bash script bash_name = '{}/script.sh'.format(sample_dir) bashScript = '#!/bin/bash\n' bashScript += 'eval `scramv1 runtime -sh`\n' bashScript += 'ls\n' bashScript += 'printenv\n' bashScript += 'mkdir datacards_shape\n' bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n' bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n' bashScript += 'echo $files\n' bashScript += 'for A in $files; do\n' for cc in combineCommands: bashScript += cc.format( datacard=datacard, h=h, a=a, tag=tag, postscript=postscript) + '\n' bashScript += 'done\n' bashScript += """echo '''<FrameworkJobReport>\ <ReadBranches>\n </ReadBranches>\n <PerformanceReport>\n <PerformanceSummary Metric="StorageStatistics">\n <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n </PerformanceSummary>\n </PerformanceReport>\n <GeneratorInfo>\n </GeneratorInfo>\n </FrameworkJobReport>''' > FrameworkJobReport.xml\n""" with open(bash_name, 'w') as file: file.write(bashScript) os.system('chmod +x {0}'.format(bash_name)) # setup crab config from CRABClient.UserUtilities import config config = config() config.General.workArea = submit_dir config.General.transferOutputs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format( os.environ['CMSSW_BASE']) config.JobType.scriptExe = bash_name config.JobType.outputFiles = [] config.JobType.inputFiles = ['datacards_shape/MuMuTauTau'] config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format( UNAME, jobName, tag, h) config.Data.outputDatasetTag = jobName config.Data.userInputFiles = [str(a) for a in amasses] config.Data.splitting = 'FileBased' config.Data.unitsPerJob = pointsPerJob config.Data.outputPrimaryDataset = 'Limits' config.Site.storageSite = 'T2_US_Wisconsin' # submit submitArgs = ['--config', config] if dryrun: submitArgs += ['--dryrun'] from CRABClient.ClientExceptions import ClientException from CRABClient.ClientUtilities import initLoggers from httplib import HTTPException import CRABClient.Commands.submit as crabClientSubmit tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) try: logging.info('Submitting {}/{}/{}'.format(jobName, tag, h)) crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: logging.info("Submission failed: {}".format(hte.headers)) except ClientException as cle: logging.info("Submission failed: {}".format(cle))
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [args.site] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args,'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample,sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory,sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles==0: logging.warning('{0} {1} has no files.'.format(inputDirectory,sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample)) if totalSize: averageSize = totalSize/totalFiles GB = 1024.*1024.*1024. filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize)) if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config',config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
def submitLimitCrab(tag,h,amasses,**kwargs): dryrun = kwargs.get('dryrun',False) jobName = kwargs.get('jobName',None) pointsPerJob = kwargs.get('pointsPerJob',10) parametric = kwargs.get('parametric',False) a = '${A}' datacard = 'datacards_shape/MuMuTauTau/mmmt_{}_HToAAH{}A{}.txt'.format(tag,h,'X' if parametric else '${A}') combineCommands = getCommands(**kwargs) sample_dir = '/{}/{}/crab_projects/{}/{}/{}'.format(scratchDir,pwd.getpwuid(os.getuid())[0], jobName, tag, h) python_mkdir(sample_dir) # create submit dir submit_dir = '{}/crab'.format(sample_dir) if os.path.exists(submit_dir): logging.warning('Submission directory exists for {0}.'.format(jobName)) return # create bash script bash_name = '{}/script.sh'.format(sample_dir) bashScript = '#!/bin/bash\n' bashScript += 'eval `scramv1 runtime -sh`\n' bashScript += 'ls\n' bashScript += 'printenv\n' bashScript += 'mkdir datacards_shape\n' bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n' bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n' bashScript += 'echo $files\n' bashScript += 'for A in $files; do\n' for cc in combineCommands: bashScript += cc.format(datacard=datacard,h=h,a=a,tag=tag)+'\n' bashScript += 'done\n' bashScript += """echo '''<FrameworkJobReport>\ <ReadBranches>\n </ReadBranches>\n <PerformanceReport>\n <PerformanceSummary Metric="StorageStatistics">\n <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n </PerformanceSummary>\n </PerformanceReport>\n <GeneratorInfo>\n </GeneratorInfo>\n </FrameworkJobReport>''' > FrameworkJobReport.xml\n""" with open(bash_name,'w') as file: file.write(bashScript) os.system('chmod +x {0}'.format(bash_name)) # setup crab config from CRABClient.UserUtilities import config config = config() config.General.workArea = submit_dir config.General.transferOutputs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format(os.environ['CMSSW_BASE']) config.JobType.scriptExe = bash_name config.JobType.outputFiles = [] config.JobType.inputFiles = ['datacards_shape/MuMuTauTau'] config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format(UNAME, jobName, tag, h) config.Data.outputDatasetTag = jobName config.Data.userInputFiles = [str(a) for a in amasses] config.Data.splitting = 'FileBased' config.Data.unitsPerJob = pointsPerJob config.Data.outputPrimaryDataset= 'Limits' config.Site.storageSite = 'T2_US_Wisconsin' # submit submitArgs = ['--config',config] if dryrun: submitArgs += ['--dryrun'] from CRABClient.ClientExceptions import ClientException from CRABClient.ClientUtilities import initLoggers from httplib import HTTPException import CRABClient.Commands.submit as crabClientSubmit tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) try: logging.info('Submitting {}/{}/{}'.format(jobName,tag,h)) crabClientSubmit.submit(logger,submitArgs)() except HTTPException as hte: logging.info("Submission failed: {}".format(hte.headers)) except ClientException as cle: logging.info("Submission failed: {}".format(cle))
def submit_untracked_crab(args): '''Submit jobs from an inputDirectory''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) config.Site.whitelist = [ args.site ] # whitelist site, run on same site as files located # get samples sampleList = hdfs_ls_directory(args.inputDirectory) submitMap = {} # iterate over samples for sample in sampleList: if hasattr(args, 'sampleFilter'): submitSample = False for sampleFilter in args.sampleFilter: if fnmatch.fnmatch(sample, sampleFilter): submitSample = True if not submitSample: continue primaryDataset = sample config.General.requestName = '{0}'.format(primaryDataset) # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.outputPrimaryDataset = primaryDataset # get file list inputFiles = get_hdfs_root_files(args.inputDirectory, sample) config.Data.userInputFiles = inputFiles totalFiles = len(inputFiles) if totalFiles == 0: logging.warning('{0} {1} has no files.'.format( inputDirectory, sample)) continue filesPerJob = args.filesPerJob if args.gigabytesPerJob: totalSize = get_hdfs_directory_size( os.path.join(args.inputDirectory, sample)) if totalSize: averageSize = totalSize / totalFiles GB = 1024. * 1024. * 1024. filesPerJob = int( math.ceil(args.gigabytesPerJob * GB / averageSize)) if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob: if os.path.isfile(args.jsonFilesPerJob): with open(args.jsonFilesPerJob) as f: data = json.load(f) if sample in data: filesPerJob = data[sample] else: logging.error('JSON map {0} for jobs does not exist'.format( args.jsonFilesPerJob)) return config.Data.unitsPerJob = filesPerJob # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))
def submit_das_crab(args): '''Submit samples using DAS''' tblogger, logger, memhandler = initLoggers() tblogger.setLevel(logging.INFO) logger.setLevel(logging.INFO) memhandler.setLevel(logging.INFO) # crab config config = get_config(args) # get samples sampleList = [] if args.samples: sampleList += args.samples elif os.path.isfile(args.sampleList): with open(args.sampleList, 'r') as f: sampleList = [line.strip() for line in f] else: log.error('Sample input list {0} does not exist.'.format( args.sampleList)) submitMap = {} # iterate over samples for sample in sampleList: # lookup reasonable sites if args.ignoreLocality: sites = get_sites(sample) if sites: # if we found an ignoreLocality site list config.Data.ignoreLocality = True config.Site.whitelist = sites else: logging.warning('Not enabling ignoreLocality, no sites found') _, primaryDataset, datasetTag, dataFormat = sample.split('/') config.General.requestName = '{0}'.format(primaryDataset) maxDatasetTagSize = 97 - len(primaryDataset) config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:] # make it only 100 characters config.General.requestName = config.General.requestName[: 99] # Warning: may not be unique now config.Data.inputDataset = sample # submit the job submitArgs = ['--config', config] if args.dryrun: submitArgs += ['--dryrun'] print 'Will submit with args:' print submitArgs print config.__str__() try: log.info("Submitting for input dataset {0}".format(sample)) submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)() # save config file text outdir = os.path.join( config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py') with open(outdir, 'w') as f: f.write(config.__str__()) except HTTPException as hte: log.info("Submission for input dataset {0} failed: {1}".format( sample, hte.headers)) except ClientException as cle: log.info("Submission for input dataset {0} failed: {1}".format( sample, cle))