def submission(): from CRABClient.UserUtilities import config config = config() config.General.workArea = '/nfs/dust/cms/user/%s/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date) check_path(config.General.workArea) config.General.transferOutputs = True config.General.transferLogs = True config.User.voGroup = 'dcms' config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'kSkimming_run2_cfg.py' #config.JobType.inputFiles = ['Summer15_V5_MC.db'] config.JobType.allowUndistributedCMSSW = True config.Site.blacklist = ["T2_BR_SPRACE"] config.Data.inputDBS = 'global' config.Data.splitting = 'FileBased' config.Data.unitsPerJob = 1 config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/%s'%(getUsernameFromSiteDB(), date) config.Data.publication = False config.Site.storageSite = "T2_DE_DESY" # load nicknames form gc-style config files and write them to a flat nicknames list nicknames = read_grid_control_includes(["samples/13TeV/Fall15_SM_Analysis.conf"]) #nicknames = ['SUSYGluGluToHToTauTauM160_RunIIFall15MiniAODv2_76X_13TeV_MINIAOD_pythia8'] # loop over datasets and get repsective nicks for nickname in nicknames: config.General.requestName = nickname config.JobType.pyCfgParams = ['globalTag=76X_dataRun2_16Dec2015_v0' if isData(nickname) else 'globalTag=76X_mcRun2_asymptotic_RunIIFall15DR76_v1' ,'kappaTag=KAPPA_2_1_0','nickname=%s'%(nickname),'outputfilename=kappa_%s.root'%(nickname),'testsuite=False'] config.JobType.outputFiles = ['kappa_%s.root'%(nickname)] config.Data.inputDataset = get_sample_by_nick(nickname) p = Process(target=submit, args=(config,)) p.start() p.join()
def submission(events_per_job): from CRABClient.UserUtilities import config config = config() config.General.workArea = '/nfs/dust/cms/user/%s/kappa/crab_kappa_skim80X-%s'%(getUsernameFromSiteDB(), date) #config.General.workArea = '/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date) #config.General.workArea = '/nfs/dust/cms/user/<your-NAF-username>/kappa/crab_kappa_skim80X-%s'% date #if CERN-username != NAF-username check_path(config.General.workArea) config.General.transferOutputs = True config.General.transferLogs = True config.User.voGroup = 'dcms' config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'kSkimming_run2_cfg.py' #config.JobType.inputFiles = ['Spring16_25nsV6_DATA.db', 'Spring16_25nsV6_MC.db'] config.JobType.allowUndistributedCMSSW = True config.Site.blacklist = ["T2_BR_SPRACE"] config.Data.splitting = 'FileBased' config.Data.unitsPerJob = 1 config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/80X_%s'%(getUsernameFromSiteDB(), date) config.Data.publication = False config.Site.storageSite = "T2_DE_DESY" # load nicknames form gc-style config files and write them to a flat nicknames list nicknames = read_grid_control_includes(["samples/13TeV/Summer16_SM_Analysis.conf"]) #nicknames = read_grid_control_includes(["samples/13TeV/Spring16_SM_Higgs_CPmixing_2.conf"]) #nicknames = read_grid_control_includes(["samples/13TeV/2016B_Data.conf"]) #nicknames = ['SUSYGluGluToHToTauTauM160_RunIIFall15MiniAODv2_76X_13TeV_MINIAOD_pythia8'] # loop over datasets and get repsective nicks for nickname in nicknames: config.General.requestName = nickname[:100] config.Data.inputDBS = get_inputDBS_by_nick(nickname) config.Data.unitsPerJob = 1 nfiles = get_n_files_from_nick(nickname) if events_per_job: nevents = get_n_generated_events_from_nick(nickname) try: if int(nfiles) > 0 and int(nevents) > 0: files_per_job = int(events_per_job) * int(nfiles) / int(nevents) if files_per_job > 1: config.Data.unitsPerJob = int(files_per_job) except: print "Its not possilbe to make ",events_per_job," events/job for ",nickname," which has Nevents:",nevents," and Nfiles",nfiles," in the database. Just make one file per job" if float(config.Data.unitsPerJob) > 0 and float(nfiles)/float(config.Data.unitsPerJob) >= job_submission_limit: files_per_job = ceil(float(nfiles)/job_submission_limit) if files_per_job > 1: config.Data.unitsPerJob = int(files_per_job) config.JobType.pyCfgParams = ['globalTag=80X_dataRun2_2016SeptRepro_v7' if isData(nickname) else 'globalTag=80X_mcRun2_asymptotic_2016_TrancheIV_v8' if "PUMoriond17" in getScenario(nickname) else 'globalTag=80X_mcRun2_asymptotic_2016_miniAODv2_v1' ,'kappaTag=KAPPA_2_1_0','nickname=%s'%(nickname),'outputfilename=kappa_%s.root'%(nickname),'testsuite=False'] config.JobType.outputFiles = ['kappa_%s.root'%(nickname)] config.Data.inputDataset = get_sample_by_nick(nickname) #config.Data.lumiMask = '/nfs/dust/cms/user/<NAF-username>/kappa/crab_kappa_skim80X-<campaign-date>/results/missingLumis.json' # for running of a subset of lumi sections p = Process(target=submit, args=(config,)) p.start() p.join()
def create_config(): """ Create a default CRAB configuration :return: """ from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.workArea = 'tasks' config.General.transferOutputs = True config.General.transferLogs = True config.JobType.pluginName = 'Analysis' config.JobType.disableAutomaticOutputCollection = True config.JobType.outputFiles = [] config.JobType.allowUndistributedCMSSW = True config.Data.inputDBS = 'global' config.Data.splitting = 'LumiBased' config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) config.Data.publication = False config.Site.storageSite = 'T2_BE_UCL' return config
def crab_command(command): for dir in glob('/nfs/dust/cms/user/%s/kappa/crab_kappa_skim76X-%s/*'%(getUsernameFromSiteDB(), date)): #for dir in glob('/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s/*'%(getUsernameFromSiteDB(), date)): try: crabCommand(command, dir = dir) except HTTPException as hte: print hte
def create_config(is_mc): """ Create a default CRAB configuration suitable to run the framework :return: """ from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.workArea = 'tasks' config.General.transferOutputs = True config.General.transferLogs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = '../python/dummy_pset.py' config.JobType.scriptExe = '../bin/runFrameworkOnGrid.sh' config.JobType.sendPythonFolder = True config.JobType.disableAutomaticOutputCollection = True config.JobType.allowUndistributedCMSSW = True config.JobType.inputFiles = ['../python/runFrameworkOnGrid.py'] config.JobType.outputFiles = ['output.root'] config.Data.inputDBS = 'global' if is_mc: config.Data.splitting = 'FileBased' else: config.Data.splitting = 'LumiBased' config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) config.Data.publication = False config.Site.storageSite = 'T2_BE_UCL' return config
def getUsernameFromSiteDB_cache(self): if self.UsernameFromSiteDB: return self.UsernameFromSiteDB else: from CRABClient.UserUtilities import getUsernameFromSiteDB self.UsernameFromSiteDB = getUsernameFromSiteDB() return self.UsernameFromSiteDB
def fill_config_defaults(self): ## here are the default values for running crab. Feel free to test some options self.config.General.requestName = self.name self.config.General.workArea = self.workdir self.config.General.transferOutputs = True self.config.General.transferLogs = True self.config.User.voGroup = 'dcms' self.config.JobType.pluginName = 'Analysis' self.config.Data.inputDBS = 'global' self.config.Data.splitting = 'FileBased' self.config.Data.unitsPerJob = 3 self.config.Data.totalUnits = -1 self.config.Data.publication = False self.config.Data.outLFNDirBase = '/store/user/%s/zjets/skimming/%s'%(getUsernameFromSiteDB(), self.name) self.config.Site.storageSite = "T2_DE_DESY" # self.config.Site.storageSite = "T1_DE_KIT" #self.config.JobType.disableAutomaticOutputCollection = True self.config.JobType.outputFiles = ['skim76.root'] #self.config.JobType.sendPythonFolder = True self.config.Data.ignoreLocality = True #switch of xrd acess for now # self.config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH'] self.config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH','T2_US_*']
def __init__(self, proxy=None, user=None): if proxy is None: proxy = os.getenv("X509_USER_PROXY") if not proxy or not os.path.isfile(proxy): raise CRABToolsException("X509_USER_PROXY is %r, get grid proxy first" % proxy) self.proxy = proxy if user is None: user = getUsernameFromSiteDB() if not user: raise CRABToolsException("could not get username from sitedb, returned %r" % user) self.user = user
def __init__(self, proxy=None, user=None): if proxy is None: proxy = os.getenv('X509_USER_PROXY') if not proxy or not os.path.isfile(proxy): raise Crab3ToolsException('X509_USER_PROXY is %r, get grid proxy first with grid-proxy-init' % proxy) self.proxy = proxy if user is None: user = getUsernameFromSiteDB() if not user: raise Crab3ToolsException('could not get username from sitedb, returned %r' % user) self.user = user
def build_configs(filenames_per_sample_per_pipeline): today = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") max_n_files_per_task = 8000 filenames = [] for sample, filenames_per_pipeline in filenames_per_sample_per_pipeline.iteritems(): for pipeline, tmp_filenames in filenames_per_pipeline.iteritems(): filenames.extend(tmp_filenames) configs = [] jobfiles = [] filenames_chunks = [filenames[index:index+max_n_files_per_task] for index in xrange(0, len(filenames), max_n_files_per_task)] for index, filenames_chunk in enumerate(filenames_chunks): # create job scripts jobfiles.append(str("svfit_%s_%d.sh" % (today, index))) with open(jobfiles[-1], "w+") as jobfile: jobfile.write(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_userjob_prefix.sh"))) svfit_code = string.Template(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_svfit.sh"))) jobfile.write(svfit_code.safe_substitute( input_files = "\n".join("arr[%d,0]=%s" % (i+1, f) for i, f in enumerate(filenames_chunk)), cwd=os.getcwd() )) jobfile.close() # crab configuration configs.append(CRABClient.UserUtilities.config()) configs[-1].General.workArea = os.path.abspath(os.path.expandvars("$ARTUS_WORK_BASE/../svfit_caches/%s/" % (today))) configs[-1].General.transferOutputs = True configs[-1].General.transferLogs = True configs[-1].General.requestName = ("svfit_%s_%d" % (today, index))[:100] log.debug("Job name: " + configs[-1].General.requestName) configs[-1].Data.outputPrimaryDataset = "Svfit" configs[-1].Data.splitting = "EventBased" configs[-1].Data.unitsPerJob = 1 configs[-1].Data.totalUnits = len(filenames_chunk) configs[-1].Data.publication = False configs[-1].Data.outputDatasetTag = configs[-1].General.requestName configs[-1].Data.outLFNDirBase = "/store/user/%s/higgs-kit/Svfit/%s/"%(getUsernameFromSiteDB(), today) log.debug("Output directory: " + configs[-1].Data.outLFNDirBase) configs[-1].Data.publication = False configs[-1].User.voGroup = "dcms" configs[-1].JobType.pluginName = "PrivateMC" configs[-1].JobType.psetName = os.environ["CMSSW_BASE"]+"/src/CombineHarvester/CombineTools/scripts/do_nothing_cfg.py" configs[-1].JobType.inputFiles = [os.path.expandvars("$CMSSW_BASE/bin/$SCRAM_ARCH/ComputeSvfit"), jobfiles[-1]] configs[-1].JobType.allowUndistributedCMSSW = True configs[-1].JobType.scriptExe = jobfiles[-1] configs[-1].JobType.outputFiles = ["SvfitCache.tar"] configs[-1].Site.storageSite = "T2_DE_RWTH" return configs, jobfiles
def submission(): from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() ##-- Your name of the crab project config.General.requestName = 'KAPPA_FROM_AOD_SUSYGluGlu_Sync2015' #config.General.workArea = 'crab_projects' config.General.workArea = '/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date) ##-- Transfer root files as well as log files "cmsRun -j FrameworkJobReport.xml" (log file = FrameworkJobReport.xml) check_path(config.General.workArea) config.General.transferOutputs = True config.General.transferLogs = True ##-- We want to have the special dcms role (better fair share at german grid sites). config.User.voGroup = 'dcms' ##-- the scripts (Analysis means with EDM input) which are executed. psetName is the cmsRun config and scriptExe is a shell config which should include "cmsRun -j FrameworkJobReport.xml -p PSet.py" (PSet.py is the renamed config.JobType.psetName) config.JobType.pluginName = 'Analysis' config.JobType.sendPythonFolder = True config.JobType.psetName = 'AODtoMiniAOD_cfg.py' config.JobType.scriptExe = 'kappaWorkflow_privateMiniAOD.sh' #config.JobType.maxJobRuntimeMin = 2750 #config.JobType.maxMemoryMB = 6000 ##-- instead of taking the outputfile per hand use the result of pset.py and renamed it, which cheat on the test of is an EDM file test and allows to use publish the data config.JobType.disableAutomaticOutputCollection = True config.JobType.outputFiles = ['kappaTuple.root'] ##-- The dataset you want to process: config.Data.inputDataset = '/SUSYGluGluToHToTauTau_M-160_TuneCUETP8M1_13TeV-pythia8/RunIIFall15DR76-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/AODSIM' #'/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15DR76-PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext1-v1/AODSIM' config.Data.inputDBS = 'global' config.Data.splitting = 'FileBased' config.Data.unitsPerJob = 1 ##-- If you want to run test jobs set totalUnits to a small number and publication to false #config.Data.totalUnits = 10 config.Data.publication = False ##-- the output strorage element config.Site.storageSite = 'T2_DE_DESY' config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/80X_%s'%(getUsernameFromSiteDB(), date) ##-- Run in xrootd mode (which allows you to run the jobs on all possible sites) #config.Data.ignoreLocality = True #config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH','T2_UK_London_IC', 'T2_US_MIT'] p = Process(target=submit, args=(config,)) p.start() p.join()
def getUsernameFromSiteDB_wrapped(logger, quiet = False): """ Wrapper function for getUsernameFromSiteDB, catching exceptions and printing messages. """ from CRABClient.UserUtilities import getUsernameFromSiteDB username = None msg = "Retrieving username from SiteDB..." if quiet: logger.debug(msg) else: logger.info(msg) infomsg = "\n%sNote%s: Make sure you have the correct certificate mapped in SiteDB" % (colors.BOLD, colors.NORMAL) infomsg += " (you can check what is the certificate you currently have mapped in SiteDB" infomsg += " by searching for your name in https://cmsweb.cern.ch/sitedb/prod/people)." infomsg += " For instructions on how to map a certificate in SiteDB, see https://twiki.cern.ch/twiki/bin/viewauth/CMS/SiteDBForCRAB." try: username = getUsernameFromSiteDB() except ProxyException as ex: msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex) if quiet: logger.debug(msg) else: logger.error(msg) except UsernameException as ex: msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex) msg += infomsg if quiet: logger.debug(msg) else: logger.error(msg) except Exception: msg = "%sError%s: Failed to retrieve username from SiteDB." % (colors.RED, colors.NORMAL) msg += "\n%s" % (traceback.format_exc()) if quiet: logger.debug(msg) else: logger.error(msg) else: msg = "Username is: %s" % (username) if quiet: logger.debug(msg) else: logger.info(msg) return username
def get_user(): from CRABClient.UserUtilities import getUsernameFromSiteDB LOG = logging.getLogger(__name__) user = '******' try: user = getUsernameFromSiteDB() except Exception as e: import traceback LOG.error( 'Could not get user name from https://cmsweb.cern.ch/sitedb/data/prod/whoami') LOG.error(traceback.format_exc(e)) import getpass user = getpass.getuser() # LOG.info('Guessing user from cert') # import subprocess # p = subprocess.Popen('voms-proxy-info -identity', stdout = subprocess.PIPE, shell = True) # result, _ = p.communicate() # USER = result.split(' ')[-1] # LOG.info('Found {0}'.format(USER)) return user
def getUsernameFromSiteDB_wrapped(logger, quiet = False): """ Wrapper function for getUsernameFromSiteDB, catching exceptions and printing messages. """ from CRABClient.UserUtilities import getUsernameFromSiteDB username = None msg = "Retrieving username from SiteDB..." if quiet: logger.debug(msg) else: logger.info(msg) try: username = getUsernameFromSiteDB() except ProxyException as ex: msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex) if quiet: logger.debug(msg) else: logger.error(msg) except UsernameException as ex: msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex) if quiet: logger.debug(msg) else: logger.error(msg) except Exception: msg = "%sError%s: Failed to retrieve username from SiteDB." % (colors.RED, colors.NORMAL) msg += "\n%s" % (traceback.format_exc()) if quiet: logger.debug(msg) else: logger.error(msg) else: msg = "Username is: %s" % (username) if quiet: logger.debug(msg) else: logger.info(msg) return username
config.General.requestName = 'inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_ext1_v1' config.General.workArea = 'crab_projects' config.General.transferOutputs = True config.General.transferLogs = True config.section_('JobType') config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_cfg.py' config.JobType.outputFiles = ['inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_ext1.root'] config.section_('Data') config.Data.inputDataset = '/inclBtoJPsiMuMu_5TeV02/pAWinter13DR53X-pa_2nd_run_STARTHI53_V27_ext1-v1/GEN-SIM-RECO' config.Data.inputDBS = 'global' config.Data.unitsPerJob = 1 #NJOBS = 10 # This is not a configuration parameter, but an auxiliary variable that we use in the next line. #config.Data.totalUnits = config.Data.unitsPerJob * NJOBS config.Data.splitting = 'FileBased' #config.Data.outLFNDirBase = '/store/user/%s/pAWinter13/%s' % (getUsernameFromSiteDB(), config.General.requestName) config.Data.outLFNDirBase = '/store/user/%s/pAWinter13ext1' % (getUsernameFromSiteDB()) #config.Data.publication = False config.Data.publication = True config.Data.outputDatasetTag = config.General.requestName config.section_('Site') #config.Site.whitelist = ['T2_KR_KNU'] config.Site.storageSite = 'T2_KR_KNU' # If your site is blacklisted by crab, use: # config.Data.ignoreLocality = True # config.Site.whitelist = ["T2_FR*"]
#config.JobType.scriptArgs #config.JobType.sendPythonFolde #config.JobType.externalPluginFile #================================================================================================ # Data Section: Contains all parameters related to the data to be analyzed (incl. splitting params) #================================================================================================ config.section_("Data") config.Data.inputDataset = dataset config.Data.inputDBS = 'global' #'phys03' config.Data.splitting = 'FileBased' #config.Data.totalUnits = 10 config.Data.unitsPerJob = 5 config.Data.publication = False config.Data.outLFNDirBase = '/store/user/%s/CRAB3_TransferData' % (getUsernameFromSiteDB()) # testing: # config.Data.totalUnits = 100000 # config.Data.unitsPerJob = 10000 # options: # config.Data.allowNonValidInputDatase # config.Data.outputPrimaryDataset # config.Data.inputDBS # config.Data.unitsPerJob # config.Data.useParent # config.Data.secondaryInputDataset # config.Data.lumiMask # config.Data.runRange # config.Data.outLFNDirBase # config.Data.publication # config.Data.publishDBS
def submission(base_path): today = datetime.date.today().strftime("%Y-%m-%d") max_n_files_per_task = 8000 filename_replacements = { "srm://grid-srm.physik.rwth-aachen.de:8443/srm/managerv2?SFN=/pnfs/physik.rwth-aachen.de/cms/store/user/" : "root://grid-vo-cms.physik.rwth-aachen.de:1094//store/user/" } # retrieve and prepare input files stdout_directories, stderr_directories = tools.subprocessCall(shlex.split("gfal-ls " + args.base_path)) for sample in stdout_directories.decode().strip().split("\n"): stdout_files, stderr_files = tools.subprocessCall(shlex.split("gfal-ls " + os.path.join(args.base_path, sample))) filenames = [filename for filename in stdout_files.decode().strip().split("\n") if (("SvfitCache" in filename) and filename.endswith(".root"))] if len(filenames) > 0: filenames = [os.path.join(args.base_path, sample, filename) for filename in filenames] pipelines_filenames = {} for filename in filenames: for src, dst in filename_replacements.iteritems(): filename = filename.replace(src, dst) pipeline = re.search("SvfitCache(?P<pipeline>.*)\d+.root", filename).groupdict()["pipeline"] pipelines_filenames.setdefault(pipeline, []).append(filename) for pipeline, filenames in pipelines_filenames.iteritems(): filenames_chunks = [filenames[index:index+max_n_files_per_task] for index in xrange(0, len(filenames), max_n_files_per_task)] for index, filenames_chunk in enumerate(filenames_chunks): # create job scripts jobfile_name = str("svfit_%s_%s_%s_%d.sh" % (today, sample, pipeline, index)) with open(jobfile_name, "w+") as jobfile: jobfile.write(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_userjob_prefix.sh"))) svfit_code = string.Template(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_svfit.sh"))) jobfile.write(svfit_code.safe_substitute( input_files = "\n".join("arr[%d,0]=%s" % (i+1, f) for i, f in enumerate(filenames_chunk)), cwd=os.getcwd() )) jobfile.close() # crab configuration config = CRABClient.UserUtilities.config() config.General.workArea = os.path.abspath(os.path.expandvars("$ARTUS_WORK_BASE/../svfit_caches/%s/" % (today))) config.General.transferOutputs = True config.General.transferLogs = True config.General.requestName = ("%s_%s_%d" % (sample, pipeline, index))[:100] log.info("Job name: " + config.General.requestName) config.Data.outputPrimaryDataset = "Svfit" config.Data.splitting = "EventBased" config.Data.unitsPerJob = 1 config.Data.totalUnits = len(filenames_chunk) config.Data.publication = False config.Data.outputDatasetTag = config.General.requestName config.Data.outLFNDirBase = "/store/user/%s/higgs-kit/Svfit/%s/"%(getUsernameFromSiteDB(), today) log.info("Output directory: " + config.Data.outLFNDirBase) config.Data.publication = False config.User.voGroup = "dcms" config.JobType.pluginName = "PrivateMC" config.JobType.psetName = os.environ["CMSSW_BASE"]+"/src/CombineHarvester/CombineTools/scripts/do_nothing_cfg.py" # config.JobType.inputFiles = ["Kappa/lib/libKappa.so", os.environ["CMSSW_BASE"]+"/bin/"+os.environ["SCRAM_ARCH"]+"/ComputeSvfit", jobfile_name] config.JobType.inputFiles = [os.path.expandvars("$CMSSW_BASE/bin/$SCRAM_ARCH/ComputeSvfit"), jobfile_name] config.JobType.allowUndistributedCMSSW = True config.JobType.scriptExe = jobfile_name config.JobType.outputFiles = ["SvfitCache.tar"] config.Site.storageSite = "T2_DE_DESY" # config.Site.blacklist = ["T3_US_PuertoRico", "T2_ES_CIEMAT", "T2_DE_RWTH", "T3_US_Colorado", "T2_BR_UERJ", "T2_ES_IFCA", "T2_RU_JINR", "T2_UA_KIPT", "T2_EE_Estonia", "T2_FR_GRIF_LLR", "T2_CH_CERN", "T2_FR_GRIF_LLR", "T3_IT_Bologna", "T2_US_Nebraska", "T2_US_Nebraska", "T3_TW_NTU_HEP", "T2_US_Caltech", "T3_US_Cornell", "T2_IT_Legnaro", "T2_HU_Budapest", "T2_IT_Pisa", "T2_US_Florida", "T2_IT_Bari", "T2_FR_GRIF_IRFU", "T2_IT_Rome", "T2_FR_GRIF_IRFU", "T2_CH_CSCS", "T3_TW_NCU"] p = Process(target=submit, args=(config,)) p.start() p.join() os.remove(jobfile_name)
def crab_command(command): for dir in glob('/nfs/dust/cms/user/%s/crab_kappa_skim-%s/*'%(getUsernameFromSiteDB(), date)): try: crabCommand(command, dir = dir) except HTTPException as hte: print hte
from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.requestName = 'name_2' config.General.workArea = 'projects_2' config.General.transferOutputs = True config.General.transferLogs = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'bprimeKit_miniAOD.py' config.JobType.inputFiles = [ 'dataEIDMVA' ] config.Data.inputDataset = '/TTJets_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring15DR74-Asympt25ns_MCRUN2_74_V9-v2/MINIAODSIM' config.Data.inputDBS = 'global' config.Data.splitting = 'FileBased' config.Data.unitsPerJob = 1 # config.Data.totalUnits = 50 ## Disabled to run all config.Data.outLFNDirBase = '/store/user/%s/BPRIMEKIT_PERSONAL_TESTING' % (getUsernameFromSiteDB()) #config.Data.publication = True #config.Data.publishDataName = 'CRAB3_tutorial_May2015_MC_analysis' config.Site.storageSite = 'T3_TW_NTU_HEP'
### DATA configuration config.Data.inputDataset = '/HplusToTauNu-M500/amarini-amarini_PrivateMC_HPlusToTauNu_June2015-16aa19d591b8b49c55c4508e7a7c9233/USER' #config.Data.inputDBS = 'phys03' config.Data.inputDBS = 'global' config.Data.ignoreLocality = False config.Data.splitting = 'FileBased' config.Data.unitsPerJob = 10 config.Data.totalUnits = -1 tag = check_output( "git describe --tags | cut -d'-' -f 1 | tr -d '\n' ", shell=True) print "-> current tag is '" + tag + "'" config.Data.outLFNDirBase = '/store/user/%s/Nero/%s/' % ( getUsernameFromSiteDB(), tag) config.Data.publication = False config.Data.outputDatasetTag = 'NeroNtuples' config.Site.storageSite = 'T2_CH_CERN' #config.Site.blacklist = [ 'T2_US_Florida','T2_US_Vanderbilt'] if __name__ == '__main__': from CRABAPI.RawCommand import crabCommand from CRABClient.ClientExceptions import ClientException from httplib import HTTPException # We want to put all the CRAB project directories from the tasks we submit here into one common directory. # That's why we need to set this parameter (here or above in the configuration file, it does not matter, we will not overwrite it). config.General.workArea = 'NeroSubmission2'
# For information on config parameters, see # https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile # section General config.General.requestName = name config.General.workArea = 'crab_test_' + name + '_Run' + str( runNom ) config.General.transferOutputs = True config.General.transferLogs = True # section JobType config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'hlt.py' config.JobType.outputFiles = ['hltbits.root'] config.JobType.numCores = 16 # sorry but we have to be dicks # section Data config.Data.inputDataset = '/HLTPhysics/Run2016B-v2/RAW' config.Data.splitting = 'LumiBased' config.Data.unitsPerJob = 71 # use crab submit --dryrun *.py to find optimal splitting config.Data.lumiMask = 'lumimask_Run274998.txt' # specifes good lumi sections to be used config.Data.totalUnits = -1 # analyze all events after applying the lumi mask config.Data.runRange = str( runNom ) config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) + '/' + name + '_Run' + str( runNom ) config.Data.publication = False # no need to publish the results config.Data.outputDatasetTag = name config.Data.ignoreLocality = True # section Site config.Site.storageSite = 'T3_US_FNALLPC'
config.General.transferOutputs = True config.General.transferLogs = True config.section_('JobType') config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'onia2MuMuPATHI_7xy_PbPbPrompt_cfg.py' config.section_('Data') config.Data.inputDataset ='/HIOniaL1DoubleMu0/HIRun2015-PromptReco-v1/AOD' config.Data.inputDBS = 'global' config.Data.unitsPerJob = 10 config.Data.splitting = 'LumiBased' config.Data.runRange = '262548-263757' ### Use when running firts time config.Data.lumiMask = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions15/HI/DCSOnly/json_DCSONLY.txt' ### When submiting the jobs again, please use: #config.Data.lumiMask = '<NAME OF MISSING LUMI MASK FROM PREVIOUS CRAB JOB>' # The missing lumimask can be obtain after using crab report -d <path to crab job dir> config.Data.publication = True config.Data.outputDatasetTag = 'HIOniaL1DoubleMu0_HIRun2015-PromptReco-v1_Run_262548_263757_ONIASKIM' config.Data.outLFNDirBase = '/store/user/%s/HIPromptReco/%s' % (getUsernameFromSiteDB(), config.Data.outputDatasetTag) config.section_('Site') config.Site.storageSite = 'T2_FR_GRIF_LLR'
from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.requestName = 'amarini_Moriond18_GluGluMH125' config.General.workArea = 'crab_privateMCProduction' config.General.transferOutputs = True config.General.transferLogs = False config.JobType.pluginName = 'PrivateMC' #config.JobType.disableAutomaticOutputCollection = True config.JobType.maxMemoryMB = 2500 config.JobType.psetName = 'fake.py' ## fake the last step -> step4 + empty source config.JobType.inputFiles = ['scriptExe.sh', 'step1.py','step2.py','step3.py','step4.py','pu.py'] config.JobType.scriptExe='scriptExe.sh' config.JobType.numCores=1 config.Data.splitting = 'EventBased' config.Data.unitsPerJob = 500 config.Data.totalUnits = 200000 config.Data.outLFNDirBase = '/store/group/phys_higgs/cmshmm/%s/' % (getUsernameFromSiteDB()) config.Data.publication = True config.Data.outputPrimaryDataset = 'GluGlu_HToMuMu_M125_13TeV_amcatnloFXFX_pythia8' config.Data.outputDatasetTag ='Fall17_94X-MINIAODSIM' config.Site.storageSite = 'T2_CH_CERN'
from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.requestName = 'PYTHIA8_MC_Higgs_M900_GEN' config.General.workArea = 'crab_projects' config.General.transferOutputs = True config.General.transferLogs = False config.JobType.pluginName = 'PrivateMC' config.JobType.psetName = 'step1_m900.py' config.JobType.maxMemoryMB = 2500 #/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM #config.Data.inputDataset = '/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM' config.Data.primaryDataset = 'HplusToTauNu-M900' config.Data.splitting = 'EventBased' config.Data.unitsPerJob = 1000 #NJOBS = 20 # This is not a configuration parameter, but an auxiliary variable that we use in the next line. config.Data.totalUnits = 100000 config.Data.outLFNDirBase = '/store/user/%s/mc/' % (getUsernameFromSiteDB()) config.Data.publication = True config.Data.publishDataName ='%s_PrivateMC_HPlusToTauNu_June2015_GENSIMRAW'% (getUsernameFromSiteDB()) config.Site.storageSite = 'T2_CH_CERN' #config.Site.blacklist = ['T2_US_Florida', 'T2_BR_*', 'T2_RU_*']
from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.section_("General") config.General.requestName = "JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69_ONIASKIM_20151209" config.General.workArea = "crab_projects" config.General.transferOutputs = True config.General.transferLogs = True config.section_("JobType") config.JobType.pluginName = "Analysis" config.JobType.psetName = "onia2MuMuPATHI_7xy_PbPb_MC_cfg.py" config.section_("Data") config.Data.inputDataset = "/JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69/echapon-JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69_step3_20151208-c5e5b4508236081d2fa5bf691a689da0/USER" config.Data.inputDBS = "phys03" config.Data.unitsPerJob = 1 config.Data.splitting = "FileBased" config.Data.outLFNDirBase = "/store/user/%s/PbPbMC2015/%s" % (getUsernameFromSiteDB(), config.General.requestName) config.Data.publication = True config.Data.outputDatasetTag = config.General.requestName config.section_("Site") config.Site.whitelist = ["T2_FR_GRIF_LLR"] config.Site.storageSite = "T2_FR_GRIF_LLR"
config.JobType.pluginName = 'Analysis' # feed in any additional input files config.JobType.inputFiles = [] config.JobType.inputFiles.extend(additionalInputFiles) config.JobType.psetName = '' # overridden per dataset # need to execute the user_script #config.JobType.scriptExe = 'user_script.sh' config.Data.inputDataset = '' # overridden per dataset config.Data.inputDBS = 'global' config.Data.splitting = 'FileBased' #LumiBased for data config.Data.unitsPerJob = 1 # overridden per dataset config.Data.totalUnits = -1 # overridden per dataset # no publishing config.Data.publication = False config.Data.outputDatasetTag = 'LQ' #overridden for data config.Data.outLFNDirBase = '/store/group/phys_exotica/leptonsPlusJets/RootNtuple/RunII/%s/' % (getUsernameFromSiteDB()) + options.tagName + '/' #config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) + topDirName + '/' if options.eosDir is not None: # split of /eos/cms if it is there if options.eosDir.startswith('/eos/cms'): options.eosDir = options.eosDir.split('/eos/cms')[-1] if not options.eosDir.startswith('/store'): print 'eosDir must start with /eos/cms/store or /store and you specified:',options.eosDir print 'quit' exit(-1) outputLFN=options.eosDir if not outputLFN[-1]=='/': outputLFN+='/' outputLFN+=options.tagName+'/' if not getUsernameFromSiteDB() in outputLFN: outputLFN.rstrip('/')
parser.add_argument('--output', required=True, dest='output', type=str, help="output path after /store/user/USERNAME") parser.add_argument('job_file', type=str, nargs='+', help="text file with jobs descriptions") args = parser.parse_args() from CRABClient.UserUtilities import config, ClientException, getUsernameFromSiteDB from CRABAPI.RawCommand import crabCommand from httplib import HTTPException config = config() config.General.workArea = 'work_area' config.JobType.pluginName = 'Analysis' config.JobType.psetName = args.cfg config.Data.inputDBS = 'global' config.General.transferOutputs = True config.General.transferLogs = True config.Data.publication = False config.Site.storageSite = args.site config.Data.outLFNDirBase = "/store/user/{}/{}".format(getUsernameFromSiteDB(), args.output) from crab_tools import JobCollection for job_file in args.job_file: job_collection = JobCollection(job_file) print job_file print job_collection job_collection.submit(config,args.dryrun)
from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.requestName = "noBMuMinusIdeal" config.General.workArea = "crab_projects" config.General.transferOutputs = True config.General.transferLogs = False config.User.voGroup = "dcms" config.JobType.pluginName = "PrivateMC" config.JobType.psetName = "SingleMuPt5to200_cfi_GEN_SIM_DIGI_DIGI2RAW_RAW2DIGI_L1_L1Reco_Reco.py" config.JobType.maxMemoryMB = 2500 config.Data.splitting = "EventBased" config.Data.unitsPerJob = 10000 NJOBS = 100 # This is not a configuration parameter, but an auxiliary variable that we use in the next line. config.Data.totalUnits = config.Data.unitsPerJob * NJOBS config.Data.outLFNDirBase = "/store/user/%s/SingleMuMinus" % (getUsernameFromSiteDB()) config.Data.publication = False config.Site.storageSite = "T2_DE_RWTH"
# Max requestName is 100 characters if len(config.General.requestName) > 100: bits = 5 h = hashlib.sha256(config.General.requestName).hexdigest() # Replace last 5 characters with hash in case of duplicates after truncation config.General.requestName = config.General.requestName[:(100-bits)] + h[:bits] config.JobType.pyCfgParams = configParams # Things that don't change with dataset config.General.workArea = '.' config.General.transferOutputs = True config.General.transferLogs = True config.JobType.pluginName = 'ANALYSIS' config.JobType.psetName = '%s/src/UWVV/Ntuplizer/test/ntuplize_cfg.py' % os.environ["CMSSW_BASE"] config.JobType.numCores = 1 config.JobType.inputFiles = ["%s/src/UWVV/data" % os.environ["CMSSW_BASE"]] config.Data.inputDBS = 'global' if 'USER' not in dataset else 'phys03' config.Data.useParent = False config.Data.publication = False outdir = localSettings.get("local", "outLFNDirBase").replace( "$USER", getUsernameFromSiteDB()).replace("$DATE", today) # Useful for VBFNLO samples #config.Site.whitelist = ['T2_DE_DESY'] config.Data.outLFNDirBase = outdir config.Data.ignoreLocality = False config.Site.storageSite = localSettings.get("local", "storageSite")
config = Configuration() config.section_('General') config.General.transferOutputs = True config.General.transferLogs = True config.General.requestName = 'BuToJpsiK_MC_GENOnly_8TeV_Ntuples_v5' config.section_('JobType') config.JobType.psetName = './BuToJpsiK_MC_GENOnly.py' config.JobType.pluginName = 'Analysis' #config.JobType.pluginName = 'privateMC' #config.JobType.outputFiles = ['BToJpsiK_GENOnly_8TeV_Ntuple.root'] config.section_('Data') config.Data.inputDataset = '/PYTHIA6_BuToJpsiK_GENOnly_8TeV/gechen-crab_BuToJpsiKMuMu_MC_GENOnly_8TeV-387bf2b3df13ffa8b4f3dd9f3950e077/USER' #config.Data.outputPrimaryDataset = 'PYTHIA6_BuToJpsiK_GENOnly_8TeV_Ntuple_v4' config.Data.outputDatasetTag = 'PYTHIA6_BuToJpsiK_GENOnly_8TeV_Ntuple_v5' config.Data.unitsPerJob = 2 config.Data.inputDBS = 'phys03' config.Data.splitting = 'FileBased' config.Data.ignoreLocality = True config.Data.publishDBS = 'phys03' config.Data.publication = True config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) #config.Data.outLFNDirBase = '/store/user/gechen/' config.section_('User') config.section_('Site') config.Site.storageSite = 'T2_CH_CERN' #config.Site.whitelist = ["T2_CH*"]
### this is an example for running on RECO ### the name must be changed crab.cfg for actual running from CRABClient.UserUtilities import config, getUsernameFromSiteDB config = config() config.General.requestName = 'PbPb_eff_v5' config.General.workArea = 'PbPb_eff_v5' config.General.transferOutputs = True config.General.transferLogs = True config.JobType.allowUndistributedCMSSW = True config.JobType.pluginName = 'Analysis' config.JobType.psetName = 'run_TrackPerformance_cfg.py' config.Data.inputDBS = 'phys03' config.Data.inputDataset = '/Hydjet_Quenched_MinBias_5020GeV_750/velicanu-Hydjet_Quenched_MinBias_5020GeV_750_RECODEBUG_v0-eb8cf5655150b59e96d879ea397567ad/USER' #config.Data.inputDataset = '/PYTHIA_QCD_TuneCUETP8M1_cfi_GEN_SIM_5020GeV/velicanu-Pythia8_Dijet80_pp_TuneCUETP8M1_Hydjet_MinBias_5020GeV_PrivMC-282baa650b1997daa0dd8689f6a69785/USER' config.Data.splitting = 'FileBased' config.Data.ignoreLocality = False config.Data.unitsPerJob = 10 config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) config.Data.publication = False config.Site.storageSite = 'T2_US_MIT' config.Site.whitelist = ['T2_US_MIT']