Beispiel #1
0
def submission():
	from CRABClient.UserUtilities import config
	config = config()
	config.General.workArea = '/nfs/dust/cms/user/%s/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date)
	check_path(config.General.workArea)
	config.General.transferOutputs = True
	config.General.transferLogs = True
	config.User.voGroup = 'dcms'
	
	config.JobType.pluginName = 'Analysis'
	config.JobType.psetName = 'kSkimming_run2_cfg.py'
	#config.JobType.inputFiles = ['Summer15_V5_MC.db']
	config.JobType.allowUndistributedCMSSW = True
	config.Site.blacklist = ["T2_BR_SPRACE"]
	config.Data.inputDBS = 'global'
	config.Data.splitting = 'FileBased'
	config.Data.unitsPerJob = 1
	config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/%s'%(getUsernameFromSiteDB(), date)
	config.Data.publication = False
	
	config.Site.storageSite = "T2_DE_DESY"
	# load nicknames form gc-style config files and write them to a flat nicknames list
	nicknames = read_grid_control_includes(["samples/13TeV/Fall15_SM_Analysis.conf"])
	#nicknames = ['SUSYGluGluToHToTauTauM160_RunIIFall15MiniAODv2_76X_13TeV_MINIAOD_pythia8']

	# loop over datasets and get repsective nicks
	for nickname in nicknames:
		config.General.requestName = nickname
		config.JobType.pyCfgParams = ['globalTag=76X_dataRun2_16Dec2015_v0' if isData(nickname) else 'globalTag=76X_mcRun2_asymptotic_RunIIFall15DR76_v1' ,'kappaTag=KAPPA_2_1_0','nickname=%s'%(nickname),'outputfilename=kappa_%s.root'%(nickname),'testsuite=False']
		config.JobType.outputFiles = ['kappa_%s.root'%(nickname)]
		config.Data.inputDataset = get_sample_by_nick(nickname)
		p = Process(target=submit, args=(config,))
		p.start()
		p.join()
Beispiel #2
0
def submission(events_per_job):
	from CRABClient.UserUtilities import config
	config = config()
	config.General.workArea = '/nfs/dust/cms/user/%s/kappa/crab_kappa_skim80X-%s'%(getUsernameFromSiteDB(), date)
	#config.General.workArea = '/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date)
	#config.General.workArea = '/nfs/dust/cms/user/<your-NAF-username>/kappa/crab_kappa_skim80X-%s'% date  #if CERN-username != NAF-username
	check_path(config.General.workArea)
	config.General.transferOutputs = True
	config.General.transferLogs = True
	config.User.voGroup = 'dcms'
	
	config.JobType.pluginName = 'Analysis'
	config.JobType.psetName = 'kSkimming_run2_cfg.py'
	#config.JobType.inputFiles = ['Spring16_25nsV6_DATA.db', 'Spring16_25nsV6_MC.db']
	config.JobType.allowUndistributedCMSSW = True
	config.Site.blacklist = ["T2_BR_SPRACE"]
	config.Data.splitting = 'FileBased'
	config.Data.unitsPerJob = 1
	config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/80X_%s'%(getUsernameFromSiteDB(), date)
	config.Data.publication = False
	
	config.Site.storageSite = "T2_DE_DESY"
	# load nicknames form gc-style config files and write them to a flat nicknames list
	nicknames = read_grid_control_includes(["samples/13TeV/Summer16_SM_Analysis.conf"])
	#nicknames = read_grid_control_includes(["samples/13TeV/Spring16_SM_Higgs_CPmixing_2.conf"])
	#nicknames = read_grid_control_includes(["samples/13TeV/2016B_Data.conf"])
	#nicknames = ['SUSYGluGluToHToTauTauM160_RunIIFall15MiniAODv2_76X_13TeV_MINIAOD_pythia8']

	# loop over datasets and get repsective nicks
	for nickname in nicknames:
		config.General.requestName = nickname[:100]
		config.Data.inputDBS = get_inputDBS_by_nick(nickname)
		config.Data.unitsPerJob = 1
		nfiles = get_n_files_from_nick(nickname)
		if events_per_job:
			nevents = get_n_generated_events_from_nick(nickname)
			try:
				if int(nfiles) > 0 and int(nevents) > 0:
					files_per_job = int(events_per_job) * int(nfiles) / int(nevents)
					if files_per_job > 1:
						config.Data.unitsPerJob = int(files_per_job)
			except:
				print "Its not possilbe to make ",events_per_job," events/job for ",nickname," which has Nevents:",nevents," and Nfiles",nfiles," in the database. Just make one file per job"
		if float(config.Data.unitsPerJob) > 0 and float(nfiles)/float(config.Data.unitsPerJob) >= job_submission_limit:
			files_per_job = ceil(float(nfiles)/job_submission_limit)
			if files_per_job > 1:
				config.Data.unitsPerJob = int(files_per_job)

		config.JobType.pyCfgParams = ['globalTag=80X_dataRun2_2016SeptRepro_v7' if isData(nickname) else 'globalTag=80X_mcRun2_asymptotic_2016_TrancheIV_v8' if "PUMoriond17" in getScenario(nickname) else 'globalTag=80X_mcRun2_asymptotic_2016_miniAODv2_v1' ,'kappaTag=KAPPA_2_1_0','nickname=%s'%(nickname),'outputfilename=kappa_%s.root'%(nickname),'testsuite=False']
		config.JobType.outputFiles = ['kappa_%s.root'%(nickname)]
		config.Data.inputDataset = get_sample_by_nick(nickname)
		#config.Data.lumiMask = '/nfs/dust/cms/user/<NAF-username>/kappa/crab_kappa_skim80X-<campaign-date>/results/missingLumis.json' # for running of a subset of lumi sections
		p = Process(target=submit, args=(config,))
		p.start()
		p.join()
def create_config():
    """
    Create a default CRAB configuration
    :return:
    """

    from CRABClient.UserUtilities import config, getUsernameFromSiteDB
    config = config()

    config.General.workArea = 'tasks'
    config.General.transferOutputs = True
    config.General.transferLogs = True

    config.JobType.pluginName = 'Analysis'
    config.JobType.disableAutomaticOutputCollection = True
    config.JobType.outputFiles = []
    config.JobType.allowUndistributedCMSSW = True

    config.Data.inputDBS = 'global'

    config.Data.splitting = 'LumiBased'

    config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB())
    config.Data.publication = False

    config.Site.storageSite = 'T2_BE_UCL'

    return config
def crab_command(command):
	for dir in glob('/nfs/dust/cms/user/%s/kappa/crab_kappa_skim76X-%s/*'%(getUsernameFromSiteDB(), date)):
	#for dir in glob('/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s/*'%(getUsernameFromSiteDB(), date)):
		try:
			crabCommand(command, dir = dir)
		except HTTPException as hte:
			print hte
def create_config(is_mc):
    """
    Create a default CRAB configuration suitable to run the framework
    :return:
    """

    from CRABClient.UserUtilities import config, getUsernameFromSiteDB
    config = config()

    config.General.workArea = 'tasks'
    config.General.transferOutputs = True
    config.General.transferLogs = True

    config.JobType.pluginName = 'Analysis'
    config.JobType.psetName = '../python/dummy_pset.py'
    config.JobType.scriptExe = '../bin/runFrameworkOnGrid.sh'
    config.JobType.sendPythonFolder = True
    config.JobType.disableAutomaticOutputCollection = True
    config.JobType.allowUndistributedCMSSW = True
    config.JobType.inputFiles = ['../python/runFrameworkOnGrid.py']
    config.JobType.outputFiles = ['output.root']

    config.Data.inputDBS = 'global'

    if is_mc:
        config.Data.splitting = 'FileBased'
    else:
        config.Data.splitting = 'LumiBased'

    config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB())
    config.Data.publication = False

    config.Site.storageSite = 'T2_BE_UCL'

    return config
Beispiel #6
0
	def getUsernameFromSiteDB_cache(self):
		if self.UsernameFromSiteDB:
			return self.UsernameFromSiteDB
		else:
			from CRABClient.UserUtilities import getUsernameFromSiteDB
			self.UsernameFromSiteDB = getUsernameFromSiteDB()
			return self.UsernameFromSiteDB
Beispiel #7
0
    def fill_config_defaults(self): ## here are the default values for running crab. Feel free to test some options 
        self.config.General.requestName = self.name
	self.config.General.workArea = self.workdir
	self.config.General.transferOutputs = True
	self.config.General.transferLogs = True
	self.config.User.voGroup = 'dcms'
	
	self.config.JobType.pluginName = 'Analysis'

	
	self.config.Data.inputDBS = 'global'
	self.config.Data.splitting = 'FileBased'
	self.config.Data.unitsPerJob = 3 
	self.config.Data.totalUnits =  -1
        self.config.Data.publication = False
	
        self.config.Data.outLFNDirBase = '/store/user/%s/zjets/skimming/%s'%(getUsernameFromSiteDB(), self.name)
	self.config.Site.storageSite = "T2_DE_DESY"
#	self.config.Site.storageSite = "T1_DE_KIT"
	
	#self.config.JobType.disableAutomaticOutputCollection = True
	self.config.JobType.outputFiles = ['skim76.root']
        #self.config.JobType.sendPythonFolder = True

        self.config.Data.ignoreLocality = True  #switch of xrd acess for now
#	self.config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH']
	self.config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH','T2_US_*']
    def __init__(self, proxy=None, user=None):
        if proxy is None:
            proxy = os.getenv("X509_USER_PROXY")
            if not proxy or not os.path.isfile(proxy):
                raise CRABToolsException("X509_USER_PROXY is %r, get grid proxy first" % proxy)
        self.proxy = proxy

        if user is None:
            user = getUsernameFromSiteDB()
            if not user:
                raise CRABToolsException("could not get username from sitedb, returned %r" % user)
        self.user = user
    def __init__(self, proxy=None, user=None):
        if proxy is None:
            proxy = os.getenv('X509_USER_PROXY')
        if not proxy or not os.path.isfile(proxy):
            raise Crab3ToolsException('X509_USER_PROXY is %r, get grid proxy first with grid-proxy-init' % proxy)
        self.proxy = proxy

        if user is None:
            user = getUsernameFromSiteDB()
        if not user:
            raise Crab3ToolsException('could not get username from sitedb, returned %r' % user)
        self.user = user
def build_configs(filenames_per_sample_per_pipeline):
	today = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
	max_n_files_per_task = 8000
	
	filenames = []
	for sample, filenames_per_pipeline in filenames_per_sample_per_pipeline.iteritems():
		for pipeline, tmp_filenames in filenames_per_pipeline.iteritems():
			filenames.extend(tmp_filenames)
	
	configs = []
	jobfiles = []
	filenames_chunks = [filenames[index:index+max_n_files_per_task] for index in xrange(0, len(filenames), max_n_files_per_task)]
	for index, filenames_chunk in enumerate(filenames_chunks):
		
		# create job scripts
		jobfiles.append(str("svfit_%s_%d.sh" % (today, index)))
		with open(jobfiles[-1], "w+") as jobfile:
			jobfile.write(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_userjob_prefix.sh")))
			
			svfit_code = string.Template(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_svfit.sh")))
			jobfile.write(svfit_code.safe_substitute(
					input_files = "\n".join("arr[%d,0]=%s" % (i+1, f) for i, f in enumerate(filenames_chunk)),
					cwd=os.getcwd()
			))
			
			jobfile.close()
		
		# crab configuration
		configs.append(CRABClient.UserUtilities.config())
		configs[-1].General.workArea = os.path.abspath(os.path.expandvars("$ARTUS_WORK_BASE/../svfit_caches/%s/" % (today)))
		configs[-1].General.transferOutputs = True
		configs[-1].General.transferLogs = True
		configs[-1].General.requestName = ("svfit_%s_%d" % (today, index))[:100]
		log.debug("Job name: " + configs[-1].General.requestName)
		configs[-1].Data.outputPrimaryDataset = "Svfit"
		configs[-1].Data.splitting = "EventBased"
		configs[-1].Data.unitsPerJob = 1
		configs[-1].Data.totalUnits = len(filenames_chunk)
		configs[-1].Data.publication = False
		configs[-1].Data.outputDatasetTag = configs[-1].General.requestName
		configs[-1].Data.outLFNDirBase = "/store/user/%s/higgs-kit/Svfit/%s/"%(getUsernameFromSiteDB(), today)
		log.debug("Output directory: " + configs[-1].Data.outLFNDirBase)
		configs[-1].Data.publication = False
		configs[-1].User.voGroup = "dcms"
		configs[-1].JobType.pluginName = "PrivateMC"
		configs[-1].JobType.psetName = os.environ["CMSSW_BASE"]+"/src/CombineHarvester/CombineTools/scripts/do_nothing_cfg.py"
		configs[-1].JobType.inputFiles = [os.path.expandvars("$CMSSW_BASE/bin/$SCRAM_ARCH/ComputeSvfit"), jobfiles[-1]]
		configs[-1].JobType.allowUndistributedCMSSW = True
		configs[-1].JobType.scriptExe = jobfiles[-1]
		configs[-1].JobType.outputFiles = ["SvfitCache.tar"]
		configs[-1].Site.storageSite = "T2_DE_RWTH"
	
	return configs, jobfiles
def submission():
	from CRABClient.UserUtilities import config, getUsernameFromSiteDB
	config = config()

	##-- Your name of the crab project
	config.General.requestName = 'KAPPA_FROM_AOD_SUSYGluGlu_Sync2015'
	#config.General.workArea = 'crab_projects'
	config.General.workArea = '/net/scratch_cms/institut_3b/%s/kappa/crab_kappa_skim-%s'%(getUsernameFromSiteDB(), date)

	##-- Transfer root files as well as log files "cmsRun -j FrameworkJobReport.xml" (log file = FrameworkJobReport.xml)
	check_path(config.General.workArea)	
	config.General.transferOutputs = True
	config.General.transferLogs = True

	##-- We want to have the special dcms role (better fair share at german grid sites). 
	config.User.voGroup = 'dcms'

	##-- the scripts (Analysis means with EDM input) which are executed. psetName is the cmsRun config and scriptExe is a shell config which should include "cmsRun -j FrameworkJobReport.xml -p PSet.py" (PSet.py is the renamed config.JobType.psetName)
	config.JobType.pluginName = 'Analysis'
	config.JobType.sendPythonFolder = True
	config.JobType.psetName = 'AODtoMiniAOD_cfg.py'
	config.JobType.scriptExe = 'kappaWorkflow_privateMiniAOD.sh'
	#config.JobType.maxJobRuntimeMin = 2750
	#config.JobType.maxMemoryMB = 6000

	##-- instead of taking the outputfile per hand use the result of pset.py and renamed it, which cheat on the test of is an EDM file test and allows to use publish the data 
	config.JobType.disableAutomaticOutputCollection = True
	config.JobType.outputFiles = ['kappaTuple.root']

	##-- The dataset you want to process:

	config.Data.inputDataset = '/SUSYGluGluToHToTauTau_M-160_TuneCUETP8M1_13TeV-pythia8/RunIIFall15DR76-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/AODSIM'
	#'/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15DR76-PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext1-v1/AODSIM'
	config.Data.inputDBS = 'global'
	config.Data.splitting = 'FileBased'
	config.Data.unitsPerJob = 1
	##-- If you want to run test jobs set totalUnits to a small number and publication to false
	#config.Data.totalUnits = 10
	config.Data.publication = False
	
	##-- the output strorage element
	config.Site.storageSite = 'T2_DE_DESY'
	config.Data.outLFNDirBase = '/store/user/%s/higgs-kit/skimming/80X_%s'%(getUsernameFromSiteDB(), date)
	
	##-- Run in xrootd mode (which allows you to run the jobs on all possible sites) 
	#config.Data.ignoreLocality = True
	#config.Site.whitelist = ['T2_CH_CERN','T2_DE_DESY','T1_DE_KIT','T2_DE_RWTH','T2_UK_London_IC', 'T2_US_MIT']
	

	p = Process(target=submit, args=(config,))
	p.start()
	p.join()
def getUsernameFromSiteDB_wrapped(logger, quiet = False):
    """
    Wrapper function for getUsernameFromSiteDB,
    catching exceptions and printing messages.
    """
    from CRABClient.UserUtilities import getUsernameFromSiteDB
    username = None
    msg = "Retrieving username from SiteDB..."
    if quiet:
        logger.debug(msg)
    else:
        logger.info(msg)
    infomsg  = "\n%sNote%s: Make sure you have the correct certificate mapped in SiteDB" % (colors.BOLD, colors.NORMAL)
    infomsg += " (you can check what is the certificate you currently have mapped in SiteDB"
    infomsg += " by searching for your name in https://cmsweb.cern.ch/sitedb/prod/people)."
    infomsg += " For instructions on how to map a certificate in SiteDB, see https://twiki.cern.ch/twiki/bin/viewauth/CMS/SiteDBForCRAB."
    try:
        username = getUsernameFromSiteDB()
    except ProxyException as ex:
        msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex)
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    except UsernameException as ex:
        msg  = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex)
        msg += infomsg
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    except Exception:
        msg  = "%sError%s: Failed to retrieve username from SiteDB." % (colors.RED, colors.NORMAL)
        msg += "\n%s" % (traceback.format_exc()) 
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    else:
        msg = "Username is: %s" % (username)
        if quiet:
            logger.debug(msg)
        else:
            logger.info(msg)
    return username
def get_user():
    from CRABClient.UserUtilities import getUsernameFromSiteDB

    LOG = logging.getLogger(__name__)
    user = '******'
    try:
        user = getUsernameFromSiteDB()
    except Exception as e:
        import traceback
        LOG.error(
            'Could not get user name from https://cmsweb.cern.ch/sitedb/data/prod/whoami')
        LOG.error(traceback.format_exc(e))
        import getpass
        user = getpass.getuser()
#         LOG.info('Guessing user from cert')
#         import subprocess
#         p = subprocess.Popen('voms-proxy-info -identity', stdout = subprocess.PIPE, shell = True)
#         result, _ = p.communicate()
#         USER = result.split(' ')[-1]
#         LOG.info('Found {0}'.format(USER))
    return user
Beispiel #14
0
def getUsernameFromSiteDB_wrapped(logger, quiet = False):
    """
    Wrapper function for getUsernameFromSiteDB,
    catching exceptions and printing messages.
    """
    from CRABClient.UserUtilities import getUsernameFromSiteDB
    username = None
    msg = "Retrieving username from SiteDB..."
    if quiet:
        logger.debug(msg)
    else:
        logger.info(msg)
    try:
        username = getUsernameFromSiteDB()
    except ProxyException as ex:
        msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex)
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    except UsernameException as ex:
        msg = "%sError%s: %s" % (colors.RED, colors.NORMAL, ex)
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    except Exception:
        msg  = "%sError%s: Failed to retrieve username from SiteDB." % (colors.RED, colors.NORMAL)
        msg += "\n%s" % (traceback.format_exc()) 
        if quiet:
            logger.debug(msg)
        else:
            logger.error(msg)
    else:
        msg = "Username is: %s" % (username)
        if quiet:
            logger.debug(msg)
        else:
            logger.info(msg)
    return username
config.General.requestName = 'inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_ext1_v1'
config.General.workArea = 'crab_projects'
config.General.transferOutputs = True
config.General.transferLogs = True

config.section_('JobType')
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_cfg.py'
config.JobType.outputFiles = ['inclBtoJPsiMuMu_pa_2nd_run_SKIM_STARTHI53_V27_ext1.root']

config.section_('Data')
config.Data.inputDataset = '/inclBtoJPsiMuMu_5TeV02/pAWinter13DR53X-pa_2nd_run_STARTHI53_V27_ext1-v1/GEN-SIM-RECO'
config.Data.inputDBS = 'global'
config.Data.unitsPerJob = 1
#NJOBS = 10  # This is not a configuration parameter, but an auxiliary variable that we use in the next line.
#config.Data.totalUnits = config.Data.unitsPerJob * NJOBS
config.Data.splitting = 'FileBased'
#config.Data.outLFNDirBase = '/store/user/%s/pAWinter13/%s' % (getUsernameFromSiteDB(), config.General.requestName)
config.Data.outLFNDirBase = '/store/user/%s/pAWinter13ext1' % (getUsernameFromSiteDB())
#config.Data.publication = False
config.Data.publication = True
config.Data.outputDatasetTag =  config.General.requestName

config.section_('Site')
#config.Site.whitelist = ['T2_KR_KNU']
config.Site.storageSite = 'T2_KR_KNU'

# If your site is blacklisted by crab, use:
# config.Data.ignoreLocality = True
# config.Site.whitelist = ["T2_FR*"]
#config.JobType.scriptArgs
#config.JobType.sendPythonFolde
#config.JobType.externalPluginFile


#================================================================================================
# Data Section: Contains all parameters related to the data to be analyzed (incl. splitting params)
#================================================================================================
config.section_("Data")
config.Data.inputDataset = dataset
config.Data.inputDBS = 'global' #'phys03'
config.Data.splitting = 'FileBased'
#config.Data.totalUnits  = 10
config.Data.unitsPerJob = 5
config.Data.publication = False
config.Data.outLFNDirBase = '/store/user/%s/CRAB3_TransferData' % (getUsernameFromSiteDB())
# testing:
# config.Data.totalUnits    = 100000
# config.Data.unitsPerJob   = 10000 
# options:
# config.Data.allowNonValidInputDatase
# config.Data.outputPrimaryDataset
# config.Data.inputDBS
# config.Data.unitsPerJob
# config.Data.useParent
# config.Data.secondaryInputDataset
# config.Data.lumiMask
# config.Data.runRange
# config.Data.outLFNDirBase
# config.Data.publication
# config.Data.publishDBS
def submission(base_path):

	today = datetime.date.today().strftime("%Y-%m-%d")
	max_n_files_per_task = 8000
	filename_replacements = {
		"srm://grid-srm.physik.rwth-aachen.de:8443/srm/managerv2?SFN=/pnfs/physik.rwth-aachen.de/cms/store/user/" : "root://grid-vo-cms.physik.rwth-aachen.de:1094//store/user/"
	}
	
	# retrieve and prepare input files
	stdout_directories, stderr_directories = tools.subprocessCall(shlex.split("gfal-ls " + args.base_path))
	for sample in stdout_directories.decode().strip().split("\n"):
		stdout_files, stderr_files = tools.subprocessCall(shlex.split("gfal-ls " + os.path.join(args.base_path, sample)))
		filenames = [filename for filename in stdout_files.decode().strip().split("\n") if (("SvfitCache" in filename) and filename.endswith(".root"))]
		if len(filenames) > 0:
			filenames = [os.path.join(args.base_path, sample, filename) for filename in filenames]
			pipelines_filenames = {}
			for filename in filenames:
				for src, dst in filename_replacements.iteritems():
					filename = filename.replace(src, dst)
				pipeline = re.search("SvfitCache(?P<pipeline>.*)\d+.root", filename).groupdict()["pipeline"]
				pipelines_filenames.setdefault(pipeline, []).append(filename)
			
			for pipeline, filenames in pipelines_filenames.iteritems():
				filenames_chunks = [filenames[index:index+max_n_files_per_task] for index in xrange(0, len(filenames), max_n_files_per_task)]
				for index, filenames_chunk in enumerate(filenames_chunks):
					
					# create job scripts
					jobfile_name = str("svfit_%s_%s_%s_%d.sh" % (today, sample, pipeline, index))
					with open(jobfile_name, "w+") as jobfile:
						jobfile.write(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_userjob_prefix.sh")))
						
						svfit_code = string.Template(read_file(os.path.expandvars("$CMSSW_BASE/src/HiggsAnalysis/KITHiggsToTauTau/data/templates/crab_svfit.sh")))
						jobfile.write(svfit_code.safe_substitute(
								input_files = "\n".join("arr[%d,0]=%s" % (i+1, f) for i, f in enumerate(filenames_chunk)),
								cwd=os.getcwd()
						))
						
						jobfile.close()
					
					# crab configuration
					config = CRABClient.UserUtilities.config()
					config.General.workArea = os.path.abspath(os.path.expandvars("$ARTUS_WORK_BASE/../svfit_caches/%s/" % (today)))
					config.General.transferOutputs = True
					config.General.transferLogs = True
					config.General.requestName = ("%s_%s_%d" % (sample, pipeline, index))[:100]
					log.info("Job name: " + config.General.requestName)
					config.Data.outputPrimaryDataset = "Svfit"
					config.Data.splitting = "EventBased"
					config.Data.unitsPerJob = 1
					config.Data.totalUnits = len(filenames_chunk)
					config.Data.publication = False
					config.Data.outputDatasetTag = config.General.requestName
					config.Data.outLFNDirBase = "/store/user/%s/higgs-kit/Svfit/%s/"%(getUsernameFromSiteDB(), today)
					log.info("Output directory: " + config.Data.outLFNDirBase)
					config.Data.publication = False
		
					config.User.voGroup = "dcms"
		
					config.JobType.pluginName = "PrivateMC"
					config.JobType.psetName = os.environ["CMSSW_BASE"]+"/src/CombineHarvester/CombineTools/scripts/do_nothing_cfg.py"
					# config.JobType.inputFiles = ["Kappa/lib/libKappa.so", os.environ["CMSSW_BASE"]+"/bin/"+os.environ["SCRAM_ARCH"]+"/ComputeSvfit", jobfile_name]
					config.JobType.inputFiles = [os.path.expandvars("$CMSSW_BASE/bin/$SCRAM_ARCH/ComputeSvfit"), jobfile_name]
					config.JobType.allowUndistributedCMSSW = True
					config.JobType.scriptExe = jobfile_name
					config.JobType.outputFiles = ["SvfitCache.tar"]
		
					config.Site.storageSite = "T2_DE_DESY"
					# config.Site.blacklist = ["T3_US_PuertoRico", "T2_ES_CIEMAT", "T2_DE_RWTH", "T3_US_Colorado", "T2_BR_UERJ", "T2_ES_IFCA", "T2_RU_JINR", "T2_UA_KIPT", "T2_EE_Estonia", "T2_FR_GRIF_LLR", "T2_CH_CERN", "T2_FR_GRIF_LLR", "T3_IT_Bologna", "T2_US_Nebraska", "T2_US_Nebraska", "T3_TW_NTU_HEP", "T2_US_Caltech", "T3_US_Cornell", "T2_IT_Legnaro", "T2_HU_Budapest", "T2_IT_Pisa", "T2_US_Florida", "T2_IT_Bari", "T2_FR_GRIF_IRFU", "T2_IT_Rome", "T2_FR_GRIF_IRFU", "T2_CH_CSCS", "T3_TW_NCU"]
					p = Process(target=submit, args=(config,))
					p.start()
					p.join()
					
					os.remove(jobfile_name)
Beispiel #18
0
def crab_command(command):
	for dir in glob('/nfs/dust/cms/user/%s/crab_kappa_skim-%s/*'%(getUsernameFromSiteDB(), date)):
		try:
			crabCommand(command, dir = dir)
		except HTTPException as hte:
			print hte
from CRABClient.UserUtilities import config, getUsernameFromSiteDB
config = config()

config.General.requestName = 'name_2'
config.General.workArea = 'projects_2'
config.General.transferOutputs = True
config.General.transferLogs = True

config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'bprimeKit_miniAOD.py'
config.JobType.inputFiles = [
    'dataEIDMVA' 
    ] 

config.Data.inputDataset = '/TTJets_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring15DR74-Asympt25ns_MCRUN2_74_V9-v2/MINIAODSIM'

config.Data.inputDBS = 'global'
config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 1
# config.Data.totalUnits  =  50 ## Disabled to run all
config.Data.outLFNDirBase = '/store/user/%s/BPRIMEKIT_PERSONAL_TESTING' % (getUsernameFromSiteDB())
#config.Data.publication = True
#config.Data.publishDataName = 'CRAB3_tutorial_May2015_MC_analysis'

config.Site.storageSite = 'T3_TW_NTU_HEP'
Beispiel #20
0
### DATA configuration
config.Data.inputDataset = '/HplusToTauNu-M500/amarini-amarini_PrivateMC_HPlusToTauNu_June2015-16aa19d591b8b49c55c4508e7a7c9233/USER'
#config.Data.inputDBS = 'phys03'
config.Data.inputDBS = 'global'
config.Data.ignoreLocality = False

config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 10
config.Data.totalUnits = -1

tag = check_output(
    "git describe --tags | cut -d'-' -f 1 | tr -d '\n' ", shell=True)
print "-> current tag is '" + tag + "'"
config.Data.outLFNDirBase = '/store/user/%s/Nero/%s/' % (
    getUsernameFromSiteDB(), tag)
config.Data.publication = False
config.Data.outputDatasetTag = 'NeroNtuples'

config.Site.storageSite = 'T2_CH_CERN'
#config.Site.blacklist = [ 'T2_US_Florida','T2_US_Vanderbilt']

if __name__ == '__main__':

    from CRABAPI.RawCommand import crabCommand
    from CRABClient.ClientExceptions import ClientException
    from httplib import HTTPException

    # We want to put all the CRAB project directories from the tasks we submit here into one common directory.
    # That's why we need to set this parameter (here or above in the configuration file, it does not matter, we will not overwrite it).
    config.General.workArea = 'NeroSubmission2'
# For information on config parameters, see
# https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile

# section General
config.General.requestName = name
config.General.workArea = 'crab_test_' + name + '_Run' + str( runNom )
config.General.transferOutputs = True
config.General.transferLogs = True

# section JobType
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'hlt.py'
config.JobType.outputFiles = ['hltbits.root']
config.JobType.numCores = 16 # sorry but we have to be dicks


# section Data
config.Data.inputDataset = '/HLTPhysics/Run2016B-v2/RAW'
config.Data.splitting = 'LumiBased'
config.Data.unitsPerJob = 71  # use crab submit --dryrun *.py to find optimal splitting 
config.Data.lumiMask = 'lumimask_Run274998.txt' # specifes good lumi sections to be used
config.Data.totalUnits = -1 # analyze all events after applying the lumi mask
config.Data.runRange = str( runNom )
config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) + '/' + name + '_Run' + str( runNom )
config.Data.publication = False # no need to publish the results
config.Data.outputDatasetTag = name
config.Data.ignoreLocality = True

# section Site
config.Site.storageSite = 'T3_US_FNALLPC'
config.General.transferOutputs = True
config.General.transferLogs = True

config.section_('JobType')
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'onia2MuMuPATHI_7xy_PbPbPrompt_cfg.py'

config.section_('Data')
config.Data.inputDataset ='/HIOniaL1DoubleMu0/HIRun2015-PromptReco-v1/AOD'
config.Data.inputDBS = 'global'
config.Data.unitsPerJob = 10
config.Data.splitting = 'LumiBased'
config.Data.runRange = '262548-263757'


### Use when running firts time
config.Data.lumiMask = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions15/HI/DCSOnly/json_DCSONLY.txt'
### When submiting the jobs again, please use: 
#config.Data.lumiMask = '<NAME OF MISSING LUMI MASK FROM PREVIOUS CRAB JOB>'
# The missing lumimask can be obtain after using crab report -d <path to crab job dir> 


config.Data.publication = True
config.Data.outputDatasetTag = 'HIOniaL1DoubleMu0_HIRun2015-PromptReco-v1_Run_262548_263757_ONIASKIM'
config.Data.outLFNDirBase = '/store/user/%s/HIPromptReco/%s' % (getUsernameFromSiteDB(), config.Data.outputDatasetTag)


config.section_('Site')
config.Site.storageSite = 'T2_FR_GRIF_LLR'

Beispiel #23
0
from CRABClient.UserUtilities import config, getUsernameFromSiteDB
config = config()

config.General.requestName = 'amarini_Moriond18_GluGluMH125'
config.General.workArea = 'crab_privateMCProduction'
config.General.transferOutputs = True
config.General.transferLogs = False

config.JobType.pluginName = 'PrivateMC'
#config.JobType.disableAutomaticOutputCollection = True
config.JobType.maxMemoryMB = 2500
config.JobType.psetName = 'fake.py' ## fake the last step -> step4 + empty source
config.JobType.inputFiles = ['scriptExe.sh', 'step1.py','step2.py','step3.py','step4.py','pu.py']
config.JobType.scriptExe='scriptExe.sh'
config.JobType.numCores=1

config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 500
config.Data.totalUnits = 200000
config.Data.outLFNDirBase = '/store/group/phys_higgs/cmshmm/%s/' % (getUsernameFromSiteDB())
config.Data.publication = True
config.Data.outputPrimaryDataset = 'GluGlu_HToMuMu_M125_13TeV_amcatnloFXFX_pythia8'
config.Data.outputDatasetTag ='Fall17_94X-MINIAODSIM'

config.Site.storageSite = 'T2_CH_CERN'

Beispiel #24
0
from CRABClient.UserUtilities import config, getUsernameFromSiteDB
config = config()

config.General.requestName = 'PYTHIA8_MC_Higgs_M900_GEN'
config.General.workArea = 'crab_projects'
config.General.transferOutputs = True
config.General.transferLogs = False

config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'step1_m900.py'
config.JobType.maxMemoryMB = 2500

#/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM
#config.Data.inputDataset = '/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIIWinter15GS-MCRUN2_71_V1-v1/GEN-SIM'
config.Data.primaryDataset = 'HplusToTauNu-M900'
config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 1000
#NJOBS = 20  # This is not a configuration parameter, but an auxiliary variable that we use in the next line.
config.Data.totalUnits = 100000
config.Data.outLFNDirBase = '/store/user/%s/mc/' % (getUsernameFromSiteDB())
config.Data.publication = True
config.Data.publishDataName ='%s_PrivateMC_HPlusToTauNu_June2015_GENSIMRAW'% (getUsernameFromSiteDB())

config.Site.storageSite = 'T2_CH_CERN'
#config.Site.blacklist = ['T2_US_Florida', 'T2_BR_*', 'T2_RU_*']

from CRABClient.UserUtilities import config, getUsernameFromSiteDB

config = config()

config.section_("General")
config.General.requestName = "JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69_ONIASKIM_20151209"
config.General.workArea = "crab_projects"
config.General.transferOutputs = True
config.General.transferLogs = True

config.section_("JobType")
config.JobType.pluginName = "Analysis"
config.JobType.psetName = "onia2MuMuPATHI_7xy_PbPb_MC_cfg.py"

config.section_("Data")
config.Data.inputDataset = "/JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69/echapon-JpsiMM_5p02TeV_TuneCUETP8M1_ptJpsi69_step3_20151208-c5e5b4508236081d2fa5bf691a689da0/USER"
config.Data.inputDBS = "phys03"
config.Data.unitsPerJob = 1
config.Data.splitting = "FileBased"
config.Data.outLFNDirBase = "/store/user/%s/PbPbMC2015/%s" % (getUsernameFromSiteDB(), config.General.requestName)
config.Data.publication = True
config.Data.outputDatasetTag = config.General.requestName

config.section_("Site")
config.Site.whitelist = ["T2_FR_GRIF_LLR"]
config.Site.storageSite = "T2_FR_GRIF_LLR"
Beispiel #26
0
config.JobType.pluginName  = 'Analysis'
# feed in any additional input files
config.JobType.inputFiles = []
config.JobType.inputFiles.extend(additionalInputFiles)
config.JobType.psetName    = '' # overridden per dataset
# need to execute the user_script
#config.JobType.scriptExe = 'user_script.sh'
config.Data.inputDataset = '' # overridden per dataset
config.Data.inputDBS = 'global'
config.Data.splitting = 'FileBased' #LumiBased for data
config.Data.unitsPerJob = 1 # overridden per dataset
config.Data.totalUnits = -1 # overridden per dataset
# no publishing
config.Data.publication = False
config.Data.outputDatasetTag = 'LQ' #overridden for data
config.Data.outLFNDirBase = '/store/group/phys_exotica/leptonsPlusJets/RootNtuple/RunII/%s/' % (getUsernameFromSiteDB()) + options.tagName + '/'
#config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB()) + topDirName + '/'
if options.eosDir is not None:
  # split of /eos/cms if it is there
  if options.eosDir.startswith('/eos/cms'):
    options.eosDir = options.eosDir.split('/eos/cms')[-1]
  if not options.eosDir.startswith('/store'):
    print 'eosDir must start with /eos/cms/store or /store and you specified:',options.eosDir
    print 'quit'
    exit(-1)
  outputLFN=options.eosDir
  if not outputLFN[-1]=='/':
    outputLFN+='/'
  outputLFN+=options.tagName+'/'
  if not getUsernameFromSiteDB() in outputLFN:
    outputLFN.rstrip('/')
Beispiel #27
0
parser.add_argument('--output', required=True, dest='output', type=str,
                    help="output path after /store/user/USERNAME")
parser.add_argument('job_file', type=str, nargs='+', help="text file with jobs descriptions")
args = parser.parse_args()

from CRABClient.UserUtilities import config, ClientException, getUsernameFromSiteDB
from CRABAPI.RawCommand import crabCommand
from httplib import HTTPException

config = config()

config.General.workArea = 'work_area'

config.JobType.pluginName = 'Analysis'
config.JobType.psetName = args.cfg

config.Data.inputDBS = 'global'
config.General.transferOutputs = True
config.General.transferLogs = True
config.Data.publication = False

config.Site.storageSite = args.site
config.Data.outLFNDirBase = "/store/user/{}/{}".format(getUsernameFromSiteDB(), args.output)

from crab_tools import JobCollection
for job_file in args.job_file:
    job_collection = JobCollection(job_file)
    print job_file
    print job_collection
    job_collection.submit(config,args.dryrun)
Beispiel #28
0
from CRABClient.UserUtilities import config, getUsernameFromSiteDB

config = config()

config.General.requestName = "noBMuMinusIdeal"
config.General.workArea = "crab_projects"
config.General.transferOutputs = True
config.General.transferLogs = False

config.User.voGroup = "dcms"

config.JobType.pluginName = "PrivateMC"
config.JobType.psetName = "SingleMuPt5to200_cfi_GEN_SIM_DIGI_DIGI2RAW_RAW2DIGI_L1_L1Reco_Reco.py"
config.JobType.maxMemoryMB = 2500

config.Data.splitting = "EventBased"
config.Data.unitsPerJob = 10000
NJOBS = 100  # This is not a configuration parameter, but an auxiliary variable that we use in the next line.
config.Data.totalUnits = config.Data.unitsPerJob * NJOBS
config.Data.outLFNDirBase = "/store/user/%s/SingleMuMinus" % (getUsernameFromSiteDB())
config.Data.publication = False
config.Site.storageSite = "T2_DE_RWTH"
Beispiel #29
0
# Max requestName is 100 characters
if len(config.General.requestName) > 100:
    bits = 5
    h = hashlib.sha256(config.General.requestName).hexdigest()
    # Replace last 5 characters with hash in case of duplicates after truncation
    config.General.requestName = config.General.requestName[:(100-bits)] + h[:bits]

config.JobType.pyCfgParams = configParams

# Things that don't change with dataset
config.General.workArea = '.'
config.General.transferOutputs = True
config.General.transferLogs = True

config.JobType.pluginName = 'ANALYSIS'
config.JobType.psetName = '%s/src/UWVV/Ntuplizer/test/ntuplize_cfg.py' % os.environ["CMSSW_BASE"]
config.JobType.numCores = 1
config.JobType.inputFiles = ["%s/src/UWVV/data" % os.environ["CMSSW_BASE"]]

config.Data.inputDBS = 'global' if 'USER' not in dataset else 'phys03'
config.Data.useParent = False
config.Data.publication = False
outdir = localSettings.get("local", "outLFNDirBase").replace(
    "$USER", getUsernameFromSiteDB()).replace("$DATE", today)
# Useful for VBFNLO samples
#config.Site.whitelist = ['T2_DE_DESY']
config.Data.outLFNDirBase = outdir 
config.Data.ignoreLocality = False

config.Site.storageSite = localSettings.get("local", "storageSite")
config = Configuration()
config.section_('General')
config.General.transferOutputs = True
config.General.transferLogs = True
config.General.requestName = 'BuToJpsiK_MC_GENOnly_8TeV_Ntuples_v5'
config.section_('JobType')
config.JobType.psetName = './BuToJpsiK_MC_GENOnly.py'
config.JobType.pluginName = 'Analysis'
#config.JobType.pluginName = 'privateMC'
#config.JobType.outputFiles = ['BToJpsiK_GENOnly_8TeV_Ntuple.root']
config.section_('Data')
config.Data.inputDataset = '/PYTHIA6_BuToJpsiK_GENOnly_8TeV/gechen-crab_BuToJpsiKMuMu_MC_GENOnly_8TeV-387bf2b3df13ffa8b4f3dd9f3950e077/USER'
#config.Data.outputPrimaryDataset = 'PYTHIA6_BuToJpsiK_GENOnly_8TeV_Ntuple_v4'
config.Data.outputDatasetTag = 'PYTHIA6_BuToJpsiK_GENOnly_8TeV_Ntuple_v5'
config.Data.unitsPerJob = 2
config.Data.inputDBS = 'phys03'
config.Data.splitting = 'FileBased'
config.Data.ignoreLocality = True
config.Data.publishDBS = 'phys03'
config.Data.publication = True
config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB())
#config.Data.outLFNDirBase = '/store/user/gechen/'
config.section_('User')
config.section_('Site')
config.Site.storageSite = 'T2_CH_CERN'
#config.Site.whitelist = ["T2_CH*"]




Beispiel #31
0
### this is an example for running on RECO
### the name must be changed crab.cfg for actual running

from CRABClient.UserUtilities import config, getUsernameFromSiteDB

config = config()
config.General.requestName = 'PbPb_eff_v5'
config.General.workArea = 'PbPb_eff_v5'
config.General.transferOutputs = True
config.General.transferLogs = True
config.JobType.allowUndistributedCMSSW = True

config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'run_TrackPerformance_cfg.py'

config.Data.inputDBS = 'phys03'
config.Data.inputDataset = '/Hydjet_Quenched_MinBias_5020GeV_750/velicanu-Hydjet_Quenched_MinBias_5020GeV_750_RECODEBUG_v0-eb8cf5655150b59e96d879ea397567ad/USER'
#config.Data.inputDataset = '/PYTHIA_QCD_TuneCUETP8M1_cfi_GEN_SIM_5020GeV/velicanu-Pythia8_Dijet80_pp_TuneCUETP8M1_Hydjet_MinBias_5020GeV_PrivMC-282baa650b1997daa0dd8689f6a69785/USER'
config.Data.splitting = 'FileBased'
config.Data.ignoreLocality = False
config.Data.unitsPerJob = 10
config.Data.outLFNDirBase = '/store/user/%s/' % (getUsernameFromSiteDB())
config.Data.publication = False
config.Site.storageSite = 'T2_US_MIT'
config.Site.whitelist = ['T2_US_MIT']