def submit_jobs(output_directory, tag, samples, events_total, events_per_output, tarfile): # Long loop to monitor jobs and resubmit failed jobs for i in range(10000): total_summary = {} # Loop through samples for sample in samples: config_info = samples_config[sample] executable = create_executable(tag, sample, config_info) task = CondorTask(sample=DummySample( N=int(float(events_total) / float(events_per_output)), nevents=events_total, dataset="/" + sample + "_NANO"), tag=tag, special_dir=output_directory, events_per_output=events_per_output, total_nevents=events_total, split_within_files=True, executable=executable, open_dataset=False, tarfile=tarfile, condor_submit_params=CONDOR_SUBMIT_PARAMS) task.process() total_summary[ task.get_sample().get_datasetname()] = task.get_task_summary() StatsParser(data=total_summary, webdir="~/public_html/dump/Hgg-MC-Production/").do() time.sleep(300) # power nap
def submit(which): total_summary = {} extra_requirements = "true" tag = "v0_noFilter" pdname = "HVNoFilter" events_per_point = int(1E4) events_per_job = int(100) cfgsDir = "psets_gensim_noFilter/" modelsFile = cfgsDir + "/models.txt" df = pd.read_csv(modelsFile) for year in ["2016", "2017", "2018"]: for iterr, row in df.iterrows(): # fmass = float(mass) # mass = str(mass).replace(".","p") epp = int(events_per_point) reqname = "noFilter_m{}{}_ctau{}_xi_{}_{}_{}".format( row.portal, row.mass, row.ctau, row.xi, tag, year) njobs = epp // events_per_job sample = DummySample( dataset="/{}/params_{}_m_{}_ctau_{}mm_xi_{}_{}/LLPNTUPLE". format(pdname, row.portal, row.mass, row.ctau * 10, row.xi, year), N=njobs, nevents=epp) task = CondorTask( sample=sample, output_name="output.root", executable="executables/condor_executable_{}.sh".format(which), tarfile="package_{}.tar.xz".format(year), open_dataset=True, files_per_output=1, condor_submit_params={ "classads": [ ["param_mass", row.mass], ["param_ctau", row.ctau], ["param_xi", str(row.xi).replace(".", "p")], ["param_portal", row.portal], ["param_year", year], ["param_nevents", events_per_job], ["metis_extraargs", ""], ["JobBatchName", reqname], ], "requirements_line": 'Requirements = ((HAS_SINGULARITY=?=True) && (HAS_CVMFS_cms_cern_ch =?= true) && {extra_requirements})' .format(extra_requirements=extra_requirements), }, tag=tag, recopy_inputs=True) task.process() total_summary[ task.get_sample().get_datasetname()] = task.get_task_summary() StatsParser(data=total_summary, webdir="~/public_html/dump/metis_test/").do()
def submit(): requests = { #'TTWJetsToLNuEWK_5f_NLO': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball_retired.tar.xz', # that's the SM point, but using the SMEFT model. No lepton filtering, so name is actually confusing #'TTWJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWplusJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWplusJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWminusJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWminusJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWJetsToLNuEWK_5f_EFT_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_cpt8_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # one of the BSM points #'TTWJetsToLNuEWK_5f_EFT_mix_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # EFT mix 'TTWJetsToLNuEWK_5f_EFT_cpq3_4_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks//TTWJetsToLNuEWK_5f_EFT_myNLO_cpq3_4_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # C_pq3 = 4 } total_summary = {} extra_requirements = "true" tag = "v4" events_per_point = 500000 # produced 500k events before events_per_job = 2000 # up to 2000 works #events_per_point = 50 #events_per_job = 10 njobs = int(events_per_point)//events_per_job for reqname in requests: gridpack = requests[reqname] #reqname = "TTWJetsToLNuEWK_5f_EFT_myNLO" #gridpack = '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz' task = CondorTask( sample = DummySample(dataset="/%s/RunIIAutumn18/NANO"%reqname,N=njobs,nevents=int(events_per_point)), output_name = "nanoAOD.root", executable = "executables/condor_executable_Autumn18.sh", tarfile = "package.tar.gz", #scram_arch = "slc7_amd64_gcc630", open_dataset = False, files_per_output = 1, arguments = gridpack, condor_submit_params = { "sites":"T2_US_UCSD", # "classads": [ ["param_nevents",events_per_job], ["metis_extraargs",""], ["JobBatchName",reqname], #["SingularityImage", "/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel6-m202006"], ], "requirements_line": 'Requirements = (HAS_SINGULARITY=?=True)' # && (HAS_CVMFS_cms_cern_ch =?= true) && {extra_requirements})'.format(extra_requirements=extra_requirements), }, tag = tag, min_completion_fraction = 0.90, ) task.process() total_summary[task.get_sample().get_datasetname()] = task.get_task_summary() StatsParser(data=total_summary, webdir="~/public_html/dump/tW_gen/").do()
def submit(): requests = { 'TTWJetsToLNuEWK_5f_EFT_myNLO': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # that's EFT 'TTWJetsToLNuEWK_5f_EFT_myNLO_cpt8': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_cpt8_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # that's EFT 'TTWJetsToLNuEWK_5f_NLO': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # that's the SM } total_summary = {} extra_requirements = "true" tag = "v1" events_per_point = 500000 events_per_job = 10000 #events_per_point = 500 #events_per_job = 100 njobs = int(events_per_point)//events_per_job for reqname in requests: gridpack = requests[reqname] #reqname = "TTWJetsToLNuEWK_5f_EFT_myNLO" #gridpack = '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz' task = CondorTask( sample = DummySample(dataset="/%s/RunIIAutumn18/NANOGEN"%reqname,N=njobs,nevents=int(events_per_point)), output_name = "output.root", executable = "executables/condor_executable.sh", tarfile = "package.tar.gz", open_dataset = False, files_per_output = 1, arguments = gridpack, condor_submit_params = { "sites":"T2_US_UCSD", # "classads": [ ["param_nevents",events_per_job], ["metis_extraargs",""], ["JobBatchName",reqname], ], "requirements_line": 'Requirements = ((HAS_SINGULARITY=?=True) && (HAS_CVMFS_cms_cern_ch =?= true) && {extra_requirements})'.format(extra_requirements=extra_requirements), }, tag = tag, ) task.process() total_summary[task.get_sample().get_datasetname()] = task.get_task_summary() StatsParser(data=total_summary, webdir="~/public_html/dump/tW_gen/").do()
def main(): main_dir = os.path.dirname(os.path.abspath(__file__)) metis_path = os.path.dirname(os.path.dirname(metis.__file__)) exec_path = main_dir + "/metis.sh" hadoop_path = "metis/" metis_dashboard_path = os.path.join(metis_path, "dashboard") job_tag = "" total_summary = {} os.chdir(metis_path) while True: masspoints = [125] tasks = [] for mass in masspoints: miniaod = CondorTask( ## Dummy sample as no input is needed in generating the events #sample = DummySample( # N=3000, # dataset="/VHToNonbb_M125_13TeV_amcatnloFXFX_madspin_pythia8/PRIVATE-RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/MINIAODSIM" # ), # Dummy sample as no input is needed in generating the events sample = DummySample( N=3500, dataset="/WWW_4F_TuneCP5_13TeV-amcatnlo-pythia8/PRIVATE-RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM" ), tag = job_tag, executable = exec_path, special_dir = hadoop_path + "/private_miniaod", output_name = "output.root", files_per_output = 1, condor_submit_params = {"sites" : "T2_US_UCSD"}, open_dataset = True, flush = True, #no_load_from_backup = True, ) tasks.extend([miniaod]) all_tasks_complete = True for task in tasks: task.process() summary = task.get_task_summary() total_summary[task.get_sample().get_datasetname()] = summary all_tasks_complete = all_tasks_complete and task.complete() # parse the total summary and write out the dashboard StatsParser(data=total_summary, webdir=metis_dashboard_path).do() # Print msummary table so I don't have to load up website os.system("msummary -v0 -p ext2 | tee summary.txt") os.system("chmod -R 755 {}".format(metis_dashboard_path)) # If all done exit the loop if all_tasks_complete: print "" print "Job={} finished".format(job_tag) print "" break # Neat trick to not exit the script for force updating print 'Press Ctrl-C to force update, otherwise will sleep for 300 seconds' try: for i in range(0,300): sleep(1) # could use a backward counter to be preeety :) except KeyboardInterrupt: raw_input("Press Enter to force update, or Ctrl-C to quit.") print "Force updating..."
from metis.CMSSWTask import CMSSWTask from metis.Sample import DirectorySample, DummySample from metis.Path import Path from metis.StatsParser import StatsParser import time for _ in range(1000): proc_tag = "v1" #special_dir = "workflowtest/ProjectMetis" special_dir = "miniaod_runII" step1 = CMSSWTask( # Change dataset to something more meaningful (but keep STEP1, as we use this # for string replacement later); keep N=1 sample=DummySample(N=1, dataset="/JHUSample_tHq_cpodd_2017_20200520/STEP1"), # A unique identifier tag=proc_tag, special_dir=special_dir, # Probably want to beef up the below two numbers to control splitting, # but note that step2 is the bottleneck, so don't put too many events # in one output file here events_per_output=200, total_nevents=500000, # We have one input dummy file, so this must be True split_within_files=True, pset="psets/HIG-RunIIFall17wmLHEGS-01343_1_cfg_jhu.py", #cmssw_version = "CMSSW_9_3_6_patch2", cmssw_version="CMSSW_9_4_6_patch1", condor_submit_params={"sites": "T2_US_UCSD"}, scram_arch="slc6_amd64_gcc630",
def runall(special_dir, tag, total_nevents, events_per_output, config): for _ in range(25): proc_tag = "v1" #special_dir = "workflowtest/ProjectMetis" #special_dir = "miniaod_runII/JHUSample_ttH" cmssw_v_gensim = config["cmssw_v_gensim"] pset_gensim = config["pset_gensim"] scram_arch_gensim = config["scram_arch_gensim"] cmssw_v_aodsim = config["cmssw_v_aodsim"] pset_aodsim = config["pset_aodsim"] scram_arch_aodsim = config["scram_arch_aodsim"] pset_aodsim2 = config["pset_aodsim2"] cmssw_v_aodsim2 = cmssw_v_aodsim #config["pset_aodsim2"] scram_arch_aodsim2 = scram_arch_aodsim #config["scram_arch_aodsim2"] cmssw_v_miniaodsim = config["cmssw_v_miniaodsim"] pset_miniaodsim = config["pset_miniaodsim"] scram_arch_miniaodsim = config["scram_arch_miniaodsim"] step1 = CMSSWTask( # Change dataset to something more meaningful (but keep STEP1, as we use this # for string replacement later); keep N=1 sample=DummySample(N=1, dataset="/" + tag + "_STEP1"), # A unique identifier tag=proc_tag, special_dir=special_dir, # Probably want to beef up the below two numbers to control splitting, # but note that step2 is the bottleneck, so don't put too many events # in one output file here events_per_output=events_per_output, total_nevents=total_nevents, #events_per_output = 50, #total_nevents = 1000, # We have one input dummy file, so this must be True split_within_files=True, pset="psets/" + pset_gensim, cmssw_version=cmssw_v_gensim, scram_arch=scram_arch_gensim, ) step2 = CMSSWTask( sample=DirectorySample( location=step1.get_outputdir(), dataset=step1.get_sample().get_datasetname().replace( "STEP1", "STEP2"), ), tag=proc_tag, special_dir=special_dir, open_dataset=True, files_per_output=1, pset="psets/" + pset_aodsim, cmssw_version=cmssw_v_aodsim, scram_arch=scram_arch_aodsim, ) step3 = CMSSWTask( sample=DirectorySample( location=step2.get_outputdir(), dataset=step2.get_sample().get_datasetname().replace( "STEP2", "STEP3"), ), tag=proc_tag, special_dir=special_dir, open_dataset=True, files_per_output=1, pset="psets/" + pset_aodsim2, cmssw_version=cmssw_v_aodsim2, scram_arch=scram_arch_aodsim2, ) step4 = CMSSWTask( sample=DirectorySample( location=step3.get_outputdir(), dataset=step3.get_sample().get_datasetname().replace( "STEP3", "STEP4"), ), tag=proc_tag, special_dir=special_dir, open_dataset=True, files_per_output=1, output_name="step4.root", pset="psets/" + pset_miniaodsim, cmssw_version=cmssw_v_miniaodsim, scram_arch=scram_arch_miniaodsim, # condor_submit_params = {"sites":"UAF,UCSD"}, ) ''' step5 = CMSSWTask( sample = DirectorySample( location = step4.get_outputdir(), dataset = step4.get_sample().get_datasetname().replace("STEP4","STEP5"), ), tag = proc_tag, special_dir = special_dir, open_dataset = True, files_per_output = 1, pset = "psets/TOP-RunIIFall17NanoAODv7-00001_1_cfg.py", # The below two lines should match output file names in the pset output_name = "step5.root", #other_outputs = ["step3_inMINIAODSIM.root","step3_inDQM.root"], cmssw_version = "CMSSW_10_2_22", scram_arch = "slc6_amd64_gcc700", # condor_submit_params = {"sites":"UAF,UCSD"}, ) ''' #for _ in range(25): total_summary = {} for task in [step1, step2, step3, step4]: task.process() summary = task.get_task_summary() total_summary[task.get_sample().get_datasetname()] = summary StatsParser(data=total_summary, webdir="~/public_html/dump/metis/").do() time.sleep(600)
from metis.CMSSWTask import CMSSWTask from metis.Sample import DirectorySample, DummySample from metis.StatsParser import StatsParser import time import numpy as np tag = "v1" total_summary = {} for _ in range(10000): # first tast has no input files, just make GENSIM from a fragment with pythia commands gen = CMSSWTask( sample=DummySample( N=1, dataset="/WH_HtoRhoGammaPhiGamma/privateMC_102x/GENSIM"), events_per_output=1000, total_nevents=1000000, pset="gensim_cfg.py", cmssw_version="CMSSW_10_2_5", scram_arch="slc6_amd64_gcc700", tag=tag, split_within_files=True, ) raw = CMSSWTask( sample=DirectorySample( location=gen.get_outputdir(), dataset=gen.get_sample().get_datasetname().replace( "GENSIM", "RAWSIM"), ), open_dataset=True,
#lhe = CMSSWTask( # sample = DirectorySample( # location="/hadoop/cms/store/user/dspitzba/tW_scattering/test/", # #globber="*seed6*.lhe", # #dataset="/stop-stop/procv2/LHE", # ), # events_per_output = 20, # total_nevents = 100, # pset = "cfgs/pset_gensim.py", # cmssw_version = "CMSSW_10_2_7", # scram_arch = "slc6_amd64_gcc700", # #split_within_files = True, # ) gen = CMSSWTask( sample=DummySample(N=1, dataset="/ttWq/privateMC_102x/GENSIM"), events_per_output=10, total_nevents=100, pset="cfgs/pset_gensim.py", cmssw_version="CMSSW_10_2_7", scram_arch="slc6_amd64_gcc700", tag='v0', split_within_files=True, ) for task in tasks: task.process() summary = task.get_task_summary() total_summary[task.get_sample().get_datasetname()] = summary StatsParser(data=total_summary, webdir="~/public_html/dump/metis/").do()
def submit(): requests = { #'TTWJetsToLNuEWK_5f_NLO': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball_retired.tar.xz', # that's the SM point, but using the SMEFT model. No lepton filtering, so name is actually confusing #'TTWJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWplusJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWplusJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWminusJetsToLNuEWK_5f_NLO_v2': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWminusJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # that's the actual SM #'TTWJetsToLNuEWK_5f_EFT_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_cpt8_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # one of the BSM points #'TTWJetsToLNuEWK_5f_EFT_mix_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc6_amd64_gcc630_CMSSW_9_3_16_tarball.tar.xz', # EFT mix #'TTWJetsToLNuEWK_5f_EFT_cpq3_4_myNLO_full': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks//TTWJetsToLNuEWK_5f_EFT_myNLO_cpq3_4_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', # C_pq3 = 4 #'TTWJetsToLNuEWK_5f_NLO': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_NLO_slc7_amd64_gcc730_CMSSW_9_3_16_tarball.tar.xz', 'TTWJetsToLNuEWK_5f_SMEFTatNLO_weight': '/hadoop/cms/store/user/dspitzba/tW_scattering/gridpacks/TTWJetsToLNuEWK_5f_EFT_myNLO_slc7_amd64_gcc700_CMSSW_10_6_19_tarball.tar.xz', } total_summary = {} extra_requirements = "true" # v6+ is UL tag = "UL18_v7" #events_per_point = 250000 #events_per_job = 250 #events_per_point = 2000000 events_per_point = 4000000 events_per_job = 2000 njobs = int(events_per_point) // events_per_job for reqname in requests: gridpack = requests[reqname] task = CondorTask( sample=DummySample(dataset="/%s/RunIIAutumn18/NANO" % reqname, N=njobs, nevents=int(events_per_point)), output_name="nanoAOD.root", executable="executables/condor_executable_UL18.sh", #executable = "executables/condor_executable_UL17.sh", #executable = "executables/condor_executable_UL16_postVFP.sh", tarfile="package.tar.gz", additional_input_files=[gridpack], open_dataset=False, files_per_output=1, arguments=gridpack.split('/')[-1], condor_submit_params={ "sites": "T2_US_UCSD", # #"memory": 1950, #"cpus": 1, "memory": 15600, "cpus": 8, "classads": [ ["param_nevents", events_per_job], ["metis_extraargs", ""], ["JobBatchName", reqname], ["IS_CLOUD_JOB", "yes"], ], "requirements_line": 'Requirements = (HAS_SINGULARITY=?=True)' }, tag=tag, min_completion_fraction=0.90, ) task.process() total_summary[ task.get_sample().get_datasetname()] = task.get_task_summary() StatsParser(data=total_summary, webdir="~/public_html/dump/tW_gen/").do()
def test_all(self): dsname = "/blah/blah/BLAH/" nfiles = 15 s1 = DummySample(N=nfiles, dataset=dsname) self.assertEqual(len(s1.get_files()), nfiles)
from metis.CMSSWTask import CMSSWTask from metis.Sample import DirectorySample, DummySample from metis.Path import Path from metis.StatsParser import StatsParser import time proc_tag = "v1" special_dir = "workflowtest/ProjectMetis" step1 = CMSSWTask( # Change dataset to something more meaningful (but keep STEP1, as we use this # for string replacement later); keep N=1 sample=DummySample(N=1, dataset="/test/testv1/STEP1"), # A unique identifier tag=proc_tag, special_dir=special_dir, # Probably want to beef up the below two numbers to control splitting, # but note that step2 is the bottleneck, so don't put too many events # in one output file here events_per_output=30, total_nevents=120, # We have one input dummy file, so this must be True split_within_files=True, pset="psets/step1.py", cmssw_version="CMSSW_10_0_0_pre1", scram_arch="slc6_amd64_gcc630", ) step2 = CMSSWTask( sample=DirectorySample( location=step1.get_outputdir(), dataset=step1.get_sample().get_datasetname().replace("STEP1", "STEP2"),
from metis.CMSSWTask import CMSSWTask from metis.Sample import DirectorySample, DummySample from metis.Path import Path from metis.StatsParser import StatsParser import time for _ in range(25): proc_tag = "v3" special_dir = "workflowtest/ProjectMetis" step1 = CMSSWTask( # Change dataset to something more meaningful (but keep STEP1, as we use this # for string replacement later); keep N=1 sample=DummySample(N=1, dataset="/test/ttHH_v2/STEP1"), # A unique identifier tag=proc_tag, special_dir=special_dir, # Probably want to beef up the below two numbers to control splitting, # but note that step2 is the bottleneck, so don't put too many events # in one output file here events_per_output=50, total_nevents=1000, # We have one input dummy file, so this must be True split_within_files=True, pset="psets/TOP-RunIIFall17wmLHEGS-00072_1_cfg.py", cmssw_version="CMSSW_9_3_4", scram_arch="slc6_amd64_gcc630", ) step2 = CMSSWTask( sample=DirectorySample(