def hadd_histograms(folder, runmode, delete_input_files = False, start = False, use_custom_hadd = True): if folder[-1] == "/": folder = folder[:-1] samples = [] for item in glob.glob(folder + "/*root"): # ignore broken HT binning labels ignore_item = False ignore_list = ["-100to20_", "-10to200_", "-200to40_", "-20to400_", "-40to600_", "-600to80_", "-20To400_", "-400To60_", "-40To600_", "HT100to1500_", "HT1500to200_", "HT200toInf_", "-200toInf_", "-80to1200_", "-200To40_", "-250toInf_", "-1200to250_", "-800to120_", "-120to2500_", "1000to150_", "-60ToInf_", "400to60_", "100To20_", "HT150to2000_", "HT200to30_", "HT1000to150_", "Run218", "Run217", "Run216"] for i_ignore in ignore_list: if i_ignore in item: ignore_item = True if ignore_item: continue if "RunIIFall17MiniAODv2" in item or "RunIISummer16Mini" in item: sample_name = item.split("/")[-1].split("AOD_")[0] else: sample_name = item.split("/")[-1].split("AOD")[0] sample_name = sample_name.replace("_ext1","").replace("_ext2","").replace("_ext3","").replace("_ext4","") sample_name = sample_name.split("_RA2AnalysisTree")[0] if sample_name[-1] == "-": sample_name = sample_name[:-1] if "Run201" in sample_name: if sample_name[-1].isdigit(): sample_name = sample_name[:-1] if sample_name[-3:] == "AOD": sample_name = sample_name[:-3] samples.append(sample_name) samples = list(set(samples)) print "Merging samples of folder %s:" % folder for sample in samples: print sample cmds = [] for i, sample in enumerate(samples): if use_custom_hadd: #command = "./terahadd.py %s_merged/%s.root %s/%s*.root " % (folder, sample, folder, sample) command = "hadd %s_merged/%s.root %s/%s*.root " % (folder, sample, folder, sample) else: command = "hadd -f %s_merged/%s.root %s/%s*.root " % (folder, sample, folder, sample) if delete_input_files: command += " && rm %s/%s*.root " % (folder, sample) if " *" in command: print "Wildcard rm command found, this should never happen!" quit() cmds.append(command) runParallel(cmds, runmode, ncores_percentage=0.5, condorDir="%s_merged.condor" % folder, dontCheckOnJobs=True, confirm=(not start), use_more_time=False) os.system("cp %s/*py %s_merged/" % (folder, folder))
def run_limit_calculation(out_path): files = glob.glob(out_path + "/*.sh") commands = [] for i_script_name, script_name in enumerate(files): script_name = script_name.split("/")[-1] cmd = "cd %s; chmod +x %s; ./%s" % (out_path, script_name, script_name) commands.append(cmd) runParallel(commands, "grid", condorDir=out_path, confirm=True, babysit=False)
def do_submission(commands, output_folder, condorDir="bird", executable="looper.py", runmode="grid", dontCheckOnJobs=False, confirm=True): print "Submitting \033[1m%s jobs\033[0m, output folder will be \033[1m%s\033[0m." % ( len(commands), output_folder) os.system("mkdir -p %s" % output_folder) os.system("cp %s %s/" % (executable, output_folder)) os.system("cp ../../tools/shared_utils.py %s/" % (output_folder)) runParallel( commands, runmode, condorDir=condorDir, dontCheckOnJobs=dontCheckOnJobs, use_more_mem=False, use_more_time=False, confirm=confirm, cmsbase="/afs/desy.de/user/k/kutznerv/cmssw/CMSSW_10_2_4_patch1/src")
def do_submission(commands, output_folder, condorDir="bird", executable="looper.py", runmode="grid", dontCheckOnJobs=True, confirm=True): print "Submitting \033[1m%s jobs\033[0m, output folder will be \033[1m%s\033[0m." % ( len(commands), output_folder) os.system("mkdir -p %s" % output_folder) os.system("cp %s %s/" % (executable, output_folder)) return runParallel(commands, runmode, condorDir=condorDir, dontCheckOnJobs=dontCheckOnJobs, use_more_mem=False, use_more_time=False, confirm=confirm)
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel([ "./trainBDT.py --category long --use_chi2 --dxyinformed --equalSgXsec --phase 1 --path /afs/desy.de/user/k/kutznerv/dust/shorttrack/analysis/ntupleanalyzer/skim_83_merged" ], "grid", use_more_mem=True, confirm=False, babysit=False)
for iFile in glob.glob( "/nfs/dust/cms/user/kutznerv/cmsdas/tracks-mini-short/*.root"): parameters.append([iFile, "PreSelection", "histos-short", cuts]) for iFile in glob.glob( "/nfs/dust/cms/user/kutznerv/cmsdas/tracks-mini-medium/*.root"): parameters.append([iFile, "PreSelection", "histos-medium", cuts]) commands = [] for parameter in parameters: command = ("./treeToHist.py " + parameter[0] + " " + parameter[1] + " " + parameter[2] + ' \"' + str(parameter[3]).replace(" ", "").replace("'", "\'") + '\"') commands.append(command.replace("\\", "")) runParallel(commands, "multi") # STEP 2: create stacked histograms from previously created individual histograms: for folder in ["histos-short", "histos-medium"]: os.system("mkdir -p plots/%s" % folder) histoPath = "." stages = ["loose"] for stage in stages: print "stage:", stage, folder plotter.stackedPlot(
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel(["./trainBDT.py --category long --use_chi2"], "grid", confirm=False, babysit=False)
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel([ "./trainBDT.py --category short --use_chi2 --dxyinformed --path /afs/desy.de/user/k/kutznerv/dust/shorttrack/analysis/ntupleanalyzer/skim_83_merged" ], "grid", confirm=False, babysit=False)
path = "../../skimmer/skim_16DataMCv4_merged/" #samples = ["*"] #samples = ["Summer16.WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8"] samples = ["*Summer16*","Run2016*MET*"] inputfiles=[] for sample in samples : inputlist = glob(path+"/*%s*.root"%sample) inputfiles.extend(inputlist) inputfiles = sorted(inputfiles) commands=[] for inputfile in inputfiles: label = inputfile.split("/")[-1] isData = False isSignal = False if "Run201" in label : isData = True elif "g1800" in label or "SMS" in label : isData = False isSignal = True command = "./HistoMaker %s %s h_%s %s %s"%(inputfile,OUTDIR,label,isData,isSignal) commands.append(command) print "input files : %s, isData :%s, isSignal : %s"%(label,isData,isSignal) runParallel(commands, "grid", condorDir="condor", dontCheckOnJobs=True) #runParallel(commands, "multi", condorDir="condor", dontCheckOnJobs=True)
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel([ "./trainBDT.py --category long --use_chi2 --phase 1 --path /afs/desy.de/user/k/kutznerv/dust/shorttrack/analysis/ntupleanalyzer/skim_48_phase2_merged" ], "grid", confirm=False, babysit=False)
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel([ "./trainBDT.py --category long --dxyinformed --use_chi2 --path /afs/desy.de/user/k/kutznerv/dust/shorttrack/analysis/ntupleanalyzer/skim_65_p15OptionalJetVetoRevisedJets_merged" ], "grid", confirm=False, babysit=False)
if "Run2016" in input_file and "MET" in input_file: use_file = True if "Run2016" in input_file and "SingleMuon" in input_file: use_file = True if "Run2016" in input_file and "SingleElectron" in input_file: use_file = True if not use_file: continue # get nev: tree = TChain("Events") tree.Add(input_file) nev = tree.GetEntries() nev_per_interval = int(nev/int(options.jobs_per_file)) for i in range(int(options.jobs_per_file)): event_start = i * nev_per_interval commands.append("./get_prediction.py --input %s --output %s/%s --nev %s --fakerate_file %s --event_start %s --unweighted %s" % (input_file, options.prediction_folder, input_file.split("/")[-1], nev_per_interval, options.fakerate_file, event_start, options.unweighted)) runParallel(commands, options.runmode, condorDir = "get_prediction.condor", use_more_mem=False, use_more_time=False, confirm=not options.start) # otherwise run locally: else: options.inputfiles = options.inputfiles.split(",") main(options.inputfiles, options.outputfiles, nevents = int(options.nev), fakerate_file = options.fakerate_file, event_start = int(options.event_start), )
category="long", bg=bg, path=path, prefixes=prefixes, include_sg=True, include_bg=True, suffix="highlowMHT_signal") elif options.ptplot: pass elif options.submit: def chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] commands = [] configs = range(len(configurations)) for chunk in list(chunks(configs, options.nplotsperjob)): cmd = "" for i in chunk: cmd += "./check_invariance.py --index %s; " % i commands.append(cmd) raw_input("running %s jobs!" % len(commands)) runParallel(commands, options.runmode, condorDir="check_invariance.condor", dontCheckOnJobs=False)
for label_a in labels_a: for label_b in labels_b: if label_b == "": foldername = "%s-%s-tracks-%s" % (year, category, label_a) else: foldername = "%s-%s-tracks-%s-%s" % (year, category, label_a, label_b) print foldername os.system("mkdir -p " + foldername) os.system("mkdir -p " + foldername + "/condor") cwd = os.getcwd() folder_full = os.getcwd() + "/" + foldername cmd = "cd %s; cp ../trainBDT_template.py trainBDT.py; cp ../trainall.py trainall.py; chmod +x trainBDT.py; ./trainBDT.py --category %s --phase %s --path %s" % ( folder_full, category, phase, skim_folder) if mode == "grid": runParallel([cmd], "grid", condorDir=foldername + "/condor", use_more_mem=True, confirm=False, babysit=False) else: os.system("%s &> condor/0.sh.o &" % cmd) os.chdir(cwd)
#!/bin/env python from GridEngineTools import runParallel runParallel(["./runTMVA.sh"], "grid", cmsbase="/afs/desy.de/user/k/kutznerv/cmssw/CMSSW_8_0_28/src", dontCheckOnJobs=True, use_more_mem=True, use_more_time=True)
#!/bin/env python from GridEngineTools import runParallel import os os.system("cp ../trainBDT_template.py trainBDT.py; chmod +x trainBDT.py") runParallel(["./trainBDT.py --category short --dxyinformed"], "grid", confirm=False, babysit=False)
runmode = "grid" os.system("mkdir -p output") commands = [] files_per_job = 5 file_list = glob.glob( "/pnfs/desy.de/cms/tier2/store/user/sbein/NtupleHub/Production2016v2/Summer16.*" ) #file_list = glob.glob("/pnfs/desy.de/cms/tier2/store/user/sbein/NtupleHub/Production2016v2/Summer16.ttHJetTobb_M125_13TeV_amcatnloFXFX_madspin_pythia8_ext3_4*_RA2AnalysisTree.root") file_segments = [ file_list[x:x + files_per_job] for x in range(0, len(file_list), files_per_job) ] for inFile_segment in file_segments: out_tree = "output/" + inFile_segment[0].split("/")[-1].split( ".root")[0] + "_fakes.root" commands.append("./fakes_analyzer.py %s %s" % (str(inFile_segment).replace(", ", ","), out_tree)) print commands[0] runParallel(commands, runmode) if False: print "running hadd..." os.system("hadd -f fakes_%s.root output_%s/Summer*.root" % (choose_bdt, choose_bdt))
] for sample in cmssw8_samples: ifile_list = sorted(glob.glob(ntuples_folder + "/" + sample + "*.root")) if files_per_sample != -1: ifile_list = ifile_list[:files_per_sample] if len(ifile_list) == 0: continue print "Looping over %s files (%s)" % (len(ifile_list), sample) file_segments = [ ifile_list[x:x + files_per_job] for x in range(0, len(ifile_list), files_per_job) ] for inFile_segment in file_segments: out_tree = output_folder + "/" + inFile_segment[0].split( "/")[-1].split(".root")[0] + "_fakes.root" commands.append("./fakerate_loop.py %s %s" % (str(inFile_segment).replace(", ", ",").replace( "[", "").replace("]", ""), out_tree)) raw_input("submit %s jobs?" % len(commands)) os.system("cp fakerate_loop.py %s/" % output_folder) runParallel(commands, runmode, dontCheckOnJobs=True, burst_mode=False)
tree = TChain("Events") tree.Add(input_file) nev = tree.GetEntries() nev_per_interval = int(nev / int(options.jobs_per_file)) for i in range(int(options.jobs_per_file)): event_start = i * nev_per_interval commands.append( "./nonprompt.py --input %s --output %s/%s --nev %s --fakerate_file %s --event_start %s" % (input_file, options.prediction_folder, input_file.split("/")[-1], nev_per_interval, options.fakerate_file, event_start)) runParallel(commands, options.runmode, condorDir="nonprompt.condor", use_more_mem=False, use_more_time=False, confirm=not options.start) hadd_everything(options) # otherwise run locally: else: if not options.inputfiles and args: inputfiles_list = args else: inputfiles_list = options.inputfiles.split(",") event_loop( inputfiles_list,