def main(): start_time = time.time() global ops ops = options() global inputpath inputpath = CONF.inputpath + ops.inputdir + "/" global outputpath outputpath = CONF.outputpath + ops.outputdir + "/tempplot/" helpers.checkpath(outputpath) #if do eos eosmcpath = CONF.toppath + "/eos/atlas/user/b/btong/bb/mc/v02-00-00/gridOutput/MiniNTuple/*mc15_13TeV" eosdatapath = CONF.toppath + "/eos/atlas/user/b/btong/bb/data/v02-00-00/gridOutput/MiniNTuple/*16_*periodB.*.root_skim" #start analysis on TinyNtuple mass = 3000 TinyAnalysis(inputpath + "signal_G_hh_c10_M" + str(mass) + "/" + "hist-MiniNTuple.root", "signal_M" + str(mass)) #MC #TinyAnalysis(inputpath + "data_test/" + "hist-MiniNTuple.root", "data") #data #TinyAnalysis(inputpath + "data_test16/" + "hist-MiniNTuple.root", "data") #data ##start analysis on MiniNtuple #MiniAnalysis(glob.glob(eosmcpath + "*G_hh_bbbb_c10*" + str(mass) + ".hh4b*.root_skim")[0], "signal_M" + str(mass)) #MC #MiniAnalysis(glob.glob(eosdatapath)[0], "data16") #data #MiniAnalysis(glob.glob("../test_mini/data-MiniNTuple/*.root_skim")[0], "signal_M" + str(mass)) #MC #finish print("--- %s seconds ---" % (time.time() - start_time))
def main(): start_time = time.time() global ops ops = options() global inputpath inputpath = CONF.inputpath + ops.inputdir + "/" global outputpath outputpath = CONF.outputpath + ops.outputdir + "/tempplot/" helpers.checkpath(outputpath) #if do eos eosmcpath = "/eos/atlas/user/b/btong/bb/mc/v02-03-04/gridOutput/MiniNTuple/*mc15_13TeV" eosdatapath = "/eos/atlas/user/b/btong/bb/data/v02-03-04/gridOutput/MiniNTuple/*16_*periodB.*.root" #start analysis on TinyNtuple mass = 3000 #TinyAnalysis(inputpath + "signal_G_hh_c10_M" + str(mass) + "/" + "hist-MiniNTuple.root", "signal_M" + str(mass)) #MC #TinyAnalysis(inputpath + "data_test/" + "hist-MiniNTuple.root", "data") #data #TinyAnalysis(inputpath + "data_test16/" + "hist-MiniNTuple.root", "data") #data ##start analysis on MiniNtuple #MiniAnalysis(glob.glob(eosmcpath + "*G_hh_bbbb_c10*" + str(mass) + ".hh4b*.root")[0], "signal_M" + str(mass)) #MC MiniAnalysis(glob.glob(eosmcpath + "*G_hh_bbbb_c10*" + str(mass) + ".hh4b*.root")[0], outname="signal_M" + str(mass)) #MC #MiniAnalysis(glob.glob(eosmcpath + "*410007*ttbar*_allhad.*v1*.root")[0], outname="tthadv1") #MC #MiniAnalysis(glob.glob(eosmcpath + "*410007*ttbar*_allhad.*v3*.root")[0], outname="tthadv3") #MC #MiniAnalysis(glob.glob("/afs/cern.ch/work/b/btong/bbbb/MoriondAnalysis/test_mini/data-MiniNTuple/mc15_13TeV.root")[0], outname="tthadv3") #MC #MiniAnalysis(glob.glob(eosdatapath)[0], "data16") #data #MiniAnalysis(glob.glob("../test_mini/data-MiniNTuple/*.root_skim")[0], "signal_M" + str(mass)) #MC #finish print("--- %s seconds ---" % (time.time() - start_time))
def split(targetpath="data_test"): start_time = time.time() ops = options() nfiles = ops.nfiles inputdir = ops.inputdir global inputpath inputpath = CONF.inputpath + inputdir + "/" + targetpath global outputpath outputpath = CONF.inputpath + inputdir + "/" + targetpath helpers.checkpath(outputpath) print "split! target: ", targetpath f = ROOT.TFile(inputpath + "/" + "hist-MiniNTuple.root", "read") #load the target tree t = f.Get("TinyTree") #load the histograms hist_list = [ "CutFlowWeight", "CutFlowNoWeight", "h_leadHCand_pT_pre_trig", "h_leadHCand_pT_aft_trig" ] temp_hist_list = [] for j, hist in enumerate(hist_list): temp_hist_list.append(f.Get(hist).Clone()) temp_hist_list[j].Scale(1.0 / (nfiles * 1.0)) ##correct for the sqrt N error here; this is really stupid for x_bin in range(0, temp_hist_list[j].GetXaxis().GetNbins() + 1): temp_hist_list[j].SetBinError( x_bin, temp_hist_list[j].GetBinError(x_bin) * ROOT.TMath.Sqrt(nfiles)) outfile = [] outtree = [] for i in range(nfiles): outfile.append( ROOT.TFile(inputpath + "/" + "hist-MiniNTuple_%s.root" % (str(i)), "recreate")) outtree.append(t.CloneTree(0)) #open and copy nentries = t.GetEntries() for n in range(nentries): t.GetEntry(n) #print n%nfiles outtree[n % nfiles].Fill() for i in range(nfiles): outfile[i].cd() outtree[i].Write() for j, hist in enumerate(temp_hist_list): hist.Write() outfile[i].Close() f.Close() del (t) del (outtree) del (temp_hist_list) print("--- %s seconds ---" % (time.time() - start_time)) print "Finish!"
def pack_input(inputfile, inputsplit=-1): dic = {} dic["inputfile"] = inputfile dic["inputroot"] = "hist-MiniNTuple" + ("_" + str(inputsplit) if inputsplit >= 0 else "") + ".root" dic["outputroot"] = "hist-MiniNTuple" + ("_" + str(inputsplit) if inputsplit >= 0 else "") + ".root" #make sure the output directory exist here; resolve the conflicts helpers.checkpath(outputpath + inputfile) return dic
def print_image_checks(img_fpath): hasimg = helpers.checkpath(img_fpath, verbose=True) if hasimg: _tup = (img_fpath, filesize_str(img_fpath)) print('[io] Image %r (%s) exists. Is it corrupted?' % _tup) else: print('[io] Image %r does not exists ' (img_fpath, )) return hasimg
def print_image_checks(img_fpath): hasimg = helpers.checkpath(img_fpath, verbose=True) if hasimg: _tup = (img_fpath, filesize_str(img_fpath)) print('[io] Image %r (%s) exists. Is it corrupted?' % _tup) else: print('[io] Image %r does not exists ' (img_fpath,)) return hasimg
def debug_compute_ap_exe(compute_ap_exe, ground_truth_query, ranked_list_fpath): print('================================') print('Debugging compute_ap executable:') print('-----------') print('Path checks: ') helpers.checkpath(ranked_list_fpath, True) helpers.checkpath(compute_ap_exe, True) print('-----------') print('Command string check:') args = (compute_ap_exe, ground_truth_query, ranked_list_fpath) cmdstr = ' '.join(args) print(cmdstr) print('-----------') print('Noargs check:') (out, err, return_code) = execute(compute_ap_exe) (out, err, return_code) = popen_communicate(cmdstr)
def skim(targetpath=""): start_time = time.time() ops = options() inputpath = targetpath outputpath = targetpath helpers.checkpath(outputpath) print targetpath #setup files files = glob.glob(targetpath + "*.MiniNTuple.root") config = [] #setup the dictionary for file in files: #print file temp_dic = {} temp_dic["file"] = file #add skimming selection now if ops.file not in file: continue #only do skimming once for now! if not os.path.isfile(temp_dic["file"] + "_skim"): config.append(temp_dic) print config print " Running %s jobs on %s cores" % (len(config), mp.cpu_count() - 1) npool = min(len(config), mp.cpu_count() - 1) pool = mp.Pool(npool) pool.map(selection, config) ##for debugging #selection(config[0]) # for conf in config: # print conf # selection(conf) print("--- %s seconds ---" % (time.time() - start_time)) print "Finish!"
def skim(targetpath=""): start_time = time.time() ops = options() inputpath = targetpath outputpath = targetpath helpers.checkpath(outputpath) print targetpath #setup files files = glob.glob(targetpath + "*.MiniNTuple.root") config = [] #setup the dictionary for file in files: #print file temp_dic = {} temp_dic["file"] = file #add skimming selection now if ops.file not in file: continue #only do skimming once for now! if not os.path.isfile(temp_dic["file"] + "_skim"): config.append(temp_dic) print config print " Running %s jobs on %s cores" % (len(config), mp.cpu_count()-1) npool = min(len(config), mp.cpu_count()-1) pool = mp.Pool(npool) pool.map(selection, config) ##for debugging #selection(config[0]) # for conf in config: # print conf # selection(conf) print("--- %s seconds ---" % (time.time() - start_time)) print "Finish!"
def syst_pipeline(config): t = config["inputdir"] print "the directory is: ", t inputpath = CONF.inputpath + t + "/" #check if for syst, the data file is there helpers.checkpath(inputpath + "data_test") #this is a really bad practice and temp fix now! need to watch this very carfully... ori_link = CONF.inputpath + ops.inputdir + "/data_test/hist-MiniNTuple.root" dst_link = inputpath + "data_test/hist-MiniNTuple.root" #print ori_link, dst_link if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) #for ttbar, also need to link the MCs. if "syst_tt_" in t: for i, mass in enumerate(CONF.mass_lst): #print "creating links of signal samples", "signal_G_hh_c10_M" + str(mass) #this is a really bad practice and temp fix now! need to watch this very carfully... ori_link = CONF.inputpath + ops.inputdir + "/signal_G_hh_c10_M" + str( mass) + "/hist-MiniNTuple.root" #ori_link = inputpath.replace("TEST", "DS1_cb") + "signal_G_hh_c10_M" + str(mass) + "/hist-MiniNTuple.root" dst_link = inputpath + "signal_G_hh_c10_M" + str( mass) + "/hist-MiniNTuple.root" helpers.checkpath(inputpath + "signal_G_hh_c10_M" + str(mass)) #print ori_link, dst_link if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) #link the 2HDM samples if necessary if (ops.Xhh): ori_link = CONF.inputpath + ops.inputdir + "/signal_X_hh_M" + str( mass) + "/hist-MiniNTuple.root" dst_link = inputpath + "signal_X_hh_M" + str( mass) + "/hist-MiniNTuple.root" helpers.checkpath(inputpath + "signal_X_hh_M" + str(mass)) if os.path.islink(dst_link): os.unlink(dst_link) print "linking: ", dst_link os.symlink(ori_link, dst_link) #start running programs #print (inputpath) os.system("rm " + inputpath + "sum_" + t + ".root") os.system("rm -r " + inputpath + "Limitinput") print "done clearing!" os.system("python get_count.py --dosyst " + " --inputdir " + t + (" --Xhh " if ops.Xhh else "")) os.system("python dump_hists.py --dosyst " + " --inputdir " + t + (" --Xhh " if ops.Xhh else ""))
def oxsty_mAP_results(allres): print('oxsty_results> Building oxsty results') hs = allres.hs qcx2_res = allres.qcx2_res SV = allres.SV # Check directorys where ranked lists of images names will be put oxsty_qres_dname = 'oxsty_ranked_lists' +allres.title_suffix oxsty_qres_dpath = join(hs.dirs.qres_dir, oxsty_qres_dname) helpers.ensure_path(oxsty_qres_dpath) oxford_gt_dir = join(hs.dirs.db_dir, 'oxford_style_gt') helpers.assertpath(oxford_gt_dir) compute_ap_exe = normpath(join(oxford_gt_dir, '../compute_ap')) if not helpers.checkpath(compute_ap_exe): compute_ap_exe = normpath(join(oxford_gt_dir, '/compute_ap')) helpers.assertpath(compute_ap_exe) # Get the mAP scores using philbins program query_mAP_list = [] query_mAP_cx = [] for qcx in iter(hs.test_sample_cx): res = qcx2_res[qcx] mAP = get_oxsty_mAP_score_from_res(hs, res, SV, oxsty_qres_dpath, compute_ap_exe, oxford_gt_dir) query_mAP_list.append(mAP) query_mAP_cx.append(qcx) print('') # Calculate the scalar mAP score for the experiemnt scalar_mAP = np.mean(np.array(query_mAP_list)) scalar_mAP_str = '# mAP score = %r\n' % scalar_mAP # build a CSV file with the results header = '# Oxford Style Map Scores: title_suffix=%r\n' % allres.title_suffix header += scalar_mAP_str header += helpers.get_timestamp(format='comment')+'\n' header += '# Full Parameters: \n#' + params.param_string().replace('\n','\n#')+'\n\n' column_labels = ['QCX', 'mAP'] column_list = [query_mAP_cx, query_mAP_list] oxsty_map_csv = load_data2.make_csv_table(column_labels, column_list, header) return oxsty_map_csv, scalar_mAP_str
def DrawSRcomparison(inputname="CR_High", tag="", keyword="totalbkg_hh", prename="", Xrange=[0, 0], Yrange=[0, 0], norm=True, Logy=0): #print inputdir, inputname histdir = inputdir + "_" + inputname inputroot = ROOT.TFile.Open(CONF.inputpath + "/" + histdir + "/Limitinput/" + histdir + "_limit_" + tag + ".root") refroot = ROOT.TFile.Open(CONF.inputpath + "/" + inputdir + reweightpath + "/Limitinput/" + inputdir + reweightpath + "_limit_" + tag + ".root") tempname = inputname + "_" + "compare" + "_" + tag + "_" + keyword + ( "" if Logy == 0 else "_" + str(Logy)) canv = ROOT.TCanvas(tempname, tempname, 800, 800) canv.SetLogy(Logy) xleg, yleg = 0.5, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg + 0.15, yleg + 0.2) counter = 0 maxbincontent = (0.2 if Logy == 0 else 10) print keyword, histdir temp_hist = inputroot.Get(keyword) #print temp_hist.GetName() temp_hist.SetLineColor(2) temp_hist.SetMarkerStyle(20) temp_hist.SetMarkerColor(2) temp_hist.SetMarkerSize(1) ref_hist = refroot.Get(keyword) #print temp_hist.GetName() ref_hist.SetLineColor(1) ref_hist.SetMarkerStyle(21) ref_hist.SetMarkerColor(1) ref_hist.SetMarkerSize(1) #scale to correct normalization diff #temp_hist.Scale(ref_hist.Integral()/temp_hist.Integral()) #continue maxbincontent = max(maxbincontent, ref_hist.GetMaximum(), temp_hist.GetMaximum()) temp_hist.SetMaximum(maxbincontent * 1.5 * 100) ref_hist.SetMaximum(maxbincontent * 1.5 * 100) legend.AddEntry(temp_hist, inputname.replace("_", " "), "apl") legend.AddEntry(ref_hist, "Nominal", "apl") # top pad pad0 = ROOT.TPad("pad0", "pad0", 0.0, 0.31, 1., 1.) pad0.SetLogy(1) pad0.SetRightMargin(0.05) pad0.SetBottomMargin(0.0001) pad0.SetFrameFillColor(0) pad0.SetFrameBorderMode(0) pad0.SetFrameFillColor(0) pad0.SetBorderMode(0) pad0.SetBorderSize(0) pad1 = ROOT.TPad("pad1", "pad1", 0.0, 0.0, 1., 0.30) pad1.SetRightMargin(0.05) pad1.SetBottomMargin(0.38) pad1.SetTopMargin(0.0001) pad1.SetFrameFillColor(0) pad1.SetFillStyle(0) # transparent pad1.SetFrameBorderMode(0) pad1.SetFrameFillColor(0) pad1.SetBorderMode(0) pad1.SetBorderSize(0) canv.cd() pad0.Draw() pad0.cd() temp_hist.Draw("") ref_hist.Draw("same") legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas - 0.06, tag.replace("_", " ")) watermarks = [atlas, hh4b] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() canv.cd() pad1.Draw() pad1.cd() #ratio of the two plots ratiohist = temp_hist.Clone("ratio") ratiohist.Divide(ref_hist) ratiohist.GetYaxis().SetRangeUser(0.6, 1.5) #set range for ratio plot ratiohist.GetYaxis().SetTitle( "Varaition/Nominal") #set range for ratio plot ratiohist.GetYaxis().SetTitleFont(43) ratiohist.GetYaxis().SetTitleSize(28) ratiohist.GetYaxis().SetLabelFont(43) ratiohist.GetYaxis().SetLabelSize(28) ratiohist.GetYaxis().SetNdivisions(405) ratiohist.GetXaxis().SetTitleFont(43) ratiohist.GetXaxis().SetTitleOffset(3.5) ratiohist.GetXaxis().SetTitleSize(28) ratiohist.GetXaxis().SetLabelFont(43) ratiohist.GetXaxis().SetLabelSize(28) ratiohist.Draw("") xMin = ref_hist.GetXaxis().GetBinLowEdge(1) xMax = ref_hist.GetXaxis().GetBinUpEdge(ref_hist.GetXaxis().GetNbins()) line = ROOT.TLine(xMin, 1.0, xMax, 1.0) line.SetLineStyle(1) line.Draw() #canv.SetLogy(1) helpers.checkpath(CONF.inputpath + inputdir + "/Plot/Syst/") canv.SaveAs(CONF.inputpath + inputdir + "/Plot/Syst/" + canv.GetName() + ".pdf") pad0.Close() pad1.Close() canv.Close() inputroot.Close() refroot.Close()
def write_reweight(fname="TEST", reweight_dic={}, region_dic = [("2bs", "TwoTag_split_Sideband"), ("3b","ThreeTag_Sideband"), ("4b","FourTag_Sideband")], split=False, cond=False): '''write reweight output ''' motherfolder="Moriond" helpers.checkpath("script") #building the inputdictionary #creat an empty dictionary #ready to dump file f = open("script/" + fname + ".txt", "w") #make sure over write everytime! f.truncate() f.write( "#reweighting script for hh4b analysis \n") #iteration; Ntrk; parameter; inputfolder; parameterfile for i in range(iteration): f.write( "#iteration:" + str(i) + "\n") #space is very important!!!! for region, region_fname in region_dic: for var, var_fname in reweight_dic.iteritems(): if split: #this is to reweight leading pT and trk pT seperately if "j0_" in var and i%2 == 1: #for even skip j0_pt continue elif "j0_" not in var and i%2 != 1: #for odd, skip other continue # if "j0_pt" in var and i%4 > 1: #for even skip j0_pt # continue # elif "j0_pt" not in var and i%4 <= 1: #for odd, skip other # continue ##notice this onlyputs condition on subl, lead reweighting; reweight lead's subl and subl's lead #print region_fname, var_fname, ("lead_subl" in region_fname), ("lead" in var_fname) if (("lead_lead" in region_fname) or ("lead_subl" in region_fname)): if "lead" in var_fname: continue elif (("subl_lead" in region_fname) or ("subl_subl" in region_fname)): if "subl" in var_fname: continue elif "lead" in region_fname: if "lead" in var_fname: continue elif "subl" in region_fname: if "subl" in var_fname: continue templine = "" templine += str(i) + " " #iteration templine += region + " " #Ntrk templine += "event." + var + " " #parameter templine += motherfolder + ("_" + fname + "_" + str(i-1) if i!= 0 else "") + " " #look for the original iteration templine += "r" + str(i) + "_" + region_fname + "_" + var_fname + ".txt" + " " #parameterfile; ##add in condition; be very careful, this means the TinyNtuple has to be produced with the correct b-tagging MV2Cut ##also the definition of condition needs to agree with the PlotTinyTree region condition!!! STUPID but be very careful!!! if cond: if "2Trk_split_lead_Incl" in region_fname: templine += "((event.j0_nb==1)and(event.j1_nb==0))" + " " #condition if "2Trk_split_subl_Incl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==1))" + " " #condition # if "2Trk_split_lead_lead_Incl" in region_fname: # templine += "((event.j0_nb==1)and(event.j1_nb==0)and(event.j0_trk0_Mv2>0.6455))" + " " #condition # if "2Trk_split_subl_lead_Incl" in region_fname: # templine += "((event.j0_nb==0)and(event.j1_nb==1)and(event.j1_trk0_Mv2>0.6455))" + " " #condition # if "2Trk_split_lead_subl_Incl" in region_fname: # templine += "((event.j0_nb==1)and(event.j1_nb==0)and(event.j0_trk0_Mv2<0.6455))" + " " #condition # if "2Trk_split_subl_subl_Incl" in region_fname: # templine += "((event.j0_nb==0)and(event.j1_nb==1)and(event.j1_trk0_Mv2<0.6455))" + " " #condition if "3Trk_lead" in region_fname: templine += "((event.j0_nb==2)and(event.j1_nb==0))" + " " #condition if "3Trk_subl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==2))" + " " #condition if "4Trk_lead" in region_fname: templine += "((event.j0_nb==2)and(event.j1_nb==0))" + " " #condition if "4Trk_subl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==2))" + " " #condition templine += "\n" print templine f.write(templine) #finish f.close()
def analysis_pipeline(config): #setup the directories motherdir = config["motherdir"] #the one with TinyTree reweight = config["reweight"] #this is the reweight method iter_re = config["iter_re"] reweightplotdir = config["reweightplotdir"] outputdir = ops.inputdir + "_" + reweight + "_" + str(iter_re) #the output from Plot TinyTree, input for analysis code print "the directory is: ", outputdir, " parent dir is: ", motherdir, " reweight: ", reweight, " iteration: ", iter_re ##reweight #print "python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re) os.system("python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re)) #+ " --debug") # if (iter_re == iter_total - 1): ## reweight the dijet MC for the last iteration # os.system("python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re) + " --dijet") ##fit and produce plot os.system("python get_count.py --full --inputdir " + outputdir) os.system("python plot.py --inputdir " + outputdir) os.system("python reweight.py --inputdir " + outputdir + " --iter " + str(iter_re + 1) + " --var " + reweight) ##+1 because it is really for the next iteration if ops.publish: ##for publication purpose homepath="/afs/cern.ch/user/b/btong/" workpath=CONF.outputpath + outputdir print "Publish!" helpers.checkpath(homepath + "/www/share/hh4b/reweight/" + outputdir) helpers.checkpath(homepath + "/www/share/hh4b/plot/" + outputdir) helpers.checkpath(homepath + "/www/share/hh4b/express/" + outputdir) ##list of plots if CONF.fullstudy: plt_lst = ["mHH_l_1",\ "leadHCand_Pt_m_1", "leadHCand_Eta", "leadHCand_Phi", "leadHCand_Mass", "leadHCand_Mass_s", "leadHCand_trk_dr",\ "sublHCand_Pt_m_1", "sublHCand_Eta", "sublHCand_Phi", "sublHCand_Mass", "sublHCand_Mass_s", "sublHCand_trk_dr",\ "leadHCand_trk0_Pt", "leadHCand_trk1_Pt", "sublHCand_trk0_Pt", "sublHCand_trk1_Pt"] else: plt_lst = ["mHH_l_1", \ "leadHCand_Pt_m_1",\ "sublHCand_Pt_m_1",\ "leadHCand_trk0_Pt", "leadHCand_trk1_Pt", "sublHCand_trk0_Pt", "sublHCand_trk1_Pt"] #"hCandDr", "hCandDeta", "hCandDphi",\ ##clean up the current plots pubDirs = ["reweight", "express", "plot"] for pubdir in pubDirs: for pic in glob.glob(homepath + "/www/share/hh4b/" + pubdir + "/" + outputdir +"/*"): os.remove(pic) ##add description file descript = open(homepath + "/www/share/hh4b/" + pubdir + "/" + outputdir +"/shortdescription.txt", "w") descript.write(" reweight: " + reweight + " iteration: " + str(iter_re)) descript.write(" time: " + time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) descript.close() ##copy for plot in plt_lst: for pic in glob.glob(workpath + "/Plot_r" + str(iter_re + 1) + "/" + reweightplotdir + "/*" + plot + ".png"): os.system("cp " + pic + " " + homepath + "/www/share/hh4b/reweight/" + outputdir +"/.") for pic in glob.glob(workpath + "/Plot" + "/Sideband/*" + plot + ".png"): os.system("cp " + pic + " " + homepath + "/www/share/hh4b/plot/" + outputdir +"/.") if "mHH_l_1" in pic: os.system("cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir +"/.") for pic in glob.glob(workpath + "/Plot" + "/Control/*" + plot + ".png"): #print pic os.system("cp " + pic + " " + homepath + "/www/share/hh4b/plot/" + outputdir +"/.") if "mHH_l_1" in pic: os.system("cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir +"/.") #print "cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir +"/." ##publish os.chdir(homepath + "/www/share/") os.system("python createHtmlOverview.py") os.chdir(CONF.currpath) print "Done!"
def DrawSignalTruth(outputroot, dcut_lst, scut_lst, inputdir="", outputname="", normalization=0): ### the first argument is the input directory ### the second argument is the output prefix name ### the third argument is relative to what normalization: 0 for total number of events ### 1 for signal mass region # setup basic plot parameters lowmass = -50 highmass = 3150 # load input MC file maxbincontent = .4 # approx minbincontent = -0.01 for i, (dcut, scut) in enumerate(zip(dcut_lst, scut_lst)): canv = ROOT.TCanvas(inputdir + "_" + outputname + str(normalization), "Efficiency", 800, 800) xleg, yleg = 0.55, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg + 0.3, yleg + 0.2) sfiles = [ "/signal_G_hh_c10_M%i/hist-MiniNTuple.root" % mass for mass in mass_lst ] sinfos = ["RSG " + str(m) + " GeV" for m in mass_lst] dfiles = ["/data_test/hist-MiniNTuple.root"] dinfos = ["0Tag data"] allmc = [] for j, (f, info) in enumerate(zip(dfiles + sfiles, dinfos + sinfos)): cut = dcut if "data" in f else scut #here could be changed to have more options input_mc = ROOT.TFile.Open(CONF.inputpath + inputdir + f) if not input_mc: print CONF.inputpath + inputdir + f try: temp_mc = input_mc.Get(cut).Clone() #temp_mc = temp_mc.ProjectionY() except: print CONF.inputpath + inputdir + f print cut raise # rebin the hist for pt # set other options temp_mc.GetYaxis().SetTitle("Normalized") temp_mc.GetXaxis().SetNdivisions(510) temp_mc.GetXaxis().SetLabelSize(0.03) temp_mc.GetYaxis().SetNdivisions(505) if cut.split("_")[-1] == "pt": temp_mc.Rebin(10) temp_mc.GetXaxis().SetTitle(temp_mc.GetXaxis().GetTitle() + " [GeV]") elif "deta" in cut.lower(): temp_mc.Rebin(30) maxbincontent = .6 / 1.5 # draw line at the cut value cutval = 1.7 line = ROOT.TLine(1.7, 0, 1.7, 0.4) line.SetLineWidth(3) line.SetLineStyle(9) line.Draw() else: temp_mc.GetXaxis().SetTitle("#DeltaR between H, child b") dR_type = cut.split("/")[1][0:4] if dR_type != "h0h1": temp_mc.GetXaxis().SetRangeUser(0, 3) maxbincontent = 1.0 / 1.5 else: maxbincontent = .5 if dR_type == "h1b2" or dR_type == "h0b0": dR1_line = ROOT.TLine(1, 0, 1, maxbincontent) dR04_line = ROOT.TLine(.4, 0, .4, maxbincontent) dR1_line.Draw("") dR04_line.Draw("") elif dR_type == "b0b1" or dR_type == "b2b3": dR1_line = ROOT.TLine(2, 0, 2, maxbincontent) dR04_line = ROOT.TLine(.8, 0, .8, maxbincontent) dR1_line.Draw("") dR04_line.Draw("") if temp_mc.GetNbinsX() == 76: temp_mc.Rebin(4) elif temp_mc.GetNbinsX() > 20: temp_mc.Rebin(temp_mc.GetNbinsX() / 20) if "data" in f: temp_mc.Sumw2(True) outputroot.cd() temp_mc.Scale(1 / temp_mc.Integral()) temp_mc.SetName("RSG_" + info + "_" + cut.replace("/", "_")) temp_mc.Write() temp2_mc = outputroot.Get(temp_mc.GetName()) if temp_mc.Integral() < 1e-4: continue truth_mc = helpers.TH1toTAsym(temp2_mc, efficiency=False) truth_mc.SetLineColor(CONF.clr_lst[j]) truth_mc.SetMarkerStyle(20 + j) truth_mc.SetMarkerColor(CONF.clr_lst[j]) truth_mc.SetMarkerSize(1) truth_mc.SetMaximum(maxbincontent * 1.5) truth_mc.SetMinimum(minbincontent) truth_mc.GetXaxis().SetTitle(cut.split("/")[1].replace("_", " ")) canv.cd() #reset data style if "data" in f: truth_mc.SetLineColor(1) truth_mc.SetMarkerColor(1) if j == 0: truth_mc.Draw("APC") else: truth_mc.Draw("PC") allmc.append(truth_mc) legend.AddEntry(truth_mc, info, "apl") input_mc.Close() legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas - 0.06, "RSG c=1.0") lumi = ROOT.TLatex(xatlas, yatlas - 0.12, "MC #sqrt{s} = 13 TeV") watermarks = [atlas, hh4b, lumi] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() # finish up outputroot.cd() helpers.checkpath(CONF.inputpath + inputdir + "/Plot/Truth/") #print CONF.inputpath + inputdir + "/Plot/Truth/" canv.SaveAs(CONF.inputpath + inputdir + "/Plot/Truth/" + canv.GetName() + "_" + cut.replace("/", "_") + ".pdf") canv.Close()
#Tony: master configuration file, specifies all the necessary paths import ROOT, helpers, os #get current directory currpath = os.path.abspath(os.curdir) #change this to your home directory of everything; should be where the code is checked out toppath = os.path.dirname(currpath) #input top directory for the root files inputpath = toppath + "/Output/" helpers.checkpath(inputpath) #output top directory for the output plots/root files outputpath = toppath +"/Output/" helpers.checkpath(outputpath) #output top directory for only plots outplotpath = toppath +"/Plot/" helpers.checkpath(outplotpath) #put in the working directory name; default b77~ workdir = "Moriond" #"Moriond" #b77 #chosen reweight directory reweightdir = "bkg_9" #"Moriond" #b77 #check if reference folder exists refpath = toppath +"/Output/ref/" #do c20 and 2HDM samples doallsig = True #check if reference folder exists fullstudy = False #check if doing my thesis; if so, disable all the ATLAS labels thesis = True #check if the path exists if not os.path.exists(refpath): print "please copy the directory: /afs/cern.ch/user/b/btong/work/public/RunIIHH4b/ref over to Output/ref!!!" else:
def plot_RSG_syst_detail(masterdic, cut): canv = ROOT.TCanvas(cut + "_" + "RSG" + "_" + "syst_detail", "Sytematics", 800, 800) xleg, yleg = 0.52, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg+0.3, yleg+0.2) # setup basic plot parameters # load input MC file mass_lst = [1000, 1100, 1200, 1300, 1400, 1500, 1600, 1800, 2000, 2250, 2500, 2750, 3000] eff_lst = [] graph_lst = [] maxbincontent = 100.0 minbincontent = -0.001 lowmass = 950 highmass = 3150 #create bsyst list bsyst_lst = [] for i in bsyst: if "up" in i: bsyst_lst.append(i.replace("up", "")) #now loop draw_counter = 0 for i, syst in enumerate(bsyst_lst): eff_lst.append( ROOT.TH1F(syst, "%s; Mass, GeV; Systematic Percentage Diff" %syst, int((highmass-lowmass)/100), lowmass, highmass) ) maxsyst = 0.0 for mass in mass_lst: temp_col_dic = find_syst(masterdic, cut, syst, "RSG1_" + str(mass)) syst_eff = add_syst(temp_col_dic) #this is a tuple! # if (syst_eff[0] * 100 < 3): #if the systematic contribution is less than 3 percent # continue eff_lst[i].SetBinContent(eff_lst[i].GetXaxis().FindBin(mass), syst_eff[0] * 100) eff_lst[i].SetBinError(eff_lst[i].GetXaxis().FindBin(mass), 0) #print syst, syst_eff[0] maxbincontent = max(maxbincontent, syst_eff[0] * 100) maxsyst = max(maxsyst, syst_eff[0] * 100) #print maxsyst, i #start the canvas canv.cd() #convert it to a graph if maxsyst < 3: #don't draw everything print syst continue graph_lst.append(helpers.TH1toTAsym(eff_lst[i])) graph_lst[-1].SetLineColor(CONF.clr_lst[draw_counter]) graph_lst[-1].SetMarkerStyle(20 + draw_counter) graph_lst[-1].SetMarkerColor(CONF.clr_lst[draw_counter]) graph_lst[-1].SetMarkerSize(1) graph_lst[-1].SetMaximum(maxbincontent * 1.5) graph_lst[-1].SetMinimum(minbincontent) legend.AddEntry(graph_lst[-1], syst.replace("_", " "), "apl") if draw_counter==0: graph_lst[-1].Draw("APC") draw_counter += 1 else: graph_lst[-1].Draw("PC") draw_counter += 1 legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw reference lines # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas-0.06, "RSG c=1.0") lumi = ROOT.TLatex(xatlas, yatlas-0.12, "MC #sqrt{s} = 13 TeV") watermarks = [atlas, hh4b, lumi] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() # finish up helpers.checkpath(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/") canv.SaveAs(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/" + canv.GetName() + ".pdf") canv.Close()
def plot_RSG_syst(masterdic, cut): ### the first argument is the input dictionary ### the second argument is the 2b/3b/4b regions canv = ROOT.TCanvas(cut + "_" + "RSG" + "_" + "syst", "Sytematics", 800, 800) xleg, yleg = 0.52, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg+0.3, yleg+0.2) # setup basic plot parameters # load input MC file #mass_lst = [1000, 1100, 1200, 1300, 1400, 1500, 1600, 1800, 2000, 2250, 2500, 2750, 3000] mass_lst = [1000, 1100, 1200, 1300, 1400, 1500, 1600, 1800, 2000, 2250, 2500, 2750, 3000, 3500, 4000, 4500, 5000] systag_lst = ["JER", "JMR", "Rtrk", "EFF", "Stat"] systag_dic = {"JER":"JER", "JMR":"JMR", "Rtrk":"JES/JMS", "EFF":"b-tag SF", "Stat":"Stats"} eff_lst = [] graph_lst = [] maxbincontent = 40.0 minbincontent = -0.001 lowmass = 950 #highmass = 3150 highmass = 5050 for i, syst in enumerate(systag_lst): eff_lst.append( ROOT.TH1F(syst, "%s; Mass, GeV; Systematic Percentage Diff" %syst, int((highmass-lowmass)/100), lowmass, highmass) ) for mass in mass_lst: #print mass if syst is "Stat": for key2 in masterdic[cut]: if "RSG1_" + str(mass) in key2: eff_lst[i].SetBinContent(eff_lst[i].GetXaxis().FindBin(mass), masterdic[cut][key2]["int_err"]/masterdic[cut][key2]["int"] * 100) eff_lst[i].SetBinError(eff_lst[i].GetXaxis().FindBin(mass), 0) else: temp_col_dic = find_syst(masterdic, cut, syst, "RSG1_" + str(mass)) syst_eff = add_syst(temp_col_dic) #this is a tuple! eff_lst[i].SetBinContent(eff_lst[i].GetXaxis().FindBin(mass), syst_eff[0] * 100) eff_lst[i].SetBinError(eff_lst[i].GetXaxis().FindBin(mass), 0) #print syst_eff[0] maxbincontent = max(maxbincontent, syst_eff[0]) #start the canvas canv.cd() #convert it to a TGraph graph_lst.append(helpers.TH1toTAsym(eff_lst[i])) graph_lst[i].SetLineColor(CONF.clr_lst[i]) graph_lst[i].SetMarkerStyle(20 + i) graph_lst[i].SetMarkerColor(CONF.clr_lst[i]) graph_lst[i].SetMarkerSize(1) graph_lst[i].SetMaximum(maxbincontent * 1.5) graph_lst[i].SetMinimum(minbincontent) legend.AddEntry(graph_lst[i], systag_dic[syst].replace("_", " "), "apl") if syst==systag_lst[0]: graph_lst[i].Draw("APC") #gr.Draw("same L hist") else: graph_lst[i].Draw("PC") #gr.Draw("same L hist") legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw reference lines # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas-0.06, "RSG c=1.0") lumi = ROOT.TLatex(xatlas, yatlas-0.12, "MC #sqrt{s} = 13 TeV") watermarks = [atlas, hh4b, lumi] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() # finish up helpers.checkpath(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/") canv.SaveAs(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/" + canv.GetName() + ".pdf") canv.Close()
#Tony: master configuration file, specifies all the necessary paths import ROOT, helpers, os #get current directory currpath = os.path.abspath(os.curdir) #change this to your home directory of everything; should be where the code is checked out toppath = os.path.dirname(currpath) #input top directory for the root files inputpath = toppath + "/Output/" helpers.checkpath(inputpath) #output top directory for the output plots/root files outputpath = toppath + "/Output/" helpers.checkpath(outputpath) #output top directory for only plots outplotpath = toppath + "/Plot/" helpers.checkpath(outplotpath) #put in the working directory name; default b77~ workdir = "Moriond" #"Moriond" #b77 #chosen reweight directory reweightdir = "bkg_9" #"Moriond" #b77 #check if reference folder exists refpath = toppath + "/Output/ref/" #do c20 and 2HDM samples doallsig = True #check if reference folder exists fullstudy = False #check if doing my thesis; if so, disable all the ATLAS labels thesis = True #check if the path exists if not os.path.exists(refpath): print "please copy the directory: /afs/cern.ch/user/b/btong/work/public/RunIIHH4b/ref over to Output/ref!!!" else:
def DrawSignalTruth(outputroot, dcut_lst, scut_lst, inputdir="", outputname="", normalization=0): ### the first argument is the input directory ### the second argument is the output prefix name ### the third argument is relative to what normalization: 0 for total number of events ### 1 for signal mass region # setup basic plot parameters lowmass = -50 highmass = 3150 # load input MC file maxbincontent = .4 # approx minbincontent = -0.01 for i, (dcut, scut) in enumerate(zip(dcut_lst, scut_lst)): canv = ROOT.TCanvas(inputdir + "_" + outputname + str(normalization), "Efficiency", 800, 800) xleg, yleg = 0.55, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg+0.3, yleg+0.2) sfiles = ["/signal_G_hh_c10_M%i/hist-MiniNTuple.root" % mass for mass in mass_lst] #sfiles = ["/signal_X_hh_M%i/hist-MiniNTuple.root" % mass for mass in mass_lst] sinfos = ["RSG " + str(m) + " GeV" for m in mass_lst] dfiles = ["/data_test/hist-MiniNTuple.root"] dinfos = ["0Tag data"] allmc = [] for j,(f,info) in enumerate(zip(dfiles + sfiles, dinfos + sinfos)): cut = dcut if "data" in f else scut #here could be changed to have more options input_mc = ROOT.TFile.Open(CONF.inputpath + inputdir + f) if not input_mc: print CONF.inputpath + inputdir + f try: temp_mc = input_mc.Get(cut).Clone() #temp_mc = temp_mc.ProjectionY() except: print CONF.inputpath + inputdir + f print cut raise # rebin the hist for pt # set other options temp_mc.GetYaxis().SetTitle("Normalized") temp_mc.GetXaxis().SetNdivisions(510) temp_mc.GetXaxis().SetLabelSize(0.03) temp_mc.GetYaxis().SetNdivisions(505) temp_mc.GetXaxis().SetNdivisions(505) #print(cut) if cut.split("_")[-1] == "pt": temp_mc.Rebin(10) temp_mc.GetXaxis().SetTitle( temp_mc.GetXaxis().GetTitle() + " [GeV]") else: temp_mc.GetXaxis().SetTitle( "#DeltaR between H, child b") dR_type = cut.split("/")[1][0:4] if dR_type != "h0h1": temp_mc.GetXaxis().SetRangeUser(0, 3) maxbincontent = 1.0/1.5 else: maxbincontent = .5 if dR_type == "h1b2" or dR_type == "h0b0": dR1_line = ROOT.TLine(1, 0, 1, maxbincontent) dR04_line = ROOT.TLine(.4, 0, .4, maxbincontent) dR1_line.Draw("") dR04_line.Draw("") elif dR_type == "b0b1" or dR_type == "b2b3": dR1_line = ROOT.TLine(2, 0, 2, maxbincontent) dR04_line = ROOT.TLine(.8, 0, .8, maxbincontent) dR1_line.Draw("") dR04_line.Draw("") if temp_mc.GetNbinsX() == 76: temp_mc.Rebin(4) elif temp_mc.GetNbinsX() > 20: temp_mc.Rebin(temp_mc.GetNbinsX() / 20) if "data" in f: temp_mc.Sumw2(True) outputroot.cd() temp_mc.Scale(1/temp_mc.Integral()) temp_mc.SetName("RSG_" + info + "_" + cut.replace("/", "_")) temp_mc.Write() temp2_mc = outputroot.Get(temp_mc.GetName()) if temp_mc.Integral() < 1e-4: continue truth_mc = helpers.TH1toTAsym(temp2_mc, efficiency=False) truth_mc.SetLineColor(CONF.clr_lst[j]) truth_mc.SetMarkerStyle(20 + j) truth_mc.SetMarkerColor(CONF.clr_lst[j]) truth_mc.SetMarkerSize(1) truth_mc.SetMaximum(maxbincontent * 1.5) truth_mc.SetMinimum(minbincontent) truth_mc.GetXaxis().SetTitle(cut.split("/")[1].replace("_", " ")) canv.cd() #reset data style if "data" in f: truth_mc.SetLineColor(1) truth_mc.SetMarkerColor(1) if j==0: truth_mc.Draw("APC") else: truth_mc.Draw("PC") allmc.append(truth_mc) legend.AddEntry(truth_mc, info, "apl") input_mc.Close() legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas-0.06, "RSG c=1.0") lumi = ROOT.TLatex(xatlas, yatlas-0.12, "MC #sqrt{s} = 13 TeV") taglabel = ROOT.TLatex(xatlas, yatlas-0.18, scut.split("_")[0]) watermarks = [atlas, hh4b, lumi, taglabel] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() # finish up outputroot.cd() helpers.checkpath(CONF.inputpath + inputdir + "/Plot/Truth/") #print CONF.inputpath + inputdir + "/Plot/Truth/" canv.SaveAs(CONF.inputpath + inputdir + "/Plot/Truth/" + canv.GetName() + "_" + cut.replace("/", "_") + ".pdf") canv.Close()
def main(): start_time = time.time() ops = options() global blinded blinded = True global iter_reweight iter_reweight = int(ops.iter) #setup basics inputdir = ops.inputdir inputroot = ops.inputroot inputpath = CONF.inputpath + inputdir + "/" rootinputpath = inputpath + inputroot + "_" print "input root file is: ", rootinputpath global StatusLabel StatusLabel = "Internal" ##StatusLabel = "Preliminary" global reweightfolder reweightfolder = inputpath + "Reweight/" helpers.checkpath(reweightfolder) global outputroot outputroot = ROOT.TFile(reweightfolder + "reweights.root", "recreate") # plot in the control region # # outputFolder = inputpath + inputroot + "Plot/" + "Sideband" # plotRegion(rootinputpath, inputdir, cut="FourTag" + "_" + "Sideband" + "_" + "mHH_l", xTitle="m_{2J} [GeV]") # plotRegion(rootinputpath, inputdir, cut="FourTag" + "_" + "Sideband" + "_" + "mHH_l", xTitle="m_{2J} [GeV]", Logy=1) ##comp_lst are the distributios we want to look like ##cut_lst are the disbrituions we are changing ##this is the default if "j0pT" in ops.var: region_lst = ["Sideband"] cut_lst = ["TwoTag_split", "ThreeTag", "FourTag"] #this is the one tag rewieght if "bkg" in ops.var: region_lst = ["Incl"] comp_region_lst = ["Incl"] cut_lst = ["NoTag_2Trk_split_lead", "NoTag_2Trk_split_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl"] comp_lst = ["OneTag_subl", "OneTag_lead", "OneTag_subl", "OneTag_lead", "TwoTag_subl", "TwoTag_lead"] ##this is to the sidebands rewieght if "bkgsb" in ops.var: region_lst = ["Sideband"] comp_region_lst = ["Sideband"] cut_lst = ["NoTag_2Trk_split_lead", "NoTag_2Trk_split_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl"] comp_lst = ["TwoTag_split", "TwoTag_split", "ThreeTag", "ThreeTag", "FourTag", "FourTag"] ##this is the detailed trk reweight # region_lst = ["Incl"] # cut_lst = ["NoTag_2Trk_split_lead_lead", "NoTag_2Trk_split_subl_lead", "NoTag_2Trk_split_lead_subl", "NoTag_2Trk_split_subl_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl"] # comp_lst = ["OneTag_subl_lead", "OneTag_lead_subl", "OneTag_subl_subl", "OneTag_lead_lead", "OneTag_subl", "OneTag_lead", "TwoTag_subl", "TwoTag_lead"] # comp_region_lst = ["Incl"] #create master list inputtasks = [] #fill the task list for i, region in enumerate(region_lst): if inputroot == "sum": inputroot = "" outputFolder = inputpath + inputroot + "Plot_r" + str(iter_reweight) + "/" + region helpers.checkpath(outputFolder) for j, cut in enumerate(cut_lst): config = {} config["root"] = rootinputpath config["inputdir"] = inputdir config["outputdir"] = outputFolder config["cut"] = cut + "_" + region + "_" try: config["compcut"] = comp_lst[j] + "_" + comp_region_lst[i] + "_" ##change this to be flexiable to regions except NameError: config["compcut"] = "" ##by default this is disabled inputtasks.append(config) #parallel compute! print " Running %s jobs on %s cores" % (len(inputtasks), mp.cpu_count()-1) npool = min(len(inputtasks), mp.cpu_count()-1) pool = mp.Pool(npool) pool.map(dumpRegion, inputtasks) ##for debug # for task in inputtasks: # dumpRegion(task) # dumpRegion(inputtasks[0]) outputroot.Close() print("--- %s seconds ---" % (time.time() - start_time))
def plot_RSG_syst(masterdic, cut): ### the first argument is the input dictionary ### the second argument is the 2b/3b/4b regions canv = ROOT.TCanvas(cut + "_" + "RSG" + "_" + "syst", "Sytematics", 800, 800) xleg, yleg = 0.52, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg+0.3, yleg+0.2) # setup basic plot parameters # load input MC file mass_lst = [1000, 1100, 1200, 1300, 1400, 1500, 1600, 1800, 2000, 2250, 2500, 2750, 3000] systag_lst = ["JER", "JMR", "Rtrk", "EFF", "Stat"] systag_dic = {"JER":"JER", "JMR":"JMR", "Rtrk":"JES/JMS", "EFF":"b-tag SF", "Stat":"Stats"} eff_lst = [] graph_lst = [] maxbincontent = 40.0 minbincontent = -0.001 lowmass = 950 highmass = 3150 for i, syst in enumerate(systag_lst): eff_lst.append( ROOT.TH1F(syst, "%s; Mass, GeV; Systematic Percentage Diff" %syst, int((highmass-lowmass)/100), lowmass, highmass) ) for mass in mass_lst: if syst is "Stat": for key2 in masterdic[cut]: if "RSG1_" + str(mass) in key2: eff_lst[i].SetBinContent(eff_lst[i].GetXaxis().FindBin(mass), masterdic[cut][key2]["int_err"]/masterdic[cut][key2]["int"] * 100) eff_lst[i].SetBinError(eff_lst[i].GetXaxis().FindBin(mass), 0) else: temp_col_dic = find_syst(masterdic, cut, syst, "RSG1_" + str(mass)) syst_eff = add_syst(temp_col_dic) #this is a tuple! eff_lst[i].SetBinContent(eff_lst[i].GetXaxis().FindBin(mass), syst_eff[0] * 100) eff_lst[i].SetBinError(eff_lst[i].GetXaxis().FindBin(mass), 0) #print syst_eff[0] maxbincontent = max(maxbincontent, syst_eff[0]) #start the canvas canv.cd() #convert it to a TGraph graph_lst.append(helpers.TH1toTAsym(eff_lst[i])) graph_lst[i].SetLineColor(CONF.clr_lst[i]) graph_lst[i].SetMarkerStyle(20 + i) graph_lst[i].SetMarkerColor(CONF.clr_lst[i]) graph_lst[i].SetMarkerSize(1) graph_lst[i].SetMaximum(maxbincontent * 1.5) graph_lst[i].SetMinimum(minbincontent) legend.AddEntry(graph_lst[i], systag_dic[syst].replace("_", " "), "apl") if syst==systag_lst[0]: graph_lst[i].Draw("APC") #gr.Draw("same L hist") else: graph_lst[i].Draw("PC") #gr.Draw("same L hist") legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw reference lines # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas-0.06, "RSG c=1.0") lumi = ROOT.TLatex(xatlas, yatlas-0.12, "MC #sqrt{s} = 13 TeV") watermarks = [atlas, hh4b, lumi] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() # finish up helpers.checkpath(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/") canv.SaveAs(CONF.inputpath + ops.inputdir + "/" + "Plot/Syst/" + canv.GetName() + ".pdf") canv.Close()
def DrawSRcomparison(inputname="CR_High", tag="", keyword="totalbkg_hh", prename="", Xrange=[0, 0], Yrange=[0, 0], norm=True, Logy=0): #print inputdir, inputname histdir = inputdir + "_" + inputname inputroot = ROOT.TFile.Open(CONF.inputpath + "/" + histdir + "/Limitinput/" + histdir + "_limit_" + tag + ".root") refroot = ROOT.TFile.Open(CONF.inputpath + "/" + inputdir + reweightpath + "/Limitinput/" + inputdir + reweightpath + "_limit_" + tag + ".root") tempname = inputname + "_" + "compare" + "_" + tag + "_" + keyword + ("" if Logy == 0 else "_" + str(Logy)) canv = ROOT.TCanvas(tempname, tempname, 800, 800) canv.SetLogy(Logy) xleg, yleg = 0.5, 0.7 legend = ROOT.TLegend(xleg, yleg, xleg+0.15, yleg+0.2) counter = 0 maxbincontent = (0.2 if Logy ==0 else 10) print keyword, histdir temp_hist = inputroot.Get(keyword) #print temp_hist.GetName() temp_hist.SetLineColor(2) temp_hist.SetMarkerStyle(20) temp_hist.SetMarkerColor(2) temp_hist.SetMarkerSize(1) ref_hist = refroot.Get(keyword) #print temp_hist.GetName() ref_hist.SetLineColor(1) ref_hist.SetMarkerStyle(21) ref_hist.SetMarkerColor(1) ref_hist.SetMarkerSize(1) #scale to correct normalization diff #temp_hist.Scale(ref_hist.Integral()/temp_hist.Integral()) #continue maxbincontent = max(maxbincontent, ref_hist.GetMaximum(), temp_hist.GetMaximum()) temp_hist.SetMaximum(maxbincontent * 1.5 * 100) ref_hist.SetMaximum(maxbincontent * 1.5 * 100) legend.AddEntry(temp_hist, inputname.replace("_", " "), "apl") legend.AddEntry(ref_hist, "Nominal", "apl") # top pad pad0 = ROOT.TPad("pad0", "pad0", 0.0, 0.31, 1., 1.) pad0.SetLogy(1) pad0.SetRightMargin(0.05) pad0.SetBottomMargin(0.0001) pad0.SetFrameFillColor(0) pad0.SetFrameBorderMode(0) pad0.SetFrameFillColor(0) pad0.SetBorderMode(0) pad0.SetBorderSize(0) pad1 = ROOT.TPad("pad1", "pad1", 0.0, 0.0, 1., 0.30) pad1.SetRightMargin(0.05) pad1.SetBottomMargin(0.38) pad1.SetTopMargin(0.0001) pad1.SetFrameFillColor(0) pad1.SetFillStyle(0) # transparent pad1.SetFrameBorderMode(0) pad1.SetFrameFillColor(0) pad1.SetBorderMode(0) pad1.SetBorderSize(0) canv.cd() pad0.Draw() pad0.cd() temp_hist.Draw("") ref_hist.Draw("same") legend.SetBorderSize(0) legend.SetMargin(0.3) legend.SetTextSize(0.04) legend.Draw() # draw watermarks xatlas, yatlas = 0.35, 0.87 atlas = ROOT.TLatex(xatlas, yatlas, "ATLAS Internal") hh4b = ROOT.TLatex(xatlas, yatlas-0.06, tag.replace("_", " ")) watermarks = [atlas, hh4b] for wm in watermarks: wm.SetTextAlign(22) wm.SetTextSize(0.04) wm.SetTextFont(42) wm.SetNDC() wm.Draw() canv.cd() pad1.Draw() pad1.cd() #ratio of the two plots ratiohist = temp_hist.Clone("ratio") ratiohist.Divide(ref_hist) ratiohist.GetYaxis().SetRangeUser(0.6, 1.5) #set range for ratio plot ratiohist.GetYaxis().SetTitle("Varaition/Nominal") #set range for ratio plot ratiohist.GetYaxis().SetTitleFont(43) ratiohist.GetYaxis().SetTitleSize(28) ratiohist.GetYaxis().SetLabelFont(43) ratiohist.GetYaxis().SetLabelSize(28) ratiohist.GetYaxis().SetNdivisions(405) ratiohist.GetXaxis().SetTitleFont(43) ratiohist.GetXaxis().SetTitleOffset(3.5) ratiohist.GetXaxis().SetTitleSize(28) ratiohist.GetXaxis().SetLabelFont(43) ratiohist.GetXaxis().SetLabelSize(28) ratiohist.Draw("") xMin = ref_hist.GetXaxis().GetBinLowEdge(1) xMax = ref_hist.GetXaxis().GetBinUpEdge(ref_hist.GetXaxis().GetNbins()) line = ROOT.TLine(xMin, 1.0, xMax, 1.0) line.SetLineStyle(1) line.Draw() #canv.SetLogy(1) helpers.checkpath(CONF.inputpath + inputdir + "/Plot/Syst/") canv.SaveAs(CONF.inputpath + inputdir + "/Plot/Syst/" + canv.GetName() + ".pdf") pad0.Close() pad1.Close() canv.Close() inputroot.Close() refroot.Close()
def syst_pipeline(config): t = config["inputdir"] print "the directory is: ", t inputpath = CONF.inputpath + t + "/" #check if for syst, the data file is there helpers.checkpath(inputpath + "data_test") #this is a really bad practice and temp fix now! need to watch this very carfully... ori_link = CONF.inputpath + ops.inputdir + "/data_test/hist-MiniNTuple.root" dst_link = inputpath + "data_test/hist-MiniNTuple.root" #print ori_link, dst_link if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) #for ttbar, also need to link the MCs. if "syst_tt_" in t: ##copy zjets as well helpers.checkpath(inputpath + "zjets_test") ori_link = CONF.inputpath + ops.inputdir + "/zjets_test/hist-MiniNTuple.root" dst_link = inputpath + "zjets_test/hist-MiniNTuple.root" if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) ##copy SM_hh as well helpers.checkpath(inputpath + "signal_SM_hh") ori_link = CONF.inputpath + ops.inputdir + "/signal_SM_hh/hist-MiniNTuple.root" dst_link = inputpath + "signal_SM_hh/hist-MiniNTuple.root" if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) ##copy other MC signals sigMClist = ["signal_G_hh_c10_M"] if (ops.Xhh): sigMClist = [ "signal_G_hh_c10_M", "signal_G_hh_c20_M", "signal_X_hh_M" ] for sigMC in sigMClist: for i, mass in enumerate(CONF.mass_lst): if mass == 2750 and sigMC == "signal_G_hh_c20_M": ##no 2750 c20 sample continue #print "creating links of signal samples", "signal_G_hh_c10_M" + str(mass) #this is a really bad practice and temp fix now! need to watch this very carfully... ori_link = CONF.inputpath + ops.inputdir + "/" + sigMC + str( mass) + "/hist-MiniNTuple.root" #ori_link = inputpath.replace("TEST", "DS1_cb") + "signal_G_hh_c10_M" + str(mass) + "/hist-MiniNTuple.root" dst_link = inputpath + sigMC + str( mass) + "/hist-MiniNTuple.root" helpers.checkpath(inputpath + sigMC + str(mass)) #print ori_link, dst_link if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) for i, mass in enumerate([3500, 4000, 4500, 5000, 6000]): if sigMC != "signal_G_hh_c10_M": ##no 2750 c20 sample continue ori_link = CONF.inputpath + ops.inputdir + "/" + sigMC + str( mass) + "/hist-MiniNTuple.root" dst_link = inputpath + sigMC + str( mass) + "/hist-MiniNTuple.root" helpers.checkpath(inputpath + sigMC + str(mass)) if os.path.islink(dst_link): os.unlink(dst_link) os.symlink(ori_link, dst_link) #start running programs #print (inputpath) #os.system("rm " + inputpath + "sum_" + t + ".root") os.system("rm -r " + inputpath + "Limitinput") # print "done clearing!" # ###this is correcting the 3b/4b normalization to 2b. Should only be applied when ttbar stats makes no sense! # if "syst_tt_" in t or "JET_JER" in t or "JET_JMR" in t: ##only for ttbar variations for now # Tophack(inputpath=inputpath) # #Tophack(inputpath=inputpath) #os.system("python get_count.py --dosyst " + " --inputdir " + t) ##ttbar has weird smoothing behaviour, use ttbar + qcd for final distribution now os.system("python dump_hists.py " + " --inputdir " + t + (" --dosyst" if "syst_tt_" in t else ""))
def analysis_pipeline(config): #setup the directories motherdir = config["motherdir"] #the one with TinyTree reweight = config["reweight"] #this is the reweight method iter_re = config["iter_re"] reweightplotdir = config["reweightplotdir"] outputdir = ops.inputdir + "_" + reweight + "_" + str( iter_re) #the output from Plot TinyTree, input for analysis code print "the directory is: ", outputdir, " parent dir is: ", motherdir, " reweight: ", reweight, " iteration: ", iter_re ##reweight #print "python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re) os.system("python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re)) #+ " --debug") # if (iter_re == iter_total - 1): ## reweight the dijet MC for the last iteration # os.system("python PlotTinyTree.py --inputdir " + motherdir + " --outputdir " + outputdir + " --reweight " + reweight + " --iter " + str(iter_re) + " --dijet") ##fit and produce plot os.system("python get_count.py --full --inputdir " + outputdir) os.system("python plot.py --inputdir " + outputdir) os.system("python reweight.py --inputdir " + outputdir + " --iter " + str(iter_re + 1) + " --var " + reweight) ##+1 because it is really for the next iteration if ops.publish: ##for publication purpose homepath = "/afs/cern.ch/user/b/btong/" workpath = CONF.outputpath + outputdir print "Publish!" helpers.checkpath(homepath + "/www/share/hh4b/reweight/" + outputdir) helpers.checkpath(homepath + "/www/share/hh4b/plot/" + outputdir) helpers.checkpath(homepath + "/www/share/hh4b/express/" + outputdir) ##list of plots if CONF.fullstudy: plt_lst = ["mHH_l_1",\ "leadHCand_Pt_m_1", "leadHCand_Eta", "leadHCand_Phi", "leadHCand_Mass", "leadHCand_Mass_s", "leadHCand_trk_dr",\ "sublHCand_Pt_m_1", "sublHCand_Eta", "sublHCand_Phi", "sublHCand_Mass", "sublHCand_Mass_s", "sublHCand_trk_dr",\ "leadHCand_trk0_Pt", "leadHCand_trk1_Pt", "sublHCand_trk0_Pt", "sublHCand_trk1_Pt"] else: plt_lst = ["mHH_l_1", \ "leadHCand_Pt_m_1",\ "sublHCand_Pt_m_1",\ "leadHCand_trk0_Pt", "leadHCand_trk1_Pt", "sublHCand_trk0_Pt", "sublHCand_trk1_Pt"] #"hCandDr", "hCandDeta", "hCandDphi",\ ##clean up the current plots pubDirs = ["reweight", "express", "plot"] for pubdir in pubDirs: for pic in glob.glob(homepath + "/www/share/hh4b/" + pubdir + "/" + outputdir + "/*"): os.remove(pic) ##add description file descript = open( homepath + "/www/share/hh4b/" + pubdir + "/" + outputdir + "/shortdescription.txt", "w") descript.write(" reweight: " + reweight + " iteration: " + str(iter_re)) descript.write(" time: " + time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())) descript.close() ##copy for plot in plt_lst: for pic in glob.glob(workpath + "/Plot_r" + str(iter_re + 1) + "/" + reweightplotdir + "/*" + plot + ".png"): os.system("cp " + pic + " " + homepath + "/www/share/hh4b/reweight/" + outputdir + "/.") for pic in glob.glob(workpath + "/Plot" + "/Sideband/*" + plot + ".png"): os.system("cp " + pic + " " + homepath + "/www/share/hh4b/plot/" + outputdir + "/.") if "mHH_l_1" in pic: os.system("cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir + "/.") for pic in glob.glob(workpath + "/Plot" + "/Control/*" + plot + ".png"): #print pic os.system("cp " + pic + " " + homepath + "/www/share/hh4b/plot/" + outputdir + "/.") if "mHH_l_1" in pic: os.system("cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir + "/.") #print "cp " + pic + " " + homepath + "/www/share/hh4b/express/" + outputdir +"/." ##publish os.chdir(homepath + "/www/share/") os.system("python createHtmlOverview.py") os.chdir(CONF.currpath) print "Done!"
def main(): global ops ops = options() inputpath = CONF.inputpath + ops.inputdir + "/" global outputpath outputpath = CONF.outputpath + ops.outputdir + "/tmvaplot/" helpers.checkpath(outputpath) #start analysis on TinyNtuple mass = 1500 fSignal = TFile( inputpath + "signal_G_hh_c10_M" + str(mass) + "/" + "hist-MiniNTuple.root", "read" ) fBackground = TFile( inputpath + "data_test/" + "hist-MiniNTuple.root", "read" ) # Get the signal and background trees for training signal = fSignal.Get( "TinyTree" ) background = fBackground.Get( "TinyTree" ) # Set output paraemters and TMVA methods outfname = "TMVA_" + ops.sel + ".root" methods = DEFAULT_METHODS verbose = False # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outputpath + outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames().fWeightFileDir = outputpath + "weights" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "mHH", "mHH", "GeV", 'F' ) #factory.AddVariable( "j0_nb + j1_nb", "Nb", "Nb", 'I' ) factory.AddVariable( "j0_pt", "j0_pt", "GeV", 'F' ) #factory.AddVariable( "j0_eta", "j0_eta", "rad", 'F' ) #factory.AddVariable( "j0_phi", "j0_phi", "rad", 'F' ) factory.AddVariable( "j0_m", "j0_m", "GeV", 'F' ) factory.AddVariable( "j1_pt", "j1_pt", "GeV", 'F' ) #factory.AddVariable( "j1_eta", "j1_eta", "rad", 'F' ) #factory.AddVariable( "j1_phi", "j1_phi", "rad", 'F' ) factory.AddVariable( "j1_m", "j1_m", "GeV", 'F' ) factory.AddVariable( "j0_trk0_pt", "j0_trk0_pt", "GeV", 'F' ) #factory.AddVariable( "j0_trk0_eta", "j0_trk0_eta", "rad", 'F' ) #factory.AddVariable( "j0_trk0_phi", "j0_trk0_phi", "rad", 'F' ) #factory.AddVariable( "j0_trk0_m", "j0_trk0_m", "GeV", 'F' ) #factory.AddVariable( "j0_trk0_Mv2", "j0_trk0_Mv2", "MV2", 'F' ) factory.AddVariable( "j0_trk1_pt", "j0_trk1_pt", "GeV", 'F' ) #factory.AddVariable( "j0_trk1_eta", "j0_trk1_eta", "rad", 'F' ) #factory.AddVariable( "j0_trk1_phi", "j0_trk1_phi", "rad", 'F' ) #factory.AddVariable( "j0_trk1_m", "j0_trk1_m", "GeV", 'F' ) #factory.AddVariable( "j0_trk1_Mv2", "j0_trk1_Mv2", "MV2", 'F' ) factory.AddVariable( "j1_trk0_pt", "j1_trk0_pt", "GeV", 'F' ) #factory.AddVariable( "j1_trk0_eta", "j1_trk0_eta", "rad", 'F' ) #factory.AddVariable( "j1_trk0_phi", "j1_trk0_phi", "rad", 'F' ) #factory.AddVariable( "j1_trk0_m", "j1_trk0_m", "GeV", 'F' ) #factory.AddVariable( "j1_trk0_Mv2", "j1_trk0_Mv2", "MV2", 'F' ) factory.AddVariable( "j1_trk1_pt", "j1_trk1_pt", "GeV", 'F' ) #factory.AddVariable( "j1_trk1_eta", "j1_trk1_eta", "rad", 'F' ) #factory.AddVariable( "j1_trk1_phi", "j1_trk1_phi", "rad", 'F' ) #factory.AddVariable( "j1_trk1_m", "j1_trk1_m", "GeV", 'F' ) #factory.AddVariable( "j1_trk1_Mv2", "j1_trk1_Mv2", "MV2", 'F' ) #factory.AddVariable( "myvar1 := var1+var2", 'F' ) #factory.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ) #factory.AddVariable( "var3", "Variable 3", "units", 'F' ) #factory.AddVariable( "var4", "Variable 4", "units", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables #factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) #factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data #if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) #input = TFile.Open( infname ) # Get the signal and background trees for training #signal = input.Get( treeNameSig ) #background = input.Get( treeNameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); factory.SetSignalWeightExpression ("weight"); factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( Sigcut_dic[ops.sel] ) mycutBkg = TCut( Bkgcut_dic[ops.sel] ) #TCut( "Rhh < 63 && (j0_nb +j1_nb == 0)" ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() fBackground.Close() fSiganl.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): print "Start TinyTree--->Plots!" start_time = time.time() global DEBUG DEBUG = False global ops ops = options() global inputpath inputpath = CONF.inputpath + ops.inputdir + "/" #for reweight options global turnon_reweight #reweight or not turnon_reweight = False if ops.reweight is not None: turnon_reweight = True #set the output directory of all the hist-files global outputpath outputpath = CONF.outputpath + ops.outputdir + ("_" + ops.dosyst if (ops.dosyst is not None) else "") + "/" helpers.checkpath(outputpath) ##setup control region size, and sideband region size global Syst_cut ##36-60 is not bad; but 4b CR is off CR_size = float(ops.CR) #this needs to be fixed; so good so far SB_size = float(ops.SB) #56 is the new default; should be between 48-58 due to stats CR_X = 124. ##center for control region CR_Y = 115. ##center for control region SB_X = 124. + float(ops.SBshift) ##center for sideband region; 12 is also ok SB_Y = 115. + float(ops.SBshift) ##center for sideband region; 12 is also ok Syst_cut = { "SR" : "event.Xhh < 1.6", # #GetXhh(), #"event.Xhh < 1.6", # "CR" : GetRhh(RhhCenterX=CR_X, RhhCenterY=CR_Y, RhhCut=CR_size), #"event.Rhh < %s" % str(CR_size) , "SB" : GetRhh(RhhCenterX=SB_X, RhhCenterY=SB_Y, RhhCut=SB_size), #"event.Rhh < %s" % str(SB_size) , "CR_High" : GetRhh(RhhCenterX=CR_X+3, RhhCenterY=CR_Y+3, RhhCut=CR_size), "CR_Low" : GetRhh(RhhCenterX=CR_X-3, RhhCenterY=CR_Y-3, RhhCut=CR_size), "CR_Small" : "event.Xhh > 2.0 and event.Rhh < %s" % str(CR_size) , "SB_High" : GetRhh(RhhCenterX=SB_X+3, RhhCenterY=SB_Y+3, RhhCut=SB_size), "SB_Low" : GetRhh(RhhCenterX=SB_X-3, RhhCenterY=SB_Y-3, RhhCut=SB_size), "SB_Large" : GetRhh(RhhCenterX=SB_X, RhhCenterY=SB_Y, RhhCut=SB_size + 3), #"event.Rhh < %s" % str(SB_size + 5) , "SB_Small" : GetRhh(RhhCenterX=SB_X, RhhCenterY=SB_Y, RhhCut=SB_size - 3), #"event.Rhh < %s" % str(SB_size - 5) , "ZZ" : GetXhh(XhhCenterX=103., XhhCenterY=96., XhhCut=1.6), #"event.Xzz < 1.6" , ##use to be 2.1 "TT" : GetXhh(XhhCenterX=164., XhhCenterY=155., XhhCut=1.6) #"event.Xzz < 1.6" , ##use to be 2.1 } global SR_cut SR_cut = Syst_cut["SR"] global CR_cut CR_cut = "not " + Syst_cut["SR"] + " and " + Syst_cut["CR"] global SB_cut SB_cut = "not " + Syst_cut["CR"] + " and " + Syst_cut["SB"] if ops.dosyst is not None: if "CR" in ops.dosyst: CR_cut = "not " + Syst_cut["SR"] + " and " + Syst_cut[ops.dosyst] SB_cut = "not " + Syst_cut[ops.dosyst] + " and " + Syst_cut["SB"] ##fix this if "Small" in ops.dosyst: #sepecial treatment for this asshole SB_cut = ("event.Rhh > %s " % (str(CR_size))) + " and " + Syst_cut["SB"] elif "SB" in ops.dosyst: SB_cut = "not " + Syst_cut["CR"] + " and " + Syst_cut[ops.dosyst] elif "ZZ" in ops.dosyst or "TT" in ops.dosyst: SR_cut = "not " + Syst_cut["SR"] + " and " + Syst_cut[ops.dosyst] CR_cut = "not " + Syst_cut["SR"] + " and not " + Syst_cut[ops.dosyst] + " and " + Syst_cut["CR"] SB_cut = "not " + Syst_cut["CR"] + " and not " + Syst_cut[ops.dosyst] + " and " + Syst_cut["SB"] ##for testing if (DEBUG): analysis(pack_input("zjets_test")) print("--- %s seconds ---" % (time.time() - start_time)) return ##real job; full chain 2 mins...just data is 50 seconds nsplit = CONF.splits split_list = ["data_test", "ttbar_comb_test"] #, "signal_QCD"] #if not turnon_reweight else ["data_test"] #["data_test", "ttbar_comb_test", "signal_QCD"] #split_list = ["signal_QCD"] if turnon_reweight and ops.dosyst is None: split_list = ["data_test"] if (ops.dijet): ##only do dijet in this case, always split_list = ["signal_QCD"] #split_list = [] inputtasks = [] for split_file in split_list: for i in range(nsplit): inputtasks.append(pack_input(split_file, inputsplit=i)) ##for other MCs ##for reweighting condition; copy zjet and ttbar if not turnon_reweight or ops.dosyst is not None : if (not ops.dijet): inputtasks.append(pack_input("zjets_test")) inputtasks.append(pack_input("signal_SM_hh")) else:##if reweight or do syst linklist = ["zjets_test", "ttbar_comb_test", "signal_SM_hh"] ##don't reweight ttbar and zjets for target in linklist: helpers.checkpath(outputpath + target) ori_link = inputpath.replace(ops.inputdir, "Moriond") + target + "/hist-MiniNTuple.root" dst_link = outputpath + target + "/hist-MiniNTuple.root" try: os.remove(dst_link) except OSError: pass if os.path.islink(dst_link): os.unlink(dst_link) print ori_link, dst_link os.symlink(ori_link, dst_link) ##for signal samples; only need to process once sigMClist = ["signal_G_hh_c10_M"] if (ops.Xhh): sigMClist = ["signal_G_hh_c10_M", "signal_G_hh_c20_M", "signal_X_hh_M"] #for i, mass in enumerate([3500, 4000, 4500, 5000, 6000]): for i, mass in enumerate(CONF.mass_lst): if (ops.dijet): ##don't do anything for the dijet case continue #do not reweight signal samples; create links to the original files instead if not turnon_reweight or ops.dosyst is not None : for sigMC in sigMClist: if mass == 2750 and sigMC == "signal_G_hh_c20_M": ##no c20 2750 sample continue inputtasks.append(pack_input(sigMC + str(mass))) else:#if reweight, creat the folders and the links to the files for sigMC in sigMClist: if mass == 2750 and sigMC == "signal_G_hh_c20_M": ##no c20 2750 sample continue print "creating links of signal samples", sigMC + str(mass) helpers.checkpath(outputpath + sigMC + str(mass)) #this is a really bad practice and temp fix now! need to watch this very carfully... #ori_link = inputpath.replace("F_c10", "f_fin") + "signal_G_hh_c10_M" + str(mass) + "/hist-MiniNTuple.root" ori_link = inputpath.replace(ops.inputdir, "Moriond") + sigMC + str(mass) + "/hist-MiniNTuple.root" dst_link = outputpath + sigMC + str(mass) + "/hist-MiniNTuple.root" #print ori_link, dst_link if os.path.islink(dst_link): os.unlink(dst_link) print ori_link, dst_link os.symlink(ori_link, dst_link) #return ##if reweight, reweight everything ##for debug parallel #analysis(pack_input("ttbar_comb_test")) if ops.debug: analysis(inputtasks[0]) return else: ##parallel compute! print " START: Running %s jobs on %s cores" % (len(inputtasks), mp.cpu_count()-1) npool = min(len(inputtasks), mp.cpu_count()-1) ##because herophysics sucks pool = mp.Pool(npool) pool.map(analysis, inputtasks) ##all the other extra set of MCs for split_file in split_list: targetpath = outputpath + split_file + "/" targetfiles = [] for i in range(nsplit): targetfiles += glob.glob(targetpath + ("hist-MiniNTuple_%s"% str(i)) + ".root") haddcommand = ["hadd", "-f", targetpath + "hist-MiniNTuple" + ".root"] haddcommand += targetfiles #print haddcommand subprocess.call(haddcommand) #clean up the sub process outputs for i in range(nsplit): hrmcommand = ["rm"] hrmcommand += glob.glob(targetpath + ("hist-MiniNTuple_%s"% str(i)) + ".root") subprocess.call(hrmcommand) #analysis(pack_input("signal_BQCD_200")) #2 mins! 4 mins with expanded... #analysis("signal_QCD") #2 mins! 10 mins... print("--- %s seconds ---" % (time.time() - start_time)) print "Finish!" ##consistency check #f = ROOT.TFile(outputpath + "ttbar_comb_test" + "/hist-MiniNTuple.root", "read") f = ROOT.TFile(outputpath + "signal_G_hh_c10_M1000" + "/hist-MiniNTuple.root", "read") print f.Get("FourTag_Signal/mHH_l").GetEntries() f.Close()
def main(): start_time = time.time() ops = options() global blinded blinded = True global iter_reweight iter_reweight = int(ops.iter) #setup basics inputdir = ops.inputdir inputroot = ops.inputroot inputpath = CONF.inputpath + inputdir + "/" rootinputpath = inputpath + inputroot + "_" print "input root file is: ", rootinputpath global StatusLabel StatusLabel = "Internal" ##StatusLabel = "Preliminary" global reweightfolder reweightfolder = inputpath + "Reweight/" helpers.checkpath(reweightfolder) global outputroot outputroot = ROOT.TFile(reweightfolder + "reweights.root", "recreate") # plot in the control region # # outputFolder = inputpath + inputroot + "Plot/" + "Sideband" # plotRegion(rootinputpath, inputdir, cut="FourTag" + "_" + "Sideband" + "_" + "mHH_l", xTitle="m_{2J} [GeV]") # plotRegion(rootinputpath, inputdir, cut="FourTag" + "_" + "Sideband" + "_" + "mHH_l", xTitle="m_{2J} [GeV]", Logy=1) ##comp_lst are the distributios we want to look like ##cut_lst are the disbrituions we are changing ##this is the default if "j0pT" in ops.var: region_lst = ["Sideband"] cut_lst = ["TwoTag_split", "ThreeTag", "FourTag"] #this is the one tag rewieght if "bkg" in ops.var: region_lst = ["Incl"] cut_lst = [ "NoTag_2Trk_split_lead", "NoTag_2Trk_split_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl" ] comp_lst = [ "OneTag_subl", "OneTag_lead", "OneTag_subl", "OneTag_lead", "TwoTag_subl", "TwoTag_lead" ] comp_region_lst = ["Incl"] ##this is to the sidebands rewieght if "bkgsb" in ops.var: region_lst = ["Sideband"] comp_region_lst = ["Sideband"] cut_lst = [ "NoTag_2Trk_split_lead", "NoTag_2Trk_split_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl" ] comp_lst = [ "TwoTag_split", "TwoTag_split", "ThreeTag", "ThreeTag", "FourTag", "FourTag" ] ##this is the detailed trk reweight # region_lst = ["Incl"] # cut_lst = ["NoTag_2Trk_split_lead_lead", "NoTag_2Trk_split_subl_lead", "NoTag_2Trk_split_lead_subl", "NoTag_2Trk_split_subl_subl", "NoTag_3Trk_lead", "NoTag_3Trk_subl", "NoTag_4Trk_lead", "NoTag_4Trk_subl"] # comp_lst = ["OneTag_subl_lead", "OneTag_lead_subl", "OneTag_subl_subl", "OneTag_lead_lead", "OneTag_subl", "OneTag_lead", "TwoTag_subl", "TwoTag_lead"] # comp_region_lst = ["Incl"] #create master list inputtasks = [] #fill the task list for i, region in enumerate(region_lst): if inputroot == "sum": inputroot = "" outputFolder = inputpath + inputroot + "Plot_r" + str( iter_reweight) + "/" + region helpers.checkpath(outputFolder) for j, cut in enumerate(cut_lst): config = {} config["root"] = rootinputpath config["inputdir"] = inputdir config["outputdir"] = outputFolder config["cut"] = cut + "_" + region + "_" try: config["compcut"] = comp_lst[j] + "_" + comp_region_lst[ i] + "_" ##change this to be flexiable to regions except NameError: config["compcut"] = "" ##by default this is disabled inputtasks.append(config) #parallel compute! print " Running %s jobs on %s cores" % (len(inputtasks), mp.cpu_count() - 1) npool = min(len(inputtasks), mp.cpu_count() - 1) pool = mp.Pool(npool) pool.map(dumpRegion, inputtasks) ##for debug # for task in inputtasks: # dumpRegion(task) # dumpRegion(inputtasks[0]) outputroot.Close() print("--- %s seconds ---" % (time.time() - start_time))
def get_oxsty_mAP_score_from_res(hs, res, SV, oxsty_qres_dpath, compute_ap_exe, oxford_gt_dir): # find oxford ground truth directory cwd = os.getcwd() # build groundtruth query qcx = res.qcx qnx = hs.tables.cx2_nx[qcx] cx2_oxnum = hs.tables.prop_dict['oxnum'] qoxnum = cx2_oxnum[qcx] qname = hs.tables.nx2_name[qnx] # build ranked list cx2_score = res.cx2_score_V if SV else res.cx2_score top_cx = cx2_score.argsort()[::-1] top_gx = hs.tables.cx2_gx[top_cx] top_gname = hs.tables.gx2_gname[top_gx] # build mAP args if qoxnum == '': print("HACK: Adding a dummy qoxynum") qoxnum = '1' ground_truth_query = qname+'_'+qoxnum # build ranked list of gnames (remove duplicates) seen = set([]) ranked_list = [] for gname in iter(top_gname): gname_ = gname.replace('.jpg','') if not gname_ in seen: seen.add(gname_) ranked_list.append(gname_) ranked_list2 = [gname.replace('.jpg','') for gname in top_gname] # Write the ranked list of images names cx_aug = 'qcx_'+str(qcx) ranked_list_fname = 'ranked_list_' + cx_aug + ground_truth_query + '.txt' ranked_list_fpath = join(oxsty_qres_dpath, ranked_list_fname) helpers.write_to(ranked_list_fpath, '\n'.join(ranked_list)) # execute external mAP code: # ./compute_ap [GROUND_TRUTH] [RANKED_LIST] os.chdir(oxford_gt_dir) def filename(path): return os.path.split(path)[1] if OXSTY_VERBOSE: printable_cmd = ' '.join((filename(compute_ap_exe), ground_truth_query, filename(ranked_list_fpath))) print('Executing: %r' % printable_cmd) else: helpers.print_('.') args = (compute_ap_exe, ground_truth_query, ranked_list_fpath) cmdstr = ' '.join(args) try: proc_out = run_process(args) out = proc_out.out except OSError as ex: out = -1 if OXSTY_VERBOSE: print(repr(ex)) if repr(ex) == "OSError(12, 'Cannot allocate memory')": args_hash = helpers.hashstr(args) proc_err_fname = 'proc_err'+args_hash proc_err_cmd = proc_err_fname+'.cmd' proc_err_out = proc_err_fname+'.out' helpers.write_to(proc_err_cmd, repr(args)) if helpers.checkpath(proc_err_out): out = helpers.read_from(proc_err_out) mAP = float(out.strip()) os.chdir(cwd) return mAP
def write_reweight(fname="TEST", reweight_dic={}, region_dic=[("2bs", "TwoTag_split_Sideband"), ("3b", "ThreeTag_Sideband"), ("4b", "FourTag_Sideband")], split=False, cond=False): '''write reweight output ''' motherfolder = "Moriond" helpers.checkpath("script") #building the inputdictionary #creat an empty dictionary #ready to dump file f = open("script/" + fname + ".txt", "w") #make sure over write everytime! f.truncate() f.write("#reweighting script for hh4b analysis \n") #iteration; Ntrk; parameter; inputfolder; parameterfile for i in range(iteration): f.write("#iteration:" + str(i) + "\n") #space is very important!!!! for region, region_fname in region_dic: for var, var_fname in reweight_dic.iteritems(): if split: #this is to reweight leading pT and trk pT seperately if "j0_" in var and i % 2 == 1: #for even skip j0_pt continue elif "j0_" not in var and i % 2 != 1: #for odd, skip other continue # if "j0_pt" in var and i%4 > 1: #for even skip j0_pt # continue # elif "j0_pt" not in var and i%4 <= 1: #for odd, skip other # continue ##notice this onlyputs condition on subl, lead reweighting; reweight lead's subl and subl's lead #print region_fname, var_fname, ("lead_subl" in region_fname), ("lead" in var_fname) if (("lead_lead" in region_fname) or ("lead_subl" in region_fname)): if "lead" in var_fname: continue elif (("subl_lead" in region_fname) or ("subl_subl" in region_fname)): if "subl" in var_fname: continue elif "lead" in region_fname: if "lead" in var_fname: continue elif "subl" in region_fname: if "subl" in var_fname: continue templine = "" templine += str(i) + " " #iteration templine += region + " " #Ntrk templine += "event." + var + " " #parameter templine += motherfolder + ( "_" + fname + "_" + str(i - 1) if i != 0 else "") + " " #look for the original iteration templine += "r" + str( i ) + "_" + region_fname + "_" + var_fname + ".txt" + " " #parameterfile; ##add in condition; be very careful, this means the TinyNtuple has to be produced with the correct b-tagging MV2Cut ##also the definition of condition needs to agree with the PlotTinyTree region condition!!! STUPID but be very careful!!! if cond: if "2Trk_split_lead_Incl" in region_fname: templine += "((event.j0_nb==1)and(event.j1_nb==0))" + " " #condition if "2Trk_split_subl_Incl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==1))" + " " #condition # if "2Trk_split_lead_lead_Incl" in region_fname: # templine += "((event.j0_nb==1)and(event.j1_nb==0)and(event.j0_trk0_Mv2>0.6455))" + " " #condition # if "2Trk_split_subl_lead_Incl" in region_fname: # templine += "((event.j0_nb==0)and(event.j1_nb==1)and(event.j1_trk0_Mv2>0.6455))" + " " #condition # if "2Trk_split_lead_subl_Incl" in region_fname: # templine += "((event.j0_nb==1)and(event.j1_nb==0)and(event.j0_trk0_Mv2<0.6455))" + " " #condition # if "2Trk_split_subl_subl_Incl" in region_fname: # templine += "((event.j0_nb==0)and(event.j1_nb==1)and(event.j1_trk0_Mv2<0.6455))" + " " #condition if "3Trk_lead" in region_fname: templine += "((event.j0_nb==2)and(event.j1_nb==0))" + " " #condition if "3Trk_subl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==2))" + " " #condition if "4Trk_lead" in region_fname: templine += "((event.j0_nb==2)and(event.j1_nb==0))" + " " #condition if "4Trk_subl" in region_fname: templine += "((event.j0_nb==0)and(event.j1_nb==2))" + " " #condition templine += "\n" print templine f.write(templine) #finish f.close()