import ROOT import sys, os, json from TTH.MEAnalysis.samples_base import getSitePrefix import math DATASETPATH = os.environ["DATASETPATH"] FILE_NAMES = os.environ["FILE_NAMES"].split() ch = ROOT.TChain("tree") for fi in FILE_NAMES: ch.AddFile(getSitePrefix(fi)) for ev in ch: if not (ev.is_sl and ev.numJets==6 and ev.nBCSVM==4): continue jets_p4 = [] jets_btag = [] jets_csv = [] jets_cmva = [] jets_matchFlag = [] jets_hadronFlavour = [] for ijet in range(ev.numJets): p4 = [ ev.jets_pt[ijet], ev.jets_eta[ijet], ev.jets_phi[ijet], ev.jets_mass[ijet] ] jets_p4 += [p4] jets_btag += [ev.jets_btagFlag[ijet]]
v["var_types"], v["tree"], v["float_branches"], datatype = 'float') ######################################## # Event loop ######################################## intree = ROOT.TChain("tree") if "FILE_NAMES" in os.environ.keys(): for fi in os.environ["FILE_NAMES"].split(" "): print "adding", fi intree.AddFile(getSitePrefix(fi)) else: for fi in sys.argv[2:]: print "adding", fi intree.AddFile(getSitePrefix(fi)) #intree.AddFile(fi) n_entries = intree.GetEntries() print "Will process {0} events".format(n_entries) for i_event in range(n_entries): # Progress if not i_event % 1000:
#find integral of csv distribution in this pt/eta bin #int_ij = 0. #for k in range(0, nbinsZ + 2): # int_ij += h3.GetBinContent(i,j,k) int_ij = float(h3.ProjectionZ("asd", i, i, j, j).Integral()) #normalize csv histogram for k in range(0, nbinsZ + 2): unnorm = float(h3.GetBinContent(i, j, k)) if int_ij > 0.0: unnorm = unnorm / int_ij h3.SetBinContent(i, j, k, unnorm) #print i, j, h3.ProjectionZ("", i, i, j, j).Integral() if __name__ == "__main__": tt = ROOT.TChain("vhbb/tree") for inf in INFILES: tt.AddFile(getSitePrefix(inf)) of = ROOT.TFile(OUTFILE, "RECREATE") of.cd() makeControlPlots("btagCSV", "Jet_btagCSV", 0.0, 1.0) makeControlPlots("btagCMVA", "Jet_btagCMVA", -1.0, 1.0) makeControlPlots("btagCMVA_log", "log((1.0 + Jet_btagCMVA)/(1.0 - Jet_btagCMVA))", -15.0, 15.0) of.Write() of.Close()
elif event["is_sl"] and pass_HLT_sl_el(event): return TRIGGERPATH_MAP["e"] elif event["is_dl"] and pass_HLT_dl_mumu(event): return TRIGGERPATH_MAP["mm"] elif event["is_dl"] and pass_HLT_dl_elmu(event): return TRIGGERPATH_MAP["em"] elif event["is_dl"] and pass_HLT_dl_elel(event): return TRIGGERPATH_MAP["ee"] return 0 if __name__ == "__main__": if os.environ.has_key("FILE_NAMES"): file_names = map(getSitePrefix, os.environ["FILE_NAMES"].split()) prefix, sample = get_prefix_sample(os.environ["DATASETPATH"]) else: file_names = [getSitePrefix("/store/user/jpata/tth/pilot_Jul30_v1/ttHTobb_M125_13TeV_powheg_pythia8/pilot_Jul30_v1/160730_115048/0000/tree_{0}.root".format(i)) for i in range(1, 10)] prefix = "" sample = "ttHTobb_M125_13TeV_powheg_pythia8" process = samples_nick[sample] schema = get_schema(sample) #configure systematic scenarios according to MC/Data if schema == "mc": systematics_event = ["nominal"] + SYSTEMATICS_EVENT systematics_weight = [k[0] for k in systematic_weights] elif schema == "data": systematics_event = ["nominal"] systematics_weight = [] dirs = {}
def Make_config(do_subjets=False): config = {} ######################################## # Information concerning this config file ######################################## ts = time.time() config['date'] = datetime.datetime.fromtimestamp(ts).strftime( '%Y-%m-%d %H:%M:%S') config['info'] = '*** Information on this config.dat ***\n'\ 'This config file contains the configuration data used for outputtree.py,'\ ' which translates a VHBB Ntuple to a format readable by readtree.py. \n\n'\ 'This config.dat was created on: {0}'.format(config['date']) ######################################## # I/O information ######################################## config['input_tree_name'] = 'vhbb/tree' #the path to the root files config["root_file_base"] = samples_base.getSitePrefix() #config["root_file_base"] = 'file:///hdfs/cms/' #config["root_file_base"] = "dcap:///pnfs/psi.ch/cms/trivcat/" # The config file will be copied to 'runs/{config['run_name']}' config['run_name'] = samples.version if do_subjets: config['run_name'] += "_subjet" else: config['run_name'] += "_resolved" config['output_root_file_name'] = 'out.root' ######################################## # Program parameters ######################################## # Use only a part of the input root file config['Use_limited_entries'] = False # Specify the number of entries if only a limited number of entries is used # This number is not used if Use_limited_entries is set to False config['n_entries_limited'] = 10000 # Specify whether the program should attempt to find MC branches for the jets config['Get_MC_for_jets'] = False # Specify whether the program should link with quarks. # If this is set to True, the program will not save any quark data, and the # the user should calculate TFs with the MC values of the jets. Make sure # config['Get_MC_for_jets'] is set to True if this is set to True. config['Dont_Link_Just_Jets'] = False # Specify whether the program should look for a closely matched second quark. # If two quarks could both be matched to a jet, it usually better to remove # these quarks and this jet altogether, in order to prevent faulty matching. config['Remove_double_match'] = True ######################################## # Branch info ######################################## # Specify the names of the particles of with pt, eta, phi and m should be # extracted. # - pt, eta, phi and mass are extracted by default. A branch E is created by # default - it is calculated with the use of pt, eta, phi and mass. # - Since the notation '_pt' is common but not *standard*, it is necessary to # to add underscores where necessary manually. config['quarktypes'] = ['GenBQuarkFromTop_', 'GenBQuarkFromH_', 'GenWZQuark_' ] #enable this for resolved jets if not do_subjets: config['jettypes'] = [ 'Jet_' ] else: config['jettypes'] = [ 'httCandidates_sjW1', 'httCandidates_sjW2', 'httCandidates_sjNonW' ] # Specify which branches *other* than pt, eta, phi, mass and E should be # extracted. # - This should be FULL branch names, e.g. httCandidates_fW # - If the extra variable is particle-specific, write '{particle}' in front # it. For example: '(a quark)pdgId' can be written as '{particle}pdgId' config['quark_extra_vars'] = [ '{particle}pdgId', #'{particle}charge', #'{particle}status', ] if not do_subjets: config['jet_extra_vars'] = [ '{particle}hadronFlavour', '{particle}btagCSV', '{particle}btagBDT', ] else: config['jet_extra_vars'] = [ 'httCandidates_pt', 'httCandidates_eta', 'httCandidates_phi', 'httCandidates_mass', 'httCandidates_fRec', ] ######################################## # Cutoff criteria ######################################## # format of 1 cutoff criterium: ( varname, operator sign, cutoff value ) # Note: Only defined variable names can be used here! config['jet_cutoff_list'] = [ ( '{particle}pt' , '>' , 30.0 ), #( 'httCandidates_pt' , '>' , 200.0 ), #( 'httCandidates_mass' , '>' , 120.0 ), #( 'httCandidates_mass' , '<' , 220.0 ), #( 'httCandidates_fW' , '<' , 0.175 ), ] config['quark_cutoff_list'] = [ ( '{particle}pt' , '>' , 30.0 ) ] #matching dR between gen and reco config['max_link_delR'] = 0.3 # Only used if config['Remove_double_match'] is set to True # if another match closer than dR, remove entire jet config['max_sec_delR'] = 0.5 ######################################## # Write configuration to file: ######################################## f = open( 'cfg_outputtree.dat', 'wb' ) json.dump( config , f, indent=2) f.close() if not os.path.isdir( 'runs/{0}'.format(config['run_name'] ) ): os.makedirs('runs/{0}'.format(config['run_name'] )) shutil.copyfile( 'cfg_outputtree.dat', 'runs/{0}/cfg_outputtree.dat'.format( config['run_name'] ) ) shutil.copyfile( 'cfg_outputtree.py', 'runs/{0}/cfg_outputtree.py'.format( config['run_name'] ) ) print "cfg_outputtree.dat created"
return TRIGGERPATH_MAP["mm"] elif event["is_dl"] and pass_HLT_dl_elmu(event): return TRIGGERPATH_MAP["em"] elif event["is_dl"] and pass_HLT_dl_elel(event): return TRIGGERPATH_MAP["ee"] return 0 if __name__ == "__main__": if os.environ.has_key("FILE_NAMES"): file_names = map(getSitePrefix, os.environ["FILE_NAMES"].split()) prefix, sample = get_prefix_sample(os.environ["DATASETPATH"]) else: file_names = [ getSitePrefix( "/store/user/jpata/tth/pilot_Jul30_v1/ttHTobb_M125_13TeV_powheg_pythia8/pilot_Jul30_v1/160730_115048/0000/tree_{0}.root" .format(i)) for i in range(1, 10) ] prefix = "" sample = "ttHTobb_M125_13TeV_powheg_pythia8" process = samples_nick[sample] schema = get_schema(sample) #configure systematic scenarios according to MC/Data if schema == "mc": systematics_event = ["nominal"] + SYSTEMATICS_EVENT systematics_weight = [k[0] for k in systematic_weights] elif schema == "data": systematics_event = ["nominal"] systematics_weight = []
import ROOT import sys from TTH.MEAnalysis.samples_base import getSitePrefix ofname = sys.argv[1] tt = ROOT.TChain("tree") for fi in sys.argv[2:]: print "adding", fi fn = getSitePrefix(fi) tf = ROOT.TFile.Open(fn) if not tf or tf.IsZombie(): raise Exception("Could not open file: {0}".format(fn)) tf.Close() tt.AddFile(fn) tt.SetBranchStatus("*", False) tt.SetBranchStatus("mem_tt*", True) tt.SetBranchStatus("nMatch*", True) tt.SetBranchStatus("is_*", True) tt.SetBranchStatus("numJets*", True) tt.SetBranchStatus("nB*", True) tt.SetBranchStatus("n_*", True) #tt.SetBranchStatus("topCand*", True) tt.SetBranchStatus("btag_LR_4b_2b*", True) tt.SetBranchStatus("ttCls", True) tt.SetBranchStatus("run", True) tt.SetBranchStatus("lumi", True) tt.SetBranchStatus("evt", True) tt.SetBranchStatus("cat", True) #tt.SetBranchStatus("ht", True) #tt.SetBranchStatus("isotropy", True)
import ROOT import sys from TTH.MEAnalysis.samples_base import getSitePrefix ofname = sys.argv[1] tt = ROOT.TChain("vhbb/tree") for fi in sys.argv[2:]: print "adding", fi tt.AddFile(getSitePrefix(fi)) tt.SetBranchStatus("*", False) tt.SetBranchStatus("GenBQuarkFromH*", True) tt.SetBranchStatus("GenBQuarkFromTop*", True) tt.SetBranchStatus("GenGluon*", True) tt.SetBranchStatus("GenHiggsBoson*", True) tt.SetBranchStatus("GenJet*", True) tt.SetBranchStatus("Jet*", True) tt.SetBranchStatus("aLeptons*", True) tt.SetBranchStatus("GenLep*", True) tt.SetBranchStatus("GenLepFromTop*", True) tt.SetBranchStatus("GenNuFromTop*", True) tt.SetBranchStatus("GenTop*", True) tt.SetBranchStatus("GenWZQuark*", True) tt.SetBranchStatus("GenStatus2bHad*", True) tt.SetBranchStatus("ttCls*", True) of = ROOT.TFile(ofname, "RECREATE") of.cd() tt.CopyTree("1") of.Write() of.Close()
for i in range(0, nbinsX+2): for j in range(0, nbinsY+2): #find integral of csv distribution in this pt/eta bin #int_ij = 0. #for k in range(0, nbinsZ + 2): # int_ij += h3.GetBinContent(i,j,k) int_ij = float(h3.ProjectionZ("asd", i, i, j, j).Integral()) #normalize csv histogram for k in range(0, nbinsZ + 2): unnorm = float(h3.GetBinContent(i,j,k)) if int_ij > 0.0: unnorm = unnorm / int_ij h3.SetBinContent(i, j, k, unnorm) #print i, j, h3.ProjectionZ("", i, i, j, j).Integral() if __name__ == "__main__": tt = ROOT.TChain("vhbb/tree") for inf in INFILES: tt.AddFile(getSitePrefix(inf)) of = ROOT.TFile(OUTFILE, "RECREATE") of.cd() makeControlPlots("btagCSV", "Jet_btagCSV", 0.0, 1.0) makeControlPlots("btagCMVA", "Jet_btagCMVA", -1.0, 1.0) makeControlPlots("btagCMVA_log", "log((1.0 + Jet_btagCMVA)/(1.0 - Jet_btagCMVA))", -15.0, 15.0) of.Write() of.Close()
import ROOT, sys from TTH.MEAnalysis.samples_base import getSitePrefix tf = ROOT.TFile.Open(getSitePrefix(sys.argv[1])) if not tf: raise Exception("Could not open file") tt = tf.Get(sys.argv[2]) branches = sorted([br.GetName() for br in tt.GetListOfBranches()]) for br in branches: print br
AH.addScalarBranches(v["vars"], v["var_types"], v["tree"], v["float_branches"], datatype='float') ######################################## # Event loop ######################################## intree = ROOT.TChain("tree") if "FILE_NAMES" in os.environ.keys(): for fi in os.environ["FILE_NAMES"].split(" "): print "adding", fi intree.AddFile(getSitePrefix(fi)) else: for fi in sys.argv[2:]: print "adding", fi intree.AddFile(getSitePrefix(fi)) #intree.AddFile(fi) n_entries = intree.GetEntries() print "Will process {0} events".format(n_entries) for i_event in range(n_entries): # Progress if not i_event % 1000: print "{0:.1f}%".format(100. * i_event / n_entries)