def lorentz_trans(obt_d): """ Assume that X[ievent] only contains (in order) pt, eta, phi, mass """ #After chacking the functionality of this function, the matchpattern feature should also be removed. a0, a1 = obt_d['X'].shape X_new = np.zeros((a0, a1)) for i, ix in zip(range(a0), obt_d['X']): for j in range(8): #8 objects in the game tmp = LorentzVector() tmp.set_pt_eta_phi_e(ix[5 * j + 0], ix[5 * j + 1], ix[5 * j + 2], ix[5 * j + 3]) X_new[i][5 * j + 0] = tmp.px X_new[i][5 * j + 1] = tmp.py X_new[i][5 * j + 2] = tmp.pz X_new[i][5 * j + 3] = tmp.e X_new[i][5 * j + 4] = ix[5 * j + 4] new_d = {} for key, value in obt_d.iteritems(): new_d[key] = value new_d['X'] = X_new return new_d
def create_tree(): f = TemporaryFile() tree = Tree("tree", model=create_model()) # fill the tree for i in xrange(1000): assert_equal(tree.a_vect, LorentzVector(0, 0, 0, 0)) random_vect = LorentzVector(gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.)) tree.a_vect.copy_from(random_vect) assert_equal(tree.a_vect, random_vect) tree.a_x = gauss(.5, 1.) tree.a_y = gauss(.3, 2.) tree.a_z = gauss(13., 42.) tree.b_n = randint(1, 5) for j in xrange(tree.b_n): vect = LorentzVector(gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.)) tree.b_vect.push_back(vect) tree.b_x.push_back(randint(1, 10)) tree.b_y.push_back(gauss(.3, 2.)) tree.i = i assert_equal(tree.b_n, tree.b_vect.size()) assert_equal(tree.b_n, tree.b_x.size()) assert_equal(tree.b_n, tree.b_y.size()) tree.fill(reset=True) tree.write() # TFile.Close the file but keep the underlying # tempfile file descriptor open ROOT.TFile.Close(f) FILES.append(f) FILE_PATHS.append(f.GetName())
def fourvect(self): vect = LorentzVector() vect.SetPtEtaPhiM( self.pt, self.eta, self.phi, self.m) # self._particle.Mass() * GeV) return vect
def fourvect_vis(self): vect = LorentzVector() try: vect.SetPtEtaPhiM(et2pt(self.vis_Et, self.vis_eta, self.vis_m), self.eta, self.phi, self.m) except ValueError: log.warning("DOMAIN ERROR ON TRUTH 4-VECT: " "Et: {0} eta: {1} m: {2}".format( self.vis_Et, self.vis_eta, self.vis_m)) vect.SetPtEtaPhiM(0, self.eta, self.phi, self.m) return vect
def fourvect(self): if ((self.nSCTHits + self.nPixHits) < 4): # electron with low number of tracker hits eta = self.cl_eta phi = self.cl_phi et = self.cl_E / math.cosh(self.cl_eta) else: eta = self.tracketa phi = self.trackphi et = self.cl_E / math.cosh(self.tracketa) vect = LorentzVector() vect.SetPtEtaPhiE(et, eta, phi, self.cl_E) return vect
def neutrinoPz(lepton_fourVector, neutrino_pt, neutrino_phi): """ Calculate the z-component of the nu momentum by using the W-boson mass as the constraint General idea: If the discriminant is less than zero, then force it to be zero. You solve with the discriminant set to zero to get a scaled value for the term "mu" and "pt". The neutrino Pt will be the (lepton_pz*scaled_pt)/lepton_pt """ m_w = 80.4e3 # mass of the W boson delta_phi = lepton_fourVector.Phi() - neutrino_phi # Simplifying term you get when solve for neutrino Pz using transverse mass of W boson mu = (m_w)**2/2 + np.cos(delta_phi)*lepton_fourVector.Pt()*neutrino_pt pz_l = lepton_fourVector.Pz() # Lepton Pz pt_l = lepton_fourVector.Pt() # lepton Pt e_l = lepton_fourVector.E() # Lepton energy p_l = sqrt(pt_l**2 + pz_l**2) # Lepton momentum el_px = lepton_fourVector.Px() el_py = lepton_fourVector.Py() nu_px = neutrino_pt*np.cos(neutrino_phi) nu_py = neutrino_pt*np.sin(neutrino_phi) if e_l == 0: nu = LorentzVector() return nu discriminant = ((mu**2*pz_l**2)/(e_l**2 - pz_l**2)**2) - ((e_l**2*neutrino_pt**2 - mu**2)/(e_l**2 - pz_l**2)) if discriminant>0: pZ_nu_A = mu*lepton_fourVector.Pz()/(pt_l**2) + sqrt(discriminant) pZ_nu_B = mu*lepton_fourVector.Pz()/(pt_l**2) - sqrt(discriminant) elif discriminant<0: scaled_mu = sqrt(pt_l**2*e_l**2*neutrino_pt**2/(pz_l**2+pt_l**2)) scaled_pt = m_w**2/(2*pt_l*(1-np.cos(delta_phi))) pZ_nu_A = pZ_nu_B = (pz_l*scaled_pt)/pt_l elif discriminant==0: pZ_nu_A = pZ_nu_B = mu*lepton_fourVector.Pz()/(pt_l**2) if abs(pZ_nu_A) < abs(pZ_nu_B): nu_pz = pZ_nu_A else: nu_pz = pZ_nu_B nu = LorentzVector() nu.SetPxPyPzE(nu_px, nu_py, nu_pz, neutrino_pt) return nu
def lorentzVecsTop(nom, topIdx0, topIdx1): ''' Takes the indices of two jets identified to be bjets from top decay, return their LorentzVectors ''' top0 = LorentzVector() top0.SetPtEtaPhiE(nom.jet_pt[topIdx0], nom.jet_eta[topIdx0], nom.jet_phi[topIdx0], nom.jet_e[topIdx0]) top1 = LorentzVector() top1.SetPtEtaPhiE(nom.jet_pt[topIdx1], nom.jet_eta[topIdx1], nom.jet_phi[topIdx1], nom.jet_e[topIdx1]) return (top0, top1)
def fourvect(self): vect = LorentzVector() vect.SetPtEtaPhiM(self.pt * GeV, self.eta, self.phi, self.m) return vect
def __init__(self): self.fourvect_boosted = LorentzVector() super(FourMomentum, self).__init__()
def __init__(self): self.fourvect_boosted = LorentzVector()
### Load data to check ### ### Importing Pyplot ### import matplotlib.pyplot as plt from matplotlib.colors import LogNorm plt.rcParams["figure.figsize"] = (7,6) fd = f+"anti-kt_test.npy" X, _ = np.load(fd) # In[]: a1 = [] w1=[] for i,j in enumerate(X): constituents = j["content"][j["tree"][:, 0] == -1] # if len(constituents)>1: # constituents = np.delete(constituents,0,0) w1.append([LorentzVector(c).pt() for c in constituents]) w1 = [item for sublist in w1 for item in sublist] w1=100*np.array(w1)/sum(w1) a1 = np.vstack(a1) # In[]: plt.close() t=plt.hist2d(a1[:, 0], a1[:, 1], range=[(-0.5,0.5), (-0.5,0.5)], bins=200, cmap=plt.cm.jet,weights=w1,norm=LogNorm()) cbar = plt.colorbar() plt.xlabel(r'$\eta$') plt.ylabel(r'$\varphi$') cbar.set_label(r'% of p$_t$') #plt.savefig('tau_pfd_log_bis.png',dpi=600, transparent=True) plt.show()
def mass(tau1, tau2, METpx, METpy): """ Calculate and return the collinear mass and momentum fractions of tau1 and tau2 TODO: set visible mass of taus. 1.2 GeV for 3p and 0.8 GeV for 1p """ recTau1 = LorentzVector() recTau2 = LorentzVector() # tau 4-vector; synchronize for MMC calculation if tau1.nTracks() < 3: recTau1.SetPtEtaPhiM(tau1.pt(), tau1.eta(), tau1.phi(), 800.) # MeV else: recTau1.SetPtEtaPhiM(tau1.pt(), tau1.eta(), tau1.phi(), 1200.) # MeV if tau2.nTracks() < 3: recTau2.SetPtEtaPhiM(tau2.pt(), tau2.eta(), tau2.phi(), 800.) # MeV else: recTau2.SetPtEtaPhiM(tau2.pt(), tau2.eta(), tau2.phi(), 1200.) # MeV K = ROOT.TMatrixD(2, 2) K[0][0] = recTau1.Px() K[0][1] = recTau2.Px() K[1][0] = recTau1.Py() K[1][1] = recTau2.Py() if K.Determinant() == 0: return -1., -1111., -1111. M = ROOT.TMatrixD(2, 1) M[0][0] = METpx M[1][0] = METpy Kinv = K.Invert() X = Kinv * M X1 = X(0, 0) X2 = X(1, 0) x1 = 1. / (1. + X1) x2 = 1. / (1. + X2) p1 = recTau1 * (1. / x1) p2 = recTau2 * (1. / x2) m_col = (p1 + p2).M() m_vis = (recTau1 + recTau2).M() return m_vis, m_col, x1, x2
def fourvect_clbased(self): vect = LorentzVector() tau_numTrack = self.numTrack tau_nPi0s = self.pi0_n if tau_nPi0s == 0: if self.track_n > 0: sumTrk = LorentzVector() for trk_ind in xrange(0, self.track_n): curTrk = LorentzVector() curTrk.SetPtEtaPhiM(self.track_atTJVA_pt[trk_ind], self.track_atTJVA_eta[trk_ind], self.track_atTJVA_phi[trk_ind], 139.8) sumTrk += curTrk vect.SetPtEtaPhiM(sumTrk.Pt(), sumTrk.Eta(), sumTrk.Phi(), sumTrk.M()) else: vect.SetPtEtaPhiM(self.pt, self.eta, self.phi, self.m) elif tau_nPi0s == 1 or tau_nPi0s == 2: if self.pi0_vistau_pt == 0: vect.SetPtEtaPhiM(self.pt, self.eta, self.phi, self.m) else: vect.SetPtEtaPhiM(self.pi0_vistau_pt, self.pi0_vistau_eta, self.pi0_vistau_phi, self.pi0_vistau_m) else: vect.SetPtEtaPhiM(self.pi0_vistau_pt, self.pi0_vistau_eta, self.pi0_vistau_phi, self.pi0_vistau_m) return vect
def run_top_mass(inputPath): topMassesWZ = [] topMassestZ = [] topMassReco = [] f = TFile(inputPath, "READ") dsid = inputPath.split('/')[-1] dsid = dsid.replace('.root', '') #print(inputPath) nom = f.Get('nominal') try: nom.GetEntries() except: print('failed for ' + inputPath) return 0 try: nom.Mll01 except: print('failed for ' + inputPath) return 0 if nom.GetEntries() == 0: return 0 if hasattr(nom, "topMassReco"): print('already there', inputPath) return 0 nEntries = nom.GetEntries() for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) nom.GetEntry(idx) lep = LorentzVector() if abs(nom.Mll02 - 91.2e3) < abs(nom.Mll01 - 91.2e3): lep.SetPtEtaPhiE(nom.lep_Pt_1, nom.lep_Eta_1, nom.lep_Phi_1, nom.lep_E_1) else: lep.SetPtEtaPhiE(nom.lep_Pt_2, nom.lep_Eta_2, nom.lep_Phi_2, nom.lep_E_2) met = neutrinoPz(lep, nom.met_met, nom.met_phi) w = lep + met jet = LorentzVector() #jet.SetPtEtaPhiE( nom.jet_Pt_0, nom.jet_Eta_0, nom.jet_Phi_0, nom.jet_E_0 ) if len(nom.jet_pt): jet.SetPtEtaPhiE(nom.jet_pt[0], nom.jet_eta[0], nom.jet_phi[0], nom.jet_e[0]) else: jet.SetPtEtaPhiE(0, 0, 0, 0) top = LorentzVector() top = w + jet topMassReco.append(top.M()) f.Close() with root_open(inputPath, mode='a') as myfile: topMassReco = np.asarray(topMassReco) topMassReco.dtype = [('topMassReco', 'float64')] topMassReco.dtype.names = ['topMassReco'] root_numpy.array2tree(topMassReco, tree=myfile.nominal) myfile.write() myfile.Close()
def transformVars(df): ''' modifies the variables to create the ones that mv2 uses, inserts default values when needed, saves new variables in the dataframe Args: ----- df: pandas dataframe containing all the interesting variables as extracted from the .root file Returns: -------- modified mv2-compliant dataframe ''' from rootpy.vector import LorentzVector, Vector3 import pandautils as pup # -- modify features and set default values df['abs(jet_eta)'] = abs(df['jet_eta']) # -- create new IPxD features for (pu, pb, pc) in zip(df['jet_ip2d_pu'], df['jet_ip2d_pb'], df['jet_ip2d_pc']): pu[np.logical_or(pu >= 10, pu < -1)] = -1 pb[np.logical_or(pu >= 10, pu < -1)] = -1 pc[np.logical_or(pu >= 10, pu < -1)] = -1 for (pu, pb, pc) in zip(df['jet_ip3d_pu'], df['jet_ip3d_pb'], df['jet_ip3d_pc']): pu[pu >= 10] = -1 pb[pu >= 10] = -1 pc[pu >= 10] = -1 df['jet_ip2'] = (df['jet_ip2d_pb'] / df['jet_ip2d_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip2_c'] = (df['jet_ip2d_pb'] / df['jet_ip2d_pc']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip2_cu'] = (df['jet_ip2d_pc'] / df['jet_ip2d_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip3'] = (df['jet_ip3d_pb'] / df['jet_ip3d_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip3_c'] = (df['jet_ip3d_pb'] / df['jet_ip3d_pc']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip3_cu'] = (df['jet_ip3d_pc'] / df['jet_ip3d_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) # -- create new IPMP features for (pu, pb, pc) in zip(df['jet_ipmp_pu'], df['jet_ipmp_pb'], df['jet_ipmp_pc']): pu[pu >= 10] = -1 pb[pu >= 10] = -1 pc[pu >= 10] = -1 df['jet_ip'] = (df['jet_ipmp_pb'] / df['jet_ipmp_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip_c'] = (df['jet_ipmp_pb'] / df['jet_ipmp_pc']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) df['jet_ip_cu'] = (df['jet_ipmp_pc'] / df['jet_ipmp_pu']).apply( lambda x: np.log(x)).apply(lambda x: _replaceInfNaN(x, -20)) # -- SV1 features dx = df['jet_sv1_vtx_x'] - df['PVx'] dy = df['jet_sv1_vtx_y'] - df['PVy'] dz = df['jet_sv1_vtx_z'] - df['PVz'] v_jet = LorentzVector() pv2sv = Vector3() sv1_L3d = [] sv1_Lxy = [] dR = [] for index, dxi in enumerate(dx): # loop thru events sv1_L3d_ev = [] sv1L_ev = [] dR_ev = [] for jet in xrange(len(dxi)): # loop thru jets v_jet.SetPtEtaPhiM(df['jet_pt'][index][jet], df['jet_eta'][index][jet], df['jet_phi'][index][jet], df['jet_m'][index][jet]) if (dxi[jet].size != 0): sv1_L3d_ev.append( np.sqrt( pow(dx[index][jet], 2) + pow(dy[index][jet], 2) + pow(dz[index][jet], 2))[0]) sv1L_ev.append(math.hypot(dx[index][jet], dy[index][jet])) pv2sv.SetXYZ(dx[index][jet], dy[index][jet], dz[index][jet]) jetAxis = Vector3(v_jet.Px(), v_jet.Py(), v_jet.Pz()) dR_ev.append(pv2sv.DeltaR(jetAxis)) else: dR_ev.append(-1) sv1L_ev.append(-100) sv1_L3d_ev.append(-100) sv1_Lxy.append(sv1L_ev) dR.append(dR_ev) sv1_L3d.append(sv1_L3d_ev) df['jet_sv1_dR'] = dR df['jet_sv1_Lxy'] = sv1_Lxy df['jet_sv1_L3d'] = sv1_L3d # -- add more default values for sv1 variables sv1_vtx_ok = pup.match_shape( np.asarray([len(el) for event in df['jet_sv1_vtx_x'] for el in event]), df['jet_pt']) for (ok4event, sv1_ntkv4event, sv1_n2t4event, sv1_mass4event, sv1_efrc4event, sv1_sig34event) in zip(sv1_vtx_ok, df['jet_sv1_ntrkv'], df['jet_sv1_n2t'], df['jet_sv1_m'], df['jet_sv1_efc'], df['jet_sv1_sig3d']): sv1_ntkv4event[np.asarray(ok4event) == 0] = -1 sv1_n2t4event[np.asarray(ok4event) == 0] = -1 sv1_mass4event[np.asarray(ok4event) == 0] = -1000 sv1_efrc4event[np.asarray(ok4event) == 0] = -1 sv1_sig34event[np.asarray(ok4event) == 0] = -100 # -- JF features jf_dR = [] for eventN, (etas, phis, masses) in enumerate( zip(df['jet_jf_deta'], df['jet_jf_dphi'], df['jet_jf_m'])): # loop thru events jf_dR_ev = [] for m in xrange(len(masses)): # loop thru jets if (masses[m] > 0): jf_dR_ev.append(np.sqrt(etas[m] * etas[m] + phis[m] * phis[m])) else: jf_dR_ev.append(-10) jf_dR.append(jf_dR_ev) df['jet_jf_dR'] = jf_dR # -- add more default values for jf variables for (jf_mass, jf_n2tv, jf_ntrkv, jf_nvtx, jf_nvtx1t, jf_efrc, jf_sig3) in zip(df['jet_jf_m'], df['jet_jf_n2t'], df['jet_jf_ntrkAtVx'], df['jet_jf_nvtx'], df['jet_jf_nvtx1t'], df['jet_jf_efc'], df['jet_jf_sig3d']): jf_n2tv[jf_mass <= 0] = -1 jf_ntrkv[jf_mass <= 0] = -1 jf_nvtx[jf_mass <= 0] = -1 jf_nvtx1t[jf_mass <= 0] = -1 jf_mass[jf_mass <= 0] = -1e3 jf_efrc[jf_mass <= 0] = -1 jf_sig3[jf_mass <= 0] = -100 return df
def work(self): # get argument values local = self.args.local syst_terms = self.args.syst_terms datatype = self.metadata.datatype year = self.metadata.year verbose = self.args.student_verbose very_verbose = self.args.student_very_verbose redo_selection = self.args.redo_selection nominal_values = self.args.nominal_values # get the dataset name dsname = os.getenv('INPUT_DATASET_NAME', None) if dsname is None: # attempt to guess dsname from dirname if self.files: dsname = os.path.basename(os.path.dirname(self.files[0])) # is this a signal sample? # if so we will also keep some truth information in the output below is_signal = datatype == datasets.MC and ( '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname or '_WH' in dsname or '_ttH' in dsname) log.info("DATASET: {0}".format(dsname)) log.info("IS SIGNAL: {0}".format(is_signal)) # is this an inclusive signal sample for overlap studies? is_inclusive_signal = is_signal and '_inclusive' in dsname # is this a BCH-fixed sample? (temporary) is_bch_sample = 'r5470_r4540_p1344' in dsname if is_bch_sample: log.warning("this is a BCH-fixed r5470 sample") # onfilechange will contain a list of functions to be called as the # chain rolls over to each new file onfilechange = [] count_funcs = {} if datatype != datasets.DATA: # count the weighted number of events if local: def mc_weight_count(event): return event.hh_mc_weight else: def mc_weight_count(event): return event.TruthEvent[0].weights()[0] count_funcs = { 'mc_weight': mc_weight_count, } if local: # local means running on the skims, the output of this script # running on the grid if datatype == datasets.DATA: # merge the GRL fragments merged_grl = goodruns.GRL() def update_grl(student, grl, name, file, tree): grl |= str( file.Get('Lumi/%s' % student.metadata.treename).GetString()) onfilechange.append((update_grl, ( self, merged_grl, ))) if datatype == datasets.DATA: merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D') else: merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D') def update_cutflow(student, cutflow, name, file, tree): # record a cut-flow year = student.metadata.year datatype = student.metadata.datatype cutflow[1].value += file.cutflow_event[1].value if datatype != datasets.DATA: cutflow[2].value += file.cutflow_event_mc_weight[1].value onfilechange.append((update_cutflow, ( self, merged_cutflow, ))) else: # NEED TO BE CONVERTED TO XAOD # if datatype not in (datasets.EMBED, datasets.MCEMBED): # # merge TrigConfTrees # metadirname = '%sMeta' % self.metadata.treename # trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname) # map(trigconfchain.Add, self.files) # metadir = self.output.mkdir(metadirname) # metadir.cd() # trigconfchain.Merge(self.output, -1, 'fast keep') # self.output.cd() if datatype == datasets.DATA: # merge GRL XML strings merged_grl = goodruns.GRL() # for fname in self.files: # with root_open(fname) as f: # for key in f.Lumi.keys(): # merged_grl |= goodruns.GRL( # str(key.ReadObj().GetString()), # from_string=True) # lumi_dir = self.output.mkdir('Lumi') # lumi_dir.cd() # xml_string= ROOT.TObjString(merged_grl.str()) # xml_string.Write(self.metadata.treename) # self.output.cd() self.output.cd() # create the output tree model = get_model(datatype, dsname, prefix=None if local else 'hh_', is_inclusive_signal=is_inclusive_signal) log.info("Output Model:\n\n{0}\n\n".format(model)) outtree = Tree(name=self.metadata.treename, model=model) if local: tree = outtree else: tree = outtree.define_object(name='tree', prefix='hh_') #tree.define_object(name='tau', prefix='tau_') tree.define_object(name='tau1', prefix='tau1_') tree.define_object(name='tau2', prefix='tau2_') tree.define_object(name='truetau1', prefix='truetau1_') tree.define_object(name='truetau2', prefix='truetau2_') tree.define_object(name='jet1', prefix='jet1_') tree.define_object(name='jet2', prefix='jet2_') tree.define_object(name='jet3', prefix='jet3_') mmc_objects = [ tree.define_object(name='mmc0', prefix='mmc0_'), tree.define_object(name='mmc1', prefix='mmc1_'), tree.define_object(name='mmc2', prefix='mmc2_'), ] for mmc_obj in mmc_objects: mmc_obj.define_object(name='resonance', prefix='resonance_') # NEED TO BE CONVERTED TO XAOD # trigger_emulation = TauTriggerEmulation( # year=year, # passthrough=local or datatype != datasets.MC or year > 2011, # count_funcs=count_funcs) # if not trigger_emulation.passthrough: # onfilechange.append( # (update_trigger_trees, (self, trigger_emulation,))) # trigger_config = None # if datatype not in (datasets.EMBED, datasets.MCEMBED): # # trigger config tool to read trigger info in the ntuples # trigger_config = get_trigger_config() # # update the trigger config maps on every file change # onfilechange.append((update_trigger_config, (trigger_config,))) # define the list of event filters if local and syst_terms is None and not redo_selection: event_filters = None else: tau_ntrack_recounted_use_ntup = False if year > 2011: # peek at first tree to determine if the extended number of # tracks is already stored with root_open(self.files[0]) as test_file: test_tree = test_file.Get(self.metadata.treename) tau_ntrack_recounted_use_ntup = ('tau_out_track_n_extended' in test_tree) log.info(self.grl) event_filters = EventFilterList([ GRLFilter(self.grl, passthrough=(local or (datatype not in (datasets.DATA, datasets.EMBED))), count_funcs=count_funcs), CoreFlags(passthrough=local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingPileupPatch( # passthrough=( # local or year > 2011 or datatype != datasets.EMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD (not a priority) # PileupTemplates( # year=year, # passthrough=( # local or is_bch_sample or datatype not in ( # datasets.MC, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # RandomSeed( # datatype=datatype, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # BCHSampleRunNumber( # passthrough=not is_bch_sample, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # RandomRunNumber( # tree=tree, # datatype=datatype, # pileup_tool=pileup_tool, # passthrough=local, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # trigger_emulation, # NEED TO BE CONVERTED TO XAOD # Triggers( # year=year, # tree=tree, # datatype=datatype, # passthrough=datatype in (datasets.EMBED, datasets.MCEMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD PileupReweight_xAOD( tree=tree, passthrough=(local or (datatype not in (datasets.MC, datasets.MCEMBED))), count_funcs=count_funcs), PriVertex(passthrough=local, count_funcs=count_funcs), LArError(passthrough=local, count_funcs=count_funcs), TileError(passthrough=local, count_funcs=count_funcs), TileTrips(passthrough=(local or datatype in (datasets.MC, datasets.MCEMBED)), count_funcs=count_funcs), JetCalibration(datatype=datatype, passthrough=local, count_funcs=count_funcs), JetResolution( passthrough=(local or (datatype not in (datasets.MC, datasets.MCEMBED))), count_funcs=count_funcs), TauCalibration(datatype, passthrough=local, count_funcs=count_funcs), # # truth matching must come before systematics due to # # TES_TRUE/FAKE # NEED TO BE CONVERTED TO XAOD TrueTauSelection(passthrough=datatype == datasets.DATA, count_funcs=count_funcs), TruthMatching(passthrough=datatype == datasets.DATA, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD NvtxJets(tree=tree, count_funcs=count_funcs), # # PUT THE SYSTEMATICS "FILTER" BEFORE # # ANY FILTERS THAT REFER TO OBJECTS # # BUT AFTER CALIBRATIONS # # Systematics must also come before anything that refers to # # thing.fourvect since fourvect is cached! # NEED TO BE CONVERTED TO XAOD # Systematics( # terms=syst_terms, # year=year, # datatype=datatype, # tree=tree, # verbose=verbose, # passthrough=not syst_terms, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # JetIsPileup( # passthrough=( # local or year < 2012 or # datatype not in (datasets.MC, datasets.MCEMBED)), # count_funcs=count_funcs), JetCleaning(datatype=datatype, year=year, count_funcs=count_funcs), ElectronVeto(el_sel='Medium', count_funcs=count_funcs), MuonVeto(count_funcs=count_funcs), TauPT(2, thresh=20 * GeV, count_funcs=count_funcs), TauHasTrack(2, count_funcs=count_funcs), TauEta(2, count_funcs=count_funcs), TauElectronVeto(2, count_funcs=count_funcs), TauMuonVeto(2, count_funcs=count_funcs), TauCrack(2, count_funcs=count_funcs), # # before selecting the leading and subleading taus # # be sure to only consider good candidates TauIDMedium(2, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # but not used by default # #TauTriggerMatchIndex( # # config=trigger_config, # # year=year, # # datatype=datatype, # # passthrough=datatype == datasets.EMBED, # # count_funcs=count_funcs), # Select two leading taus at this point # 25 and 35 for data # 20 and 30 for MC to leave room for TES uncertainty TauLeadSublead(lead=(35 * GeV if datatype == datasets.DATA or local else 30 * GeV), sublead=(25 * GeV if datatype == datasets.DATA or local else 20 * GeV), count_funcs=count_funcs), # taus are sorted (in decreasing order) by pT from here on TauIDSelection(count_funcs=count_funcs), TaudR(3.2, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # but not used by default # #TauTriggerMatchThreshold( # # datatype=datatype, # # tree=tree, # # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauTriggerEfficiency( # year=year, # datatype=datatype, # tree=tree, # tes_systematic=self.args.syst_terms and ( # Systematics.TES_TERMS & self.args.syst_terms), # passthrough=datatype == datasets.DATA, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD PileupScale(tree=tree, year=year, datatype=datatype, passthrough=local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD TauIDScaleFactors(year=year, passthrough=datatype == datasets.DATA, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauFakeRateScaleFactors( # year=year, # datatype=datatype, # tree=tree, # tes_up=(self.args.syst_terms is not None and # (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or # Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)), # tes_down=(self.args.syst_terms is not None and # (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or # Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)), # passthrough=datatype in (datasets.DATA, datasets.EMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD HiggsPT(year=year, tree=tree, passthrough=not is_signal or local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauTrackRecounting( # year=year, # use_ntup_value=tau_ntrack_recounted_use_ntup, # passthrough=local, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # MCWeight( # datatype=datatype, # tree=tree, # passthrough=local or datatype == datasets.DATA, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingIsolation( # tree=tree, # passthrough=( # local or year < 2012 or # datatype not in (datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingCorrections( # tree=tree, # year=year, # passthrough=( # local or # datatype not in (datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingTauSpinner( # year=year, # tree=tree, # passthrough=( # local or datatype not in ( # datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # # put MET recalculation after tau selection but before tau-jet # # overlap removal and jet selection because of the RefAntiTau # # MET correction # NEED TO BE CONVERTED TO XAOD # METRecalculation( # terms=syst_terms, # year=year, # tree=tree, # refantitau=not nominal_values, # verbose=verbose, # very_verbose=very_verbose, # count_funcs=count_funcs), TauJetOverlapRemoval(count_funcs=count_funcs), JetPreselection(count_funcs=count_funcs), NonIsolatedJet(tree=tree, count_funcs=count_funcs), JetSelection(year=year, count_funcs=count_funcs), RecoJetTrueTauMatching(passthrough=datatype == datasets.DATA or local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # BCHCleaning( # tree=tree, # passthrough=year == 2011 or local, # datatype=datatype, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD ClassifyInclusiveHiggsSample( tree=tree, passthrough=not is_inclusive_signal, count_funcs=count_funcs), ]) # set the event filters self.filters['event'] = event_filters hh_buffer = TreeBuffer() if local: chain = TreeChain( self.metadata.treename, files=self.files, # ignore_branches=ignore_branches, events=self.events, onfilechange=onfilechange, filters=event_filters, cache=True, cache_size=50000000, learn_entries=100) buffer = TreeBuffer() for name, value in chain._buffer.items(): if name.startswith('hh_'): hh_buffer[name[3:]] = value elif name in copied: buffer[name] = value outtree.set_buffer(hh_buffer, create_branches=False, visible=True) outtree.set_buffer(buffer, create_branches=True, visible=False) else: root_chain = ROOT.TChain(self.metadata.treename) for f in self.files: log.info(f) root_chain.Add(f) # if len(self.files) != 1: # raise RuntimeError('lenght of files has to be 1 for now (no xAOD chaining available)') # self.files = self.files[0] # root_chain = ROOT.TFile(self.files) chain = xAODTree(root_chain, filters=event_filters, events=self.events) define_objects(chain, datatype=datatype) outtree.set_buffer(hh_buffer, create_branches=True, visible=False) # create the MMC mmc = mass.MMC(year=year) # report which packages have been loaded # externaltools.report() self.output.cd() # The main event loop # the event filters above are automatically run for each event and only # the surviving events are looped on for event in chain: if local and syst_terms is None and not redo_selection: outtree.Fill() continue # sort taus and jets in decreasing order by pT event.taus.sort(key=lambda tau: tau.pt(), reverse=True) event.jets.sort(key=lambda jet: jet.pt(), reverse=True) # tau1 is the leading tau # tau2 is the subleading tau tau1, tau2 = event.taus tau1.fourvect = asrootpy(tau1.p4()) tau2.fourvect = asrootpy(tau2.p4()) beta_taus = (tau1.fourvect + tau2.fourvect).BoostVector() tau1.fourvect_boosted = LorentzVector() tau1.fourvect_boosted.copy_from(tau1.fourvect) tau1.fourvect_boosted.Boost(beta_taus * -1) tau2.fourvect_boosted = LorentzVector() tau2.fourvect_boosted.copy_from(tau2.fourvect) tau2.fourvect_boosted.Boost(beta_taus * -1) jets = list(event.jets) for jet in jets: jet.fourvect = asrootpy(jet.p4()) jet1, jet2, jet3 = None, None, None beta = None if len(jets) >= 2: jet1, jet2 = jets[:2] # determine boost of system # determine jet CoM frame beta = (jet1.fourvect + jet2.fourvect).BoostVector() tree.jet_beta.copy_from(beta) jet1.fourvect_boosted = LorentzVector() jet1.fourvect_boosted.copy_from(jet1.fourvect) jet1.fourvect_boosted.Boost(beta * -1) jet2.fourvect_boosted = LorentzVector() jet2.fourvect_boosted.copy_from(jet2.fourvect) jet2.fourvect_boosted.Boost(beta * -1) tau1.min_dr_jet = min(tau1.fourvect.DeltaR(jet1.fourvect), tau1.fourvect.DeltaR(jet2.fourvect)) tau2.min_dr_jet = min(tau2.fourvect.DeltaR(jet1.fourvect), tau2.fourvect.DeltaR(jet2.fourvect)) # tau centrality (degree to which they are between the two jets) tau1.centrality = eventshapes.eta_centrality( tau1.fourvect.Eta(), jet1.fourvect.Eta(), jet2.fourvect.Eta()) tau2.centrality = eventshapes.eta_centrality( tau2.fourvect.Eta(), jet1.fourvect.Eta(), jet2.fourvect.Eta()) # boosted tau centrality tau1.centrality_boosted = eventshapes.eta_centrality( tau1.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(), jet2.fourvect_boosted.Eta()) tau2.centrality_boosted = eventshapes.eta_centrality( tau2.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(), jet2.fourvect_boosted.Eta()) # 3rd leading jet if len(jets) >= 3: jet3 = jets[2] jet3.fourvect_boosted = LorentzVector() jet3.fourvect_boosted.copy_from(jet3.fourvect) jet3.fourvect_boosted.Boost(beta * -1) elif len(jets) == 1: jet1 = jets[0] tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect) tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect) RecoJetBlock.set(tree, jet1, jet2, jet3, local=local) # mass of ditau + leading jet system if jet1 is not None: tree.mass_tau1_tau2_jet1 = (tau1.fourvect + tau2.fourvect + jet1.fourvect).M() ##################################### # number of tracks from PV minus taus ##################################### ntrack_pv = 0 ntrack_nontau_pv = 0 for vxp in event.vertices: # primary vertex if vxp.vertexType() == 1: ntrack_pv = vxp.nTrackParticles() ntrack_nontau_pv = ntrack_pv - tau1.nTracks( ) - tau2.nTracks() break tree.ntrack_pv = ntrack_pv tree.ntrack_nontau_pv = ntrack_nontau_pv ######################### # MET variables ######################### MET = event.MET[0] METx = MET.mpx() METy = MET.mpy() METet = MET.met() MET_vect = Vector2(METx, METy) MET_4vect = LorentzVector() MET_4vect.SetPxPyPzE(METx, METy, 0., METet) MET_4vect_boosted = LorentzVector() MET_4vect_boosted.copy_from(MET_4vect) if beta is not None: MET_4vect_boosted.Boost(beta * -1) tree.MET_et = METet tree.MET_etx = METx tree.MET_ety = METy tree.MET_phi = MET.phi() dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect)) dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect)) dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect)) tree.dPhi_tau1_tau2 = dPhi_tau1_tau2 tree.dPhi_tau1_MET = dPhi_tau1_MET tree.dPhi_tau2_MET = dPhi_tau2_MET tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET) tree.MET_bisecting = is_MET_bisecting(dPhi_tau1_tau2, dPhi_tau1_MET, dPhi_tau2_MET) sumET = MET.sumet() tree.MET_sumet = sumET if sumET != 0: tree.MET_sig = ((2. * METet / GeV) / (utils.sign(sumET) * sqrt(abs(sumET / GeV)))) else: tree.MET_sig = -1. tree.MET_centrality = eventshapes.phi_centrality( tau1.fourvect, tau2.fourvect, MET_vect) tree.MET_centrality_boosted = eventshapes.phi_centrality( tau1.fourvect_boosted, tau2.fourvect_boosted, MET_4vect_boosted) tree.number_of_good_vertices = len(event.vertices) ########################## # Jet and sum pt variables ########################## tree.numJets = len(event.jets) # sum pT with only the two leading jets tree.sum_pt = sum([tau1.pt(), tau2.pt()] + [jet.pt() for jet in jets[:2]]) # sum pT with all selected jets tree.sum_pt_full = sum([tau1.pt(), tau2.pt()] + [jet.pt() for jet in jets]) # vector sum pT with two leading jets and MET tree.vector_sum_pt = sum([tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets[:2]] + [MET_4vect]).Pt() # vector sum pT with all selected jets and MET tree.vector_sum_pt_full = sum([tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets] + [MET_4vect]).Pt() # resonance pT tree.resonance_pt = sum([tau1.fourvect, tau2.fourvect, MET_4vect]).Pt() # ############################# # # tau <-> vertex association # ############################# tree.tau_same_vertex = (tau1.vertex() == tau2.vertex()) tau1.vertex_prob = ROOT.TMath.Prob(tau1.vertex().chiSquared(), int(tau1.vertex().numberDoF())) tau2.vertex_prob = ROOT.TMath.Prob(tau2.vertex().chiSquared(), int(tau2.vertex().numberDoF())) # ########################## # # MMC Mass # ########################## mmc_result = mmc.mass(tau1, tau2, METx, METy, sumET, njets=len(event.jets)) for mmc_method, mmc_object in enumerate(mmc_objects): mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method] if verbose: log.info("MMC (method %d): %f" % (mmc_method, mmc_mass)) mmc_object.mass = mmc_mass mmc_object.MET_et = mmc_met.Mod() mmc_object.MET_etx = mmc_met.X() mmc_object.MET_ety = mmc_met.Y() mmc_object.MET_phi = math.pi - mmc_met.Phi() if mmc_mass > 0: FourMomentum.set(mmc_object.resonance, mmc_resonance) # ############################ # # collinear and visible mass # ############################ vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass( tau1, tau2, METx, METy) tree.mass_vis_tau1_tau2 = vis_mass tree.mass_collinear_tau1_tau2 = collin_mass tau1.collinear_momentum_fraction = tau1_x tau2.collinear_momentum_fraction = tau2_x # # Fill the tau block # # This must come after the RecoJetBlock is filled since # # that sets the jet_beta for boosting the taus RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local) # NEED TO BE CONVERTED TO XAOD if datatype != datasets.DATA: TrueTauBlock.set(tree, tau1, tau2) # fill the output tree outtree.Fill(reset=True) # externaltools.report() # flush any baskets remaining in memory to disk self.output.cd() outtree.FlushBaskets() outtree.Write() if local: if datatype == datasets.DATA: xml_string = ROOT.TObjString(merged_grl.str()) xml_string.Write('lumi') merged_cutflow.Write()
def create_dict(nom): current = 0 events = [] bestScores = [] nEntries = nom.GetEntries() print(nEntries) for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) nom.GetEntry(idx) higgCand = LorentzVector() lep4Vecs = [] jet4Vecs = [] btags = [] met = LorentzVector() met.SetPtEtaPhiE(nom.MET_RefFinal_et, 0, nom.MET_RefFinal_phi, nom.MET_RefFinal_et) #for i in range(2): lep_Pt_0 = nom.lep_Pt_0 lep_Eta_0 = nom.lep_Eta_0 lep_Phi_0 = nom.lep_Phi_0 lep_E_0 = nom.lep_E_0 lepVec_0 = LorentzVector() lepVec_0.SetPtEtaPhiE(lep_Pt_0, lep_Eta_0, lep_Phi_0, lep_E_0) lep4Vecs.append(lepVec_0) lep_Pt_1 = nom.lep_Pt_1 lep_Eta_1 = nom.lep_Eta_1 lep_Phi_1 = nom.lep_Phi_1 lep_E_1 = nom.lep_E_1 lepVec_1 = LorentzVector() lepVec_1.SetPtEtaPhiE(lep_Pt_1, lep_Eta_1, lep_Phi_1, lep_E_1) lep4Vecs.append(lepVec_1) for j in range(len(nom.m_pflow_jet_pt)): #nom.selected_jets'][i]: jetVec = LorentzVector() jetVec.SetPtEtaPhiM(nom.m_pflow_jet_pt[j], nom.m_pflow_jet_eta[j], nom.m_pflow_jet_phi[j], nom.m_pflow_jet_m[j]) jet4Vecs.append(jetVec) btags.append(nom.m_pflow_jet_flavor_weight_MV2c10[j]) combos = [] combosTop = [] for l in range(len(lep4Vecs)): for i in range(len(jet4Vecs) - 1): for j in range(i + 1, len(jet4Vecs)): comb = [l, i, j] t = topDict(jet4Vecs[i], jet4Vecs[j], lep4Vecs[0], lep4Vecs[1], met, btags[i], btags[j], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) combosTop.append([t, comb]) #loop over combinations, score them in the BDT, figure out the best result topDF = pd.DataFrame.from_dict([x[0] for x in combosTop]) topMat = xgb.DMatrix(topDF, feature_names=list(topDF)) topPred = topModel.predict(topMat) topBest = np.argmax(topPred) bestTopComb = combosTop[topBest][1] topMatches = bestTopComb[1:] for l in range(len(lep4Vecs)): for i in range(len(jet4Vecs) - 1): for j in range(i + 1, len(jet4Vecs)): comb = [l, i, j] if l == 0: k = higgsDict(jet4Vecs[i], jet4Vecs[j], lep4Vecs[l], met, btags[i], btags[j], lep4Vecs[1], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) else: k = higgsDict(jet4Vecs[i], jet4Vecs[j], lep4Vecs[l], met, btags[i], btags[j], lep4Vecs[0], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) combos.append([k, comb]) ###Evaluate higgsTop BDT df = pd.DataFrame.from_dict([x[0] for x in combos]) xgbMat = xgb.DMatrix(df, feature_names=list(df)) pred = xgbModel.predict(xgbMat) best = np.argmax(pred) bestScores.append(pred[best]) bestComb = combos[best][1] lepMatch = bestComb[0] jetMatches = bestComb[1:] k = {} #k['higgs_pt'] = nom.higgs_pt k['comboScore'] = pred[best] k['topScore'] = topPred[topBest] if lepMatch == 0: k['lep_Pt_H'] = nom.lep_Pt_0 k['lep_Eta_H'] = nom.lep_Eta_0 phi_0 = nom.lep_Phi_0 k['lep_E_H'] = nom.lep_E_0 k['lep_Pt_O'] = nom.lep_Pt_1 k['lep_Eta_O'] = nom.lep_Eta_1 k['lep_Phi_O'] = calc_phi(phi_0, nom.lep_Phi_1) k['lep_E_O'] = nom.lep_E_1 elif lepMatch == 1: k['lep_Pt_H'] = nom.lep_Pt_1 k['lep_Eta_H'] = nom.lep_Eta_1 phi_0 = nom.lep_Phi_1 k['lep_E_H'] = nom.lep_E_1 k['lep_Pt_O'] = nom.lep_Pt_0 k['lep_Eta_O'] = nom.lep_Eta_0 k['lep_Phi_O'] = calc_phi(phi_0, nom.lep_Phi_0) k['lep_E_O'] = nom.lep_E_0 n = 0 for i in jetMatches: #nom.nJets_OR_T): k['jet_Pt_h' + str(n)] = nom.m_pflow_jet_pt[i] k['jet_Eta_h' + str(n)] = nom.m_pflow_jet_eta[i] k['jet_E_h' + str(n)] = jet4Vecs[i].E() #nom.m_pflow_jet_E[i] k['jet_Phi_h' + str(n)] = calc_phi(phi_0, nom.m_pflow_jet_phi[i]) k['jet_MV2c10_h' + str(n)] = nom.m_pflow_jet_flavor_weight_MV2c10[i] n += 1 btags = np.array(btags) btags[jetMatches[0]] = 0 btags[jetMatches[1]] = 0 bestBtags = np.argpartition(btags, -2)[-2:] n = 0 for i in topMatches: #bestBtags:#nom.nJets_OR_T): k['top_Pt_' + str(n)] = nom.m_pflow_jet_pt[i] k['top_Eta_' + str(n)] = nom.m_pflow_jet_eta[i] k['top_E_' + str(n)] = jet4Vecs[i].E() #nom.m_pflow_jet_E[i] k['top_Phi_' + str(n)] = calc_phi(phi_0, nom.m_pflow_jet_phi[i]) k['top_MV2c10_' + str(n)] = nom.m_pflow_jet_flavor_weight_MV2c10[i] n += 1 k['MET'] = nom.MET_RefFinal_et k['MET_phi'] = calc_phi(phi_0, nom.MET_RefFinal_phi) events.append(k) return events
def calcTopMass(nom, topMasses): current = 0 for e in nom: current += 1 if current % 10000 == 0: print(current) #if current==200000: # break if e.nJets_OR != 1: continue if e.nJets_OR_DL1_70 != 1: continue if abs(e.Mll01 - 91.2e3) > 10e3 and abs(e.Mll02 - 91.2e3) > 10e3: continue if e.trilep_type == 0: continue lep = LorentzVector() if abs(e.Mll02 - 91.2e3) < abs(e.Mll01 - 91.2e3): lep.SetPtEtaPhiE(e.lep_Pt_1, e.lep_Eta_1, e.lep_Phi_1, e.lep_E_1) else: lep.SetPtEtaPhiE(e.lep_Pt_2, e.lep_Eta_2, e.lep_Phi_2, e.lep_E_2) met = neutrinoPz(lep, e.met_met, e.met_phi) w = lep + met #w_eta = np.arccosh( abs( np.sqrt(wt.E()**2 - 80e3**2)/ wt.Pt() ) ) #w = LorentzVector() #w.SetPtEtaPhiE(wt.Pt(), w_eta, wt.Phi(), wt.E()) #print('M', w.M()) jet = LorentzVector() jet.SetPtEtaPhiE(e.jets_Pt_0, e.jets_Eta_0, e.jets_Phi_0, e.jets_E_0) top = LorentzVector() top = w + jet topMasses.append(top.M()) if top.M() < 0: print(top.M(), w.M())
#newNom.GetEntry(current) current+=1 if current%10000==0: print(current) #if current==200000: # break #if e.nJets_OR_T!=1: # continue #if e.nJets_OR_T_MV2c10_70!=1: # continue #if abs(e.Mll01 - 91.2e3) > 10e3 and abs(e.Mll02 - 91.2e3) > 10e3: # continue #if e.trilep_type==0: continue lep = LorentzVector() if abs(e.Mll02 - 91.2e3) < abs(e.Mll01 - 91.2e3): lep.SetPtEtaPhiE( e.lep_Pt_1, e.lep_Eta_1, e.lep_Phi_1, e.lep_E_1 ) else: lep.SetPtEtaPhiE( e.lep_Pt_2, e.lep_Eta_2, e.lep_Phi_2, e.lep_E_2 ) met = neutrinoPz(lep, e.met_met, e.met_phi) w = lep+met jet = LorentzVector() jet.SetPtEtaPhiE( e.jets_Pt_0, e.jets_Eta_0, e.jets_Phi_0, e.jets_E_0 ) top = LorentzVector()
def create_dict(nom): current = 0 events1l = [] events2l = [] decayDicts = [] bestScores = [] nEntries = nom.GetEntries() for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) #if current%100000==0: #break nom.GetEntry(idx) higgCand = LorentzVector() lep4Vecs = [] jet4Vecs = [] btags = [] met = LorentzVector() met.SetPtEtaPhiE(nom.MET_RefFinal_et, 0, nom.MET_RefFinal_phi, nom.MET_RefFinal_et) lepVec_0 = LorentzVector() lepVec_0.SetPtEtaPhiE(nom.lep_Pt_0, nom.lep_Eta_0, nom.lep_Phi_0, nom.lep_E_0) lep4Vecs.append(lepVec_0) lepVec_1 = LorentzVector() lepVec_1.SetPtEtaPhiE(nom.lep_Pt_1, nom.lep_Eta_1, nom.lep_Phi_1, nom.lep_E_1) lep4Vecs.append(lepVec_1) lepVec_2 = LorentzVector() lepVec_2.SetPtEtaPhiE(nom.lep_Pt_2, nom.lep_Eta_2, nom.lep_Phi_2, nom.lep_E_2) lep4Vecs.append(lepVec_2) for j in range(len(nom.m_pflow_jet_pt)): #nom.selected_jets'][i]: jetVec = LorentzVector() jetVec.SetPtEtaPhiM(nom.m_pflow_jet_pt[j], nom.m_pflow_jet_eta[j], nom.m_pflow_jet_phi[j], nom.m_pflow_jet_m[j]) jet4Vecs.append(jetVec) btags.append(nom.m_pflow_jet_flavor_weight_MV2c10[j]) combosTop = [] for l in range(len(lep4Vecs)): for i in range(len(jet4Vecs) - 1): for j in range(i + 1, len(jet4Vecs)): comb = [l, i, j] t = topDict(jet4Vecs[i], jet4Vecs[j], lep4Vecs[0], lep4Vecs[1], lep4Vecs[2], met, btags[i], btags[j], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) combosTop.append([t, comb]) #loop over combinations, score them in the BDT, figure out the best result topDF = pd.DataFrame.from_dict([x[0] for x in combosTop]) topMat = xgb.DMatrix(topDF, feature_names=list(topDF)) topPred = topModel.predict(topMat) topBest = np.argmax(topPred) bestTopComb = combosTop[topBest][1] topMatches = bestTopComb[1:] combos1l = [] for l in range(1, len(lep4Vecs)): for i in range(len(jet4Vecs) - 1): for j in range(i + 1, len(jet4Vecs)): comb = [l, i, j] if l == 1: k = higgs1lDict( jet4Vecs[i], jet4Vecs[j], lep4Vecs[l], met, nom.m_pflow_jet_flavor_weight_MV2c10[i], nom.m_pflow_jet_flavor_weight_MV2c10[j], lep4Vecs[0], lep4Vecs[2], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) else: k = higgs1lDict( jet4Vecs[i], jet4Vecs[j], lep4Vecs[l], met, nom.m_pflow_jet_flavor_weight_MV2c10[i], nom.m_pflow_jet_flavor_weight_MV2c10[j], lep4Vecs[0], lep4Vecs[1], nom.m_pflow_jet_jvt[i], nom.m_pflow_jet_jvt[j], nom.m_pflow_jet_numTrk[i], nom.m_pflow_jet_numTrk[j]) combos1l.append([k, comb]) combos2l = [] possCombs = [[0, 1, 2], [0, 2, 1]] for comb in possCombs: k = higgs2lDict(lep4Vecs[comb[0]], lep4Vecs[comb[1]], lep4Vecs[comb[2]], met) combos2l.append([k, [comb[0], comb[1]]]) #Run 2l XGB, find best match df2l = pd.DataFrame.from_dict([x[0] for x in combos2l]) xgbMat2l = xgb.DMatrix(df2l, feature_names=list(df2l)) pred2l = higgs2lModel.predict(xgbMat2l) best2l = np.argmax(pred2l) bestComb2l = combos2l[best2l][1] lepMatch2l = bestComb2l[1] #Run 1l XGB, find best match df1l = pd.DataFrame.from_dict([x[0] for x in combos1l]) xgbMat1l = xgb.DMatrix(df1l, feature_names=list(df1l)) pred1l = higgs1lModel.predict(xgbMat1l) best1l = np.argmax(pred1l) bestComb1l = combos1l[best1l][1] lepMatch1l = bestComb1l[0] jetMatches1l = bestComb1l[1:] ### Add decay dict k = decayDict(lep4Vecs[0], lep4Vecs[1], lep4Vecs[2], met, jet4Vecs[topMatches[0]], jet4Vecs[topMatches[1]]) k['nJets'] = nom.nJets_OR_T k['nJets_MV2c10_70'] = nom.nJets_OR_T_MV2c10_70 k['higgs2l_score'] = pred2l[best2l] k['higgs1l_score'] = pred1l[best1l] decayDicts.append(k) ### Add 2l pt prediction dict q = {} q['comboScore'] = pred2l[best2l] if lepMatch2l == 1: q['lep_Pt_0'] = nom.lep_Pt_0 q['lep_Eta_0'] = nom.lep_Eta_0 phi_0 = nom.lep_Phi_0 q['lep_E_0'] = nom.lep_E_0 q['lep_Pt_1'] = nom.lep_Pt_1 q['lep_Eta_1'] = nom.lep_Eta_1 q['lep_Phi_1'] = calc_phi(phi_0, nom.lep_Phi_1) q['lep_E_1'] = nom.lep_E_1 q['lep_Pt_2'] = nom.lep_Pt_2 q['lep_Eta_2'] = nom.lep_Eta_2 q['lep_Phi_2'] = calc_phi(phi_0, nom.lep_Phi_2) q['lep_E_2'] = nom.lep_E_2 elif lepMatch2l == 2: q['lep_Pt_0'] = nom.lep_Pt_0 q['lep_Eta_0'] = nom.lep_Eta_0 phi_0 = nom.lep_Phi_0 q['lep_E_0'] = nom.lep_E_0 q['lep_Pt_1'] = nom.lep_Pt_2 q['lep_Eta_1'] = nom.lep_Eta_2 q['lep_Phi_1'] = calc_phi(phi_0, nom.lep_Phi_2) q['lep_E_1'] = nom.lep_E_2 q['lep_Pt_2'] = nom.lep_Pt_1 q['lep_Eta_2'] = nom.lep_Eta_1 q['lep_Phi_2'] = calc_phi(phi_0, nom.lep_Phi_1) q['lep_E_2'] = nom.lep_E_1 n = 0 for i in topMatches: q['top_Pt_' + str(n)] = nom.m_pflow_jet_pt[i] q['top_Eta_' + str(n)] = nom.m_pflow_jet_eta[i] q['top_E_' + str(n)] = jet4Vecs[i].E() #nom.m_pflow_jet_E[i] q['top_Phi_' + str(n)] = calc_phi(phi_0, nom.m_pflow_jet_phi[i]) q['top_MV2c10_' + str(n)] = nom.m_pflow_jet_flavor_weight_MV2c10[i] n += 1 q['MET'] = nom.MET_RefFinal_et q['MET_phi'] = calc_phi(phi_0, nom.MET_RefFinal_phi) events2l.append(q) ### Add 1l Pt prediction dict y = {} #y['higgs_pt'] = nom.higgs_pt y['comboScore'] = pred1l[best1l] if lepMatch1l == 1: y['lep_Pt_H'] = nom.lep_Pt_1 y['lep_Eta_H'] = nom.lep_Eta_1 phi_0 = nom.lep_Phi_1 y['lep_E_H'] = nom.lep_E_1 y['lep_Pt_0'] = nom.lep_Pt_0 y['lep_Eta_0'] = nom.lep_Eta_0 y['lep_Phi_0'] = calc_phi(phi_0, nom.lep_Phi_0) y['lep_E_0'] = nom.lep_E_0 y['lep_Pt_1'] = nom.lep_Pt_2 y['lep_Eta_1'] = nom.lep_Eta_2 y['lep_Phi_1'] = calc_phi(phi_0, nom.lep_Phi_2) y['lep_E_1'] = nom.lep_E_2 elif lepMatch1l == 2: y['lep_Pt_H'] = nom.lep_Pt_2 y['lep_Eta_H'] = nom.lep_Eta_2 phi_0 = nom.lep_Phi_2 y['lep_E_H'] = nom.lep_E_2 y['lep_Pt_0'] = nom.lep_Pt_0 y['lep_Eta_0'] = nom.lep_Eta_0 y['lep_Phi_0'] = calc_phi(phi_0, nom.lep_Phi_0) y['lep_E_0'] = nom.lep_E_0 y['lep_Pt_1'] = nom.lep_Pt_1 y['lep_Eta_1'] = nom.lep_Eta_1 y['lep_Phi_1'] = calc_phi(phi_0, nom.lep_Phi_1) y['lep_E_1'] = nom.lep_E_1 n = 0 for i in jetMatches1l: #nom.nJets_OR_T): y['jet_Pt_h' + str(n)] = nom.m_pflow_jet_pt[i] y['jet_Eta_h' + str(n)] = nom.m_pflow_jet_eta[i] y['jet_E_h' + str(n)] = jet4Vecs[i].E() #nom.m_pflow_jet_E[i] y['jet_Phi_h' + str(n)] = calc_phi(phi_0, nom.m_pflow_jet_phi[i]) y['jet_MV2c10_h' + str(n)] = nom.m_pflow_jet_flavor_weight_MV2c10[i] n += 1 n = 0 for i in topMatches: #bestBtags:#nom.nJets_OR_T): y['top_Pt_' + str(n)] = nom.m_pflow_jet_pt[i] y['top_Eta_' + str(n)] = nom.m_pflow_jet_eta[i] y['top_E_' + str(n)] = jet4Vecs[i].E() #nom.m_pflow_jet_E[i] y['top_Phi_' + str(n)] = calc_phi(phi_0, nom.m_pflow_jet_phi[i]) y['top_MV2c10_' + str(n)] = nom.m_pflow_jet_flavor_weight_MV2c10[i] n += 1 y['MET'] = nom.MET_RefFinal_et y['MET_phi'] = calc_phi(phi_0, nom.MET_RefFinal_phi) events1l.append(y) return decayDicts, events1l, events2l
def work(self): year = self.metadata.year verbose = self.args.verbose draw_decays = self.args.draw_decays args = self.args # initialize the TreeChain of all input files # only enable branches I need chain = TreeChain(self.metadata.treename, files=self.files, branches=[ 'tau_*', 'mc_*', 'el_*', 'mu_staco_*', 'MET_RefFinal_BDTMedium_*', 'MET_RefFinal_STVF_*', 'EventNumber', 'RunNumber', 'averageIntPerXing', ], events=self.events, read_branches_on_demand=True, cache=True, verbose=True) define_objects(chain, year) self.output.cd() # this tree will contain info pertaining to true tau decays # for possible use in the optimization of a missing mass calculator tree = Tree(name="ditaumass", model=DTMEvent) tree.define_object(name='resonance', prefix='resonance_') tree.define_object(name='radiative', prefix='radiative_') truetaus = [ tree.define_object(name='truetau1', prefix='truetau1_'), tree.define_object(name='truetau2', prefix='truetau2_') ] taus = [ tree.define_object(name='tau1', prefix='tau1_'), tree.define_object(name='tau2', prefix='tau2_') ] electrons = [ tree.define_object(name='ele1', prefix='ele1_'), tree.define_object(name='ele2', prefix='ele2_') ] muons = [ tree.define_object(name='muon1', prefix='muon1_'), tree.define_object(name='muon2', prefix='muon2_') ] # get the Z or Higgs if args.higgs: resonance_pdgid = 25 else: resonance_pdgid = 23 if '7TeV' in self.metadata.name: collision_energy = 7 else: collision_energy = 8 for event_index, event in enumerate(chain): try: tree.reset_branch_values() # get the Z or Higgs resonance = tautools.get_particles(event, resonance_pdgid, num_expected=1) if not resonance: print "could not find resonance" continue # get the resonance just before the decay resonance = resonance[0].last_self if draw_decays: resonance.export_graphvis('resonance_%d.dot' % event.EventNumber) FourVectModel.set(tree.resonance, resonance) # collect decay products (taus and photons) tau_decays = [] mc_photons = [] for child in resonance.iter_children(): if abs(child.pdgId) == pdg.tau_minus: # ignore status 3 taus in 2012 (something strange in the # MC record...) if year == 2012: if child.status == 3: continue tau_decays.append(tautools.TauDecay(child)) elif child.pdgId == pdg.gamma: mc_photons.append(child) else: raise TypeError( 'unexpected particle after resonance:\n%s' % child) # There should be exactly two taus if len(tau_decays) != 2: print "found %i tau decays in MC record" % len(tau_decays) for decay in tau_decays: print decay # skip this event continue # check for incomplete tau decays invalid = False for decay in tau_decays: if not decay.valid: print "invalid tau decay:" print decay if draw_decays: decay.init.export_graphvis('decay_invalid_%d.dot' % event.EventNumber) invalid = True break if invalid: # skip this event continue radiative_fourvect = LorentzVector() for photon in mc_photons: radiative_fourvect += photon.fourvect radiative_fourvect.fourvect = radiative_fourvect FourVectModel.set(tree.radiative, radiative_fourvect) tree.radiative_ngamma = len(mc_photons) tree.radiative_ngamma_5 = len( [ph for ph in mc_photons if ph.pt > 5]) tree.radiative_ngamma_10 = len( [ph for ph in mc_photons if ph.pt > 10]) tree.radiative_et_scalarsum = sum([ph.pt for ph in mc_photons] + [0]) all_matched = True matched_objects = [] skip = False for i, (decay, truetau, tau, electron, muon) in enumerate( zip(tau_decays, truetaus, taus, electrons, muons)): if draw_decays: decay.init.export_graphvis('decay%d_%d.dot' % (i, event.EventNumber)) TrueTau.set(truetau, decay, verbose=verbose) # match to reco taus, electrons and muons if decay.hadronic: recotau, dr = closest_reco_object( event.taus, decay.fourvect_visible, dR=0.2) if recotau is not None: matched_objects.append(recotau) recotau.matched = True recotau.matched_dr = dr RecoTau.set(tau, recotau, verbose=verbose) else: all_matched = False elif decay.leptonic_electron: recoele, dr = closest_reco_object( event.electrons, decay.fourvect_visible, dR=0.2) if recoele is not None: matched_objects.append(recoele) recoele.matched = True recoele.matched_dr = dr RecoElectron.set(electron, recoele) else: all_matched = False elif decay.leptonic_muon: recomuon, dr = closest_reco_object( event.muons, decay.fourvect_visible, dR=0.2) if recomuon is not None: matched_objects.append(recomuon) recomuon.matched = True recomuon.matched_dr = dr RecoMuon.set(muon, recomuon) else: all_matched = False else: print "unhandled invalid tau decay:" print decay if not draw_decays: decay.init.export_graphvis('decay%d_%d.dot' % (i, event.EventNumber)) # skip this event skip = True break if skip: # skip this event continue # did both decays match a reco object? tree.matched = all_matched # match collision: decays matched same reco object if all_matched: tree.match_collision = ( matched_objects[0] == matched_objects[1]) # MET tree.met_x = event.MET.etx tree.met_y = event.MET.ety tree.met_phi = event.MET.phi tree.met = event.MET.et tree.sum_et = event.MET.sumet # set extra event variables tree.channel = event.mc_channel_number tree.event = event.EventNumber tree.run = event.RunNumber tree.mu = event.averageIntPerXing tree.collision_energy = collision_energy tree.Fill() except: print "event index: %d" % event_index print "event number: %d" % event.EventNumber print "file: %s" % chain.file.GetName() raise self.output.cd() tree.FlushBaskets() tree.Write()
def lorentzVecs(nom, jetIdx0, jetIdx1, is3l): ''' Initialize met, lepton, and jet lorentz vectors Return jet0, jet1, met, lep0, lep1, (lep2 if is3l) ''' met = LorentzVector() met.SetPtEtaPhiE(nom.met_met, 0, nom.met_phi, nom.met_met) lep0 = LorentzVector() lep0.SetPtEtaPhiE(nom.lep_Pt_0, nom.lep_Eta_0, nom.lep_Phi_0, nom.lep_E_0) lep1 = LorentzVector() lep1.SetPtEtaPhiE(nom.lep_Pt_1, nom.lep_Eta_1, nom.lep_Phi_1, nom.lep_E_1) if is3l: lep2 = LorentzVector() lep2.SetPtEtaPhiE(nom.lep_Pt_2, nom.lep_Eta_2, nom.lep_Phi_2, nom.lep_E_2) jet0 = LorentzVector() jet0.SetPtEtaPhiE(nom.jet_pt[jetIdx0], nom.jet_eta[jetIdx0], nom.jet_phi[jetIdx0], nom.jet_e[jetIdx0]) jet1 = LorentzVector() jet1.SetPtEtaPhiE(nom.jet_pt[jetIdx1], nom.jet_eta[jetIdx1], nom.jet_phi[jetIdx1], nom.jet_e[jetIdx1]) if is3l: return (jet0, jet1, met, lep0, lep1, lep2) else: return (jet0, jet1, met, lep0, lep1)
def preprocess(jet, cluster, output="kt", regression=False, R_clustering=0.3): """ preprocesses the data to make it usable by the recnn Preprocessing algorithm: 1. j = the highest pt anti-kt jet (R=1) 2. run kt (R=0.3) on the constituents c of j, resulting in subjets sj1, sj2, ..., sjN 3. phi = sj1.phi(); for all c, do c.rotate_z(-phi) 4. bv = sj1.boost_vector(); bv.set_perp(0); for all c, do c.boost(-bv) 5. deltaz = sj1.pz - sj2.pz; deltay = sj1.py - sj2.py; alpha = -atan2(deltaz, deltay); for all c, do c.rotate_x(alpha) 6. if sj3.pz < 0: for all c, do c.set_pz(-c.pz) 7. finally recluster all transformed constituents c into a single jet """ jet = copy.deepcopy(jet) constituents = jet["content"][jet["tree"][:, 0] == -1] if regression: genpt = jet["genpt"] ### run kt (R=0.3) on the constituents c of j, resulting in subjets sj1, sj2, ..., sjN ### subjets = cluster(constituents, R=R_clustering, jet_algorithm=0) oldeta = jet["eta"] oldpt = jet['pt'] ### Rot phi ### # phi = sj1.phi() # for all c, do c.rotate_z(-phi) v = subjets[0][1][0] v = LorentzVector(v) phi = v.phi() for _, content, _, _ in subjets: for i in range(len(content)): v = LorentzVector(content[i][:4]) v.rotate_z(-phi) content[i, 0] = v[0] content[i, 1] = v[1] content[i, 2] = v[2] content[i, 3] = v[3] ### boost ### # bv = sj1.boost_vector() # bv.set_perp(0) # for all c, do c.boost(-bv) v = subjets[0][1][0] v = LorentzVector(v) bv = v.boost_vector() bv.set_perp(0) for _, content, _, _ in subjets: for i in range(len(content)): v = LorentzVector(content[i][:4]) v.boost(-bv) content[i, 0] = v[0] content[i, 1] = v[1] content[i, 2] = v[2] content[i, 3] = v[3] ### Rot alpha ### # deltaz = sj1.pz - sj2.pz # deltay = sj1.py - sj2.py # alpha = -atan2(deltaz, deltay) # for all c, do c.rotate_x(alpha) if len(subjets) >= 2: deltaz = subjets[0][1][0, 2] - subjets[1][1][0, 2] deltay = subjets[0][1][0, 1] - subjets[1][1][0, 1] alpha = -np.arctan2(deltaz, deltay) for _, content, _, _ in subjets: for i in range(len(content)): v = LorentzVector(content[i][:4]) v.rotate_x(alpha) content[i, 0] = v[0] content[i, 1] = v[1] content[i, 2] = v[2] content[i, 3] = v[3] ### flip if necessary ### # if sj3.pz < 0: for all c, do c.set_pz(-c.pz) if len(subjets) >= 3 and subjets[2][1][0, 2] < 0: for _, content, _, _ in subjets: for i in range(len(content)): content[i, 2] *= -1.0 ### finally recluster all transformed constituents c into a single jet ### constituents = [] for tree, content, _, _ in subjets: constituents.append(content[tree[:, 0] == -1]) constituents = np.vstack(constituents) if output == "anti-kt": subjets = cluster(constituents, R=100., jet_algorithm=1) elif output == "kt": subjets = cluster(constituents, R=100., jet_algorithm=0) elif output == "cambridge": subjets = cluster(constituents, R=100., jet_algorithm=2) else: raise jet["tree"] = subjets[0][0] jet["content"] = subjets[0][1] v = LorentzVector(jet["content"][0]) jet["phi"] = v.phi() jet["eta"] = v.eta() jet["energy"] = v.E() jet["mass"] = v.m() jet["pt"] = v.pt() jet["root_id"] = 0 jet['oldeta'] = oldeta jet['oldpt'] = oldpt if regression: jet["genpt"] = genpt return (jet)
# In[]: ### Verification of the formating ### ### Load data to check ### fd = f+"anti-kt_test.npy" X, y = np.load(fd) # In[]: ### Check for signal ### a1 = [] w1=[] for i,j in enumerate(X): constituents = j["content"][j["tree"][:, 0] == -1] # if len(constituents)>1: # constituents = np.delete(constituents,0,0) if y[i]==1: a1.append(np.array([[LorentzVector(c).eta(), LorentzVector(c).phi()] for c in constituents])) w1.append([LorentzVector(c).pt() for c in constituents]) w1 = [item for sublist in w1 for item in sublist] w1=100*np.array(w1)/sum(w1) a1 = np.vstack(a1) # In[]: plt.close() t=plt.hist2d(a1[:, 0], a1[:, 1], range=[(-0.5,0.5), (-0.5,0.5)], bins=200, cmap=plt.cm.jet,weights=w1,norm=LogNorm()) cbar = plt.colorbar() plt.xlabel(r'$\eta$') plt.ylabel(r'$\varphi$') cbar.set_label(r'% of p$_t$')
def getTruthVis4Vector(self): """Get the LorentzVector for the visible truth tau """ vector = LorentzVector() vector.SetPtEtaPhiM(self.vis_Et, self.vis_eta, self.vis_phi, self.vis_m) return vector
def work(self): year = self.metadata.year verbose = self.args.verbose draw_decays = self.args.draw_decays args = self.args # initialize the TreeChain of all input files # only enable branches I need chain = TreeChain( self.metadata.treename, files=self.files, branches=[ 'tau_*', 'mc_*', 'el_*', 'mu_staco_*', 'MET_RefFinal_BDTMedium_*', 'MET_RefFinal_STVF_*', 'EventNumber', 'RunNumber', 'averageIntPerXing', ], events=self.events, read_branches_on_demand=True, cache=True, verbose=True) define_objects(chain, year) self.output.cd() # this tree will contain info pertaining to true tau decays # for possible use in the optimization of a missing mass calculator tree = Tree(name="ditaumass", model=DTMEvent) tree.define_object(name='resonance', prefix='resonance_') tree.define_object(name='radiative', prefix='radiative_') truetaus = [ tree.define_object(name='truetau1', prefix='truetau1_'), tree.define_object(name='truetau2', prefix='truetau2_')] taus = [ tree.define_object(name='tau1', prefix='tau1_'), tree.define_object(name='tau2', prefix='tau2_')] electrons = [ tree.define_object(name='ele1', prefix='ele1_'), tree.define_object(name='ele2', prefix='ele2_')] muons = [ tree.define_object(name='muon1', prefix='muon1_'), tree.define_object(name='muon2', prefix='muon2_')] # get the Z or Higgs if args.higgs: resonance_pdgid = 25 else: resonance_pdgid = 23 if '7TeV' in self.metadata.name: collision_energy = 7 else: collision_energy = 8 for event_index, event in enumerate(chain): try: tree.reset_branch_values() # get the Z or Higgs resonance = tautools.get_particles(event, resonance_pdgid, num_expected=1) if not resonance: print "could not find resonance" continue # get the resonance just before the decay resonance = resonance[0].last_self if draw_decays: resonance.export_graphvis('resonance_%d.dot' % event.EventNumber) FourVectModel.set(tree.resonance, resonance) # collect decay products (taus and photons) tau_decays = [] mc_photons = [] for child in resonance.iter_children(): if abs(child.pdgId) == pdg.tau_minus: # ignore status 3 taus in 2012 (something strange in the # MC record...) if year == 2012: if child.status == 3: continue tau_decays.append(tautools.TauDecay(child)) elif child.pdgId == pdg.gamma: mc_photons.append(child) else: raise TypeError( 'unexpected particle after resonance:\n%s' % child) # There should be exactly two taus if len(tau_decays) != 2: print "found %i tau decays in MC record" % len(tau_decays) for decay in tau_decays: print decay # skip this event continue # check for incomplete tau decays invalid = False for decay in tau_decays: if not decay.valid: print "invalid tau decay:" print decay if draw_decays: decay.init.export_graphvis( 'decay_invalid_%d.dot' % event.EventNumber) invalid = True break if invalid: # skip this event continue radiative_fourvect = LorentzVector() for photon in mc_photons: radiative_fourvect += photon.fourvect radiative_fourvect.fourvect = radiative_fourvect FourVectModel.set(tree.radiative, radiative_fourvect) tree.radiative_ngamma = len(mc_photons) tree.radiative_ngamma_5 = len([ ph for ph in mc_photons if ph.pt > 5]) tree.radiative_ngamma_10 = len([ ph for ph in mc_photons if ph.pt > 10]) tree.radiative_et_scalarsum = sum([ ph.pt for ph in mc_photons] + [0]) all_matched = True matched_objects = [] skip = False for i, (decay, truetau, tau, electron, muon) in enumerate(zip( tau_decays, truetaus, taus, electrons, muons)): if draw_decays: decay.init.export_graphvis('decay%d_%d.dot' % ( i, event.EventNumber)) TrueTau.set(truetau, decay, verbose=verbose) # match to reco taus, electrons and muons if decay.hadronic: recotau, dr = closest_reco_object( event.taus, decay.fourvect_visible, dR=0.2) if recotau is not None: matched_objects.append(recotau) recotau.matched = True recotau.matched_dr = dr RecoTau.set(tau, recotau, verbose=verbose) else: all_matched = False elif decay.leptonic_electron: recoele, dr = closest_reco_object( event.electrons, decay.fourvect_visible, dR=0.2) if recoele is not None: matched_objects.append(recoele) recoele.matched = True recoele.matched_dr = dr RecoElectron.set(electron, recoele) else: all_matched = False elif decay.leptonic_muon: recomuon, dr = closest_reco_object( event.muons, decay.fourvect_visible, dR=0.2) if recomuon is not None: matched_objects.append(recomuon) recomuon.matched = True recomuon.matched_dr = dr RecoMuon.set(muon, recomuon) else: all_matched = False else: print "unhandled invalid tau decay:" print decay if not draw_decays: decay.init.export_graphvis('decay%d_%d.dot' % ( i, event.EventNumber)) # skip this event skip = True break if skip: # skip this event continue # did both decays match a reco object? tree.matched = all_matched # match collision: decays matched same reco object if all_matched: tree.match_collision = ( matched_objects[0] == matched_objects[1]) # MET tree.met_x = event.MET.etx tree.met_y = event.MET.ety tree.met_phi = event.MET.phi tree.met = event.MET.et tree.sum_et = event.MET.sumet # set extra event variables tree.channel = event.mc_channel_number tree.event = event.EventNumber tree.run = event.RunNumber tree.mu = event.averageIntPerXing tree.collision_energy = collision_energy tree.Fill() except: print "event index: %d" % event_index print "event number: %d" % event.EventNumber print "file: %s" % chain.file.GetName() raise self.output.cd() tree.FlushBaskets() tree.Write()
def work(self): # get argument values local = self.args.local syst_terms = self.args.syst_terms datatype = self.metadata.datatype year = self.metadata.year verbose = self.args.student_verbose very_verbose = self.args.student_very_verbose redo_selection = self.args.redo_selection nominal_values = self.args.nominal_values # get the dataset name dsname = os.getenv('INPUT_DATASET_NAME', None) if dsname is None: # attempt to guess dsname from dirname if self.files: dsname = os.path.basename(os.path.dirname(self.files[0])) # is this a signal sample? # if so we will also keep some truth information in the output below is_signal = datatype == datasets.MC and ( '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname or '_WH' in dsname or '_ttH' in dsname) log.info("DATASET: {0}".format(dsname)) log.info("IS SIGNAL: {0}".format(is_signal)) # is this an inclusive signal sample for overlap studies? is_inclusive_signal = is_signal and '_inclusive' in dsname # is this a BCH-fixed sample? (temporary) is_bch_sample = 'r5470_r4540_p1344' in dsname if is_bch_sample: log.warning("this is a BCH-fixed r5470 sample") # onfilechange will contain a list of functions to be called as the # chain rolls over to each new file onfilechange = [] count_funcs = {} if datatype != datasets.DATA: # count the weighted number of events if local: def mc_weight_count(event): return event.hh_mc_weight else: def mc_weight_count(event): return event.TruthEvent[0].weights()[0] count_funcs = { 'mc_weight': mc_weight_count, } if local: # local means running on the skims, the output of this script # running on the grid if datatype == datasets.DATA: # merge the GRL fragments merged_grl = goodruns.GRL() def update_grl(student, grl, name, file, tree): grl |= str(file.Get('Lumi/%s' % student.metadata.treename).GetString()) onfilechange.append((update_grl, (self, merged_grl,))) if datatype == datasets.DATA: merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D') else: merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D') def update_cutflow(student, cutflow, name, file, tree): # record a cut-flow year = student.metadata.year datatype = student.metadata.datatype cutflow[1].value += file.cutflow_event[1].value if datatype != datasets.DATA: cutflow[2].value += file.cutflow_event_mc_weight[1].value onfilechange.append((update_cutflow, (self, merged_cutflow,))) else: # NEED TO BE CONVERTED TO XAOD # if datatype not in (datasets.EMBED, datasets.MCEMBED): # # merge TrigConfTrees # metadirname = '%sMeta' % self.metadata.treename # trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname) # map(trigconfchain.Add, self.files) # metadir = self.output.mkdir(metadirname) # metadir.cd() # trigconfchain.Merge(self.output, -1, 'fast keep') # self.output.cd() if datatype == datasets.DATA: # merge GRL XML strings merged_grl = goodruns.GRL() # for fname in self.files: # with root_open(fname) as f: # for key in f.Lumi.keys(): # merged_grl |= goodruns.GRL( # str(key.ReadObj().GetString()), # from_string=True) # lumi_dir = self.output.mkdir('Lumi') # lumi_dir.cd() # xml_string= ROOT.TObjString(merged_grl.str()) # xml_string.Write(self.metadata.treename) # self.output.cd() self.output.cd() # create the output tree model = get_model(datatype, dsname, prefix=None if local else 'hh_', is_inclusive_signal=is_inclusive_signal) log.info("Output Model:\n\n{0}\n\n".format(model)) outtree = Tree(name=self.metadata.treename, model=model) if local: tree = outtree else: tree = outtree.define_object(name='tree', prefix='hh_') #tree.define_object(name='tau', prefix='tau_') tree.define_object(name='tau1', prefix='tau1_') tree.define_object(name='tau2', prefix='tau2_') tree.define_object(name='truetau1', prefix='truetau1_') tree.define_object(name='truetau2', prefix='truetau2_') tree.define_object(name='jet1', prefix='jet1_') tree.define_object(name='jet2', prefix='jet2_') tree.define_object(name='jet3', prefix='jet3_') mmc_objects = [ tree.define_object(name='mmc0', prefix='mmc0_'), tree.define_object(name='mmc1', prefix='mmc1_'), tree.define_object(name='mmc2', prefix='mmc2_'), ] for mmc_obj in mmc_objects: mmc_obj.define_object(name='resonance', prefix='resonance_') # NEED TO BE CONVERTED TO XAOD # trigger_emulation = TauTriggerEmulation( # year=year, # passthrough=local or datatype != datasets.MC or year > 2011, # count_funcs=count_funcs) # if not trigger_emulation.passthrough: # onfilechange.append( # (update_trigger_trees, (self, trigger_emulation,))) # trigger_config = None # if datatype not in (datasets.EMBED, datasets.MCEMBED): # # trigger config tool to read trigger info in the ntuples # trigger_config = get_trigger_config() # # update the trigger config maps on every file change # onfilechange.append((update_trigger_config, (trigger_config,))) # define the list of event filters if local and syst_terms is None and not redo_selection: event_filters = None else: tau_ntrack_recounted_use_ntup = False if year > 2011: # peek at first tree to determine if the extended number of # tracks is already stored with root_open(self.files[0]) as test_file: test_tree = test_file.Get(self.metadata.treename) tau_ntrack_recounted_use_ntup = ( 'tau_out_track_n_extended' in test_tree) log.info(self.grl) event_filters = EventFilterList([ GRLFilter( self.grl, passthrough=( local or ( datatype not in (datasets.DATA, datasets.EMBED))), count_funcs=count_funcs), CoreFlags( passthrough=local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingPileupPatch( # passthrough=( # local or year > 2011 or datatype != datasets.EMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD (not a priority) # PileupTemplates( # year=year, # passthrough=( # local or is_bch_sample or datatype not in ( # datasets.MC, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # RandomSeed( # datatype=datatype, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # BCHSampleRunNumber( # passthrough=not is_bch_sample, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # RandomRunNumber( # tree=tree, # datatype=datatype, # pileup_tool=pileup_tool, # passthrough=local, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # trigger_emulation, # NEED TO BE CONVERTED TO XAOD # Triggers( # year=year, # tree=tree, # datatype=datatype, # passthrough=datatype in (datasets.EMBED, datasets.MCEMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD PileupReweight_xAOD( tree=tree, passthrough=(local or ( datatype not in (datasets.MC, datasets.MCEMBED))), count_funcs=count_funcs), PriVertex( passthrough=local, count_funcs=count_funcs), LArError( passthrough=local, count_funcs=count_funcs), TileError( passthrough=local, count_funcs=count_funcs), TileTrips( passthrough=( local or datatype in (datasets.MC, datasets.MCEMBED)), count_funcs=count_funcs), JetCalibration( datatype=datatype, passthrough=local, count_funcs=count_funcs), JetResolution( passthrough=(local or ( datatype not in (datasets.MC, datasets.MCEMBED))), count_funcs=count_funcs), TauCalibration( datatype, passthrough=local, count_funcs=count_funcs), # # truth matching must come before systematics due to # # TES_TRUE/FAKE # NEED TO BE CONVERTED TO XAOD TrueTauSelection( passthrough=datatype == datasets.DATA, count_funcs=count_funcs), TruthMatching( passthrough=datatype == datasets.DATA, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD NvtxJets( tree=tree, count_funcs=count_funcs), # # PUT THE SYSTEMATICS "FILTER" BEFORE # # ANY FILTERS THAT REFER TO OBJECTS # # BUT AFTER CALIBRATIONS # # Systematics must also come before anything that refers to # # thing.fourvect since fourvect is cached! # NEED TO BE CONVERTED TO XAOD # Systematics( # terms=syst_terms, # year=year, # datatype=datatype, # tree=tree, # verbose=verbose, # passthrough=not syst_terms, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # JetIsPileup( # passthrough=( # local or year < 2012 or # datatype not in (datasets.MC, datasets.MCEMBED)), # count_funcs=count_funcs), JetCleaning( datatype=datatype, year=year, count_funcs=count_funcs), ElectronVeto( el_sel='Medium', count_funcs=count_funcs), MuonVeto( count_funcs=count_funcs), TauPT(2, thresh=20 * GeV, count_funcs=count_funcs), TauHasTrack(2, count_funcs=count_funcs), TauEta(2, count_funcs=count_funcs), TauElectronVeto(2, count_funcs=count_funcs), TauMuonVeto(2, count_funcs=count_funcs), TauCrack(2, count_funcs=count_funcs), # # before selecting the leading and subleading taus # # be sure to only consider good candidates TauIDMedium(2, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # but not used by default # #TauTriggerMatchIndex( # # config=trigger_config, # # year=year, # # datatype=datatype, # # passthrough=datatype == datasets.EMBED, # # count_funcs=count_funcs), # Select two leading taus at this point # 25 and 35 for data # 20 and 30 for MC to leave room for TES uncertainty TauLeadSublead( lead=( 35 * GeV if datatype == datasets.DATA or local else 30 * GeV), sublead=( 25 * GeV if datatype == datasets.DATA or local else 20 * GeV), count_funcs=count_funcs), # taus are sorted (in decreasing order) by pT from here on TauIDSelection( count_funcs=count_funcs), TaudR(3.2, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # but not used by default # #TauTriggerMatchThreshold( # # datatype=datatype, # # tree=tree, # # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauTriggerEfficiency( # year=year, # datatype=datatype, # tree=tree, # tes_systematic=self.args.syst_terms and ( # Systematics.TES_TERMS & self.args.syst_terms), # passthrough=datatype == datasets.DATA, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD PileupScale( tree=tree, year=year, datatype=datatype, passthrough=local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD TauIDScaleFactors( year=year, passthrough=datatype == datasets.DATA, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauFakeRateScaleFactors( # year=year, # datatype=datatype, # tree=tree, # tes_up=(self.args.syst_terms is not None and # (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or # Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)), # tes_down=(self.args.syst_terms is not None and # (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or # Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)), # passthrough=datatype in (datasets.DATA, datasets.EMBED), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # HiggsPT( # year=year, # tree=tree, # passthrough=not is_signal or local, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # TauTrackRecounting( # year=year, # use_ntup_value=tau_ntrack_recounted_use_ntup, # passthrough=local, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # MCWeight( # datatype=datatype, # tree=tree, # passthrough=local or datatype == datasets.DATA, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingIsolation( # tree=tree, # passthrough=( # local or year < 2012 or # datatype not in (datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingCorrections( # tree=tree, # year=year, # passthrough=( # local or # datatype not in (datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # EmbeddingTauSpinner( # year=year, # tree=tree, # passthrough=( # local or datatype not in ( # datasets.EMBED, datasets.MCEMBED)), # count_funcs=count_funcs), # # put MET recalculation after tau selection but before tau-jet # # overlap removal and jet selection because of the RefAntiTau # # MET correction # NEED TO BE CONVERTED TO XAOD # METRecalculation( # terms=syst_terms, # year=year, # tree=tree, # refantitau=not nominal_values, # verbose=verbose, # very_verbose=very_verbose, # count_funcs=count_funcs), TauJetOverlapRemoval( count_funcs=count_funcs), JetPreselection( count_funcs=count_funcs), NonIsolatedJet( tree=tree, count_funcs=count_funcs), JetSelection( year=year, count_funcs=count_funcs), RecoJetTrueTauMatching( passthrough=datatype == datasets.DATA or local, count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD # BCHCleaning( # tree=tree, # passthrough=year == 2011 or local, # datatype=datatype, # count_funcs=count_funcs), # NEED TO BE CONVERTED TO XAOD ClassifyInclusiveHiggsSample( tree=tree, passthrough=not is_inclusive_signal, count_funcs=count_funcs), ]) # set the event filters self.filters['event'] = event_filters hh_buffer = TreeBuffer() if local: chain = TreeChain( self.metadata.treename, files=self.files, # ignore_branches=ignore_branches, events=self.events, onfilechange=onfilechange, filters=event_filters, cache=True, cache_size=50000000, learn_entries=100) buffer = TreeBuffer() for name, value in chain._buffer.items(): if name.startswith('hh_'): hh_buffer[name[3:]] = value elif name in copied: buffer[name] = value outtree.set_buffer( hh_buffer, create_branches=False, visible=True) outtree.set_buffer( buffer, create_branches=True, visible=False) else: root_chain = ROOT.TChain(self.metadata.treename) for f in self.files: log.info(f) root_chain.Add(f) # if len(self.files) != 1: # raise RuntimeError('lenght of files has to be 1 for now (no xAOD chaining available)') # self.files = self.files[0] # root_chain = ROOT.TFile(self.files) chain = xAODTree(root_chain, filters=event_filters, events=self.events) define_objects(chain, datatype=datatype) outtree.set_buffer( hh_buffer, create_branches=True, visible=False) # # create the MMC # mmc = mass.MMC(year=year) from ROOT import MissingMassTool mass_tool = MissingMassTool('mass_tool') mass_tool.initialize() # report which packages have been loaded # externaltools.report() self.output.cd() # The main event loop # the event filters above are automatically run for each event and only # the surviving events are looped on for event in chain: if local and syst_terms is None and not redo_selection: outtree.Fill() continue # sort taus and jets in decreasing order by pT event.taus.sort(key=lambda tau: tau.pt(), reverse=True) event.jets.sort(key=lambda jet: jet.pt(), reverse=True) # tau1 is the leading tau # tau2 is the subleading tau tau1, tau2 = event.taus tau1.fourvect = asrootpy(tau1.p4()) tau2.fourvect = asrootpy(tau2.p4()) beta_taus = (tau1.fourvect + tau2.fourvect).BoostVector() tau1.fourvect_boosted = LorentzVector() tau1.fourvect_boosted.copy_from(tau1.fourvect) tau1.fourvect_boosted.Boost(beta_taus * -1) tau2.fourvect_boosted = LorentzVector() tau2.fourvect_boosted.copy_from(tau2.fourvect) tau2.fourvect_boosted.Boost(beta_taus * -1) jets = list(event.jets) for jet in jets: jet.fourvect = asrootpy(jet.p4()) jet1, jet2, jet3 = None, None, None beta = None if len(jets) >= 2: jet1, jet2 = jets[:2] # determine boost of system # determine jet CoM frame beta = (jet1.fourvect + jet2.fourvect).BoostVector() tree.jet_beta.copy_from(beta) jet1.fourvect_boosted = LorentzVector() jet1.fourvect_boosted.copy_from(jet1.fourvect) jet1.fourvect_boosted.Boost(beta * -1) jet2.fourvect_boosted = LorentzVector() jet2.fourvect_boosted.copy_from(jet2.fourvect) jet2.fourvect_boosted.Boost(beta * -1) tau1.min_dr_jet = min( tau1.fourvect.DeltaR(jet1.fourvect), tau1.fourvect.DeltaR(jet2.fourvect)) tau2.min_dr_jet = min( tau2.fourvect.DeltaR(jet1.fourvect), tau2.fourvect.DeltaR(jet2.fourvect)) # tau centrality (degree to which they are between the two jets) tau1.centrality = eventshapes.eta_centrality( tau1.fourvect.Eta(), jet1.fourvect.Eta(), jet2.fourvect.Eta()) tau2.centrality = eventshapes.eta_centrality( tau2.fourvect.Eta(), jet1.fourvect.Eta(), jet2.fourvect.Eta()) # boosted tau centrality tau1.centrality_boosted = eventshapes.eta_centrality( tau1.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(), jet2.fourvect_boosted.Eta()) tau2.centrality_boosted = eventshapes.eta_centrality( tau2.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(), jet2.fourvect_boosted.Eta()) # 3rd leading jet if len(jets) >= 3: jet3 = jets[2] jet3.fourvect_boosted = LorentzVector() jet3.fourvect_boosted.copy_from(jet3.fourvect) jet3.fourvect_boosted.Boost(beta * -1) elif len(jets) == 1: jet1 = jets[0] tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect) tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect) RecoJetBlock.set(tree, jet1, jet2, jet3, local=local) # mass of ditau + leading jet system if jet1 is not None: tree.mass_tau1_tau2_jet1 = ( tau1.fourvect + tau2.fourvect + jet1.fourvect).M() ##################################### # number of tracks from PV minus taus ##################################### ntrack_pv = 0 ntrack_nontau_pv = 0 for vxp in event.vertices: # primary vertex if vxp.vertexType() == 1: ntrack_pv = vxp.nTrackParticles() ntrack_nontau_pv = ntrack_pv - tau1.nTracks() - tau2.nTracks() break tree.ntrack_pv = ntrack_pv tree.ntrack_nontau_pv = ntrack_nontau_pv ######################### # MET variables ######################### MET = event.MET.collection['Final'] METx = MET.mpx() METy = MET.mpy() METet = MET.met() MET_vect = Vector2(METx, METy) MET_4vect = LorentzVector() MET_4vect.SetPxPyPzE(METx, METy, 0., METet) MET_4vect_boosted = LorentzVector() MET_4vect_boosted.copy_from(MET_4vect) if beta is not None: MET_4vect_boosted.Boost(beta * -1) tree.MET_et = METet tree.MET_etx = METx tree.MET_ety = METy tree.MET_phi = MET.phi() dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect)) dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect)) dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect)) tree.dPhi_tau1_tau2 = dPhi_tau1_tau2 tree.dPhi_tau1_MET = dPhi_tau1_MET tree.dPhi_tau2_MET = dPhi_tau2_MET tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET) tree.MET_bisecting = is_MET_bisecting( dPhi_tau1_tau2, dPhi_tau1_MET, dPhi_tau2_MET) sumET = MET.sumet() tree.MET_sumet = sumET if sumET != 0: tree.MET_sig = ((2. * METet / GeV) / (utils.sign(sumET) * sqrt(abs(sumET / GeV)))) else: tree.MET_sig = -1. tree.MET_centrality = eventshapes.phi_centrality( tau1.fourvect, tau2.fourvect, MET_vect) tree.MET_centrality_boosted = eventshapes.phi_centrality( tau1.fourvect_boosted, tau2.fourvect_boosted, MET_4vect_boosted) tree.number_of_good_vertices = len(event.vertices) ########################## # Jet and sum pt variables ########################## tree.numJets = len(event.jets) # sum pT with only the two leading jets tree.sum_pt = sum( [tau1.pt(), tau2.pt()] + [jet.pt() for jet in jets[:2]]) # sum pT with all selected jets tree.sum_pt_full = sum( [tau1.pt(), tau2.pt()] + [jet.pt() for jet in jets]) # vector sum pT with two leading jets and MET tree.vector_sum_pt = sum( [tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets[:2]] + [MET_4vect]).Pt() # vector sum pT with all selected jets and MET tree.vector_sum_pt_full = sum( [tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets] + [MET_4vect]).Pt() # resonance pT tree.resonance_pt = sum( [tau1.fourvect, tau2.fourvect, MET_4vect]).Pt() # ############################# # # tau <-> vertex association # ############################# tree.tau_same_vertex = ( tau1.vertex() == tau2.vertex()) tau1.vertex_prob = ROOT.TMath.Prob( tau1.vertex().chiSquared(), int(tau1.vertex().numberDoF())) tau2.vertex_prob = ROOT.TMath.Prob( tau2.vertex().chiSquared(), int(tau2.vertex().numberDoF())) # ########################## # # MMC Mass # ########################## # OLD USAGE # mmc_result = mmc.mass( # tau1, tau2, # METx, METy, sumET, # njets=len(event.jets)) # for mmc_method, mmc_object in enumerate(mmc_objects): # mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method] # if verbose: # log.info("MMC (method %d): %f" % (mmc_method, mmc_mass)) # mmc_object.mass = mmc_mass # mmc_object.MET_et = mmc_met.Mod() # mmc_object.MET_etx = mmc_met.X() # mmc_object.MET_ety = mmc_met.Y() # mmc_object.MET_phi = math.pi - mmc_met.Phi() # if mmc_mass > 0: # FourMomentum.set(mmc_object.resonance, mmc_resonance) mass_tool.apply(event.EventInfo, tau1, tau2, MET, len(event.jets)) for i, mmc_object in enumerate(mmc_objects): mmc_object.mass = event.EventInfo.auxdataConst('double')('mmc%s_mass' % i) mmc_object.MET_et = mass_tool.GetFittedMetVec(i).Mod() mmc_object.MET_etx = mass_tool.GetFittedMetVec(i).X() mmc_object.MET_ety = mass_tool.GetFittedMetVec(i).Y() mmc_object.MET_phi = math.pi - mass_tool.GetFittedMetVec(i).Phi() if mmc_object.mass > 0: FourMomentum.set(mmc_object.resonance, mass_tool.GetResonanceVec(i)) # ############################ # # collinear and visible mass # ############################ # vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass( # tau1, tau2, METx, METy) # tree.mass_vis_tau1_tau2 = vis_mass # tree.mass_collinear_tau1_tau2 = collin_mass # tau1.collinear_momentum_fraction = tau1_x # tau2.collinear_momentum_fraction = tau2_x # # Fill the tau block # # This must come after the RecoJetBlock is filled since # # that sets the jet_beta for boosting the taus RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local) # if datatype != datasets.DATA: # TrueTauBlock.set(tree, tau1, tau2) # fill the output tree outtree.Fill(reset=True) # externaltools.report() # flush any baskets remaining in memory to disk self.output.cd() outtree.FlushBaskets() outtree.Write() if local: if datatype == datasets.DATA: xml_string = ROOT.TObjString(merged_grl.str()) xml_string.Write('lumi') merged_cutflow.Write()
from rootpy.io import root_open from rootpy import stl from random import gauss f = root_open("test.root", "recreate") # define the model class Event(TreeModel): x = stl.vector('TLorentzVector') i = IntCol() tree = Tree("test", model=Event) # fill the tree for i in xrange(100): tree.x.clear() for j in xrange(5): vect = LorentzVector( gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.), gauss(.5, 1.)) tree.x.push_back(vect) tree.i = i tree.fill() tree.write() f.close()
def lorentzVecsLeps(nom, is3l): ''' Higgs decays to two jets and one lepton, or two leptons. This returns lorentzVectors for each decay product candidate For H -> 2j, 1l case (not isF): returns jet0, jet1, met, lep0, lep1, (lep2 if is3l) For H -> 2l case (isF): Return met, lep0, lep1, lep2 ''' met = LorentzVector() met.SetPtEtaPhiE(nom.met_met, 0, nom.met_phi, nom.met_met) lep0 = LorentzVector() lep0.SetPtEtaPhiE(nom.lep_Pt_0, nom.lep_Eta_0, nom.lep_Phi_0, nom.lep_E_0) lep1 = LorentzVector() lep1.SetPtEtaPhiE(nom.lep_Pt_1, nom.lep_Eta_1, nom.lep_Phi_1, nom.lep_E_1) if is3l: lep2 = LorentzVector() lep2.SetPtEtaPhiE(nom.lep_Pt_2, nom.lep_Eta_2, nom.lep_Phi_2, nom.lep_E_2) if is3l: return (lep0, lep1, lep2, met) else: return (lep0, lep1, met)
def preprocessing(jet): jet = copy.deepcopy(jet) jet=jet.reshape(-1,4) n_consti=len(jet) # find the jet (eta, phi) center=jet.sum(axis=0) v_jet=LorentzVector(center[1], center[2], center[3], center[0]) # centering parameters phi=v_jet.phi() bv = v_jet.boost_vector() bv.set_perp(0) for i in range(n_consti): v = LorentzVector(jet[i,1], jet[i,2], jet[i,3], jet[i,0]) v.rotate_z(-phi) v.boost(-bv) jet[i, 0]=v[3] #e jet[i, 1]=v[0] #px jet[i, 2]=v[1] #py jet[i, 3]=v[2] #pz # rotating parameters weighted_phi=0 weighted_eta=0 for i in range(n_consti): if jet[i,0]<1e-10: # pass zero paddings continue v = LorentzVector(jet[i,1], jet[i,2], jet[i,3], jet[i,0]) r=np.sqrt(v.phi()**2 + v.eta()**2) if r == 0: # in case there is only one component. In fact these data points should generally be invalid. continue weighted_phi += v.phi() * v.E()/r weighted_eta += v.eta() * v.E()/r #alpha = np.arctan2(weighted_phi, weighted_eta) # approximately align at eta alpha = np.arctan2(weighted_eta, weighted_phi) # approximately align at phi for i in range(n_consti): v = LorentzVector(jet[i,1], jet[i,2], jet[i,3], jet[i,0]) #v.rotate_x(alpha) # approximately align at eta v.rotate_x(-alpha) # approximately align at phi jet[i, 0]=v[3] jet[i, 1]=v[0] jet[i, 2]=v[1] jet[i, 3]=v[2] #jet=jet.reshape(1,-1) jet=jet.ravel() return jet
def preprocessing( jet ): # every entry would be a sequence of 4-vecs (E, px, py, pz) of jet constituents jet = copy.deepcopy(jet) jet = jet.reshape(-1, 4) n_consti = len(jet) # find the jet (eta, phi) center = jet.sum(axis=0) v_jet = LorentzVector(center[1], center[2], center[3], center[0]) # centering phi = v_jet.phi() bv = v_jet.boost_vector() bv.set_perp(0) # rotating weighted_phi = 0 weighted_eta = 0 for i in range(n_consti): if jet[i, 0] < 1e-10: continue v = LorentzVector(jet[i, 1], jet[i, 2], jet[i, 3], jet[i, 0]) r = np.sqrt(v.phi()**2 + v.eta()**2) weighted_phi += v.phi() * v.E() / r weighted_eta += v.eta() * v.E() / r alpha = -np.arctan2(weighted_phi, weighted_eta) for i in range(n_consti): v = LorentzVector(jet[i, 1], jet[i, 2], jet[i, 3], jet[i, 0]) v.rotate_z(-phi) v.boost(-bv) v.rotate_x(alpha) jet[i, 0] = v[3] jet[i, 1] = v[0] jet[i, 2] = v[1] jet[i, 3] = v[2] jet = jet.reshape(1, -1) return jet