/
matched_jet_spectra.py
130 lines (109 loc) · 5.72 KB
/
matched_jet_spectra.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from atlas_jets import *
import ROOT
import numpy as np
import matplotlib.pyplot as pl
import root_numpy as rnp
import pickle
filename = '~/Desktop/PileupSkim_TTbar_14TeV_MU80_10000.root'
directory = 'TTbar_14TeV_MU80'
tree = 'mytree'
rootfile = ROOT.TFile(filename)
#set total number of events
total_num_events = int(rootfile.Get('%s/%s' % (directory, tree)).GetEntries())
#set jet cuts
offline_jetpT_threshold = 0. #[GeV]
gTower_jetET_threshold = 0.
# define helper functions - also a source of parallelization!
def compute_jetDistance(jet1, jet2):
return ((jet1.eta - jet2.eta)**2. + (jet1.phi - jet2.phi)**2.)**0.5
def match_jets(oJets=[], tJets=[]):
# dR as a change in distance
dR = 1.0
if len(tJets) == 0:
return np.array([[oJet,gTowers.Jet()] for oJet in oJets])
# we want to match the closest gTower jet for every offline jet
matched_jets = []
for oJet in oJets:
distances = np.array(map(lambda tJet: compute_jetDistance(tJet, oJet), tJets))
energies = np.array(map(lambda tJet: tJet.E/np.cosh(tJet.eta), tJets))
# return jet with highest ET within dR
if np.where(distances <= dR)[0].size == 0:
closest_tJet = gTowers.Jet()
else:
max_energy_in_distance = np.amax(energies[np.where(distances <= 1.0)])
index_jet = np.where(energies == max_energy_in_distance)[0][0]
closest_tJet = tJets[index_jet]
matched_jets.append([oJet,closest_tJet])
return np.array(matched_jets)
def run_code(offline_jetpT_threshold = 0., gTower_jetET_threshold = 0., seed_ETthresh = 0.):
#set seed cuts
seed_filter = gTowers.SeedFilter(ETthresh = seed_ETthresh, numSeeds = 1.0e5)
#column names to pull from the file, must be in this order to sync with the predefined classes in atlas_jets package
offline_column_names = ['jet_AntiKt10LCTopo_%s' % col for col in ['E', 'pt', 'm', 'eta', 'phi']]
gTower_column_names = ['gTower%s' % col for col in ['E', 'NCells', 'EtaMin', 'EtaMax', 'PhiMin', 'PhiMax']]
matched_jet_pairs = []
num_offlineEvents = 0
# main loop that goes over the file
for event_num in range(total_num_events):
if event_num % 100 == 0 and event_num != 0:
print "doing event_num=%d for (%d, %d, %d)" % (event_num, offline_jetpT_threshold, gTower_jetET_threshold, seed_ETthresh)
# pull in data row by row
data = rnp.root2rec(filename, treename='%s/%s' % (directory,tree), branches=offline_column_names + gTower_column_names, start=(event_num), stop=(event_num+1))
oEvent = OfflineJets.Event(event=[data[col][0] for col in offline_column_names])
# if there are no offline jets, we skip it
if len(oEvent.jets) == 0 or oEvent.jets[0].pT < offline_jetpT_threshold:
continue
num_offlineEvents += 1
'''can use seed_filter on an event by event basis'''
# max number of seeds based on number of offline jets
#seed_filter = gTowers.SeedFilter(numSeeds = len(oEvent.jets))
tEvent = gTowers.TowerEvent(event=[data[col][0] for col in gTower_column_names], seed_filter = seed_filter)
tEvent.get_event()
#paired_jets = match_jets(oJets=oEvent.jets, tJets=tEvent.filter_towers())
paired_jets = match_jets(oJets=oEvent.jets, tJets=tEvent.event.jets)
matched_jet_pairs.append(np.array([[oJet.pT, tJet.E/np.cosh(tJet.eta)] for oJet,tJet in paired_jets if oJet.pT > offline_jetpT_threshold and tJet.E > 0.]))
'''at this point, we've processed all the data and we just need to make plots'''
filename_ending = 'offline%d_gTower%d_seed%d_unweighted' % (offline_jetpT_threshold, gTower_jetET_threshold, seed_filter.ETthresh)
matched_jet_pairs = np.array(matched_jet_pairs)
all_jet_pairs = np.array([l for item in matched_jet_pairs for l in item])
leading_offline_jet_pairs = np.array([l for item in matched_jet_pairs for l in item if l[1] == np.amax(item[:,1])])
xlim = (1e2,5e2)
ylim = (0.,1200.)
#make figures
'''All Jet Pairs'''
pl.figure()
pl.xlabel('offline $p_T^{\mathrm{jet}}$ [GeV]')
pl.ylabel('trigger $E_T^{\mathrm{jet}}$ [GeV]')
pl.title('$p_T^{\mathrm{offline jet}}$ > %d GeV, %d events, $E_T^{\mathrm{tower jet}}$ > %d GeV, $E_T^{\mathrm{seed}}$ > %d GeV' % (offline_jetpT_threshold, num_offlineEvents, gTower_jetET_threshold, seed_filter.ETthresh))
pl.scatter(all_jet_pairs[:,0], all_jet_pairs[:,1])
pl.grid(True, which='both')
pl.xlim(xlim)
pl.ylim(ylim)
pl_aJet = {'xdata': all_jet_pairs[:,0],\
'ydata': all_jet_pairs[:,1]}
pickle.dump(pl_aJet, file('events_all_jet_pairs_%s.pkl' % filename_ending, 'w+') )
pl.savefig('events_all_jet_pairs_%s.png' % filename_ending)
pl.close()
'''Leading Offline Jet Pairs'''
pl.figure()
pl.xlabel('leading offline $p_T^{\mathrm{jet}}$ [GeV]')
pl.ylabel('trigger $E_T^{\mathrm{jet}}$ [GeV]')
pl.title('$p_T^{\mathrm{offline jet}}$ > %d GeV, %d events, $E_T^{\mathrm{tower jet}}$ > %d GeV, $E_T^{\mathrm{seed}}$ > %d GeV' % (offline_jetpT_threshold, num_offlineEvents, gTower_jetET_threshold, seed_filter.ETthresh))
pl.scatter(leading_offline_jet_pairs[:,0], leading_offline_jet_pairs[:,1])
pl.grid(True, which='both')
pl.xlim(xlim)
pl.ylim(ylim)
pl_lJet = {'xdata': leading_offline_jet_pairs[:,0],\
'ydata': leading_offline_jet_pairs[:,1]}
pickle.dump(pl_lJet, file('events_leading_offline_jet_pairs_%s.pkl' % filename_ending, 'w+') )
pl.savefig('events_leading_offline_jet_pairs_%s.png' % filename_ending)
pl.close()
class Copier(object):
def __init__(self, offline_jetpT_threshold, gTower_jetET_threshold):
self.offline_jetpT_threshold = offline_jetpT_threshold
self.gTower_jetET_threshold = gTower_jetET_threshold
def __call__(self, seed_ETthresh):
run_code(self.offline_jetpT_threshold, self.gTower_jetET_threshold, seed_ETthresh)
import multiprocessing
p = multiprocessing.Pool(processes=6)
p.map(Copier(0., 0.), [10., 15., 20., 25., 30., 35.])