def ana(files,returnplots=False): #%%################ # Plots and Setup # ################### ## Make a dictionary of histogram objects plots = { "RjetpT": Hist(100,(0,100) ,'RECO matched jet pT','Events','recplots/RjetpT'), "Rjeteta": Hist(66 ,(-3.3,3.3) ,'RECO matched jet eta','Events','recplots/Rjeteta'), "RjetCSVV2":Hist([0,0.1241,0.4184,0.7527,1],None,'RECO matched jet btagCSVV2 score','events','recplots/RjetCSVV2'), #"RjetDeepB":Hist([0,0.0494,0.2770,0.7264,1],None,'RECO matched jet btagDeepB score','events','recplots/RjetDeepB'), "RjetDeepFB":Hist([0,0.0494,0.2770,0.7264,1],None,'RECO matched jet btagDeepFlavB score','events','recplots/RjetDeepFB'), "RA1pT": Hist(80 ,(0,160) ,'pT of RECO A1 objects constructed from matched jets','Events','recplots/RA1pT'), "RA2pT": Hist(80 ,(0,160) ,'pT of RECO A2 objects constructed from matched jets','Events','recplots/RA2pT'), "RA1mass": Hist(40 ,(0,80) ,'reconstructed mass of A1 objects from matched jets','Events','recplots/RA1mass'), "RA2mass": Hist(40 ,(0,80) ,'reconstructed mass of A2 objects from matched jets','Events','recplots/RA2mass'), "RA1dR": Hist(50 ,(0,5) ,'dR between jet children of reconstructed A1 object','Events','recplots/RA1dR'), "RA2dR": Hist(50 ,(0,5) ,'dR between jet children of reconstructed A2 object','Events','recplots/RA2dR'), "RA1deta": Hist(33 ,(0,3.3) ,'|deta| between jet children of reconstructed A1 object','Events','recplots/RA1deta'), "RA2deta": Hist(33 ,(0,3.3) ,'|deta| between jet children of reconstructed A2 object','Events','recplots/RA2deta'), "RA1dphi": Hist(33 ,(0,3.3) ,'|dphi| between jet children of reconstructed A1 object','Events','recplots/RA1dphi'), "RA2dphi": Hist(33 ,(0,3.3) ,'|dphi| between jet children of reconstructed A2 object','Events','recplots/RA2dphi'), "RHmass": Hist(80 ,(0,160) ,'reconstructed mass of Higgs object from reconstructed As','Events','recplots/RHmass'), "RHpT": Hist(100,(0,200) ,'pT of reconstructed higgs object from reconstructed As','Events','recplots/RHpT'), "RHdR": Hist(50 ,(0,5) ,'dR between A children of reconstructed higgs object','Events','recplots/RHdR'), "RHdeta": Hist(33 ,(0,3.3) ,'|deta| between A children of reconstructed higgs object','Events','recplots/RHdeta'), "RHdphi": Hist(33 ,(0,3.3) ,'|dphi| between A children of reconstructed higgs object','Events','recplots/RHdphi'), ## "RalljetpT": Hist(100,(0,100),'All RECO jet pT','Events','recplots/RalljetpT'), "npassed": Hist(1 ,(0.5,1.5) ,'','Number of events that passed cuts', 'recplots/npassed') } for plot in plots: plots[plot].title = files[0] ## Create an internal figure for pyplot to write to plt.figure(1) ## Loop over input files for fnum in range(len(files)): ##################### # Loading Variables # ##################### print('Opening '+files[fnum]) ## Open our file and grab the events tree f = uproot.open(files[fnum])#'nobias.root') events = f.get('Events') jets = PhysObj('jets') jets.eta= pd.DataFrame(events.array('Jet_eta')).rename(columns=inc) jets.phi= pd.DataFrame(events.array('Jet_phi')).rename(columns=inc) jets.pt = pd.DataFrame(events.array('Jet_pt')).rename(columns=inc) jets.mass=pd.DataFrame(events.array('Jet_mass')).rename(columns=inc) jets.CSVV2 = pd.DataFrame(events.array('Jet_btagCSVV2')).rename(columns=inc) jets.DeepB = pd.DataFrame(events.array('Jet_btagDeepB')).rename(columns=inc) jets.DeepFB= pd.DataFrame(events.array('Jet_btagDeepFlavB')).rename(columns=inc) print('Processing ' + str(len(jets.eta)) + ' events') ## Figure out how many bs and jets there are njet= jets.eta.shape[1] if njet > 10: njet = 10 ev = Event(jets) jets.cut(jets.pt>15) jets.cut(abs(jets.eta)<2.4) jets.cut(jets.DeepB > 0.4184 ) ev.sync() ############################## # Processing and Calculation # ############################## ## Create our dR dataframe by populating its first column and naming it accordingly jjdr2 = pd.DataFrame(np.power(jets.eta[1]-jets.eta[2],2) + np.power(jets.phi[1]-jets.phi[2],2)).rename(columns={0:'Jet 1 x Jet 2'}) jjmass = pd.DataFrame(jets.mass[1] + jets.mass[2]).rename(columns={0:'Jet 1 x Jet 2'}) ## Loop over jet x b combinations jjstr = [] for j in range(1,njet+1): for i in range(j+1,njet+1): ## Make our column name jjstr.append("Jet "+str(j)+" x Jet "+str(i)) #if (i+j == 3): # continue ## Compute and store the dr of the given b and jet for every event at once jjdr2[jjstr[-1]] = pd.DataFrame(np.power(jets.eta[j]-jets.eta[i],2) + np.power(jets.phi[j]-jets.phi[i],2)) jjmass[jjstr[-1]] = pd.DataFrame(jets.mass[j] + jets.mass[i]) #if (j==i): # jjdr2[jjstr[-1]] = jjdr2[jjstr[-1]] * np.nan # jjmass[jjstr[-1]] = jjmass[jjstr[-1]] * np.nan print('jjs done') j4mass = pd.DataFrame(jets.mass[1]+jets.mass[2]+jets.mass[3]+jets.mass[4]).rename(columns={0:"J1 x J2 J3 J4"}) j4str = [] for a in range(1,njet+1): for b in range(a+1,njet+1): for c in range(b+1,njet+1): for d in range(c+1,njet+1): j4str.append("J"+str(a)+" J"+str(b)+" J"+str(c)+" J"+str(d)) #if (a+b+c+d == 10): # continue j4mass[j4str[-1]] = pd.DataFrame(jets.mass[a]+jets.mass[b]+jets.mass[c]+jets.mass[d]) #if (a==b or a==c or a==d or b==c or b==d or c==d): # j4mass[j4str[-1]] = j4mass[j4str[-1]] * np.nan print('j4s done') ## Create a copy array to collapse in jets into drlist = [] mmlist = [] m4list = [] for j in range(njet): drlist.append(np.sqrt(jjdr2.filter(like='Jet '+str(j+1)))) mmlist.append(jjmass.filter(like='Jet '+str(j+1))) m4list.append(j4mass.filter(like='J'+str(j+1))) #jlist[j] = jlist[j][jlist[j].rank(axis=1,method='first') == 1] #drlist[j] = jlist[j].rename(columns=lambda x:int(x[4:6])) #mmlist[j] = mmlist[j].rename(columns=lambda x:int(x[4:6])) print('jlist done') ## Cut our events to only resolved 4jet events with dR<0.4 djets = mmlist[0][(mmlist[0]>25) & (mmlist[0]<65)] qjets = m4list[0][(m4list[0]>90) & (m4list[0]<150)] for i in range(1,njet): djets = djets.combine_first(mmlist[i][(mmlist[i]>25) & (mmlist[i]<65)]) qjets = qjets.combine_first(m4list[i][(m4list[i]>90) & (m4list[i]<150)]) djets = djets / djets qjets = qjets / qjets djets = djets.sum(axis=1) qjets = qjets.sum(axis=1) djets = djets[djets>=2].dropna() qjets = qjets[qjets>=1].dropna() jets.trimTo(djets) jets.trimTo(qjets) ev.sync() print('trimming done') ############################# # Constructing RECO objects # ############################# for prop in ['bpt','beta','bphi','bmass']: jets[prop] = pd.DataFrame() for i in range(1,5): jets[prop][i] = jets[prop[1:]][jets.mass.rank(axis=1,method='first',ascending=False) == i].max(axis=1) #jets.bdr = pd.DataFrame() #for i in range(nb): # jets.bdr[i+1] = blist[i][blist[i]>0].max(axis=1) #ev.sync() bvec = [] for i in range(1,5): bvec.append(TLorentzVectorArray.from_ptetaphim(jets.bpt[i],jets.beta[i],jets.bphi[i],jets.bmass[i])) avec = [] for i in range(0,4,2): avec.append(bvec[i]+bvec[i+1]) for prop in ['apt','aeta','aphi','amass']: jets[prop] = pd.DataFrame() for i in range(2): jets.apt[i+1] = avec[i].pt jets.aeta[i+1] = avec[i].eta jets.aphi[i+1] = avec[i].phi jets.amass[i+1]= avec[i].mass for prop in ['apt','aeta','aphi','amass']: jets[prop].index = jets.pt.index hvec = [avec[0]+avec[1]] for prop in ['hpt','heta','hphi','hmass']: jets[prop] = pd.DataFrame() jets.hpt[1] = hvec[0].pt jets.heta[1] = hvec[0].eta jets.hphi[1] = hvec[0].phi jets.hmass[1]= hvec[0].mass for prop in ['hpt','heta','hphi','hmass']: jets[prop].index = jets.eta.index ################ # Filling Data # ################ plots['RalljetpT'].dfill(jets.pt) #plots['bjdR'].dfill(jets.bdr) plots['RjetpT'].dfill(jets.bpt) plots['Rjeteta'].dfill(jets.beta) for i in range(1,3): plots['RA'+str(i)+'pT' ].fill(jets.apt[i]) plots['RA'+str(i)+'mass'].fill(jets.amass[i]) #lots['RA'+str(i)+'deta'].fill(abs(jets.beta[2*i]-jets.beta[(2*i)-1])) plots['RA'+str(i)+'dR' ].fill(np.sqrt(np.power(jets.beta[2*i]-jets.beta[(2*i)-1],2)+np.power(jets.bphi[2*i]-jets.bphi[(2*i)-1],2))) plots['RA'+str(i)+'deta'].fill(abs(jets.beta[2*i]-jets.beta[(2*i)-1])) plots['RA'+str(i)+'dphi'].fill(abs(jets.bphi[2*i]-jets.bphi[(2*i)-1])) plots['RHpT' ].fill(jets.hpt[1]) plots['RHmass'].fill(jets.hmass[1]) plots['RHdR' ].fill(np.sqrt(np.power(jets.aeta[2]-jets.aeta[1],2)+np.power(jets.aphi[2]-jets.aphi[1],2))) plots['RHdeta'].fill(abs(jets.aeta[2]-jets.aeta[1])) plots['RHdphi'].fill(abs(jets.aphi[2]-jets.aphi[1])) plots['npassed'].fill(jets.hpt[1]/jets.hpt[1]) plots['RjetCSVV2'].dfill(jets.CSVV2) plots['RjetDeepFB'].dfill(jets.DeepFB) ############ # Plotting # ############ plt.clf() #plots.pop('bjdR').plot(logv=True) for p in plots: plots[p].plot() #%% if returnplots==True: return plots else: sys.exit()
def trig(files): ## Create a dictionary of histogram objects plots = { 'cutflow': Hist(4,(-0.5,4.5),'Total / Passed 4jet pT and |eta| cut/ passed DeepB > 0.4184','Events','recplots/datacutflow'), "RjetCSVV2":Hist([0,0.1241,0.4184,0.7527,1],None,'RECO matched jet btagCSVV2 score','events','recplots/dataCSVV2'), #"RjetDeepB":Hist([0,0.0494,0.2770,0.7264,1],None,'RECO matched jet btagDeepB score','events','recplots/RjetDeepB'), "RjetDeepFB":Hist([0,0.0494,0.2770,0.7264,1],None,'RECO matched jet btagDeepFlavB score','events','recplots/dataDeepFB'), } for plot in plots: plots[plot].title = files[0] ## Create an internal figure for pyplot to write to plt.figure(1) ## Loop over all input files for fnum in range(len(files)): print('Opening '+files[fnum]) ## Open the file and retrieve our key branches f = uproot.open(files[fnum]) events = f.get('Events') jets = PhysObj('jets') jets.eta= pd.DataFrame(events.array('Jet_eta')).rename(columns=inc) jets.phi= pd.DataFrame(events.array('Jet_phi')).rename(columns=inc) jets.pt = pd.DataFrame(events.array('Jet_pt')).rename(columns=inc) jets.mass=pd.DataFrame(events.array('Jet_mass')).rename(columns=inc) jets.CSVV2 = pd.DataFrame(events.array('Jet_btagCSVV2')).rename(columns=inc) jets.DeepB = pd.DataFrame(events.array('Jet_btagDeepB')).rename(columns=inc) jets.DeepFB= pd.DataFrame(events.array('Jet_btagDeepFlavB')).rename(columns=inc) print('Processing ' + str(len(jets.eta)) + ' events') ## Figure out how many bs and jets there are njet= jets.eta.shape[1] ## Fill 0 bin of cut flow plots plots['cutflow'].fill(jets.pt.max(axis=1)*0) ev = Event(jets) jets.cut(abs(jets.eta)<2.4) jets.cut(jets.pt>15) resjets = (jets.pt/jets.pt).sum(axis=1) resjets = resjets[resjets>=4] plots['cutflow'].fill(resjets/resjets) plots['RjetCSVV2'].dfill(jets.CSVV2) plots['RjetDeepFB'].dfill(jets.DeepFB) jets.cut(jets.DeepB > 0.4184 ) tagjets = (jets.DeepB/jets.DeepB).sum(axis=1) tagjets = tagjets[tagjets>=2] plots['cutflow'].fill(tagjets*2/tagjets) ev.sync() for pl in plots: plots[pl].plot() print(plots['cutflow'][0],plots['cutflow'][1]) sys.exit()
def compare(conf, option, stage): #%% print(f"Analysing {conf} with {option} cuts") with open(conf) as f: confd = json.load(f) islhe = confd['islhe'] isdata = confd['isdata'] files = confd['files'] if type(files) != list: files = [files] fweights = confd['weight'] if type(fweights) != list: fweights = [fweights] name = confd['name'] if stage == "A": numplot = { 'pt': Hist(27, (150, 1500), 'pT of AK8 jet passing cuts + triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_pTEfficiencyPlot_{option}_A"), } elif stage == "B": numplot = { 'pt': Hist(50, (0, 1000), 'pT of AK8 jet passing cuts + triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_pTEfficiencyPlot_{option}_B"), 'msoft': Hist( 11, (90, 200), 'softdrop mass of AK8 jet above 400GeV passing cuts+triggers / cuts', f"{option} Ratio", f"Effplots/{name}_msoftEfficiencyPlot_{option}_B"), 'DDBvL': Hist( 20, (0.8, 1.0), 'DDBvL of AK8 jet above 400GeV passing cuts+triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_ddbvlEfficiencyPlot_{option}_B"), } elif stage == "C": numplot = { 'pt': Hist(27, (150, 1500), 'pT of AK8 jet passing cuts + triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_pTEfficiencyPlot_{option}_C"), 's2pt': Hist( 61, (30, 1050), 'pT of 2nd highest pT slimjet passing cuts+triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_s2pTEfficiencyPlot_{option}_C"), 'lowb': Hist( 20, (0, 1.0), 'Lowest deepB of two slimjets passing cuts+triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_lowbEfficiencyPlot_{option}_C"), } elif stage == "D": numplot = { 'pt': Hist(27, (150, 1500), 'pT of AK8 jet passing cuts + triggers / passing cuts', f"{option} Ratio", f"Effplots/{name}_pTEfficiencyPlot_{option}_D"), } elif len(stage) > 1: numplot = { 'pt': Hist(5, (400, 650), 'pT of AK8 jet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_ptScalePlot"), 'msoft': Hist(6, (80, 200), 'msoft of AK8 jet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_msoftScalePlot"), 'DDBvL': Hist(4, (.80, 1), 'DDBvL of AK8 jet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_ddbvlScalePlot"), } # numplot['pt'][1][-1] = np.inf # numplot['msoft'][1][0] = 90 if "C" in stage: numplot.update({ 's2pt': Hist( 4, (140, 220), 'pT of 2nd highest pT slimjet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_s2ptScalePlot"), 's2deepb': Hist( [.4184, .5856, .7527, 0.8764, 1], None, 'DeepB of 2nd highest DeepB slimjet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_s2deepbScalePlot"), }) if stage == "CX": numplot.update({ 'pt': Hist(3, (250, 400), 'pT of AK8 jet passing cuts+triggers / cuts', f"{option} Ratio", f"Seffplots/{name}_{stage}_{option}_ptScalePlot"), }) # for p in numplot: # numplot[p][1][0] = 0 for p in numplot: numplot[p].title = f"{name} {stage}" numplot[p].ylim = (0, 1) denplot = cp.deepcopy(numplot) elecvars = ['pt', 'eta', 'mvaFall17V2Iso_WP90'] muvars = [ 'pt', 'eta', 'mediumPromptId', 'miniIsoId', 'softId', 'dxy', 'dxyErr', 'ip3d' ] l1vars = [ 'SingleJet180', 'Mu7_EG23er2p5', 'Mu7_LooseIsoEG20er2p5', 'Mu20_EG10er2p5', 'SingleMu22', 'SingleMu25', 'DoubleJet112er2p3_dEta_Max1p6', 'DoubleJet150er2p5' ] hltvars = [ 'AK8PFJet500', 'Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ', 'Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL', 'Mu27_Ele37_CaloIdL_MW', 'Mu37_Ele27_CaloIdL_MW', 'AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4', 'DoublePFJets116MaxDeta1p6_DoubleCaloBTagDeepCSV_p71' ] slimvars = ['pt', 'eta', 'phi', 'btagDeepB', 'puId'] print("Collecting event information") events, jets, elecs, mus, l1s, hlts, sjets = [], [], [], [], [], [], [] for i in range(len(files)): events.append(uproot.open(files[i]).get('Events')) jets.append(loadjets(PhysObj(f"Jets{i}"), events[i], islhe)) jets[i].extweight = jets[i].extweight * fweights[i] elecs.append( PhysObj(f"Electron{i}", files[i], *elecvars, varname='Electron')) mus.append(PhysObj(f"Muon{i}", files[i], *muvars, varname='Muon')) mus[i].eta = abs(mus[i].eta) mus[i].ip = abs(mus[i].dxy / mus[i].dxyErr) l1s.append(PhysObj(f"L1{i}", files[i], *l1vars, varname='L1')) hlts.append(PhysObj(f"HLT{i}", files[i], *hltvars, varname='HLT')) sjets.append(PhysObj(f"Slimjet{i}", files[i], *slimvars, varname='Jet')) evs = [] for i in range(len(files)): evs.append(Event(jets[i], l1s[i], hlts[i], elecs[i], mus[i])) if "C" in stage: evs[i].register(sjets[i]) for jet in jets: jet.cut(jet.pt > 170) jet.cut(abs(jet.eta) < 2.4) jet.cut(jet.DDBvL > 0.8) jet.cut(jet.DeepB > 0.4184) jet.cut(jet.msoft > 90) jet.cut(jet.mass > 90) jet.cut(jet.msoft < 200) jet.cut(jet.npvsG >= 1) if "AB" in stage: jet.cut(jet.pt >= 400) elif stage == "CX": jet.cut(jet.pt >= 250) jet.cut(jet.pt < 400) if option == 'MuonEG': for elec in elecs: elec.cut(elec.pt > 15) elec.cut(abs(elec.eta) < 2.5) elec.cut(elec.mvaFall17V2Iso_WP90 > 0.9) for mu in mus: if option == 'MuonEG': mu.cut(mu.pt > 10) mu.cut(abs(mu.eta) < 2.4) mu.cut(mu.mediumPromptId > 0.9) mu.cut(mu.miniIsoId >= 2) elif option == 'Parked': mu.cut(mu.softId > 0.9) mu.cut(abs(mu.eta) < 2.4) mu.cut(mu.pt > 7) mu.cut(mu.ip > 2) mu.cut(mu.ip3d < 0.5) #else: raise(NameError("Dataset name does not match expected")) for ev in evs: ev.sync() if option == 'MuonEG': for i in range(len(files)): l1s[i].cut( np.logical_or.reduce( (np.logical_and( np.logical_or(l1s[i].Mu7_EG23er2p5, l1s[i].Mu7_LooseIsoEG20er2p5), hlts[i].Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ), np.logical_and( np.logical_or(l1s[i].Mu20_EG10er2p5, l1s[i].SingleMu22), hlts[i].Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL), np.logical_and(l1s[i].SingleMu25, hlts[i].Mu27_Ele37_CaloIdL_MW), np.logical_and(l1s[i].SingleMu25, hlts[i].Mu37_Ele27_CaloIdL_MW)))) ## Makes a frame whose elements have the highest pt of the muon or electron in that position passf = elecs[i].pt.combine(mus[i].pt, np.maximum, fill_value=0) ## Drops pt < 25 passf = passf[passf > 25] ## Drops empty rows passf = passf.dropna(how='all') ## The remaining events must have had an electron or muon with pt > 25 - the rest are removed elecs[i].trimTo(passf) for ev in evs: ev.sync() if "C" in stage or stage == "AB": for i in range(len(files)): print(f"Processing file {i} slimjets") jets[i].cut( jets[i].pt.rank(axis=1, method='first', ascending=False) == 1) sjets[i].cut(abs(sjets[i].eta) < 2.4) sjets[i].cut(sjets[i].pt > 30) sjets[i].cut(sjets[i].puId >= 1) sjets[i].cut(sjets[i].pt > 140) sjets[i].cut(sjets[i].btagDeepB > 0.4184) evs[i].sync() ## This entire block is designed to remove any events whose defined a and b jets ## have a dR > 0.8 to the highest pT passing jet print("Computing dR") if sjets[i].pt.shape[0] < 1 or jets[i].pt.shape[0] < 1: continue sjjdr = computedR(jets[i], sjets[i], ['Fatjet', 'slimjet']) jlist = [] print("Assembling slim frame") for j in range(jets[i].pt.shape[1]): jlist.append(sjjdr.filter(like=f"Fatjet {j+1}")) jlist[j] = jlist[j].rename(columns=lambda x: int(x[-2:])) jlist[0][jlist[0] == 0] = jlist[0] + 0.001 sjframe = jlist[0][jlist[0] < 0.8].fillna(0) for j in range(1, jets[i].pt.shape[1]): jlist[j][jlist[j] == 0] = jlist[j] + 0.001 sjframe = sjframe + jlist[j][jlist[j] < 0.8].fillna(0) sjets[i].cut(sjframe != 0) ## Trims the collection of slimjets < dR 0.8 to only events with a 2nd passing jet sjets[i].trimto(sjets[i].cut(sjets[i].pt.rank( axis=1, method='first', ascending=False) == 2, split=True).pt) if stage == "AB": for elem in sjets[i]: evs[i].frame = evs[i].frame.loc[ evs[i].frame.index.difference(sjets[i][elem].index)] for ev in evs: ev.sync() print("Assembling finished frame") framepieces = [] for i in range(len(files)): tempframe = pd.DataFrame() for prop in jets[i]: tempframe[prop] = jets[i][prop][jets[i]['pt'].rank( axis=1, method='first', ascending=False) == 1].max(axis=1) if option == "MuonEG" or option == "Parked": for prop in mus[i]: tempframe[f"m{prop}"] = mus[i][prop][mus[i]['pt'].rank( axis=1, method='first', ascending=False) == 1].max(axis=1) if "C" in stage: for prop in sjets[i]: tempframe[f"s1{prop}"] = sjets[i][prop][sjets[i]['pt'].rank( axis=1, method='first', ascending=False) == 1].max(axis=1) tempframe[f"s2{prop}"] = sjets[i][prop][sjets[i]['pt'].rank( axis=1, method='first', ascending=False) == 2].max(axis=1) tempframe['trigA'] = np.logical_and(l1s[i].SingleJet180[1], hlts[i].AK8PFJet500[1]) tempframe['trigB'] = np.logical_and( l1s[i].SingleJet180[1], hlts[i].AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4[1]) tempframe['trigC'] = np.logical_and( np.logical_or(l1s[i].DoubleJet112er2p3_dEta_Max1p6[1], l1s[i].DoubleJet150er2p5[1]), hlts[i].DoublePFJets116MaxDeta1p6_DoubleCaloBTagDeepCSV_p71[1]) tempframe['trigAB'] = np.logical_or(tempframe['trigA'], tempframe['trigB']) tempframe['trigABC'] = np.logical_or(tempframe['trigAB'], tempframe['trigC']) # tempframe['L1_SingleJet180'] = l1s[i].SingleJet180[1] # tempframe['HLT_AK8PFJet500'] = hlts[i].AK8PFJet500[1] # tempframe['HLT_AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4'] = hlts[i].AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4[1] # tempframe['L1_DoubleJet112er2p3_dEta_Max1p6'] = l1s[i].DoubleJet112er2p3_dEta_Max1p6[1] # tempframe['L1_DoubleJet150er2p5'] = l1s[i].DoubleJet150er2p5[1] # tempframe['HLT_DoublePFJets116MaxDeta1p6_DoubleCaloBTagDeepCSV_p71'] = hlts[i].DoublePFJets116MaxDeta1p6_DoubleCaloBTagDeepCSV_p71[1] framepieces.append(tempframe) mergedframe = pd.concat(framepieces, ignore_index=True) mergedframe = mergedframe.dropna() if conf == "GGH_HPT.json": sigweight = (3.9 - 0.4 * np.log2(mergedframe['pt'])) sigweight[sigweight < 0.1] = 0.1 mergedframe['extweight'] = mergedframe['extweight'] * sigweight if option == 'Parked' and not isdata: mergedframe['extweight'] = lumipucalc(mergedframe) # pickle.dump(evs,open('effevents.p','wb')) # pickle.dump(mergedframe,open('effframe.p','wb')) print("Producing histograms") effplot = {} if stage == "A": denplot['pt'].fill(mergedframe['pt'], mergedframe['extweight']) numplot['pt'].fill(mergedframe['pt'][mergedframe['trigA'] == 1], mergedframe['extweight'][mergedframe['trigA'] == 1]) effplot.update({ 'pt': numplot['pt'].divideby(denplot['pt'], split=True, errmethod='effnorm') }) if stage == "B": denplot['pt'].fill(mergedframe['pt'], mergedframe['extweight']) numplot['pt'].fill(mergedframe['pt'][mergedframe['trigB'] == 1], mergedframe['extweight'][mergedframe['trigB'] == 1]) effplot.update({ 'pt': numplot['pt'].divideby(denplot['pt'], split=True, errmethod='effnorm') }) ## Trim down events to only passing values after the pt plateu, for further studies mergedframe = mergedframe[mergedframe['pt'] > 400] denplot['msoft'].fill(mergedframe['msoft'], mergedframe['extweight']) numplot['msoft'].fill( mergedframe['msoft'][mergedframe['trigB'] == 1], mergedframe['extweight'][mergedframe['trigB'] == 1]) effplot.update({ 'msoft': numplot['msoft'].divideby(denplot['msoft'], split=True, errmethod='effnorm') }) denplot['DDBvL'].fill(mergedframe['DDBvL'], mergedframe['extweight']) numplot['DDBvL'].fill( mergedframe['DDBvL'][mergedframe['trigB'] == 1], mergedframe['extweight'][mergedframe['trigB'] == 1]) effplot.update({ 'DDBvL': numplot['DDBvL'].divideby(denplot['DDBvL'], split=True, errmethod='effnorm') }) if stage == "C": tempframe = mergedframe[np.logical_and(mergedframe['s1pt'] > 140, mergedframe['s2pt'] > 140)] tempframe = tempframe[np.logical_and(tempframe['s1btagDeepB'] > .4184, tempframe['s2btagDeepB'] > .4184)] denplot['pt'].fill(tempframe['pt'], tempframe['extweight']) numplot['pt'].fill(tempframe['pt'][mergedframe['trigC'] == 1], tempframe['extweight'][mergedframe['trigC'] == 1]) effplot.update({ 'pt': numplot['pt'].divideby(denplot['pt'], split=True, errmethod='effnorm') }) tempframe = mergedframe[mergedframe['s1pt'] > mergedframe['s2pt']] tempframe = tempframe[np.logical_and(tempframe['s1btagDeepB'] > .4184, tempframe['s2btagDeepB'] > .4184)] denplot['s2pt'].fill(tempframe['s2pt'], tempframe['extweight']) numplot['s2pt'].fill(tempframe['s2pt'][mergedframe['trigC'] == 1], tempframe['extweight'][mergedframe['trigC'] == 1]) tempframe = mergedframe[mergedframe['s1pt'] <= mergedframe['s2pt']] tempframe = tempframe[np.logical_and(tempframe['s1btagDeepB'] > .4184, tempframe['s2btagDeepB'] > .4184)] denplot['s2pt'].fill(tempframe['s1pt'], tempframe['extweight']) numplot['s2pt'].fill(tempframe['s1pt'][mergedframe['trigC'] == 1], tempframe['extweight'][mergedframe['trigC'] == 1]) effplot.update({ 's2pt': numplot['s2pt'].divideby(denplot['s2pt'], split=True, errmethod='effnorm') }) tempframe = mergedframe[ mergedframe['s1btagDeepB'] > mergedframe['s2btagDeepB']] tempframe = tempframe[np.logical_and(tempframe['s1pt'] > 150, tempframe['s2pt'] > 150)] denplot['lowb'].fill(tempframe['s2btagDeepB'], tempframe['extweight']) numplot['lowb'].fill( tempframe['s2btagDeepB'][mergedframe['trigC'] == 1], tempframe['extweight'][mergedframe['trigC'] == 1]) tempframe = mergedframe[ mergedframe['s1btagDeepB'] <= mergedframe['s2btagDeepB']] tempframe = tempframe[np.logical_and(tempframe['s1pt'] > 150, tempframe['s2pt'] > 150)] denplot['lowb'].fill(tempframe['s1btagDeepB'], tempframe['extweight']) numplot['lowb'].fill( tempframe['s1btagDeepB'][mergedframe['trigC'] == 1], tempframe['extweight'][mergedframe['trigC'] == 1]) effplot.update({ 'lowb': numplot['lowb'].divideby(denplot['lowb'], split=True, errmethod='effnorm') }) if stage == "D": denplot['pt'].fill(mergedframe['pt'], mergedframe['extweight']) numplot['pt'].fill(mergedframe['pt'][mergedframe['trigC'] == 1], mergedframe['extweight'][mergedframe['trigC'] == 1]) effplot.update({ 'pt': numplot['pt'].divideby(denplot['pt'], split=True, errmethod='effnorm') }) if len(stage) > 1: if stage == "CX": stage = "C" denplot['pt'].fill(mergedframe['pt'], mergedframe['extweight']) numplot['pt'].fill( mergedframe['pt'][mergedframe[f"trig{stage}"] == 1], mergedframe['extweight'][mergedframe[f"trig{stage}"] == 1]) denplot['msoft'].fill(mergedframe['msoft'], mergedframe['extweight']) numplot['msoft'].fill( mergedframe['msoft'][mergedframe[f"trig{stage}"] == 1], mergedframe['extweight'][mergedframe[f"trig{stage}"] == 1]) denplot['DDBvL'].fill(mergedframe['DDBvL'], mergedframe['extweight']) numplot['DDBvL'].fill( mergedframe['DDBvL'][mergedframe[f"trig{stage}"] == 1], mergedframe['extweight'][mergedframe[f"trig{stage}"] == 1]) if "C" in stage: tempframe = mergedframe[mergedframe['s1pt'] > mergedframe['s2pt']] denplot['s2pt'].fill(tempframe['s2pt'], tempframe['extweight']) numplot['s2pt'].fill( tempframe['s2pt'][tempframe[f"trig{stage}"] == 1], tempframe['extweight'][mergedframe[f"trig{stage}"] == 1]) tempframe = mergedframe[mergedframe['s1pt'] <= mergedframe['s2pt']] denplot['s2pt'].fill(tempframe['s1pt'], tempframe['extweight']) numplot['s2pt'].fill( tempframe['s1pt'][tempframe[f"trig{stage}"] == 1], tempframe['extweight'][mergedframe[f"trig{stage}"] == 1]) tempframe = mergedframe[ mergedframe['s1btagDeepB'] > mergedframe['s2btagDeepB']] denplot['s2deepb'].fill(tempframe['s2btagDeepB'], tempframe['extweight']) numplot['s2deepb'].fill( tempframe['s2btagDeepB'][tempframe[f"trig{stage}"] == 1], tempframe['extweight'][mergedframe[f"trig{stage}"] == 1]) tempframe = mergedframe[ mergedframe['s1btagDeepB'] <= mergedframe['s2btagDeepB']] denplot['s2deepb'].fill(tempframe['s1btagDeepB'], tempframe['extweight']) numplot['s2deepb'].fill( tempframe['s1btagDeepB'][tempframe[f"trig{stage}"] == 1], tempframe['extweight'][mergedframe[f"trig{stage}"] == 1]) if stage == "C": stage = "CX" for p in denplot: effplot.update({ p: numplot[p].divideby(denplot[p], split=True, errmethod='effnorm') }) pickle.dump( effplot, open(f"Seffplots/{name}_{stage}_{option}_ScaleFactor.p", 'wb')) else: pickle.dump( effplot, open(f"Effplots/{name}_EfficiencyPlot_{option}_{stage}.p", 'wb')) for p in effplot: effplot[p].plot(htype='err') pickle.dump( effplot, open(f"Effplots/{name}_EfficiencyPlot_{option}_{stage}.p", 'wb'))
def main(): #%%################ # Plots and Setup # ################### ## Define what pdgId we expect the A to have Aid = 9000006 ## How many resolved jets we want to target with our analysis #resjets = 4 Aid = 36 netvars = ['pt', 'eta', 'phi', 'mass', 'CSVV2', 'DeepB', 'msoft', 'DDBvL'] sigfile = 'GGH1M_Nano.root' datafile = 'TestData.root' bgfiles = [ '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT200to300_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT200to300_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_18p74M.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT300to500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT300to500_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_17p13M.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT500to700_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT500to700_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_8p292M.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT700to1000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT700to1000_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_5p845M.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT1000to1500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT1000to1500_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_1p953M.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT1500to2000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT1500to2000_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_511p5k.root', '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT2000toInf_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/SkimsFatJet/QCD_HT2000toInf_BGen_nFat1_doubB_0p5_massH_60_200_dR_2p4_287p3k.root' ] #bgfiles = ['/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT200to300_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/003D724A-9341-2A40-A766-A663D3E4F10B.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT300to500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/075AC8F4-7F0C-C447-82D1-CD6A47B26BCD.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT500to700_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/2CFFD279-3BCC-CF41-9577-B1A740DC2679.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT700to1000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/0F535BA4-C750-8E44-BD53-6B3011CA2AF8.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT1000to1500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/039F2902-2B95-3B4A-9EC3-53EF6299F867.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT1500to2000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/71369D3B-257F-524D-A55A-55968109677A.root', # '/cms/data/store/user/abrinke1/NanoAOD/2018/MC/QCD/QCD_HT2000toInf_BGenFilter_TuneCP5_13TeV-madgraph-pythia8/Nano25Oct2019/3542C35C-1109-D345-8B36-3DA027200467.root'] ## Make a dictionary of histogram objects # bjplots = {} # for i in range(1,5): # bjplots.update({ # "s_beta"+str(i): Hist(33 ,(-3.3,3.3) ,'GEN b '+str(i)+' Eta (ranked by pT)','Events','upplots/s_beta'+str(i)), # "s_bpT"+str(i): Hist(60 ,(0,120) ,'GEN pT of b '+str(i)+' (ranked by pT)','Events','upplots/s_bpT'+str(i)), # "s_bjetpT"+str(i): Hist(60 ,(0,120) ,'Matched RECO jet '+str(i)+' pT (ranked by b pT)','Events','upplots/s_RjetpT'+str(i)), # "s_bjeteta"+str(i): Hist(33 ,(-3.3,3.3) ,'Matched RECO jet '+str(i)+' Eta (ranked by b pT)','Events','upplots/s_Rjeteta'+str(i)), # "s_bjdR"+str(i): Hist(90 ,(0,3) ,'GEN b '+str(i)+' (ranked by pT) to matched jet dR','Events','upplots/s_bjdR'+str(i)) # }) plots = { "pt": Hist( 80, (150, 550), 'pT for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/pt'), "BGpt": Hist(80, (150, 550)), "SGpt": Hist(80, (150, 550)), "DTpt": Hist(80, (150, 550)), "eta": Hist( 15, (0, 3), '|eta| for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/eta'), "BGeta": Hist(15, (0, 3)), "SGeta": Hist(15, (0, 3)), "DTeta": Hist(15, (0, 3)), "phi": Hist( 32, (-3.2, 3.2), 'phi for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/phi'), "BGphi": Hist(32, (-3.2, 3.2)), "SGphi": Hist(32, (-3.2, 3.2)), "DTphi": Hist(32, (-3.2, 3.2)), "mass": Hist( 50, (0, 200), 'mass for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/mass'), "BGmass": Hist(50, (0, 200)), "SGmass": Hist(50, (0, 200)), "DTmass": Hist(50, (0, 200)), "CSVV2": Hist( 22, (0, 1.1), 'CSVV2 for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/CSVV2'), "BGCSVV2": Hist(22, (0, 1.1)), "SGCSVV2": Hist(22, (0, 1.1)), "DTCSVV2": Hist(22, (0, 1.1)), "DeepB": Hist( 22, (0, 1.1), 'DeepB for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/DeepB'), "BGDeepB": Hist(22, (0, 1.1)), "SGDeepB": Hist(22, (0, 1.1)), "DTDeepB": Hist(22, (0, 1.1)), "msoft": Hist( 50, (0, 200), 'msoft for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/msoft'), "BGmsoft": Hist(50, (0, 200)), "SGmsoft": Hist(50, (0, 200)), "DTmsoft": Hist(50, (0, 200)), "DDBvL": Hist( 22, (0, 1.1), 'DDBvL for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/DDBvL'), "BGDDBvL": Hist(22, (0, 1.1)), "SGDDBvL": Hist(22, (0, 1.1)), "DTDDBvL": Hist(22, (0, 1.1)), "LHEHT": Hist( 400, (0, 4000), 'LHE_HT for highest pT jet in passing signal (red), BG (blue), and data (black) events', '% Distribution', 'netplots/LHE_HT'), "BGLHEHT": Hist(400, (0, 4000)), "SGLHEHT": Hist(400, (0, 4000)), "DTLHEHT": Hist(400, (0, 4000)), } # for plot in bjplots: # bjplots[plot].title = files[0] # for plot in plots: # plots[plot].title = files[0] ## Create an internal figure for pyplot to write to plt.figure(1) ## Loop over input files if True: ##################### # Loading Variables # ##################### print('Opening ', sigfile, ', ', datafile) ## Loop some data if the bg/signal files need to be equalized ## Open our file and grab the events tree sigf = uproot.open(sigfile) #'nobias.root') dataf = uproot.open(datafile) sigevents = sigf.get('Events') dataevents = dataf.get('Events') bgevents = [] for bfile in bgfiles: print('Opening ', bfile) bgevents.append(uproot.open(bfile).get('Events')) pdgida = sigevents.array('GenPart_pdgId') paridxa = sigevents.array('GenPart_genPartIdxMother') parida = pdgida[paridxa] bs = PhysObj('bs') ## Removes all particles that do not have A parents ## from the GenPart arrays, then removes all particles ## that are not bs after resizing the pdgid array to be a valid mask bs.oeta = pd.DataFrame( sigevents.array('GenPart_eta')[abs(parida) == Aid][abs(pdgida)[abs( parida) == Aid] == 5]).rename(columns=inc) bs.ophi = pd.DataFrame( sigevents.array('GenPart_phi')[abs(parida) == Aid][abs(pdgida)[abs( parida) == Aid] == 5]).rename(columns=inc) bs.opt = pd.DataFrame( sigevents.array('GenPart_pt')[abs(parida) == Aid][abs(pdgida)[abs( parida) == Aid] == 5]).rename(columns=inc) ## Test b order corresponds to As testbs = pd.DataFrame( sigevents.array('GenPart_genPartIdxMother')[abs(parida) == Aid][ abs(pdgida)[abs(parida) == Aid] == 5]).rename(columns=inc) ## The first term checks b4 has greater idx than b1, the last two check that the bs are paired if ((testbs[4] - testbs[1]).min() <= 0) or ( (abs(testbs[2] - testbs[1]) + abs(testbs[4]) - testbs[3]).min() != 0): print('b to A ordering violated - time to do it the hard way') sys.exit() As = PhysObj('As') As.oeta = pd.DataFrame( sigevents.array('GenPart_eta')[abs(parida) == 25][abs(pdgida)[abs( parida) == 25] == Aid]).rename(columns=inc) As.ophi = pd.DataFrame( sigevents.array('GenPart_phi')[abs(parida) == 25][abs(pdgida)[abs( parida) == 25] == Aid]).rename(columns=inc) As.opt = pd.DataFrame( sigevents.array('GenPart_pt')[abs(parida) == 25][abs(pdgida)[abs( parida) == 25] == Aid]).rename(columns=inc) As.omass = pd.DataFrame( sigevents.array('GenPart_mass')[abs(parida) == 25][abs(pdgida)[abs( parida) == 25] == Aid]).rename(columns=inc) higgs = PhysObj('higgs') higgs.eta = pd.DataFrame( sigevents.array('GenPart_eta')[abs(parida) != 25][abs(pdgida)[ abs(parida) != 25] == 25]).rename(columns=inc) higgs.phi = pd.DataFrame( sigevents.array('GenPart_phi')[abs(parida) != 25][abs(pdgida)[ abs(parida) != 25] == 25]).rename(columns=inc) higgs.pt = pd.DataFrame( sigevents.array('GenPart_pt')[abs(parida) != 25][abs(pdgida)[ abs(parida) != 25] == 25]).rename(columns=inc) sigjets = PhysObj('sigjets') sigjets.eta = np.abs( pd.DataFrame(sigevents.array('FatJet_eta')).rename(columns=inc)) sigjets.phi = pd.DataFrame( sigevents.array('FatJet_phi')).rename(columns=inc) sigjets.pt = pd.DataFrame( sigevents.array('FatJet_pt')).rename(columns=inc) sigjets.mass = pd.DataFrame( sigevents.array('FatJet_mass')).rename(columns=inc) sigjets.CSVV2 = pd.DataFrame( sigevents.array('FatJet_btagCSVV2')).rename(columns=inc) sigjets.DeepB = pd.DataFrame( sigevents.array('FatJet_btagDeepB')).rename(columns=inc) sigjets.DDBvL = pd.DataFrame( sigevents.array('FatJet_btagDDBvL')).rename(columns=inc) sigjets.msoft = pd.DataFrame( sigevents.array('FatJet_msoftdrop')).rename(columns=inc) sigjets.LHEHT = pd.DataFrame( sigevents.array('LHE_HT')).rename(columns=inc) slimjets = PhysObj('slimjets') slimjets.eta = pd.DataFrame( sigevents.array('Jet_eta')).rename(columns=inc) slimjets.phi = pd.DataFrame( sigevents.array('Jet_phi')).rename(columns=inc) slimjets.pt = pd.DataFrame( sigevents.array('Jet_pt')).rename(columns=inc) slimjets.mass = pd.DataFrame( sigevents.array('Jet_mass')).rename(columns=inc) #sigjets.CSVV2 = pd.DataFrame(sigevents.array('FatJet_btagCSVV2')).rename(columns=inc) slimjets.DeepB = pd.DataFrame( sigevents.array('Jet_btagDeepB')).rename(columns=inc) #sigjets.DDBvL = pd.DataFrame(sigevents.array('FatJet_btagDDBvL')).rename(columns=inc) #sigjets.msoft = pd.DataFrame(sigevents.array('FatJet_msoftdrop')).rename(columns=inc) slimjets.DeepFB = pd.DataFrame( sigevents.array('Jet_btagDeepFlavB')).rename(columns=inc) slimjets.puid = pd.DataFrame( sigevents.array('Jet_puId')).rename(columns=inc) datajets = PhysObj('datajets') datajets.eta = np.abs( pd.DataFrame(dataevents.array('FatJet_eta')).rename(columns=inc)) datajets.phi = pd.DataFrame( dataevents.array('FatJet_phi')).rename(columns=inc) datajets.pt = pd.DataFrame( dataevents.array('FatJet_pt')).rename(columns=inc) datajets.mass = pd.DataFrame( dataevents.array('FatJet_mass')).rename(columns=inc) datajets.CSVV2 = pd.DataFrame( dataevents.array('FatJet_btagCSVV2')).rename(columns=inc) datajets.DeepB = pd.DataFrame( dataevents.array('FatJet_btagDeepB')).rename(columns=inc) datajets.DDBvL = pd.DataFrame( dataevents.array('FatJet_btagDDBvL')).rename(columns=inc) datajets.msoft = pd.DataFrame( dataevents.array('FatJet_msoftdrop')).rename(columns=inc) datajets.LHEHT = pd.DataFrame( sigevents.array('LHE_HT')).rename(columns=inc) #bgjets.DeepFB= pd.DataFrame(bgevents.array('Jet_btagDeepFlavB')).rename(columns=inc) bgjets = [ PhysObj('300'), PhysObj('500'), PhysObj('700'), PhysObj('1000'), PhysObj('1500'), PhysObj('2000'), PhysObj('inf') ] for i in range(7): bgjets[i].eta = np.abs( pd.DataFrame( bgevents[i].array('FatJet_eta')).rename(columns=inc)) bgjets[i].phi = pd.DataFrame( bgevents[i].array('FatJet_phi')).rename(columns=inc) bgjets[i].pt = pd.DataFrame( bgevents[i].array('FatJet_pt')).rename(columns=inc) bgjets[i].mass = pd.DataFrame( bgevents[i].array('FatJet_mass')).rename(columns=inc) bgjets[i].CSVV2 = pd.DataFrame( bgevents[i].array('FatJet_btagCSVV2')).rename(columns=inc) bgjets[i].DeepB = pd.DataFrame( bgevents[i].array('FatJet_btagDeepB')).rename(columns=inc) bgjets[i].DDBvL = pd.DataFrame( bgevents[i].array('FatJet_btagDDBvL')).rename(columns=inc) bgjets[i].msoft = pd.DataFrame( bgevents[i].array('FatJet_msoftdrop')).rename(columns=inc) bgjets[i].LHEHT = pd.DataFrame( bgevents[i].array('LHE_HT')).rename(columns=inc) del bgevents print('Processing ' + str(len(bs.oeta)) + ' events') ## Figure out how many bs and jets there are nb = bs.oeta.shape[1] njet = sigjets.eta.shape[1] #nsjet=slimjets.eta.shape[1] na = As.oeta.shape[1] if na != 2: print("More than two As per event, found " + str(na) + ", halting") sys.exit() ## Create sorted versions of A values by pt for prop in ['eta', 'phi', 'pt', 'mass']: As[prop] = pd.DataFrame() for i in range(1, 3): As[prop][i] = As['o' + prop][As.opt.rank( axis=1, ascending=False, method='first') == i].max(axis=1) ## Clean up original ordered dataframes; we don't really need them #del As['o'+prop] ## Reorder out b dataframes to match sorted A parents tframe = pd.DataFrame() tframe[1] = (As.opt.rank(axis=1, ascending=False, method='first') == 1)[1] tframe[2] = (As.opt.rank(axis=1, ascending=False, method='first') == 1)[1] tframe[3] = (As.opt.rank(axis=1, ascending=False, method='first') == 1)[2] tframe[4] = (As.opt.rank(axis=1, ascending=False, method='first') == 1)[2] for prop in ['eta', 'phi', 'pt']: bs[prop] = pd.DataFrame() bs[prop][1] = bs['o' + prop][tframe][1].dropna().append( bs['o' + prop][tframe][3].dropna()).sort_index() bs[prop][2] = bs['o' + prop][tframe][2].dropna().append( bs['o' + prop][tframe][4].dropna()).sort_index() bs[prop][3] = bs['o' + prop][~tframe][1].dropna().append( bs['o' + prop][~tframe][3].dropna()).sort_index() bs[prop][4] = bs['o' + prop][~tframe][2].dropna().append( bs['o' + prop][~tframe][4].dropna()).sort_index() ## Clean up original ordered dataframes; we don't really need them. #del bs['o'+prop] # ## Sort our b dataframes in descending order of pt # for prop in ['spt','seta','sphi']: # bs[prop] = pd.DataFrame() # #bs.spt, bs.seta, bs.sphi = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() # for i in range(1,nb+1): # bs[prop][i] = bs[prop[1:]][bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # #bs.seta[i] = bs.eta[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # #bs.sphi[i] = bs.phi[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # plots['genAmass'].dfill(As.mass) ev = Event(bs, sigjets, As, higgs) for jets in [sigjets, datajets]: jets.cut(jets.pt > 170) jets.cut(abs(jets.eta) < 2.4) jets.cut(jets.DDBvL > 0.6) jets.cut(jets.DeepB > 0.4184) jets.cut(jets.msoft > 0.25) for jets in bgjets: #pass jets.cut(jets.pt > 170) jets.cut(abs(jets.eta) < 2.4) jets.cut(jets.DDBvL > 0.6) jets.cut(jets.DeepB > 0.4184) jets.cut(jets.msoft > 0.25) bs.cut(bs.pt > 5) bs.cut(abs(bs.eta) < 2.4) ev.sync() slimjets.cut(slimjets.DeepB > 0.1241) slimjets.cut(slimjets.DeepFB > 0.277) slimjets.cut(slimjets.puid > 0) slimjets.trimTo(jets.eta) ############################## # Processing and Calculation # ############################## ## Create our dR dataframe by populating its first column and naming it accordingly jbdr2 = pd.DataFrame( np.power(sigjets.eta[1] - bs.eta[1], 2) + np.power(sigjets.phi[1] - bs.phi[1], 2)).rename( columns={1: 'Jet 1 b 1'}) sjbdr2 = pd.DataFrame( np.power(slimjets.eta[1] - bs.eta[1], 2) + np.power(slimjets.phi[1] - bs.phi[1], 2)).rename( columns={1: 'Jet 1 b 1'}) ## Loop over jet x b combinations jbstr = [] for j in range(1, njet + 1): for b in range(1, nb + 1): ## Make our column name jbstr.append("Jet " + str(j) + " b " + str(b)) if (j + b == 2): continue ## Compute and store the dr of the given b and jet for every event at once jbdr2[jbstr[-1]] = pd.DataFrame( np.power(sigjets.eta[j] - bs.eta[b], 2) + np.power(sigjets.phi[j] - bs.phi[b], 2)) sjbdr2[jbstr[-1]] = pd.DataFrame( np.power(slimjets.eta[j] - bs.eta[b], 2) + np.power(slimjets.phi[j] - bs.phi[b], 2)) ## Create a copy array to collapse in jets instead of bs blist = [] sblist = [] for b in range(nb): blist.append(np.sqrt(jbdr2.filter(like='b ' + str(b + 1)))) blist[b] = blist[b][blist[b].rank(axis=1, method='first') == 1] blist[b] = blist[b].rename(columns=lambda x: int(x[4:6])) sblist.append(np.sqrt(sjbdr2.filter(like='b ' + str(b + 1)))) sblist[b] = sblist[b][sblist[b].rank(axis=1, method='first') == 1] sblist[b] = sblist[b].rename(columns=lambda x: int(x[4:6])) ## Trim resolved jet objects # if resjets==3: # for i in range(nb): # for j in range(nb): # if i != j: # blist[i] = blist[i][np.logical_not(blist[i] > blist[j])] # blist[i] = blist[i][blist[i]<0.4] ## Cut our events to only events with 3-4 bs in one fatjet of dR<0.8 fjets = blist[0][blist[0] < 0.8].fillna(0) / blist[0][ blist[0] < 0.8].fillna(0) for i in range(1, 4): fjets = fjets + blist[i][blist[i] < 0.8].fillna(0) / blist[i][ blist[i] < 0.8].fillna(0) fjets = fjets.max(axis=1) fjets = fjets[fjets == 4].dropna() sigjets.trimTo(fjets) ev.sync() # ####################### # Training Neural Net # ####################### datajetframe = pd.DataFrame() #for i in range(njet): for prop in netvars: datajetframe[prop] = datajets[prop][datajets['pt'].rank( axis=1, method='first') == 1].max(axis=1) #bgjetframe['val'] = 0 datajetframe['LHEHT'] = datajets['LHEHT'] sigjetframe = pd.DataFrame() #for i in range(njet): for prop in netvars: sigjetframe[prop] = sigjets[prop][sigjets['pt'].rank( axis=1, method='first') == 1].max(axis=1) #sigjetframe['val'] = 1 sigjetframe['LHEHT'] = sigjets['LHEHT'] #X_train = pd.concat([bgjetframe.sample(frac=0.7,random_state=6),sigjetframe.sample(frac=0.7,random_state=6)]) print('Signal cut to', sigjetframe.shape[0], ' events') print('Data has', datajetframe.shape[0], 'events') bgweights = [1, 0.259, 0.0515, 0.01666, 0.00905, 0.003594, 0.001401] bgpieces = [] for i in range(len(bgweights)): tempframe = pd.DataFrame() #for i in range(njet): for prop in netvars: tempframe[prop] = bgjets[i][prop][bgjets[i]['pt'].rank( axis=1, method='first') == 1].max(axis=1) tempframe['LHEHT'] = bgjets[i]['LHEHT'] tempframe = tempframe.sample(frac=bgweights[i], random_state=6) bgpieces.append(tempframe) bgjetframe = pd.concat(bgpieces) bgjetframe = bgjetframe[bgjetframe != 0] bgjetframe = bgjetframe.dropna() print('Background has', bgjetframe.shape[0], 'events') del bgpieces netvars.append('LHEHT') for col in netvars: plots['BG' + col].fill(bgjetframe[col]) plots['SG' + col].fill(sigjetframe[col]) plots['DT' + col].fill(datajetframe[col]) plt.clf() for col in netvars: plots['BG' + col][0] = plots['BG' + col][0] / sum(plots['BG' + col][0]) plots['SG' + col][0] = plots['SG' + col][0] / sum(plots['SG' + col][0]) #print(col) #print(plots['DT'+col][0]) #print(datajetframe[col]) plots['DT' + col][0] = plots['DT' + col][0] / sum(plots['DT' + col][0]) ##p.norm(sum(p[0])) #p[0] = p[0]/sum(p[0]) for col in netvars: plt.clf() plots['SG' + col].make(color='red', linestyle='-', htype='step') plots['DT' + col].make(color='black', linestyle='--', htype='step') plots['BG' + col].make(color='blue', linestyle=':', htype='step') plots[col].plot(same=True)
def ana(files,returnplots=False): #%%################ # Plots and Setup # ################### ## Define what pdgId we expect the A to have Aid = 9000006 ## How many resolved jets we want to target with our analysis resjets = 4 #Aid = 36 ## Make a dictionary of histogram objects bjplots = {} for i in range(1,5): bjplots.update({ "s_beta"+str(i): Hist(33 ,(-3.3,3.3) ,'GEN b '+str(i)+' Eta (ranked by pT)','Events','upplots/s_beta'+str(i)), "s_bpT"+str(i): Hist(60 ,(0,120) ,'GEN pT of b '+str(i)+' (ranked by pT)','Events','upplots/s_bpT'+str(i)), "s_bjetpT"+str(i): Hist(60 ,(0,120) ,'Matched RECO jet '+str(i)+' pT (ranked by b pT)','Events','upplots/s_RjetpT'+str(i)), "s_bjeteta"+str(i): Hist(33 ,(-3.3,3.3) ,'Matched RECO jet '+str(i)+' Eta (ranked by b pT)','Events','upplots/s_Rjeteta'+str(i)), "s_bjdR"+str(i): Hist(90 ,(0,3) ,'GEN b '+str(i)+' (ranked by pT) to matched jet dR','Events','upplots/s_bjdR'+str(i)) }) plots = { "HpT": Hist(60 ,(0,320) ,'GEN Higgs pT','Events','upplots/HpT'), #"HAdR": Hist(100,(0,2) ,'GEN Higgs to A dR','Events','upplots/HAdR'), #'HAdeta': Hist(66 ,(-3.3,3.3) ,'GEN Higgs to A deta','Events','upplots/HAdeta'), #'HAdphi': Hist(66 ,(-3.3,3.3) ,'GEN Higgs to A dphi','Events','upplots/HAdphi'), "A1pT": Hist(80 ,(0,160) ,'Highest GEN A pT','Events','upplots/A1pT'), "A2pT": Hist(80 ,(0,160) ,'Lowest GEN A pT','Events','upplots/A2pT'), "AdR": Hist(50 ,(0,5) ,'GEN A1 to A2 dR','Events','upplots/AdR'), "bdRA1": Hist(50 ,(0,5) ,'GEN dR between highest pT A child bs','Events','upplots/bdRA1'), "bdRA2": Hist(50 ,(0,5) ,'GEN dR between lowest pT A child bs','Events','upplots/bdRA2'), "bdetaA1": Hist(34 ,(0,3.4) ,'GEN |deta| between highest-A child bs','Events','upplots/bdetaA1'), "bdetaA2": Hist(34 ,(0,3.4) ,'GEN |deta| between lowest-A child bs','Events','upplots/bdetaA2'), "bdphiA1": Hist(34 ,(0,3.4) ,'GEN |dphi| between highest-A child bs','Events','upplots/bdphiA1'), "bdphiA2": Hist(34 ,(0,3.4) ,'GEN |dphi| between lowest-A child bs','Events','upplots/bdphiA2'), "bphi": Hist(66 ,(-3.3,3.3) ,'GEN b Phi','Events','upplots/bphi'), "bjdR": Hist(100,(0,2) ,'All GEN bs to matched jet dR','Events','upplots/bjdR'), "RjetpT": Hist(100,(0,100) ,'RECO matched jet pT','Events','upplots/RjetpT'), "Rjeteta": Hist(66 ,(-3.3,3.3) ,'RECO matched jet eta','Events','upplots/Rjeteta'), "RjetCSVV2":Hist(140 ,(-12,2) ,'RECO matched jet btagCSVV2 score','events','upplots/RjetCSVV2'), "RjetDeepB":Hist(40 ,(-2.5,1.5) ,'RECO matched jet btagDeepB score','events','upplots/RjetDeepB'), "RjetDeepFB" :Hist(24 ,(0,1.2) ,'RECO matched jet btagDeepFlavB score','events','upplots/RjetDeepFB'), "RA1pT": Hist(80 ,(0,160) ,'pT of RECO A1 objects constructed from matched jets','Events','upplots/RA1pT'), "RA2pT": Hist(80 ,(0,160) ,'pT of RECO A2 objects constructed from matched jets','Events','upplots/RA2pT'), "RA1mass": Hist(40 ,(0,80) ,'reconstructed mass of A1 objects from matched jets','Events','upplots/RA1mass'), "RA2mass": Hist(40 ,(0,80) ,'reconstructed mass of A2 objects from matched jets','Events','upplots/RA2mass'), "RA1dR": Hist(50 ,(0,5) ,'dR between jet children of reconstructed A1 object','Events','upplots/RA1dR'), "RA2dR": Hist(50 ,(0,5) ,'dR between jet children of reconstructed A2 object','Events','upplots/RA2dR'), "RA1deta": Hist(33 ,(0,3.3) ,'|deta| between jet children of reconstructed A1 object','Events','upplots/RA1deta'), "RA2deta": Hist(33 ,(0,3.3) ,'|deta| between jet children of reconstructed A2 object','Events','upplots/RA2deta'), "RA1dphi": Hist(33 ,(0,3.3) ,'|dphi| between jet children of reconstructed A1 object','Events','upplots/RA1dphi'), "RA2dphi": Hist(33 ,(0,3.3) ,'|dphi| between jet children of reconstructed A2 object','Events','upplots/RA2dphi'), "RHmass": Hist(80 ,(0,160) ,'reconstructed mass of Higgs object from reconstructed As','Events','upplots/RHmass'), "RHpT": Hist(100,(0,200) ,'pT of reconstructed higgs object from reconstructed As','Events','upplots/RHpT'), "RHdR": Hist(50 ,(0,5) ,'dR between A children of reconstructed higgs object','Events','upplots/RHdR'), "RHdeta": Hist(33 ,(0,3.3) ,'|deta| between A children of reconstructed higgs object','Events','upplots/RHdeta'), "RHdphi": Hist(33 ,(0,3.3) ,'|dphi| between A children of reconstructed higgs object','Events','upplots/RHdphi'), ## "RalljetpT": Hist(100,(0,100),'All RECO jet pT','Events','upplots/RalljetpT'), "bjdRvlogbpT1": Hist2d([80,200],[[0,8],[0,2]],'log2(GEN b pT)','dR from 1st pT GEN b to matched RECO jet','upplots/bjdRvlogbpT1'), "bjdRvlogbpT2": Hist2d([80,200],[[0,8],[0,2]],'log2(GEN b pT)','dR from 2nd pT GEN b to matched RECO jet','upplots/bjdRvlogbpT2'), "bjdRvlogbpT3": Hist2d([80,200],[[0,8],[0,2]],'log2(GEN b pT)','dR from 3rd pT GEN b to matched RECO jet','upplots/bjdRvlogbpT3'), "bjdRvlogbpT4": Hist2d([80,200],[[0,8],[0,2]],'log2(GEN b pT)','dR from 4th pT GEN b to matched RECO jet','upplots/bjdRvlogbpT4'), "jetoverbpTvlogbpT1": Hist2d([60,40],[[2,8],[0,4]],'log2(GEN b pT)','RECO jet pT / 1st GEN b pT for matched jets','upplots/jetoverbpTvlogbpT1'), "jetoverbpTvlogbpT2": Hist2d([60,40],[[2,8],[0,4]],'log2(GEN b pT)','RECO jet pT / 2nd GEN b pT for matched jets','upplots/jetoverbpTvlogbpT2'), "jetoverbpTvlogbpT3": Hist2d([60,40],[[2,8],[0,4]],'log2(GEN b pT)','RECO jet pT / 3rd GEN b pT for matched jets','upplots/jetoverbpTvlogbpT3'), "jetoverbpTvlogbpT4": Hist2d([60,40],[[2,8],[0,4]],'log2(GEN b pT)','RECO jet pT / 4th GEN b pT for matched jets','upplots/jetoverbpTvlogbpT4'), "npassed": Hist(1 ,(0.5,1.5) ,'','Number of events that passed cuts', 'upplots/npassed'), "genAmass": Hist(40 ,(0,80) ,'GEN mass of A objects','Events','upplots/Amass_g'), "cutAmass": Hist(40 ,(0,80) ,'GEN mass of A objects that pass cuts','Events','upplots/Amass_c') } for plot in bjplots: bjplots[plot].title = files[0] for plot in plots: plots[plot].title = files[0] ## Create an internal figure for pyplot to write to plt.figure(1) ## Loop over input files for fnum in range(len(files)): ##################### # Loading Variables # ##################### print('Opening '+files[fnum]) ## Open our file and grab the events tree f = uproot.open(files[fnum])#'nobias.root') events = f.get('Events') pdgida = events.array('GenPart_pdgId') paridxa = events.array('GenPart_genPartIdxMother') parida = pdgida[paridxa] bs = PhysObj('bs') ## Removes all particles that do not have A parents ## from the GenPart arrays, then removes all particles ## that are not bs after resizing the pdgid array to be a valid mask bs.oeta = pd.DataFrame(events.array('GenPart_eta')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) bs.ophi = pd.DataFrame(events.array('GenPart_phi')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) bs.opt = pd.DataFrame(events.array('GenPart_pt' )[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) ## Test b order corresponds to As testbs = pd.DataFrame(events.array('GenPart_genPartIdxMother')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) ## The first term checks b4 has greater idx than b1, the last two check that the bs are paired if ((testbs[4]-testbs[1]).min() <= 0) or ((abs(testbs[2]-testbs[1]) + abs(testbs[4])-testbs[3]).min() != 0): print('b to A ordering violated - time to do it the hard way') sys.exit() As = PhysObj('As') As.oeta = pd.DataFrame(events.array('GenPart_eta')[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.ophi = pd.DataFrame(events.array('GenPart_phi')[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.opt = pd.DataFrame(events.array('GenPart_pt' )[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.omass =pd.DataFrame(events.array('GenPart_mass')[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) higgs = PhysObj('higgs') higgs.eta = pd.DataFrame(events.array('GenPart_eta')[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) higgs.phi = pd.DataFrame(events.array('GenPart_phi')[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) higgs.pt = pd.DataFrame(events.array('GenPart_pt' )[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) jets = PhysObj('jets') jets.eta= pd.DataFrame(events.array('Jet_eta')).rename(columns=inc) jets.phi= pd.DataFrame(events.array('Jet_phi')).rename(columns=inc) jets.pt = pd.DataFrame(events.array('Jet_pt')).rename(columns=inc) jets.mass=pd.DataFrame(events.array('Jet_mass')).rename(columns=inc) jets.CSVV2 = pd.DataFrame(events.array('Jet_btagCSVV2')).rename(columns=inc) jets.DeepB = pd.DataFrame(events.array('Jet_btagDeepB')).rename(columns=inc) jets.DeepFB= pd.DataFrame(events.array('Jet_btagDeepFlavB')).rename(columns=inc) print('Processing ' + str(len(bs.oeta)) + ' events') ## Figure out how many bs and jets there are nb = bs.oeta.shape[1] njet= jets.eta.shape[1] na = As.oeta.shape[1] if na != 2: print("More than two As per event, found "+str(na)+", halting") sys.exit() ## Create sorted versions of A values by pt for prop in ['eta','phi','pt','mass']: As[prop] = pd.DataFrame() for i in range(1,3): As[prop][i] = As['o'+prop][As.opt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) ## Clean up original ordered dataframes; we don't really need them #del As['o'+prop] ## Reorder out b dataframes to match sorted A parents tframe = pd.DataFrame() tframe[1] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[1] tframe[2] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[1] tframe[3] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[2] tframe[4] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[2] for prop in ['eta','phi','pt']: bs[prop] = pd.DataFrame() bs[prop][1] = bs['o'+prop][tframe][1].dropna().append(bs['o'+prop][tframe][3].dropna()).sort_index() bs[prop][2] = bs['o'+prop][tframe][2].dropna().append(bs['o'+prop][tframe][4].dropna()).sort_index() bs[prop][3] = bs['o'+prop][~tframe][1].dropna().append(bs['o'+prop][~tframe][3].dropna()).sort_index() bs[prop][4] = bs['o'+prop][~tframe][2].dropna().append(bs['o'+prop][~tframe][4].dropna()).sort_index() ## Clean up original ordered dataframes; we don't really need them. #del bs['o'+prop] ## Sort our b dataframes in descending order of pt for prop in ['spt','seta','sphi']: bs[prop] = pd.DataFrame() #bs.spt, bs.seta, bs.sphi = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() for i in range(1,nb+1): bs[prop][i] = bs[prop[1:]][bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) #bs.seta[i] = bs.eta[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) #bs.sphi[i] = bs.phi[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) plots['genAmass'].dfill(As.mass) ev = Event(bs,jets,As,higgs) jets.cut(jets.pt>0) bs.cut(bs.pt>0) ev.sync() ############################## # Processing and Calculation # ############################## ## Create our dR dataframe by populating its first column and naming it accordingly jbdr2 = pd.DataFrame(np.power(jets.eta[1]-bs.eta[1],2) + np.power(jets.phi[1]-bs.phi[1],2)).rename(columns={1:'Jet 1 b 1'}) sjbdr2= pd.DataFrame(np.power(jets.eta[1]-bs.seta[1],2) + np.power(jets.phi[1]-bs.sphi[1],2)).rename(columns={1:'Jet 1 b 1'}) ## Loop over jet x b combinations jbstr = [] for j in range(1,njet+1): for b in range(1,nb+1): ## Make our column name jbstr.append("Jet "+str(j)+" b "+str(b)) if (j+b==2): continue ## Compute and store the dr of the given b and jet for every event at once jbdr2[jbstr[-1]] = pd.DataFrame(np.power(jets.eta[j]-bs.eta[b],2) + np.power(jets.phi[j]-bs.phi[b],2)) sjbdr2[jbstr[-1]]= pd.DataFrame(np.power(jets.eta[j]-bs.seta[b],2) + np.power(jets.phi[j]-bs.sphi[b],2)) ## Create a copy array to collapse in jets instead of bs blist = [] sblist = [] for b in range(nb): blist.append(np.sqrt(jbdr2.filter(like='b '+str(b+1)))) blist[b] = blist[b][blist[b].rank(axis=1,method='first') == 1] blist[b] = blist[b].rename(columns=lambda x:int(x[4:6])) sblist.append(np.sqrt(sjbdr2.filter(like='b '+str(b+1)))) sblist[b] = sblist[b][sblist[b].rank(axis=1,method='first') == 1] sblist[b] = sblist[b].rename(columns=lambda x:int(x[4:6])) ## Trim resolved jet objects if resjets==3: for i in range(nb): for j in range(nb): if i != j: blist[i] = blist[i][np.logical_not(blist[i] > blist[j])] blist[i] = blist[i][blist[i]<0.4] ## Cut our events to only resolved 4jet events with dR<0.4 rjets = blist[0][blist[0]<0.4].fillna(0) for i in range(1,4): rjets = np.logical_or(rjets,blist[i][blist[i]<0.4].fillna(0)) rjets = rjets.sum(axis=1) rjets = rjets[rjets==resjets].dropna() jets.trimTo(rjets) ev.sync() ############################# # Constructing RECO objects # ############################# for prop in ['bpt','beta','bphi','bmass']: jets[prop] = pd.DataFrame() for i in range(nb): jets[prop][i+1] = jets[prop[1:]][blist[i]>0].max(axis=1) jets.bdr = pd.DataFrame() for i in range(nb): jets.bdr[i+1] = blist[i][blist[i]>0].max(axis=1) ev.sync() if resjets==3: pidx = [2,1,4,3] for prop in ['bpt','beta','bphi','bmass']: jets[prop]['merged'], jets[prop]['missing'] = (jets[prop][1]==jets[prop][3]),(jets[prop][1]==jets[prop][3]) for i in range(1,nb+1): jets[prop]['merged']=jets[prop]['merged']+jets[prop].fillna(0)[i][(jets.bmass[i]>=15) & (jets.bmass[i]+jets.bmass[pidx[i-1]]==jets.bmass[i])] jets[prop]['missing']=jets[prop]['missing']+jets[prop].fillna(0)[i][(jets.bmass[i]<15) & (jets.bmass[i]+jets.bmass[pidx[i-1]]==jets.bmass[i])] #jets[prop][i] = jets[prop][i]+(0*jets[prop][pidx]) bvec = [] for i in range(1,nb+1): bvec.append(TLorentzVectorArray.from_ptetaphim(jets.bpt[i],jets.beta[i],jets.bphi[i],jets.bmass[i])) avec = [] for i in range(0,nb,2): avec.append(bvec[i]+bvec[i+1]) for prop in ['apt','aeta','aphi','amass']: jets[prop] = pd.DataFrame() for i in range(na): jets.apt[i+1] = avec[i].pt jets.aeta[i+1] = avec[i].eta jets.aphi[i+1] = avec[i].phi jets.amass[i+1]= avec[i].mass for prop in ['apt','aeta','aphi','amass']: jets[prop].index = jets.pt.index hvec = [avec[0]+avec[1]] for prop in ['hpt','heta','hphi','hmass']: jets[prop] = pd.DataFrame() jets.hpt[1] = hvec[0].pt jets.heta[1] = hvec[0].eta jets.hphi[1] = hvec[0].phi jets.hmass[1]= hvec[0].mass for prop in ['hpt','heta','hphi','hmass']: jets[prop].index = jets.eta.index ################ # Filling Data # ################ for i in range(4): plots['bjdRvlogbpT'+str(i+1)].dfill(np.log2(bs.spt[[i+1]]),bs.trim(sblist[i])) plots['RjetCSVV2'].dfill(jets.CSVV2[blist[i]>0]) plots['RjetDeepB'].dfill(jets.DeepB[blist[i]>0]) plots['RjetDeepFB'].dfill(jets.DeepFB[blist[i]>0]) yval = np.divide(jets.pt[sblist[i]>0].melt(value_name=0).drop('variable',axis=1).dropna().reset_index(drop=True)[0],bs.spt[[i+1]].dropna().reset_index(drop=True)[i+1]) xval = np.log2(bs.spt[[i+1]]).melt(value_name=0).drop('variable',axis=1).dropna().reset_index(drop=True)[0] plots['jetoverbpTvlogbpT'+str(i+1)].fill(xval,yval) bjplots['s_bpT'+str(i+1)].dfill(bs.spt[[i+1]]) bjplots['s_beta'+str(i+1)].dfill(bs.seta[[i+1]]) bjplots['s_bjetpT'+str(i+1)].dfill(jets.pt[sblist[i]>0]) bjplots['s_bjeteta'+str(i+1)].dfill(jets.eta[sblist[i]>0]) bjplots['s_bjdR'+str(i+1)].dfill(sblist[i][sblist[i]!=0]) plots['HpT'].dfill(higgs.pt) plots['A1pT'].fill(As.pt[1]) plots['A2pT'].fill(As.pt[2]) plots['AdR'].fill(np.sqrt(np.power(As.eta[2]-As.eta[1],2) + np.power(As.phi[2]-As.phi[1],2))) plots['bdRA1'].fill(np.sqrt(np.power(bs.eta[2]-bs.eta[1],2) + np.power(bs.phi[2]-bs.phi[1],2))) plots['bdRA2'].fill(np.sqrt(np.power(bs.eta[4]-bs.eta[3],2) + np.power(bs.phi[4]-bs.phi[3],2))) plots['bdetaA1'].fill(abs(bs.eta[2]-bs.eta[1])) plots['bdetaA2'].fill(abs(bs.eta[4]-bs.eta[3])) plots['bdphiA1'].fill(abs(bs.phi[2]-bs.phi[1])) plots['bdphiA2'].fill(abs(bs.phi[4]-bs.phi[3])) plots['bphi'].dfill(bs.phi) plots['RalljetpT'].dfill(jets.pt) plots['bjdR'].dfill(jets.bdr) plots['RjetpT'].dfill(jets.bpt) plots['Rjeteta'].dfill(jets.beta) for i in range(1,3): plots['RA'+str(i)+'pT' ].fill(jets.apt[i]) plots['RA'+str(i)+'mass'].fill(jets.amass[i]) #lots['RA'+str(i)+'deta'].fill(abs(jets.beta[2*i]-jets.beta[(2*i)-1])) plots['RA'+str(i)+'dR' ].fill(np.sqrt(np.power(jets.beta[2*i]-jets.beta[(2*i)-1],2)+np.power(jets.bphi[2*i]-jets.bphi[(2*i)-1],2))) plots['RA'+str(i)+'deta'].fill(abs(jets.beta[2*i]-jets.beta[(2*i)-1])) plots['RA'+str(i)+'dphi'].fill(abs(jets.bphi[2*i]-jets.bphi[(2*i)-1])) plots['RHpT' ].fill(jets.hpt[1]) plots['RHmass'].fill(jets.hmass[1]) plots['RHdR' ].fill(np.sqrt(np.power(jets.aeta[2]-jets.aeta[1],2)+np.power(jets.aphi[2]-jets.aphi[1],2))) plots['RHdeta'].fill(abs(jets.aeta[2]-jets.aeta[1])) plots['RHdphi'].fill(abs(jets.aphi[2]-jets.aphi[1])) plots['npassed'].fill(jets.hpt[1]/jets.hpt[1]) plots['cutAmass'].dfill(As.mass) ############ # Plotting # ############ plt.clf() #plots.pop('bjdR').plot(logv=True) for i in range(1,5): bjplots.pop('s_bjdR'+str(i)).plot(logv=True) for p in plots: plots[p].plot() for p in bjplots: bjplots[p].plot() #%% if returnplots==True: return plots else: sys.exit()
def trig(files): ## Create a dictionary of histogram objects rptbins = [0,0.5,1,1.5,2,2.5,3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,11,12,13,14,15,30,100] plots = { 'hptplot': Hist(rptbins,None,'Highest Muon pT','Events passing HLT','upplots/TrigHpTplot'), 'ptplot': Hist(rptbins,None,'Highest Muon pT','Events','upplots/TrigpTplot'), 'ratioptplot': Hist(rptbins,None,'Highest Muon pT','HLT_Mu7_IP4 / Events with Muons of sip > 5','upplots/TrigRatiopTPlot'), 'sipplot': Hist(20,(0,20),'Highest Muon SIP', 'Events', 'upplots/TrigSIPplot'), 'hsipplot': Hist(20,(0,20),'Highest Muon SIP', 'Events', 'upplots/TrigHSIPplot'), 'ratiosipplot': Hist(20,(0,20),'Highest Muon SIP', 'HLT_Mu7_IP4 / Events with muons of pT > 10', 'upplots/TrigRatioSIPplot'), 'HLTcutflow': Hist(12,(-0.5,11.5),'All // HLT_Mu7/8/9/12_IP4/3,5,6/4,5,6/6','Events','upplots/cutflowHLT'), 'L1Tcutflow': Hist(12,(-0.5,11.5),'All // L1_SingleMu6/7/8/9/10/12/14/16/18','Events','upplots/cutflowL1T'), 'HLTcutflowL': Hist(12,(-0.5,11.5),'All // HLT_Mu7/8/9/12_IP4/3,5,6/4,5,6/6','Events','upplots/cutflowHLT-L'), 'L1TcutflowL': Hist(12,(-0.5,11.5),'All // L1_SingleMu6/7/8/9/10/12/14/16/18','Events','upplots/cutflowL1T-L') } cutflow2d = Hist2d([9,10],[[-0.5,8.5],[-0.5,9.5]],'All // HLT_Mu7/8/9/12_IP4/3,5,6/4,5,6/6', 'All // L1_SingleMu6/7/8/9/10/12/14/16/18','upplots/cutflowHLTvsL1T',files[0]) for plot in plots: plots[plot].title = files[0] ## Create an internal figure for pyplot to write to plt.figure(1) ## Loop over all input files for fnum in range(len(files)): print('Opening '+files[fnum]) ## Open the file and retrieve our key branches f = uproot.open(files[fnum]) events = f.get('Events') HLTcuts = ['HLT_Mu7_IP4','HLT_Mu8_IP3','HLT_Mu8_IP5','HLT_Mu8_IP6','HLT_Mu9_IP4','HLT_Mu9_IP5','HLT_Mu9_IP6','HLT_Mu12_IP6'] L1Tcuts = ['L1_SingleMu6','L1_SingleMu7','L1_SingleMu8','L1_SingleMu9','L1_SingleMu10','L1_SingleMu12','L1_SingleMu14','L1_SingleMu16','L1_SingleMu18'] Muon = PhysObj('Muon',files[fnum],'pt','eta','phi','sip3d','mediumId') Trig = PhysObj('trig') HLT = PhysObj('HLTrig') L1T = PhysObj('L1Trig') Trig.vals = pd.DataFrame(events.array('HLT_Mu7_IP4_part0')).rename(columns=inc) for tr in HLTcuts: HLT[tr] = pd.DataFrame(events.array(tr+'_part0')).rename(columns=inc) for tr in L1Tcuts: L1T[tr]= pd.DataFrame(events.array(tr+'er1p5')).rename(columns=inc) ev = Event(Muon,Trig,HLT,L1T) print('Processing ' + str(len(Muon.pt)) + ' events') ## Fill 0 bin of cut flow plots plots['HLTcutflow'].dfill(HLT[HLTcuts[0]]*0) plots['L1Tcutflow'].dfill(L1T[L1Tcuts[0]]*0) cutflow2d.dfill(HLT[HLTcuts[0]]*0,HLT[HLTcuts[0]]*0) ## Fill the rest of the bins ct = 1 for i in HLT: plots['HLTcutflow'].dfill(HLT[i][HLT[i]].dropna()*ct) cutflow2d.dfill(HLT[i][HLT[i]].dropna()*ct,HLT[i][HLT[i]].dropna()*0) ct = ct + 1 ct = 1 for i in L1T: plots['L1Tcutflow'].dfill(L1T[i][L1T[i]].dropna()*ct) cutflow2d.dfill(L1T[i][L1T[i]].dropna()*0,L1T[i][L1T[i]].dropna()*ct) ct = ct + 1 ht = 1 for i in HLT: lt = 1 for j in L1T: cutflow2d.dfill(HLT[i][HLT[i] & L1T[j]].dropna()*ht,L1T[j][L1T[j] & HLT[i]].dropna()*lt) lt = lt + 1 ht = ht + 1 ##Perform global cuts Muon.cut(abs(Muon.eta)<1.5) Muon.cut(Muon.mediumId==True) ev.sync() ##Fill bin 1 of cut flow lots #plots['HLTcutflow'].fill((Muon.pt/Muon.pt).max(axis=1).dropna()) #plots['L1Tcutflow'].fill((Muon.pt/Muon.pt).max(axis=1).dropna()) ## Cut muons and trim triggers to the new size MuonP = Muon.cut(Muon.sip3d>5,split=True) MuonS = Muon.cut(Muon.pt>10,split=True) TrigP = Trig.trimTo(MuonP.pt,split=True) TrigS = Trig.trimTo(MuonS.sip3d,split=True) ## Reshape triggers to fit our muons for i in MuonP.pt.columns: TrigP.vals[i] = TrigP.vals[1] for i in MuonS.sip3d.columns: TrigS.vals[i] = TrigS.vals[1] ## Create the two histograms we want to divide plt.figure(1) plots['ptplot'].fill(MuonP.pt.max(axis=1)) plots['hptplot'].fill(MuonP.pt[TrigP.vals].max(axis=1).dropna(how='all')) plots['sipplot'].fill(MuonS.sip3d.max(axis=1)) plots['hsipplot'].fill(MuonS.sip3d[TrigS.vals].max(axis=1).dropna(how='all')) plots['ratioptplot'].add(plots['hptplot'].divideby(plots['ptplot'],split=True)) plots['ratiosipplot'].add(plots['hsipplot'].divideby(plots['sipplot'],split=True)) plots['HLTcutflowL'].add(plots['HLTcutflow']) plots['L1TcutflowL'].add(plots['L1Tcutflow']) cutflow2d.norm()[0][0][0] = 0 cutflow2d.plot(text=True,edgecolor='black') plots.pop('HLTcutflowL').norm().plot(ylim=(None,.2)) plots.pop('L1TcutflowL').norm().plot(ylim=(None,.2)) plots.pop('HLTcutflow').norm().plot() plots.pop('L1Tcutflow').norm().plot() for pl in plots: plots[pl].plot() sys.exit()
def ana(sigfiles,bgfiles,isLHE=False): #%%################ # Plots and Setup # ################### #training=True #training=False #tf.random.set_random_seed(2) #tf.compat.v1.set_random_seed(2) #np.random.seed(2) #fig = plt.figure(figsize=(10.0,6.0)) global LOADMODEL Skey = 'Signal' Bkey = 'Background' l1 = 8 l2 = 8 l3 = 8 alpha = 0.85 gamma = 0.8 # bmodel = keras.Sequential([ # keras.Input(shape=(4,),dtype='float32'), # #keras.layers.Flatten(input_shape=(8,)), # keras.layers.Dense(l1, activation=tf.nn.relu), # keras.layers.Dense(l2, activation=tf.nn.relu), # keras.layers.Dense(l3, activation=tf.nn.relu), # #keras.layers.Dropout(0.1), # keras.layers.Dense(1, activation=tf.nn.sigmoid), # ]) # optimizer = keras.optimizers.Adam(learning_rate=0.01) # bmodel.compile(optimizer=optimizer,#'adam', # #loss='binary_crossentropy', # #loss=[focal_loss], # #loss=[custom], # loss=[binary_focal_loss(alpha, gamma)], # metrics=['accuracy'])#,tf.keras.metrics.AUC()]) #nbatch = math.floor(nbg / (2*nsig)) netvars = ['pt','eta','mass','CSVV2','DeepB','msoft','DDBvL','H4qvs'] bnetvars = ['DeepB','H4qvs','DDBvL','CSVV2'] pnetvars = ['pt','eta','mass','msoft'] ## Define what pdgId we expect the A to have Aid = 9000006 ## How many resolved jets we want to target with our analysis #resjets = 4 Aid = 36 ## Make a dictionary of histogram objects plots = { "SigDist": Hist2d([20,20],[[0,1],[0,1]],'b-tag network','phys network','netplots/SigDist'), "BGDist": Hist2d([20,20],[[0,1],[0,1]],'b-tag network','phys network','netplots/BGDist'), "SigProfile": Hist(20,(0,1),'b-tag bin','phys network avg','netplots/SigProfile'), "BGProfile": Hist(20,(0,1),'b-tag bin','phys network avg','netplots/BGProfile'), "SigRanges": Hist(20,(0,1),'confidence','events','netplots/SigRanges'), "BGRanges": Hist(20,(0,1),'confidence','events','netplots/BGRanges'), } for i in range(3): plots.update({f"SigRanges{i}":cp.deepcopy(plots['SigRanges']),F"BGRanges{i}":cp.deepcopy(plots['BGRanges'])}) ## Create an internal figure for pyplot to write to plt.figure(1) if isLHE: nbg = len(bgfiles)/nlhe if float(nbg).is_integer(): nbg = int(nbg) else: raise Exception('LHE argument specified, but BG files do not divide evenly into '+str(nlhe)) else: nbg = len(bgfiles) nsig = len(sigfiles) sigmbg = nbg - nsig ## Loop over input files for fnum in range(max(nbg,nsig)): print('bg',nbg,'sig',nsig) ##################### # Loading Variables # ##################### if isLHE: print('Opening',sigfiles[fnum],'+ LHE Background') else: print('Opening',sigfiles[fnum],'+',bgfiles[fnum]) ## Loop some data if the bg/signal files need to be equalized if sigmbg > 0: print('Catching up signal') sigfiles.append(sigfiles[fnum]) sigmbg = sigmbg - 1 elif sigmbg < 0: if isLHE: print('Catching up background') for i in range(nlhe): bgfiles.append(bgfiles[fnum+i]) sigmbg = sigmbg + 1 else: print('Catching up background') bgfiles.append(bgfiles[fnum]) sigmbg = sigmbg + 1 print('diff:',sigmbg) ## Open our file and grab the events tree if isLHE: bgevents = [] for i in range(nlhe): idx = fnum*nlhe + i print('Opening ',bgfiles[idx]) bgevents.append(uproot.open(bgfiles[idx]).get('Events')) else: bgf = uproot.open(bgfiles[fnum]) bgevents = bgf.get('Events') sigf = uproot.open(sigfiles[fnum]) sigevents = sigf.get('Events') def loadjets(jets, events,wname=''): jets.eta= pd.DataFrame(events.array('FatJet_eta', executor=executor)).rename(columns=inc) jets.phi= pd.DataFrame(events.array('FatJet_phi', executor=executor)).rename(columns=inc) jets.pt = pd.DataFrame(events.array('FatJet_pt' , executor=executor)).rename(columns=inc) jets.mass=pd.DataFrame(events.array('FatJet_mass', executor=executor)).rename(columns=inc) jets.CSVV2 = pd.DataFrame(events.array('FatJet_btagCSVV2', executor=executor)).rename(columns=inc) jets.DeepB = pd.DataFrame(events.array('FatJet_btagDeepB', executor=executor)).rename(columns=inc) jets.DDBvL = pd.DataFrame(events.array('FatJet_btagDDBvL', executor=executor)).rename(columns=inc) jets.msoft = pd.DataFrame(events.array('FatJet_msoftdrop', executor=executor)).rename(columns=inc) jets.H4qvs = pd.DataFrame(events.array('FatJet_deepTagMD_H4qvsQCD', executor=executor)).rename(columns=inc) jets.event = pd.DataFrame(events.array('event', executor=executor)).rename(columns=inc) jets.npvs = pd.DataFrame(events.array('PV_npvs', executor=executor)).rename(columns=inc) jets.npvsG = pd.DataFrame(events.array('PV_npvsGood', executor=executor)).rename(columns=inc) jets.extweight = jets.event / jets.event if wname != '': weights = pickle.load(open('weights/'+wname+'-'+fstrip(DATANAME)+'.p',"rb" )) for prop in ['genweights','PUweights','normweights']: #print('jets.extweight[1]')#,jets.extweight[1]) jets.extweight[1] = jets.extweight[1] * weights[prop][1] else: jets.extweight = jets.event / jets.event for j in range(1,jets.pt.shape[1]): jets.event[j+1] = jets.event[1] jets.npvs[j+1] = jets.npvs[1] jets.npvsG[j+1] = jets.npvsG[1] #if POSTWEIGHT: jets.extweight[j+1] = jets.extweight[1] return jets sigjets = loadjets(PhysObj('sigjets'),sigevents,fstrip(sigfiles[fnum])) if isLHE: #bgjets = [PhysObj('300'),PhysObj('500'),PhysObj('700'),PhysObj('1000'),PhysObj('1500'),PhysObj('2000'),PhysObj('inf')] bgjets = [] for i in range(nlhe): bgjets.append(loadjets(PhysObj(str(i)),bgevents[i],fstrip(bgfiles[(fnum*nlhe)+i]))) else: bgjets = loadjets(PhysObj('bgjets'),bgevents,fstrip(bgfiles[fnum])) print(f"Processing {str(len(sigjets.eta))} {Skey} events") if True: pdgida = sigevents.array('GenPart_pdgId') paridxa = sigevents.array('GenPart_genPartIdxMother') parida = pdgida[paridxa] bs = PhysObj('bs') ## Removes all particles that do not have A parents ## from the GenPart arrays, then removes all particles ## that are not bs after resizing the pdgid array to be a valid mask bs.oeta = pd.DataFrame(sigevents.array('GenPart_eta')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) bs.ophi = pd.DataFrame(sigevents.array('GenPart_phi')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) bs.opt = pd.DataFrame(sigevents.array('GenPart_pt' )[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) ## Test b order corresponds to As testbs = pd.DataFrame(sigevents.array('GenPart_genPartIdxMother')[abs(parida)==Aid][abs(pdgida)[abs(parida)==Aid]==5]).rename(columns=inc) ## The first term checks b4 has greater idx than b1, the last two check that the bs are paired if ((testbs[4]-testbs[1]).min() <= 0) or ((abs(testbs[2]-testbs[1]) + abs(testbs[4])-testbs[3]).min() != 0): print('b to A ordering violated - time to do it the hard way') sys.exit() As = PhysObj('As') As.oeta = pd.DataFrame(sigevents.array('GenPart_eta', executor=executor)[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.ophi = pd.DataFrame(sigevents.array('GenPart_phi', executor=executor)[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.opt = pd.DataFrame(sigevents.array('GenPart_pt' , executor=executor)[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) As.omass =pd.DataFrame(sigevents.array('GenPart_mass', executor=executor)[abs(parida)==25][abs(pdgida)[abs(parida)==25]==Aid]).rename(columns=inc) higgs = PhysObj('higgs') higgs.eta = pd.DataFrame(sigevents.array('GenPart_eta', executor=executor)[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) higgs.phi = pd.DataFrame(sigevents.array('GenPart_phi', executor=executor)[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) higgs.pt = pd.DataFrame(sigevents.array('GenPart_pt' , executor=executor)[abs(parida)!=25][abs(pdgida)[abs(parida)!=25]==25]).rename(columns=inc) slimjets = PhysObj('slimjets') slimjets.eta= pd.DataFrame(sigevents.array('Jet_eta', executor=executor)).rename(columns=inc) slimjets.phi= pd.DataFrame(sigevents.array('Jet_phi', executor=executor)).rename(columns=inc) slimjets.pt = pd.DataFrame(sigevents.array('Jet_pt' , executor=executor)).rename(columns=inc) slimjets.mass=pd.DataFrame(sigevents.array('Jet_mass', executor=executor)).rename(columns=inc) #sigjets.CSVV2 = pd.DataFrame(sigevents.array('FatJet_btagCSVV2')).rename(columns=inc) slimjets.DeepB = pd.DataFrame(sigevents.array('Jet_btagDeepB', executor=executor)).rename(columns=inc) #sigjets.DDBvL = pd.DataFrame(sigevents.array('FatJet_btagDDBvL')).rename(columns=inc) #sigjets.msoft = pd.DataFrame(sigevents.array('FatJet_msoftdrop')).rename(columns=inc) slimjets.DeepFB= pd.DataFrame(sigevents.array('Jet_btagDeepFlavB', executor=executor)).rename(columns=inc) slimjets.puid = pd.DataFrame(sigevents.array('Jet_puId', executor=executor)).rename(columns=inc) ## Figure out how many bs and jets there are nb = bs.oeta.shape[1] njet= sigjets.eta.shape[1] #nsjet=slimjets.eta.shape[1] na = As.oeta.shape[1] if na != 2: print("More than two As per event, found "+str(na)+", halting") sys.exit() ## Create sorted versions of A values by pt for prop in ['eta','phi','pt','mass']: As[prop] = pd.DataFrame() for i in range(1,3): As[prop][i] = As['o'+prop][As.opt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) ## Clean up original ordered dataframes; we don't really need them #del As['o'+prop] ## Reorder out b dataframes to match sorted A parents tframe = pd.DataFrame() tframe[1] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[1] tframe[2] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[1] tframe[3] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[2] tframe[4] = (As.opt.rank(axis=1,ascending=False,method='first')==1)[2] for prop in ['eta','phi','pt']: bs[prop] = pd.DataFrame() bs[prop][1] = bs['o'+prop][tframe][1].dropna().append(bs['o'+prop][tframe][3].dropna()).sort_index() bs[prop][2] = bs['o'+prop][tframe][2].dropna().append(bs['o'+prop][tframe][4].dropna()).sort_index() bs[prop][3] = bs['o'+prop][~tframe][1].dropna().append(bs['o'+prop][~tframe][3].dropna()).sort_index() bs[prop][4] = bs['o'+prop][~tframe][2].dropna().append(bs['o'+prop][~tframe][4].dropna()).sort_index() ## Clean up original ordered dataframes; we don't really need them. #del bs['o'+prop] # ## Sort our b dataframes in descending order of pt # for prop in ['spt','seta','sphi']: # bs[prop] = pd.DataFrame() # #bs.spt, bs.seta, bs.sphi = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() # for i in range(1,nb+1): # bs[prop][i] = bs[prop[1:]][bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # #bs.seta[i] = bs.eta[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # #bs.sphi[i] = bs.phi[bs.pt.rank(axis=1,ascending=False,method='first')==i].max(axis=1) # plots['genAmass'].dfill(As.mass) ev = Event(bs,sigjets,As,higgs) if isLHE: for jets in bgjets+[sigjets]: jets.cut(jets.pt > 170)#240)#170) jets.cut(abs(jets.eta)<2.4) jets.cut(jets.DDBvL > 0.8)#0.8)#0.6) jets.cut(jets.DeepB > 0.4184) jets.cut(jets.msoft > 90)#90)#0.25) # jets.cut(jets.mass > 90) else: for jets in [bgjets, sigjets]: jets.cut(jets.pt > 170)#170) jets.cut(abs(jets.eta)<2.4) jets.cut(jets.DDBvL > 0.8)#0.6) jets.cut(jets.DeepB > 0.4184) jets.cut(jets.msoft > 90)#0.25) # jets.cut(jets.mass > 90) bs.cut(bs.pt>5) bs.cut(abs(bs.eta)<2.4) ev.sync() slimjets.cut(slimjets.DeepB > 0.1241) slimjets.cut(slimjets.DeepFB > 0.277) slimjets.cut(slimjets.puid > 0) slimjets.trimto(jets.eta) ############################## # Processing and Calculation # ############################## if True: ## Create our dR dataframe by populating its first column and naming it accordingly jbdr2 = pd.DataFrame(np.power(sigjets.eta[1]-bs.eta[1],2) + np.power(sigjets.phi[1]-bs.phi[1],2)).rename(columns={1:'Jet 1 b 1'}) sjbdr2= pd.DataFrame(np.power(slimjets.eta[1]-bs.eta[1],2) + np.power(slimjets.phi[1]-bs.phi[1],2)).rename(columns={1:'Jet 1 b 1'}) ## Loop over jet x b combinations jbstr = [] for j in range(1,njet+1): for b in range(1,nb+1): ## Make our column name jbstr.append("Jet "+str(j)+" b "+str(b)) if (j+b==2): continue ## Compute and store the dr of the given b and jet for every event at once jbdr2[jbstr[-1]] = pd.DataFrame(np.power(sigjets.eta[j]-bs.eta[b],2) + np.power(sigjets.phi[j]-bs.phi[b],2)) sjbdr2[jbstr[-1]]= pd.DataFrame(np.power(slimjets.eta[j]-bs.eta[b],2) + np.power(slimjets.phi[j]-bs.phi[b],2)) ## Create a copy array to collapse in jets instead of bs blist = [] sblist = [] for b in range(nb): blist.append(np.sqrt(jbdr2.filter(like='b '+str(b+1)))) blist[b] = blist[b][blist[b].rank(axis=1,method='first') == 1] blist[b] = blist[b].rename(columns=lambda x:int(x[4:6])) sblist.append(np.sqrt(sjbdr2.filter(like='b '+str(b+1)))) sblist[b] = sblist[b][sblist[b].rank(axis=1,method='first') == 1] sblist[b] = sblist[b].rename(columns=lambda x:int(x[4:6])) ## Trim resolved jet objects # if resjets==3: # for i in range(nb): # for j in range(nb): # if i != j: # blist[i] = blist[i][np.logical_not(blist[i] > blist[j])] # blist[i] = blist[i][blist[i]<0.4] ## Cut our events to only events with 3-4 bs in one fatjet of dR<0.8 fjets = blist[0][blist[0]<0.8].fillna(0)/blist[0][blist[0]<0.8].fillna(0) for i in range(1,4): fjets = fjets + blist[i][blist[i]<0.8].fillna(0)/blist[i][blist[i]<0.8].fillna(0) fjets = fjets.max(axis=1) # fjetsfail = fjets[fjets!=4].dropna() fjets = fjets[fjets==4].dropna() # ev.sync() # sigjetsfail = sigjets.trimto(fjetsfail,split=True) sigjets.trimto(fjets) ev.sync() ################################## # Preparing Neural Net Variables # ################################## bgjetframe = pd.DataFrame() extvars = ['event','extweight','npvs','npvsG'] if isLHE: bgpieces = [] wtpieces = [] for i in range(nlhe): tempframe = pd.DataFrame() twgtframe = pd.DataFrame() for prop in netvars+extvars: twgtframe[prop] = bgjets[i][prop][bgjets[i]['pt'].rank(axis=1,method='first') == 1].max(axis=1) if 'eta' in netvars: twgtframe['eta'] = abs(twgtframe['eta']) twgtframe['val'] = 0 tempframe = twgtframe.sample(frac=lheweights[i],random_state=6) twgtframe['extweight'] = twgtframe['extweight'] * lheweights[i] bgpieces.append(tempframe) #pickle.dump(tempframe, open(filefix+str(i)+"piece.p", "wb")) wtpieces.append(twgtframe) bgjetframe = pd.concat(bgpieces,ignore_index=True) bgrawframe = pd.concat(wtpieces,ignore_index=True) bgjetframe = bgjetframe.dropna() bgrawframe = bgrawframe.dropna() bgtrnframe = bgjetframe[bgjetframe['event']%2 == 0] else: for prop in netvars + extvars: bgjetframe[prop] = bgjets[prop][bgjets['pt'].rank(axis=1,method='first') == 1].max(axis=1) bgjetframe['eta'] = abs(bgjetframe['eta']) bgjetframe['val'] = 0 bgtrnframe = bgjetframe[bgjetframe['event']%2 == 0] nbg = bgtrnframe.shape[0] sigjetframe = pd.DataFrame() for prop in netvars + extvars: sigjetframe[prop] = sigjets[prop][sigjets['pt'].rank(axis=1,method='first') == 1].max(axis=1) if 'eta' in netvars: sigjetframe['eta'] = abs(sigjetframe['eta']) sigjetframe['val'] = 1 sigtrnframe = sigjetframe[sigjetframe['event']%2 == 0] nsig = sigtrnframe.shape[0] # sigjetfailframe = pd.DataFrame() # for prop in netvars + extvars: # sigjetfailframe[prop] = sigjetsfail[prop][sigjetsfail['pt'].rank(axis=1,method='first') == 1].max(axis=1) # if 'eta' in netvars: # sigjetfailframe['eta'] = abs(sigjetfailframe['eta']) # sigjetfailframe['val'] = 1 # sigtrnFframe = sigjetfailframe[sigjetfailframe['event']%2 == 0] # nsig = sigtrnFframe.shape[0] print(f"{Skey} cut to {sigjetframe.shape[0]} events") print(f"{Bkey} has {bgjetframe.shape[0]} intended events") extvars = extvars + ['val'] ####################### # Training Neural Net # ####################### if isLHE: bgjetframe=bgrawframe X_inputs = pd.concat([bgjetframe,sigjetframe]) # XF_inputs = pd.concat([bgjetframe,sigjetfailframe]) W_inputs = X_inputs['extweight'] # WF_inputs = XF_inputs['extweight'] Y_inputs = X_inputs['val'] # YF_inputs = XF_inputs['val'] X_inputs = X_inputs.drop(extvars,axis=1) # XF_inputs = XF_inputs.drop(extvars,axis=1) bmodel = keras.models.load_model('btagfiles/weighted.hdf5', compile=False) physmodel = keras.models.load_model('physfiles/weighted.hdf5', compile=False) bscaler = pickle.load( open("btagfiles/weightedscaler.p", "rb" ) ) physcaler = pickle.load( open("physfiles/weightedscaler.p", "rb" ) ) ## #print(scaler.transform(bgpieces[1].drop('val',axis=1))) ## Xb_inputs = bscaler.transform(X_inputs.drop(pnetvars,axis=1)) # XbF_inputs = bscaler.transform(XF_inputs) Xp_inputs = physcaler.transform(X_inputs.drop(bnetvars,axis=1)) # XpF_inputs = physcaler.transform(XF_inputs) ################################## # Analyzing and Plotting Outputs # ################################## distsb = bmodel.predict( Xb_inputs[Y_inputs==1]) distsp = physmodel.predict(Xp_inputs[Y_inputs==1]) distbb = bmodel.predict (Xb_inputs [Y_inputs==0]) distbp = physmodel.predict (Xp_inputs [Y_inputs==0]) plots['SigDist' ].fill(distsb[:,0] ,distsp[:,0] ,weights=W_inputs[Y_inputs==1]) plots['BGDist' ].fill(distbb[:,0] ,distbp[:,0] ,weights=W_inputs[Y_inputs==0]) Sprofile, Serr, Bprofile, Berr = [],[],[],[] for i in range(plots['SigDist'][0].shape[0]): Sprofile.append(np.average(distsp[np.logical_and( distsb > plots['SigDist'][1][i], distsb <= plots['SigDist'][1][i+1])])) Serr.append(np.std(distsp[np.logical_and( distsb > plots['SigDist'][1][i], distsb <= plots['SigDist'][1][i+1])])) Bprofile.append(np.average(distbp[np.logical_and( distbb > plots['BGDist'][1][i], distbb <= plots['BGDist'][1][i+1])])) Berr.append(np.std(distbp[np.logical_and( distbb > plots['BGDist'][1][i], distbb <= plots['BGDist'][1][i+1])])) plots['SigProfile'][0] = Sprofile plots['SigProfile'].ser = np.power(Serr,2) plots['BGProfile'][0] = Bprofile plots['BGProfile'].ser = np.power(Berr,2) sbranges, bbranges = [0.0], [0.0] for i in range(1,4): sbranges.append(np.sort(distsb.ravel())[math.floor(distsb.shape[0]*i/3)-1]) bbranges.append(np.sort(distbb.ravel())[math.floor(distbb.shape[0]*i/3)-1]) for i in range(3): plots[f"SigRanges{i}"].fill(distsp[np.logical_and( distsb > sbranges[i], distsb <= sbranges[i+1])]) plots[f"BGRanges{i}"].fill(distbp[np.logical_and( distbb > bbranges[i], distbb <= bbranges[i+1])]) for p in [plots['SigDist'],plots['BGDist']]: p.plot() for p in [plots['SigProfile'],plots['BGProfile']]: p.plot(error=True,htype='err') plt.clf() for plot in ['SigRanges','BGRanges']: plots[f"{plot}{0}"].make(linestyle='-',color='b',htype='step') plots[f"{plot}{1}"].make(linestyle='--',color='r',htype='step') plots[f"{plot}{2}"].make(linestyle=':',color='k',htype='step') plots[plot].plot(same=True,legend=['Low Confidence','Medium Confidence','High Confidence'])