def bparking_binned(dir, test, egamma, has_pfgsf_branches=True, AxE=True): print( '##### BPARKING BINNED ##########################################################' ) ############# # ROC CURVE # ############# plt.figure(figsize=(6, 6)) ax = plt.subplot(111) plt.title('Low-pT electron performance (BParking)') plt.xlim(1.e-4, 1.) plt.ylim([0., 1.]) plt.xlabel('Mistag rate (w.r.t. KF tracks, pT > 0.5 GeV)') plt.ylabel('Efficiency (w.r.t. KF tracks, pT > 0.5 GeV)') ax.tick_params(axis='x', pad=10.) plt.gca().set_xscale('log') plt.grid(True) ######################################## # "by chance" line plt.plot(np.arange(0., 1., plt.xlim()[0]), np.arange(0., 1., plt.xlim()[0]), ls='dotted', lw=0.5, label="By chance") ######################################## # Electron (pT > 0.5 GeV, VL WP for Seed BDT) has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) denom = has_trk & test.is_e numer = has_ele & denom ele_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_trk & (~test.is_e) numer = has_ele & denom ele_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [ele_fr], [ele_eff], marker='o', markerfacecolor='blue', markeredgecolor='blue', markersize=8, linestyle='none', label='Low-pT electron', ) id_branch = 'ele_mva_value_depth15' id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele], test[id_branch][has_ele]) id_auc = roc_auc_score( test.is_e[has_ele], test[id_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0. plt.plot(id_fpr * ele_fr, id_tpr * ele_eff, linestyle='solid', color='blue', linewidth=1.0, label='ID, 2020Feb24 (AUC={:.3f})'.format(id_auc)) ######################################## # Electron (pT > 2.0 GeV, VL WP for Seed BDT) has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt > 2.0) denom = has_trk & test.is_e numer = has_ele & denom ele_high_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_trk & (~test.is_e) numer = has_ele & denom ele_high_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [ele_high_fr], [ele_high_eff], marker='^', markerfacecolor='blue', markeredgecolor='blue', markersize=8, linestyle='none', label='pT > 2.0 GeV', ) id_high_branch = 'ele_mva_value_depth15' id_high_fpr, id_high_tpr, id_high_score = roc_curve( test.is_e[has_ele], test[id_high_branch][has_ele]) id_high_auc = roc_auc_score(test.is_e[has_ele], test[id_high_branch][has_ele]) if len( set(test.is_e[has_ele])) > 1 else 0. plt.plot( id_high_fpr * ele_high_fr, id_high_tpr * ele_high_eff, linestyle='dotted', color='blue', linewidth=1.0, #label='ID, 2020Feb24 (AUC={:.3f})'.format(id_high_auc) ) ######################################## # Electron (pT < 2.0 GeV, VL WP for Seed BDT) has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt < 2.0) denom = has_trk & test.is_e numer = has_ele & denom ele_low_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_trk & (~test.is_e) numer = has_ele & denom ele_low_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [ele_low_fr], [ele_low_eff], marker='v', markerfacecolor='blue', markeredgecolor='blue', markersize=8, linestyle='none', label='pT < 2.0 GeV', ) id_low_branch = 'ele_mva_value_depth15' id_low_fpr, id_low_tpr, id_low_score = roc_curve( test.is_e[has_ele], test[id_low_branch][has_ele]) id_low_auc = roc_auc_score(test.is_e[has_ele], test[id_low_branch][has_ele]) if len( set(test.is_e[has_ele])) > 1 else 0. plt.plot(id_low_fpr * ele_low_fr, id_low_tpr * ele_low_eff, linestyle='dotted', color='blue', linewidth=1.0 #label='ID, 2020Feb24 (AUC={:.3f})'.format(id_low_auc) ) ######################################## # EGamma PF GSF electrons has_trk = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs( egamma.trk_eta) < 2.5) has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) denom = has_trk & egamma.is_e numer = has_ele & denom pf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_trk & (~egamma.is_e) numer = has_ele & denom pf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([pf_fr], [pf_eff], marker='o', color='purple', markersize=8, linestyle='none', label='PF electron') pf_id_fpr, pf_id_tpr, pf_id_score = roc_curve( egamma.is_e[has_ele], egamma['ele_mva_value_retrained'][has_ele]) pf_id_auc = roc_auc_score( egamma.is_e[has_ele], egamma['ele_mva_value_retrained'] [has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0. plt.plot(pf_id_fpr * pf_fr, pf_id_tpr * pf_eff, linestyle='solid', color='purple', linewidth=1.0, label='ID, retrain (AUC={:.3f})'.format(pf_id_auc)) pf_id2_fpr, pf_id2_tpr, pf_id2_score = roc_curve( egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele]) pf_id2_auc = roc_auc_score(egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele]) if len( set(egamma.is_e[has_ele])) > 1 else 0. plt.plot(pf_id2_fpr * pf_fr, pf_id2_tpr * pf_eff, linestyle='dashed', color='purple', linewidth=1.0, label='ID, EGamma (AUC={:.3f})'.format(pf_id2_auc)) ######################################## # Working points id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin() same_fr = test[id_branch] > id_score[id_ELE] x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff #plt.plot([x], [y], marker='o', markerfacecolor='white', markeredgecolor='blue', markersize=8) #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' ) id_high_ELE = np.abs(id_high_fpr * ele_high_fr - pf_fr).argmin() same_fr_high = test[id_high_branch] > id_high_score[id_high_ELE] x, y = id_high_fpr[id_high_ELE] * ele_high_fr, id_high_tpr[ id_high_ELE] * ele_high_eff #plt.plot([x], [y], marker='^', markerfacecolor='white', markeredgecolor='blue', markersize=8) #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' ) id_low_ELE = np.abs(id_low_fpr * ele_low_fr - pf_fr).argmin() same_fr_low = test[id_low_branch] > id_low_score[id_low_ELE] x, y = id_low_fpr[id_low_ELE] * ele_low_fr, id_low_tpr[ id_low_ELE] * ele_low_eff #plt.plot([x], [y], marker='v', markerfacecolor='white', markeredgecolor='blue', markersize=8) #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' ) ########## # Finish up ... plt.legend(loc='upper left', facecolor='white', framealpha=None, frameon=False) plt.tight_layout() plt.savefig(dir + '/roc.pdf') plt.clf() plt.close() ############## # EFF CURVES # ############## # Binning bin_edges = np.linspace(0., 4., 8, endpoint=False) bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False)) bin_edges = np.append(bin_edges, np.linspace(8., 12., 3, endpoint=True)) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2. bin_widths = (bin_edges[1:] - bin_edges[:-1]) bin_width = bin_widths[0] bin_widths /= bin_width #print("bin_edges",bin_edges) #print("bin_centres",bin_centres) #print("bin_widths",bin_widths) #print("bin_width",bin_width) tuple = ([ 'gen_pt', 'gsf_pt', 'gsf_mode_pt', 'gsf_dxy', 'gsf_dz', 'rho', ], [ bin_edges, bin_edges, bin_edges, np.linspace(0., 3.3, 12), np.linspace(0., 22., 12), np.linspace(0., 44., 12), ], [ 'Generator-level transverse momentum (GeV)', 'Transverse momentum (GeV)', 'Mode transverse momentum (GeV)', 'Transverse impact parameter w.r.t. beamspot (cm)', 'Longitudinal impact parameter w.r.t. beamspot (cm)', 'Median energy density from UE/pileup (GeV / unit area)', ]) print("Efficiency curves ...") for attr, binning, xlabel in zip(*tuple): print(attr) plt.figure() ax = plt.subplot(111) has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_ele_low = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt < 2.0) has_ele_high = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt > 2.0) has_trk_ = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs( egamma.trk_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "Low-pT electron", "var": test[attr], "mask": (test.is_e) & (has_trk), "condition": (has_ele), "colour": "blue", "fill": True, "size": 8, }, { "label": "Same mistag rate", "var": test[attr], "mask": (test.is_e) & (has_trk), "condition": (has_ele) & (same_fr), "colour": "blue", "fill": False, "size": 8, }, { "label": "Same mistag rate", "var": test[attr], "mask": (test.is_e) & (has_trk), "condition": (has_ele_high) & (same_fr_high), "colour": "blue", "fill": False, "size": 8, "marker": "^" }, { "label": "Same mistag rate", "var": test[attr], "mask": (test.is_e) & (has_trk), "condition": (has_ele_low) & (same_fr_low), "colour": "blue", "fill": False, "size": 8, "marker": "v" }, { "label": "PF electron", "var": egamma[attr], "mask": (egamma.is_e) & (has_trk_), "condition": (has_ele_), "colour": "purple", "fill": True, "size": 8, }, ] for idx, curve in enumerate(curves): his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=binning) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=binning) x = binning[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker=curve.get("marker", 'o'), color=curve["colour"], markerfacecolor=curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... plt.title('Low-pT electron performance (BParking)') plt.xlabel(xlabel) plt.ylabel('Efficiency (w.r.t. KF tracks, pT > 0.5 GeV)') ax.set_xlim(binning[0], binning[-2]) plt.ylim([0., 1.]) plt.legend(loc='lower right', facecolor='white', framealpha=None, frameon=False) plt.tight_layout() plt.savefig(dir + '/eff_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close() ################# # MISTAG CURVES # ################# print("Mistag curves ...") for attr, binning, xlabel in zip(*tuple): print(attr) plt.figure() ax = plt.subplot(111) has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_ele_low = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt < 2.0) has_ele_high = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs( test.ele_eta) < 2.5) & (test.ele_pt > 2.0) has_trk_ = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs( egamma.trk_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "Low-pT electron", "var": test[attr], "mask": (~test.is_e) & (has_trk), "condition": (has_ele), "colour": "blue", "fill": True, "size": 8, }, { "label": "Same mistag rate", "var": test[attr], "mask": (~test.is_e) & (has_trk), "condition": (has_ele) & (same_fr), "colour": "blue", "fill": False, "size": 8, }, { "label": "Same mistag rate", "var": test[attr], "mask": (~test.is_e) & (has_trk), "condition": (has_ele_high) & (same_fr_high), "colour": "blue", "fill": False, "size": 8, "marker": "^" }, { "label": "Same mistag rate", "var": test[attr], "mask": (~test.is_e) & (has_trk), "condition": (has_ele_low) & (same_fr_low), "colour": "blue", "fill": False, "size": 8, "marker": "v" }, { "label": "PF electron", "var": egamma[attr], "mask": (~egamma.is_e) & (has_trk_), "condition": (has_ele_), "colour": "purple", "fill": True, "size": 8, }, ] for idx, curve in enumerate(curves): his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=binning) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=binning) x = binning[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:6.4f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker=curve.get("marker", 'o'), color=curve["colour"], markerfacecolor=curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... plt.title('Low-pT electron performance (BParking)') plt.xlabel(xlabel) plt.ylabel('Mistag rate (w.r.t. KF tracks, pT > 0.5 GeV)') plt.gca().set_yscale('log') ax.set_xlim(binning[0], binning[-2]) ax.set_ylim([1.e-4, 1.]) plt.legend(loc='lower right', facecolor='white', framealpha=None, frameon=False) plt.tight_layout() plt.savefig(dir + '/mistag_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close()
def mauro(dir, test, egamma, has_pfgsf_branches=True, AxE=True): print( '##### MAURO ##########################################################' ) ############# # ROC CURVE # ############# plt.figure(figsize=(6, 6)) ax = plt.subplot(111) plt.title('Efficiency and mistag rate w.r.t. GSF tracks') plt.xlim(1.e-3, 1.1) plt.ylim([0., 0.6]) if AxE is True else plt.ylim([0., 1.03]) plt.xlabel('FPR') plt.ylabel('TPR') ax.tick_params(axis='x', pad=10.) plt.gca().set_xscale('log') plt.grid(True) ######################################## # "by chance" line plt.plot(np.arange(0., 1., plt.xlim()[0]), np.arange(0., 1., plt.xlim()[0]), 'k--', lw=0.5) ######################################## # Low-pT GSF electrons + ROC curves has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_pfgsf = (test.has_pfgsf) & (test.pfgsf_pt > 0.5) & (np.abs( test.pfgsf_eta) < 2.5) #has_gsf |= has_pfgsf has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) # Eff and FR if AxE is True: denom = test.is_e numer = has_ele & denom ele_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. else: denom = has_gsf & test.is_e numer = has_ele & denom ele_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_ele & denom ele_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([ele_fr], [ele_eff], marker='o', color='blue', markersize=8, linestyle=None) id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele], test['training_out'][has_ele]) id_auc = roc_auc_score(test.is_e[has_ele], test['training_out'][has_ele]) plt.plot(id_fpr * ele_fr, id_tpr * ele_eff, linestyle='solid', color='black', linewidth=1.0, label='Low-pT GSF electron + ID, AUC={:.3f}'.format(id_auc)) # Unbiased seed BDT ele_unb_fpr, ele_unb_tpr, ele_unb_score = roc_curve( test.is_e[has_ele], test.gsf_bdtout1[has_ele]) ele_unb_auc = roc_auc_score(test.is_e[has_ele], test.gsf_bdtout1[has_ele]) plt.plot( ele_unb_fpr * ele_fr, ele_unb_tpr * ele_eff, linestyle='solid', color='blue', linewidth=1.0, label='Low-pT GSF electron + unbiased seed BDT, AUC={:.3f}'.format( ele_unb_auc)) # Biased seed BDT ele_b_fpr, ele_b_tpr, ele_b_score = roc_curve(test.is_e[has_ele], test.gsf_bdtout2[has_ele]) ele_b_auc = roc_auc_score( test.is_e[has_ele], test.gsf_bdtout2[has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0. plt.plot(ele_b_fpr * ele_fr, ele_b_tpr * ele_eff, linestyle='dashed', color='blue', linewidth=0.5, label='Low-pT GSF electron + biased seed BDT, AUC={:.3f}'.format( ele_b_auc)) ######################################## # Low-pT GSF tracks + ROC curves has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_pfgsf = (test.has_pfgsf) & (test.pfgsf_pt > 0.5) & (np.abs( test.pfgsf_eta) < 2.5) #has_gsf |= has_pfgsf has_ele = None # Eff and FR if AxE is True: denom = test.is_e numer = has_gsf & denom gsf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. else: denom = has_gsf & test.is_e numer = has_gsf & denom gsf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_gsf & denom gsf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([gsf_fr], [gsf_eff], marker='o', color='red', markersize=8, linestyle='None') # Unbiased seed BDT gsf_unb_fpr, gsf_unb_tpr, gsf_unb_score = roc_curve( test.is_e[has_gsf], test.gsf_bdtout1[has_gsf]) gsf_unb_auc = roc_auc_score(test.is_e[has_gsf], test.gsf_bdtout1[has_gsf]) plt.plot(gsf_unb_fpr * gsf_fr, gsf_unb_tpr * gsf_eff, linestyle='solid', color='red', linewidth=1.0, label='Low-pT GSF track + unbiased seed BDT, AUC={:.3f}'.format( gsf_unb_auc)) # Biased seed BDT gsf_b_fpr, gsf_b_tpr, gsf_b_score = roc_curve(test.is_e[has_gsf], test.gsf_bdtout2[has_gsf]) gsf_b_auc = roc_auc_score( test.is_e[has_gsf], test.gsf_bdtout2[has_gsf]) if len(set(test.is_e[has_gsf])) > 1 else 0. plt.plot(gsf_b_fpr * gsf_fr, gsf_b_tpr * gsf_eff, linestyle='dashed', color='red', linewidth=0.5, label='Low-pT GSF track + biased seed BDT, AUC={:.3f}'.format( gsf_b_auc)) ######################################## # EGamma GSF tracks and PF GSF electrons has_gsf = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs( egamma.pfgsf_eta) < 2.5) #has_gsf |= has_pfgsf #has_gsf |= egamma.seed_ecal_driven has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) # Eff and FR (EGamma GSF tracks) if AxE is True: denom = egamma.is_e numer = has_pfgsf & denom eg_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. else: denom = has_gsf & egamma.is_e numer = has_pfgsf & denom eg_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~egamma.is_e) numer = has_pfgsf & denom eg_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([eg_fr], [eg_eff], marker='o', color='green', markersize=8, linestyle='None', label='EGamma GSF track') # Eff and FR (EGamma PF GSF electrons) if AxE is True: denom = egamma.is_e numer = has_ele & denom pf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. else: denom = has_gsf & egamma.is_e numer = has_ele & denom pf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~egamma.is_e) numer = has_ele & denom pf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([pf_fr], [pf_eff], marker='o', color='purple', markersize=8, linestyle='None', label='PF GSF electron') print('eff: {:.3f}, mistag: {:.4f}'.format(ele_eff, ele_fr), 'Low-pT GSF electrons + ID') print('eff: {:.3f}, mistag: {:.4f}'.format(gsf_eff, gsf_fr), 'Low-pT GSF electrons + unbiased seed BDT') print('eff: {:.3f}, mistag: {:.4f}'.format(eg_eff, eg_fr), 'EGamma GSF tracks') print('eff: {:.3f}, mistag: {:.4f}'.format(pf_eff, pf_fr), 'EGamma GSF electrons') ########## # EGamma GSF electrons (ECAL-driven) # has_gsf |= egamma.seed_ecal_driven # has_ele &= egamma.seed_ecal_driven # # if AxE is True : # denom = egamma.is_e; numer = has_ele&denom # _eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. # else : # denom = has_gsf&egamma.is_e; numer = has_ele&denom # _eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. # denom = has_gsf&(~egamma.is_e); numer = has_ele&denom # _fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. # plt.plot([_fr], [_eff], # marker='o', markerfacecolor='none', markeredgecolor='purple', markersize=8, linestyle='None', # label='PF GSF electron (ECAL-driven)') # print('eff: {:.3f}, mistag: {:.4f}'.format(_eff,_fr),'EGamma GSF electrons (ECAL-driven)') # "New" WPs unb_L = np.abs(gsf_unb_fpr * gsf_fr - eg_fr * 10.).argmin() unb_M = np.abs(gsf_unb_fpr * gsf_fr - eg_fr * 3.).argmin() unb_T = np.abs(gsf_unb_fpr * gsf_fr - eg_fr).argmin() unb_VT = np.abs(gsf_unb_tpr * gsf_eff - eg_eff).argmin() unb_ELE = np.abs(gsf_unb_fpr * gsf_fr - pf_fr).argmin() # same FR id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin() # same FR print("EG GSF track: FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(eg_fr, eg_eff, np.nan)) print("VLoose (10% FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_fr, gsf_eff, np.nan)) print( "Loose (x10 FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_L] * gsf_fr, gsf_unb_tpr[unb_L] * gsf_eff, gsf_unb_score[unb_L])) print( "Medium (x3 FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_M] * gsf_fr, gsf_unb_tpr[unb_M] * gsf_eff, gsf_unb_score[unb_M])) print( "Tight (same FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_T] * gsf_fr, gsf_unb_tpr[unb_T] * gsf_eff, gsf_unb_score[unb_T])) print( "VTight (same eff): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_VT] * gsf_fr, gsf_unb_tpr[unb_VT] * gsf_eff, gsf_unb_score[unb_VT])) print("PF GSF electron: FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(pf_fr, pf_eff, np.nan)) print( "Unb/PF (same FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(ele_unb_fpr[unb_ELE] * ele_fr, ele_unb_tpr[unb_ELE] * ele_eff, ele_unb_score[unb_ELE])) print( "ID/PF (same FR): FR, Eff, score:", "{:.4f}, {:.3f}, {:5.2f} ".format(id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff, id_score[id_ELE])) x, y = gsf_unb_fpr[unb_L] * gsf_fr, gsf_unb_tpr[unb_L] * gsf_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='green', markersize=4) plt.text(x, y - 0.02, "L", fontsize=10, ha='center', va='center', color='green') x, y = gsf_unb_fpr[unb_M] * gsf_fr, gsf_unb_tpr[unb_M] * gsf_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='green', markersize=4) plt.text(x, y - 0.02, "M", fontsize=10, ha='center', va='center', color='green') x, y = gsf_unb_fpr[unb_T] * gsf_fr, gsf_unb_tpr[unb_T] * gsf_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='green', markersize=4) plt.text(x, y - 0.02, "T", fontsize=10, ha='center', va='center', color='green') x, y = gsf_unb_fpr[unb_VT] * gsf_fr, gsf_unb_tpr[unb_VT] * gsf_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='green', markersize=4) plt.text(x, y - 0.02, "VT", fontsize=10, ha='center', va='center', color='green') x, y = ele_unb_fpr[unb_ELE] * ele_fr, ele_unb_tpr[unb_ELE] * ele_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='purple', markersize=4) plt.text(x, y - 0.02, "E", fontsize=10, ha='center', va='center', color='purple') x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='purple', markersize=4) plt.text(x, y - 0.02, "E", fontsize=10, ha='center', va='center', color='purple') # Original WPs # https://github.com/cms-sw/cmssw/blob/CMSSW_10_2_X/RecoEgamma/EgammaElectronProducers/python/lowPtGsfElectronSeeds_cfi.py #orig_VL = np.abs(gsf_unb_score-0.19).argmin() #orig_L = np.abs(gsf_unb_score-1.20).argmin() #orig_M = np.abs(gsf_unb_score-2.02).argmin() #orig_T = np.abs(gsf_unb_score-3.05).argmin() #orig_ELE = np.abs(gsf_unb_score-5.26).argmin() # same FR? #x,y = gsf_unb_fpr[orig_VL]*gsf_fr,gsf_unb_tpr[orig_VL]*gsf_eff #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4) #plt.text(x, y-0.02, "VL", fontsize=10, ha='center', va='center', color='green' ) #x,y = gsf_unb_fpr[orig_L]*gsf_fr,gsf_unb_tpr[orig_L]*gsf_eff #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4) #plt.text(x, y-0.02, "L", fontsize=10, ha='center', va='center', color='green' ) #x,y = gsf_unb_fpr[orig_M]*gsf_fr,gsf_unb_tpr[orig_M]*gsf_eff #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4) #plt.text(x, y-0.02, "M", fontsize=10, ha='center', va='center', color='green' ) #x,y = gsf_unb_fpr[orig_T]*gsf_fr,gsf_unb_tpr[orig_T]*gsf_eff #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4) #plt.text(x, y-0.02, "T", fontsize=10, ha='center', va='center', color='green' ) #x,y = gsf_unb_fpr[orig_ELE]*gsf_fr,gsf_unb_tpr[orig_ELE]*gsf_eff #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='purple', markersize=4) #plt.text(x, y+0.02, "E", fontsize=10, ha='center', va='center', color='purple' ) ########## # Finish up ... plt.legend(loc='upper left', framealpha=None, frameon=False) plt.tight_layout() plt.savefig(dir + '/roc.pdf') plt.clf() plt.close() ############## # EFF CURVES # ############## # Binning bin_edges = np.linspace(0., 4., 8, endpoint=False) bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False)) bin_edges = np.append(bin_edges, np.linspace(8., 10., 2, endpoint=True)) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2. bin_widths = (bin_edges[1:] - bin_edges[:-1]) bin_width = bin_widths[0] bin_widths /= bin_width #print("bin_edges",bin_edges) #print("bin_centres",bin_centres) #print("bin_widths",bin_widths) #print("bin_width",bin_width) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs( egamma.pfgsf_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "EG GSF", "var": egamma.gsf_pt, "mask": (egamma.is_e) & (has_gsf_), "condition": (has_pfgsf), "colour": "green", "size": 7, }, { "label": "PF ELE", "var": egamma.gsf_pt, "mask": (egamma.is_e) & (has_gsf_), "condition": (has_ele_), "colour": "purple", "size": 7, }, { "label": "Unbiased", "var": test.gsf_pt, "mask": (test.is_e) & (has_gsf), "condition": (has_ele) & (test.gsf_bdtout1 > gsf_unb_score[unb_ELE]), "colour": "blue", "size": 7, }, { "label": "ID", "var": test.gsf_pt, "mask": (test.is_e) & (has_gsf), "condition": (has_ele) & (test['training_out'] > id_score[id_ELE]), "colour": "black", "size": 7, }, #{"label":"Track (VT)","var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VT]),"colour":"red","size":7,}, #{"label":"Track (T)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_T]),"colour":"red","size":6,}, #{"label":"Track (M)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_M]),"colour":"red","size":5,}, #{"label":"Track (L)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_L]),"colour":"red","size":4,}, #{"label":"Track (VL)","var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VL]),"colour":"red","size":3,}, { "label": "Open", "var": test.gsf_pt, "mask": (test.is_e) & (has_gsf), "condition": (has_ele), "colour": "red", "size": 7, }, ] for idx, curve in enumerate(curves): #print("label:",curve["label"]) his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=bin_edges) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=bin_edges) x = bin_edges[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker='o', color=curve["colour"], markerfacecolor="white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) ########## # Finish up ... plt.title('Efficiency as a function of GSF track pT') plt.xlabel('Transverse momentum (GeV)') plt.ylabel('Efficiency') ax.set_xlim(bin_edges[0], bin_edges[-2]) plt.ylim([0., 1.]) plt.legend(loc='best') plt.tight_layout() plt.savefig(dir + '/eff.pdf') plt.clf() plt.close() ################# # MISTAG CURVES # ################# plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs( egamma.pfgsf_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "EG GSF", "var": egamma.gsf_pt, "mask": (~egamma.is_e) & (has_gsf_), "condition": (has_pfgsf), "colour": "green", "size": 7, }, { "label": "PF ELE", "var": egamma.gsf_pt, "mask": (~egamma.is_e) & (has_gsf_), "condition": (has_ele_), "colour": "purple", "size": 7, }, { "label": "Unbiased", "var": test.gsf_pt, "mask": (~test.is_e) & (has_gsf), "condition": (has_ele) & (test.gsf_bdtout1 > gsf_unb_score[unb_ELE]), "colour": "blue", "size": 7, }, { "label": "ID", "var": test.gsf_pt, "mask": (~test.is_e) & (has_gsf), "condition": (has_ele) & (test['training_out'] > id_score[id_ELE]), "colour": "black", "size": 7, }, #{"label":"Track (VT)","var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VT]),"colour":"red","size":7,}, #{"label":"Track (T)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_T]),"colour":"red","size":6,}, #{"label":"Track (M)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_M]),"colour":"red","size":5,}, #{"label":"Track (L)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_L]),"colour":"red","size":4,}, #{"label":"Track (VL)","var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VL]),"colour":"red","size":3,}, { "label": "Open", "var": test.gsf_pt, "mask": (~test.is_e) & (has_gsf), "condition": (has_ele), "colour": "red", "size": 7, }, ] for idx, curve in enumerate(curves): #print("label:",curve["label"]) his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=bin_edges) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=bin_edges) x = bin_edges[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker='o', color=curve["colour"], markerfacecolor="white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) ########## # Finish up ... plt.title('Mistag rate as a function of GSF track pT') plt.xlabel('Transverse momentum (GeV)') plt.ylabel('Mistag rate') plt.gca().set_yscale('log') ax.set_xlim(bin_edges[0], bin_edges[-2]) ax.set_ylim([0.0001, 1.]) plt.legend(loc='best') plt.tight_layout() plt.savefig(dir + '/mistag.pdf') plt.clf() plt.close()
def miniaod(dir, test, egamma, has_pfgsf_branches=True, AxE=True): print( '##### MINIAOD ##########################################################' ) ############# # ROC CURVE # ############# plt.figure(figsize=(6, 6)) ax = plt.subplot(111) plt.title('Efficiency and mistag rate w.r.t. GSF tracks') plt.xlim(1.e-3, 1.1) plt.ylim([0., 1.02]) plt.xlabel('FPR') plt.ylabel('TPR') ax.tick_params(axis='x', pad=10.) plt.gca().set_xscale('log') plt.grid(True) ######################################## # "by chance" line plt.plot(np.arange(0., 1., plt.xlim()[0]), np.arange(0., 1., plt.xlim()[0]), 'k--', lw=0.5) ######################################## # Low-pT GSF tracks + ROC curves # pT > 0.5 GeV, VL WP for Seed BDT has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) denom = has_gsf & test.is_e numer = has_gsf & denom gsf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_gsf & denom gsf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [gsf_fr], [gsf_eff], marker='o', markerfacecolor='none', markeredgecolor='red', markersize=8, linestyle='none', label='Low-pT GSF track, pT > 0.5 GeV, VLoose Seed', ) # pT > 1.0 GeV, Tight WP for Seed BDT has_gsf_T = has_gsf & (test.gsf_pt > 1.0) & ((test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42)) denom = has_gsf & test.is_e numer = has_gsf_T & denom gsf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_gsf_T & denom gsf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [gsf_fr], [gsf_eff], marker='o', markerfacecolor='none', markeredgecolor='blue', markersize=8, linestyle='none', label='Low-pT GSF track, pT > 1.0 GeV, Tight Seed', ) ######################################## # Low-pT GSF electrons + ROC curves # pT > 0.5 GeV, VL WP for Seed BDT has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) denom = has_gsf & test.is_e numer = has_ele & denom ele_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_ele & denom ele_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [ele_fr], [ele_eff], marker='o', markerfacecolor='red', markeredgecolor='red', markersize=8, linestyle='none', label='Low-pT GSF electron, pT > 0.5 GeV, VLoose Seed', ) # pT > 1.0 GeV, Tight WP for Seed BDT has_ele = has_ele & (test.gsf_pt > 1.0) & ((test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42)) denom = has_gsf & test.is_e numer = has_ele & denom ele_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~test.is_e) numer = has_ele & denom ele_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot( [ele_fr], [ele_eff], marker='o', markerfacecolor='blue', markeredgecolor='blue', markersize=8, linestyle='none', label='Low-pT GSF electron, pT > 1.0 GeV, Tight Seed', ) id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele], test['training_out'][has_ele]) id_auc = roc_auc_score(test.is_e[has_ele], test['training_out'][has_ele]) if len( set(test.is_e[has_ele])) > 1 else 0. plt.plot(id_fpr * ele_fr, id_tpr * ele_eff, linestyle='solid', color='black', linewidth=1.0, label='Low-pT ID, AUC={:.3f}'.format(id_auc)) ######################################## # EGamma GSF tracks and PF GSF electrons has_gsf = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs( egamma.pfgsf_eta) < 2.5) denom = has_gsf & egamma.is_e numer = has_pfgsf & denom eg_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~egamma.is_e) numer = has_pfgsf & denom eg_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([eg_fr], [eg_eff], marker='o', color='green', markersize=8, linestyle='none', label='EGamma GSF track') has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) denom = has_gsf & egamma.is_e numer = has_ele & denom pf_eff = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. denom = has_gsf & (~egamma.is_e) numer = has_ele & denom pf_fr = float(numer.sum()) / float(denom.sum()) if float( denom.sum()) > 0. else 0. plt.plot([pf_fr], [pf_eff], marker='o', color='purple', markersize=8, linestyle='none', label='PF GSF electron') pf_id_fpr, pf_id_tpr, pf_id_score = roc_curve( egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele]) pf_id_auc = roc_auc_score(egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele]) if len( set(egamma.is_e[has_ele])) > 1 else 0. plt.plot(pf_id_fpr * pf_fr, pf_id_tpr * pf_eff, linestyle='dashed', color='purple', linewidth=1.0, label='PF ID, AUC={:.3f}'.format(pf_id_auc)) ################# # Working points id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin() same_fr = test['training_out'] > id_score[id_ELE] x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff plt.plot([x], [y], marker='o', markerfacecolor='black', markeredgecolor='black', markersize=7) plt.text(x, y + 0.03, "Tight", fontsize=8, ha='center', va='center', color='black') id_ELE = np.abs(id_fpr * ele_fr - pf_fr * 2.).argmin() double_fr = test['training_out'] > id_score[id_ELE] x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff plt.plot([x], [y], marker='o', markerfacecolor='none', markeredgecolor='black', markersize=7) plt.text(x, y + 0.03, "Loose", fontsize=8, ha='center', va='center', color='black') ########## # Finish up ... plt.legend(loc='lower right', framealpha=None, frameon=False) plt.tight_layout() plt.savefig(dir + '/roc.pdf') plt.clf() plt.close() ############## # EFF CURVES # ############## # Binning bin_edges = np.linspace(0., 4., 8, endpoint=False) bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False)) bin_edges = np.append(bin_edges, np.linspace(8., 12., 3, endpoint=True)) bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2. bin_widths = (bin_edges[1:] - bin_edges[:-1]) bin_width = bin_widths[0] bin_widths /= bin_width #print("bin_edges",bin_edges) #print("bin_centres",bin_centres) #print("bin_widths",bin_widths) #print("bin_width",bin_width) tuple = ([ 'gsf_pt', 'gsf_mode_pt', 'gsf_dxy', 'gsf_dz', 'rho', ], [ bin_edges, bin_edges, np.linspace(0., 3.3, 12), np.linspace(0., 22., 12), np.linspace(0., 44., 12), ], [ 'Transverse momentum (GeV)', 'Mode transverse momentum (GeV)', 'Transverse impact parameter w.r.t. beamspot (cm)', 'Longitudinal impact parameter w.r.t. beamspot (cm)', 'Median energy density from UE/pileup (GeV / unit area)', ]) print("Efficiency curves ...") for attr, binning, xlabel in zip(*tuple): print(attr) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_ele_T = has_ele & (test.gsf_pt > 1.0) & ( (test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42)) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "Open", "var": test[attr], "mask": (test.is_e) & (has_gsf), "condition": (has_ele), "colour": "red", "fill": True, "size": 7, }, { "label": "Tight Seed", "var": test[attr], "mask": (test.is_e) & (has_gsf), "condition": (has_ele_T), "colour": "blue", "fill": True, "size": 7, }, { "label": "PF ELE", "var": egamma[attr], "mask": (egamma.is_e) & (has_gsf_), "condition": (has_ele_), "colour": "purple", "fill": True, "size": 7, }, { "label": "ID (Tight)", "var": test[attr], "mask": (test.is_e) & (has_gsf), "condition": (has_ele_T) & (same_fr), "colour": "black", "fill": True, "size": 7, }, { "label": "ID (Loose)", "var": test[attr], "mask": (test.is_e) & (has_gsf), "condition": (has_ele_T) & (double_fr), "colour": "black", "fill": False, "size": 7, }, ] for idx, curve in enumerate(curves): # print("label:",curve["label"]) his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=binning) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=binning) x = binning[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker='o', color=curve["colour"], markerfacecolor=curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... #plt.title('Efficiency as a function of GSF track pT') plt.xlabel(xlabel) plt.ylabel('Efficiency') ax.set_xlim(binning[0], binning[-2]) plt.ylim([0., 1.]) plt.legend(loc='best') plt.tight_layout() plt.savefig(dir + '/eff_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close() ################# # MISTAG CURVES # ################# print("Mistag curves ...") for attr, binning, xlabel in zip(*tuple): print(attr) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) < 2.5) has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) < 2.5) has_ele_T = has_ele & (test.gsf_pt > 1.0) & ( (test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42)) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs( egamma.gsf_eta) < 2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs( egamma.ele_eta) < 2.5) curves = [ { "label": "Open", "var": test[attr], "mask": (~test.is_e) & (has_gsf), "condition": (has_ele), "colour": "red", "fill": True, "size": 7, }, { "label": "Tight Seed", "var": test[attr], "mask": (~test.is_e) & (has_gsf), "condition": (has_ele_T), "colour": "blue", "fill": True, "size": 7, }, { "label": "PF ELE", "var": egamma[attr], "mask": (~egamma.is_e) & (has_gsf_), "condition": (has_ele_), "colour": "purple", "fill": True, "size": 7, }, { "label": "ID (Tight)", "var": test[attr], "mask": (~test.is_e) & (has_gsf), "condition": (has_ele_T) & (same_fr), "colour": "black", "fill": True, "size": 7, }, { "label": "ID (Loose)", "var": test[attr], "mask": (~test.is_e) & (has_gsf), "condition": (has_ele_T) & (double_fr), "colour": "black", "fill": False, "size": 7, }, ] for idx, curve in enumerate(curves): his_total, _ = np.histogram(curve["var"][curve["mask"]], bins=binning) his_passed, _ = np.histogram(curve["var"][curve["mask"] & curve["condition"]], bins=binning) x = binning[:-1] y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)] yhigh = [ binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0. for p, t in zip(his_passed, his_total) ] ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0. for p, t in zip(his_passed, his_total)] yerr = [ylow, yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar( x=x, y=y, yerr=yerr, #color=None, label=label, marker='o', color=curve["colour"], markerfacecolor=curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... #plt.title('Mistag rate as a function of GSF track pT') plt.xlabel(xlabel) plt.ylabel('Mistag rate') plt.gca().set_yscale('log') ax.set_xlim(binning[0], binning[-2]) ax.set_ylim([0.0001, 1.]) plt.legend(loc='best') plt.tight_layout() plt.savefig(dir + '/mistag_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close()
def ultralegacy_vloose(dir,test,egamma,has_pfgsf_branches=True,AxE=True) : print('##### ULTRA LEGACY VLOOSE ##########################################################') ############# # ROC CURVE # ############# plt.figure(figsize=(6,6)) ax = plt.subplot(111) plt.title('Low-pT electron performance (UltraLegacy)') plt.xlim(0.8e-3,1.) plt.ylim([0., 1.]) plt.xlabel('Mistag rate (w.r.t. GSF tracks, pT > 0.5 GeV)') plt.ylabel('Efficiency (w.r.t. GSF tracks, pT > 0.5 GeV)') ax.tick_params(axis='x', pad=10.) plt.gca().set_xscale('log') plt.grid(True) ######################################## # "by chance" line plt.plot(np.arange(0.,1.,plt.xlim()[0]),np.arange(0.,1.,plt.xlim()[0]),ls='dotted',lw=0.5,label="By chance") ######################################## # Low-pT GSF electrons + ROC curves # pT > 0.5 GeV, VL WP for Seed BDT has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5) has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5) denom = has_gsf&test.is_e; numer = has_ele&denom; ele_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. denom = has_gsf&(~test.is_e); numer = has_ele&denom; ele_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. plt.plot([ele_fr], [ele_eff], marker='o', markerfacecolor='red', markeredgecolor='red', markersize=8,linestyle='none', label='BParking MINIAOD', ) # pT > 1.0 GeV, Tight WP for Seed BDT has_ele = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) ) denom = has_gsf&test.is_e; numer = has_ele&denom; ele_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. denom = has_gsf&(~test.is_e); numer = has_ele&denom; ele_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. plt.plot([ele_fr], [ele_eff], marker='o', markerfacecolor='blue', markeredgecolor='blue', markersize=8, linestyle='none', label='UltraLegacy AOD', ) id_branch = 'ele_mva_value_depth15' id_fpr,id_tpr,id_score = roc_curve(test.is_e[has_ele],test[id_branch][has_ele]) id_auc = roc_auc_score(test.is_e[has_ele],test[id_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0. plt.plot(id_fpr*ele_fr, id_tpr*ele_eff, linestyle='solid', color='blue', linewidth=1.0, label='ID, 2020Feb24 (AUC={:.3f})'.format(id_auc)) id2_branch = 'training_out' if id2_branch is not None : id2_fpr,id2_tpr,id2_score = roc_curve(test.is_e[has_ele],test[id2_branch][has_ele]) id2_auc = roc_auc_score(test.is_e[has_ele],test[id2_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0. plt.plot(id2_fpr*ele_fr, id2_tpr*ele_eff, linestyle='dashed', color='blue', linewidth=1.0, label='ID, 2019Aug07 (AUC={:.3f})'.format(id2_auc)) ######################################## # EGamma PF GSF electrons has_gsf = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5) has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt>0.5) & (np.abs(egamma.pfgsf_eta)<2.5) has_ele = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5) denom = has_gsf&egamma.is_e; numer = has_ele&denom pf_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. denom = has_gsf&(~egamma.is_e); numer = has_ele&denom pf_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0. plt.plot([pf_fr], [pf_eff], marker='o', color='purple', markersize=8, linestyle='none', label='PF electron') pf_id_fpr,pf_id_tpr,pf_id_score = roc_curve(egamma.is_e[has_ele],egamma['ele_mva_value_retrained'][has_ele]) pf_id_auc = roc_auc_score(egamma.is_e[has_ele],egamma['ele_mva_value_retrained'][has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0. plt.plot(pf_id_fpr*pf_fr, pf_id_tpr*pf_eff, linestyle='solid', color='purple', linewidth=1.0, label='ID, retrain (AUC={:.3f})'.format(pf_id_auc)) pf_id2_fpr,pf_id2_tpr,pf_id2_score = roc_curve(egamma.is_e[has_ele],egamma['ele_mva_value'][has_ele]) pf_id2_auc = roc_auc_score(egamma.is_e[has_ele],egamma['ele_mva_value'][has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0. plt.plot(pf_id2_fpr*pf_fr, pf_id2_tpr*pf_eff, linestyle='dashed', color='purple', linewidth=1.0, label='ID, EGamma (AUC={:.3f})'.format(pf_id2_auc)) ################# # Working points id_ELE = np.abs(id_fpr*ele_fr-pf_fr).argmin() same_fr = test[id_branch]>id_score[id_ELE] x,y = id_fpr[id_ELE]*ele_fr,id_tpr[id_ELE]*ele_eff plt.plot([x], [y], marker='v', markerfacecolor='white', markeredgecolor='blue', markersize=8) #plt.text(x, y+0.03, "Tight", fontsize=8, ha='center', va='center', color='blue' ) id_ELE = np.abs(id_fpr*ele_fr-pf_fr*2.).argmin() double_fr = test[id_branch]>id_score[id_ELE] x,y = id_fpr[id_ELE]*ele_fr,id_tpr[id_ELE]*ele_eff plt.plot([x], [y], marker='^', markerfacecolor='white', markeredgecolor='blue', markersize=8) #plt.text(x, y+0.03, "Loose", fontsize=8, ha='center', va='center', color='blue' ) ########## # Finish up ... plt.legend(loc='lower right',facecolor='white',frameon=False) plt.tight_layout() plt.savefig(dir+'/roc.pdf') plt.clf() plt.close() ############## # EFF CURVES # ############## # Binning bin_edges = np.linspace(0., 4., 8, endpoint=False) bin_edges = np.append( bin_edges, np.linspace(4., 8., 4, endpoint=False) ) bin_edges = np.append( bin_edges, np.linspace(8., 12., 3, endpoint=True) ) bin_centres = (bin_edges[:-1] + bin_edges[1:])/2. bin_widths = (bin_edges[1:] - bin_edges[:-1]) bin_width = bin_widths[0] bin_widths /= bin_width #print("bin_edges",bin_edges) #print("bin_centres",bin_centres) #print("bin_widths",bin_widths) #print("bin_width",bin_width) tuple = ([ 'gen_pt', 'gsf_pt', 'gsf_mode_pt', 'gsf_dxy', 'gsf_dz', 'rho', ], [ bin_edges, bin_edges, bin_edges, np.linspace(0.,3.3,12), np.linspace(0.,22.,12), np.linspace(0.,44.,12), ], [ 'Generator-level transverse momentum (GeV)', 'Transverse momentum (GeV)', 'Mode transverse momentum (GeV)', 'Transverse impact parameter w.r.t. beamspot (cm)', 'Longitudinal impact parameter w.r.t. beamspot (cm)', 'Median energy density from UE/pileup (GeV / unit area)', ]) print("Efficiency curves ...") for attr,binning,xlabel in zip(*tuple) : print(attr) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5) has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5) has_ele_T = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) ) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5) curves = [ {"label":"BParking MINIAOD","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":8,}, {"label":"UltraLegacy AOD","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T),"colour":"blue","fill":True,"size":8,}, {"label":"UL MINIAOD?","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T)&(double_fr),"colour":"blue","fill":False,"size":8,"marker":"^"}, {"label":"UL MINIAOD?","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T)&(same_fr),"colour":"blue","fill":False,"size":8,"marker":"v"}, {"label":"PF electron","var":egamma[attr],"mask":(egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":8,}, ] for idx,curve in enumerate(curves) : # print("label:",curve["label"]) his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning) his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning) x=binning[:-1] y=[ x/y if y > 0 else 0. for x,y in zip(his_passed,his_total) ] yhigh=[ binomial_hpdr(p,t)[1]-(p/t) if t > 0 else 0. for p,t in zip(his_passed,his_total) ] ylow =[ (p/t)-binomial_hpdr(p,t)[0] if t > 0 else 0. for p,t in zip(his_passed,his_total) ] yerr =[ylow,yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar(x=x, y=y, yerr=yerr, #color=None, label=label, marker=curve.get("marker",'o'), color=curve["colour"], markerfacecolor = curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... plt.title('Low-pT electron performance (UltraLegacy)') plt.xlabel(xlabel) plt.ylabel('Efficiency (w.r.t. GSF tracks, pT > 0.5 GeV)') ax.set_xlim(binning[0],binning[-2]) plt.ylim([0., 1.]) plt.legend(loc='lower right',facecolor='white',frameon=False) plt.tight_layout() plt.savefig(dir+'/eff_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close() ################# # MISTAG CURVES # ################# print("Mistag curves ...") for attr,binning,xlabel in zip(*tuple) : print(attr) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5) has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5) has_ele_T = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) ) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5) curves = [ {"label":"BParking MINIAOD","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":8,}, {"label":"UltraLegacy AOD","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T),"colour":"blue","fill":True,"size":8,}, {"label":"UL MINIAOD?","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T)&(double_fr),"colour":"blue","fill":False,"size":8,"marker":"^"}, {"label":"UL MINIAOD?","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T)&(same_fr),"colour":"blue","fill":False,"size":8,"marker":"v"}, {"label":"PF electron","var":egamma[attr],"mask":(~egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":8,}, ] for idx,curve in enumerate(curves) : his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning) his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning) x=binning[:-1] y=[ x/y if y > 0 else 0. for x,y in zip(his_passed,his_total) ] yhigh=[ binomial_hpdr(p,t)[1]-(p/t) if t > 0 else 0. for p,t in zip(his_passed,his_total) ] ylow =[ (p/t)-binomial_hpdr(p,t)[0] if t > 0 else 0. for p,t in zip(his_passed,his_total) ] yerr =[ylow,yhigh] label='{:s} (mean={:6.4f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar(x=x, y=y, yerr=yerr, #color=None, label=label, marker=curve.get("marker",'o'), color=curve["colour"], markerfacecolor = curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... plt.title('Low-pT electron performance (UltraLegacy)') plt.xlabel(xlabel) plt.ylabel('Mistag rate (w.r.t. GSF tracks, pT > 0.5 GeV)') #plt.gca().set_yscale('log') ax.set_xlim(binning[0],binning[-2]) #ax.set_ylim([1.e-3, 1.]) ax.set_ylim([0., 0.22]) plt.legend(loc='lower right',facecolor='white',frameon=False) plt.tight_layout() plt.savefig(dir+'/mistag_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close() ############# # HISTOGRAM # ############# tuple = ([ 'gsf_pt', 'gsf_mode_pt', 'gsf_dxy', 'gsf_dz', 'rho', ], [ bin_edges, bin_edges, np.linspace(0.,3.3,12), np.linspace(0.,22.,12), np.linspace(0.,44.,12), ], [ 'Transverse momentum (GeV)', 'Mode transverse momentum (GeV)', 'Transverse impact parameter w.r.t. beamspot (cm)', 'Longitudinal impact parameter w.r.t. beamspot (cm)', 'Median energy density from UE/pileup (GeV / unit area)', ]) print("Histograms ...") for attr,binning,xlabel in zip(*tuple) : print(attr) plt.figure() ax = plt.subplot(111) has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5) has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5) has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5) has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5) curves = [ {"label":"Low-pT (signal)","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":False,"size":7,}, {"label":"Low-pT (bkgd)","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":7,}, {"label":"EGamma (signal)","var":egamma[attr],"mask":(egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":False,"size":7,}, {"label":"EGamma (bkgd)","var":egamma[attr],"mask":(~egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":7,}, ] for idx,curve in enumerate(curves) : # print("label:",curve["label"]) his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning) his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning) x=binning[:-1] y=[ y/w if y > 0 else 0. for w,x,y in zip(bin_widths,his_passed,his_total) ] yhigh=[0.]*len(y) ylow =[0.]*len(y) yerr =[ylow,yhigh] label='{:s} (mean={:5.3f})'.format(curve["label"], float(his_passed.sum())/float(his_total.sum()) \ if his_total.sum() > 0 else 0.) ax.errorbar(x=x, y=y, yerr=yerr, #color=None, label=label, marker='o', color=curve["colour"], markerfacecolor = curve["colour"] if curve["fill"] else "white", markersize=curve["size"], linewidth=0.5, elinewidth=0.5) # ######### # Finish up ... #plt.title('Low-pT electron performance (UltraLegacy)') plt.xlabel(xlabel) plt.ylabel('Denominator (w.r.t. GSF tracks, pT > 0.5 GeV)') ax.set_xlim(binning[0],binning[-2]) plt.ylim([0.,None]) plt.legend(loc='lower right',facecolor='white',frameon=False) plt.tight_layout() plt.savefig(dir+'/denom_vs_{:s}.pdf'.format(attr)) plt.clf() plt.close()