def bparking_binned(dir, test, egamma, has_pfgsf_branches=True, AxE=True):
    print(
        '##### BPARKING BINNED ##########################################################'
    )

    #############
    # ROC CURVE #
    #############

    plt.figure(figsize=(6, 6))
    ax = plt.subplot(111)
    plt.title('Low-pT electron performance (BParking)')
    plt.xlim(1.e-4, 1.)
    plt.ylim([0., 1.])
    plt.xlabel('Mistag rate (w.r.t. KF tracks, pT > 0.5 GeV)')
    plt.ylabel('Efficiency (w.r.t. KF tracks, pT > 0.5 GeV)')
    ax.tick_params(axis='x', pad=10.)
    plt.gca().set_xscale('log')
    plt.grid(True)

    ########################################
    # "by chance" line

    plt.plot(np.arange(0., 1.,
                       plt.xlim()[0]),
             np.arange(0., 1.,
                       plt.xlim()[0]),
             ls='dotted',
             lw=0.5,
             label="By chance")

    ########################################
    # Electron (pT > 0.5 GeV, VL WP for Seed BDT)

    has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) <
                                                      2.5)
    denom = has_trk & test.is_e
    numer = has_ele & denom
    ele_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_trk & (~test.is_e)
    numer = has_ele & denom
    ele_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [ele_fr],
        [ele_eff],
        marker='o',
        markerfacecolor='blue',
        markeredgecolor='blue',
        markersize=8,
        linestyle='none',
        label='Low-pT electron',
    )

    id_branch = 'ele_mva_value_depth15'
    id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele],
                                         test[id_branch][has_ele])
    id_auc = roc_auc_score(
        test.is_e[has_ele],
        test[id_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0.
    plt.plot(id_fpr * ele_fr,
             id_tpr * ele_eff,
             linestyle='solid',
             color='blue',
             linewidth=1.0,
             label='ID, 2020Feb24 (AUC={:.3f})'.format(id_auc))

    ########################################
    # Electron (pT > 2.0 GeV, VL WP for Seed BDT)

    has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
        test.ele_eta) < 2.5) & (test.ele_pt > 2.0)
    denom = has_trk & test.is_e
    numer = has_ele & denom
    ele_high_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_trk & (~test.is_e)
    numer = has_ele & denom
    ele_high_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [ele_high_fr],
        [ele_high_eff],
        marker='^',
        markerfacecolor='blue',
        markeredgecolor='blue',
        markersize=8,
        linestyle='none',
        label='pT > 2.0 GeV',
    )

    id_high_branch = 'ele_mva_value_depth15'
    id_high_fpr, id_high_tpr, id_high_score = roc_curve(
        test.is_e[has_ele], test[id_high_branch][has_ele])
    id_high_auc = roc_auc_score(test.is_e[has_ele],
                                test[id_high_branch][has_ele]) if len(
                                    set(test.is_e[has_ele])) > 1 else 0.
    plt.plot(
        id_high_fpr * ele_high_fr,
        id_high_tpr * ele_high_eff,
        linestyle='dotted',
        color='blue',
        linewidth=1.0,
        #label='ID, 2020Feb24 (AUC={:.3f})'.format(id_high_auc)
    )

    ########################################
    # Electron (pT < 2.0 GeV, VL WP for Seed BDT)

    has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
        test.ele_eta) < 2.5) & (test.ele_pt < 2.0)
    denom = has_trk & test.is_e
    numer = has_ele & denom
    ele_low_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_trk & (~test.is_e)
    numer = has_ele & denom
    ele_low_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [ele_low_fr],
        [ele_low_eff],
        marker='v',
        markerfacecolor='blue',
        markeredgecolor='blue',
        markersize=8,
        linestyle='none',
        label='pT < 2.0 GeV',
    )

    id_low_branch = 'ele_mva_value_depth15'
    id_low_fpr, id_low_tpr, id_low_score = roc_curve(
        test.is_e[has_ele], test[id_low_branch][has_ele])
    id_low_auc = roc_auc_score(test.is_e[has_ele],
                               test[id_low_branch][has_ele]) if len(
                                   set(test.is_e[has_ele])) > 1 else 0.
    plt.plot(id_low_fpr * ele_low_fr,
             id_low_tpr * ele_low_eff,
             linestyle='dotted',
             color='blue',
             linewidth=1.0
             #label='ID, 2020Feb24 (AUC={:.3f})'.format(id_low_auc)
             )

    ########################################
    # EGamma PF GSF electrons
    has_trk = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs(
        egamma.trk_eta) < 2.5)
    has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
        egamma.ele_eta) < 2.5)
    denom = has_trk & egamma.is_e
    numer = has_ele & denom
    pf_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_trk & (~egamma.is_e)
    numer = has_ele & denom
    pf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([pf_fr], [pf_eff],
             marker='o',
             color='purple',
             markersize=8,
             linestyle='none',
             label='PF electron')

    pf_id_fpr, pf_id_tpr, pf_id_score = roc_curve(
        egamma.is_e[has_ele], egamma['ele_mva_value_retrained'][has_ele])
    pf_id_auc = roc_auc_score(
        egamma.is_e[has_ele], egamma['ele_mva_value_retrained']
        [has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0.
    plt.plot(pf_id_fpr * pf_fr,
             pf_id_tpr * pf_eff,
             linestyle='solid',
             color='purple',
             linewidth=1.0,
             label='ID, retrain (AUC={:.3f})'.format(pf_id_auc))

    pf_id2_fpr, pf_id2_tpr, pf_id2_score = roc_curve(
        egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele])
    pf_id2_auc = roc_auc_score(egamma.is_e[has_ele],
                               egamma['ele_mva_value'][has_ele]) if len(
                                   set(egamma.is_e[has_ele])) > 1 else 0.
    plt.plot(pf_id2_fpr * pf_fr,
             pf_id2_tpr * pf_eff,
             linestyle='dashed',
             color='purple',
             linewidth=1.0,
             label='ID, EGamma (AUC={:.3f})'.format(pf_id2_auc))

    ########################################
    # Working points

    id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin()
    same_fr = test[id_branch] > id_score[id_ELE]
    x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff
    #plt.plot([x], [y], marker='o', markerfacecolor='white', markeredgecolor='blue', markersize=8)
    #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' )

    id_high_ELE = np.abs(id_high_fpr * ele_high_fr - pf_fr).argmin()
    same_fr_high = test[id_high_branch] > id_high_score[id_high_ELE]
    x, y = id_high_fpr[id_high_ELE] * ele_high_fr, id_high_tpr[
        id_high_ELE] * ele_high_eff
    #plt.plot([x], [y], marker='^', markerfacecolor='white', markeredgecolor='blue', markersize=8)
    #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' )

    id_low_ELE = np.abs(id_low_fpr * ele_low_fr - pf_fr).argmin()
    same_fr_low = test[id_low_branch] > id_low_score[id_low_ELE]
    x, y = id_low_fpr[id_low_ELE] * ele_low_fr, id_low_tpr[
        id_low_ELE] * ele_low_eff
    #plt.plot([x], [y], marker='v', markerfacecolor='white', markeredgecolor='blue', markersize=8)
    #plt.text(x, y+0.03, "WP", fontsize=8, ha='center', va='center', color='blue' )

    ##########
    # Finish up ...
    plt.legend(loc='upper left',
               facecolor='white',
               framealpha=None,
               frameon=False)
    plt.tight_layout()
    plt.savefig(dir + '/roc.pdf')
    plt.clf()
    plt.close()

    ##############
    # EFF CURVES #
    ##############

    # Binning
    bin_edges = np.linspace(0., 4., 8, endpoint=False)
    bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False))
    bin_edges = np.append(bin_edges, np.linspace(8., 12., 3, endpoint=True))
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.
    bin_widths = (bin_edges[1:] - bin_edges[:-1])
    bin_width = bin_widths[0]
    bin_widths /= bin_width
    #print("bin_edges",bin_edges)
    #print("bin_centres",bin_centres)
    #print("bin_widths",bin_widths)
    #print("bin_width",bin_width)

    tuple = ([
        'gen_pt',
        'gsf_pt',
        'gsf_mode_pt',
        'gsf_dxy',
        'gsf_dz',
        'rho',
    ], [
        bin_edges,
        bin_edges,
        bin_edges,
        np.linspace(0., 3.3, 12),
        np.linspace(0., 22., 12),
        np.linspace(0., 44., 12),
    ], [
        'Generator-level transverse momentum (GeV)',
        'Transverse momentum (GeV)',
        'Mode transverse momentum (GeV)',
        'Transverse impact parameter w.r.t. beamspot (cm)',
        'Longitudinal impact parameter w.r.t. beamspot (cm)',
        'Median energy density from UE/pileup (GeV / unit area)',
    ])

    print("Efficiency curves ...")
    for attr, binning, xlabel in zip(*tuple):
        print(attr)

        plt.figure()
        ax = plt.subplot(111)

        has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta)
                                                          < 2.5)
        has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta)
                                                          < 2.5)
        has_ele_low = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
            test.ele_eta) < 2.5) & (test.ele_pt < 2.0)
        has_ele_high = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
            test.ele_eta) < 2.5) & (test.ele_pt > 2.0)
        has_trk_ = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs(
            egamma.trk_eta) < 2.5)
        has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
            egamma.ele_eta) < 2.5)
        curves = [
            {
                "label": "Low-pT electron",
                "var": test[attr],
                "mask": (test.is_e) & (has_trk),
                "condition": (has_ele),
                "colour": "blue",
                "fill": True,
                "size": 8,
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (test.is_e) & (has_trk),
                "condition": (has_ele) & (same_fr),
                "colour": "blue",
                "fill": False,
                "size": 8,
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (test.is_e) & (has_trk),
                "condition": (has_ele_high) & (same_fr_high),
                "colour": "blue",
                "fill": False,
                "size": 8,
                "marker": "^"
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (test.is_e) & (has_trk),
                "condition": (has_ele_low) & (same_fr_low),
                "colour": "blue",
                "fill": False,
                "size": 8,
                "marker": "v"
            },
            {
                "label": "PF electron",
                "var": egamma[attr],
                "mask": (egamma.is_e) & (has_trk_),
                "condition": (has_ele_),
                "colour": "purple",
                "fill": True,
                "size": 8,
            },
        ]

        for idx, curve in enumerate(curves):
            his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                        bins=binning)
            his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                      & curve["condition"]],
                                         bins=binning)
            x = binning[:-1]
            y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
            yhigh = [
                binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
                for p, t in zip(his_passed, his_total)
            ]
            ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                    for p, t in zip(his_passed, his_total)]
            yerr = [ylow, yhigh]
            label='{:s} (mean={:5.3f})'.format(curve["label"],
                                               float(his_passed.sum())/float(his_total.sum()) \
                                                  if his_total.sum() > 0 else 0.)
            ax.errorbar(
                x=x,
                y=y,
                yerr=yerr,
                #color=None,
                label=label,
                marker=curve.get("marker", 'o'),
                color=curve["colour"],
                markerfacecolor=curve["colour"] if curve["fill"] else "white",
                markersize=curve["size"],
                linewidth=0.5,
                elinewidth=0.5)

        # #########
        # Finish up ...
        plt.title('Low-pT electron performance (BParking)')
        plt.xlabel(xlabel)
        plt.ylabel('Efficiency (w.r.t. KF tracks, pT > 0.5 GeV)')
        ax.set_xlim(binning[0], binning[-2])
        plt.ylim([0., 1.])
        plt.legend(loc='lower right',
                   facecolor='white',
                   framealpha=None,
                   frameon=False)
        plt.tight_layout()
        plt.savefig(dir + '/eff_vs_{:s}.pdf'.format(attr))
        plt.clf()
        plt.close()

    #################
    # MISTAG CURVES #
    #################

    print("Mistag curves ...")
    for attr, binning, xlabel in zip(*tuple):
        print(attr)

        plt.figure()
        ax = plt.subplot(111)

        has_trk = (test.has_trk) & (test.trk_pt > 0.5) & (np.abs(test.trk_eta)
                                                          < 2.5)
        has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta)
                                                          < 2.5)
        has_ele_low = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
            test.ele_eta) < 2.5) & (test.ele_pt < 2.0)
        has_ele_high = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(
            test.ele_eta) < 2.5) & (test.ele_pt > 2.0)
        has_trk_ = (egamma.has_trk) & (egamma.trk_pt > 0.5) & (np.abs(
            egamma.trk_eta) < 2.5)
        has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
            egamma.ele_eta) < 2.5)
        curves = [
            {
                "label": "Low-pT electron",
                "var": test[attr],
                "mask": (~test.is_e) & (has_trk),
                "condition": (has_ele),
                "colour": "blue",
                "fill": True,
                "size": 8,
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (~test.is_e) & (has_trk),
                "condition": (has_ele) & (same_fr),
                "colour": "blue",
                "fill": False,
                "size": 8,
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (~test.is_e) & (has_trk),
                "condition": (has_ele_high) & (same_fr_high),
                "colour": "blue",
                "fill": False,
                "size": 8,
                "marker": "^"
            },
            {
                "label": "Same mistag rate",
                "var": test[attr],
                "mask": (~test.is_e) & (has_trk),
                "condition": (has_ele_low) & (same_fr_low),
                "colour": "blue",
                "fill": False,
                "size": 8,
                "marker": "v"
            },
            {
                "label": "PF electron",
                "var": egamma[attr],
                "mask": (~egamma.is_e) & (has_trk_),
                "condition": (has_ele_),
                "colour": "purple",
                "fill": True,
                "size": 8,
            },
        ]

        for idx, curve in enumerate(curves):
            his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                        bins=binning)
            his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                      & curve["condition"]],
                                         bins=binning)
            x = binning[:-1]
            y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
            yhigh = [
                binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
                for p, t in zip(his_passed, his_total)
            ]
            ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                    for p, t in zip(his_passed, his_total)]
            yerr = [ylow, yhigh]
            label='{:s} (mean={:6.4f})'.format(curve["label"],
                                               float(his_passed.sum())/float(his_total.sum()) \
                                                  if his_total.sum() > 0 else 0.)
            ax.errorbar(
                x=x,
                y=y,
                yerr=yerr,
                #color=None,
                label=label,
                marker=curve.get("marker", 'o'),
                color=curve["colour"],
                markerfacecolor=curve["colour"] if curve["fill"] else "white",
                markersize=curve["size"],
                linewidth=0.5,
                elinewidth=0.5)

        # #########
        # Finish up ...
        plt.title('Low-pT electron performance (BParking)')
        plt.xlabel(xlabel)
        plt.ylabel('Mistag rate (w.r.t. KF tracks, pT > 0.5 GeV)')
        plt.gca().set_yscale('log')
        ax.set_xlim(binning[0], binning[-2])
        ax.set_ylim([1.e-4, 1.])
        plt.legend(loc='lower right',
                   facecolor='white',
                   framealpha=None,
                   frameon=False)
        plt.tight_layout()
        plt.savefig(dir + '/mistag_vs_{:s}.pdf'.format(attr))
        plt.clf()
        plt.close()
Example #2
0
def mauro(dir, test, egamma, has_pfgsf_branches=True, AxE=True):
    print(
        '##### MAURO ##########################################################'
    )

    #############
    # ROC CURVE #
    #############

    plt.figure(figsize=(6, 6))
    ax = plt.subplot(111)
    plt.title('Efficiency and mistag rate w.r.t. GSF tracks')
    plt.xlim(1.e-3, 1.1)
    plt.ylim([0., 0.6]) if AxE is True else plt.ylim([0., 1.03])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    ax.tick_params(axis='x', pad=10.)
    plt.gca().set_xscale('log')
    plt.grid(True)

    ########################################
    # "by chance" line
    plt.plot(np.arange(0., 1.,
                       plt.xlim()[0]),
             np.arange(0., 1.,
                       plt.xlim()[0]),
             'k--',
             lw=0.5)

    ########################################
    # Low-pT GSF electrons + ROC curves

    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    has_pfgsf = (test.has_pfgsf) & (test.pfgsf_pt > 0.5) & (np.abs(
        test.pfgsf_eta) < 2.5)
    #has_gsf |= has_pfgsf
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) <
                                                      2.5)

    # Eff and FR
    if AxE is True:
        denom = test.is_e
        numer = has_ele & denom
        ele_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    else:
        denom = has_gsf & test.is_e
        numer = has_ele & denom
        ele_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_ele & denom
    ele_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([ele_fr], [ele_eff],
             marker='o',
             color='blue',
             markersize=8,
             linestyle=None)

    id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele],
                                         test['training_out'][has_ele])
    id_auc = roc_auc_score(test.is_e[has_ele], test['training_out'][has_ele])
    plt.plot(id_fpr * ele_fr,
             id_tpr * ele_eff,
             linestyle='solid',
             color='black',
             linewidth=1.0,
             label='Low-pT GSF electron + ID, AUC={:.3f}'.format(id_auc))

    # Unbiased seed BDT
    ele_unb_fpr, ele_unb_tpr, ele_unb_score = roc_curve(
        test.is_e[has_ele], test.gsf_bdtout1[has_ele])
    ele_unb_auc = roc_auc_score(test.is_e[has_ele], test.gsf_bdtout1[has_ele])
    plt.plot(
        ele_unb_fpr * ele_fr,
        ele_unb_tpr * ele_eff,
        linestyle='solid',
        color='blue',
        linewidth=1.0,
        label='Low-pT GSF electron + unbiased seed BDT, AUC={:.3f}'.format(
            ele_unb_auc))

    # Biased seed BDT
    ele_b_fpr, ele_b_tpr, ele_b_score = roc_curve(test.is_e[has_ele],
                                                  test.gsf_bdtout2[has_ele])
    ele_b_auc = roc_auc_score(
        test.is_e[has_ele],
        test.gsf_bdtout2[has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0.
    plt.plot(ele_b_fpr * ele_fr,
             ele_b_tpr * ele_eff,
             linestyle='dashed',
             color='blue',
             linewidth=0.5,
             label='Low-pT GSF electron + biased seed BDT, AUC={:.3f}'.format(
                 ele_b_auc))

    ########################################
    # Low-pT GSF tracks + ROC curves

    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    has_pfgsf = (test.has_pfgsf) & (test.pfgsf_pt > 0.5) & (np.abs(
        test.pfgsf_eta) < 2.5)
    #has_gsf |= has_pfgsf
    has_ele = None

    # Eff and FR
    if AxE is True:
        denom = test.is_e
        numer = has_gsf & denom
        gsf_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    else:
        denom = has_gsf & test.is_e
        numer = has_gsf & denom
        gsf_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_gsf & denom
    gsf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([gsf_fr], [gsf_eff],
             marker='o',
             color='red',
             markersize=8,
             linestyle='None')

    # Unbiased seed BDT
    gsf_unb_fpr, gsf_unb_tpr, gsf_unb_score = roc_curve(
        test.is_e[has_gsf], test.gsf_bdtout1[has_gsf])
    gsf_unb_auc = roc_auc_score(test.is_e[has_gsf], test.gsf_bdtout1[has_gsf])
    plt.plot(gsf_unb_fpr * gsf_fr,
             gsf_unb_tpr * gsf_eff,
             linestyle='solid',
             color='red',
             linewidth=1.0,
             label='Low-pT GSF track + unbiased seed BDT, AUC={:.3f}'.format(
                 gsf_unb_auc))

    # Biased seed BDT
    gsf_b_fpr, gsf_b_tpr, gsf_b_score = roc_curve(test.is_e[has_gsf],
                                                  test.gsf_bdtout2[has_gsf])
    gsf_b_auc = roc_auc_score(
        test.is_e[has_gsf],
        test.gsf_bdtout2[has_gsf]) if len(set(test.is_e[has_gsf])) > 1 else 0.
    plt.plot(gsf_b_fpr * gsf_fr,
             gsf_b_tpr * gsf_eff,
             linestyle='dashed',
             color='red',
             linewidth=0.5,
             label='Low-pT GSF track + biased seed BDT, AUC={:.3f}'.format(
                 gsf_b_auc))

    ########################################
    # EGamma GSF tracks and PF GSF electrons

    has_gsf = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
        egamma.gsf_eta) < 2.5)
    has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs(
        egamma.pfgsf_eta) < 2.5)
    #has_gsf |= has_pfgsf
    #has_gsf |= egamma.seed_ecal_driven
    has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
        egamma.ele_eta) < 2.5)

    # Eff and FR (EGamma GSF tracks)
    if AxE is True:
        denom = egamma.is_e
        numer = has_pfgsf & denom
        eg_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    else:
        denom = has_gsf & egamma.is_e
        numer = has_pfgsf & denom
        eg_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    denom = has_gsf & (~egamma.is_e)
    numer = has_pfgsf & denom
    eg_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([eg_fr], [eg_eff],
             marker='o',
             color='green',
             markersize=8,
             linestyle='None',
             label='EGamma GSF track')

    # Eff and FR (EGamma PF GSF electrons)
    if AxE is True:
        denom = egamma.is_e
        numer = has_ele & denom
        pf_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    else:
        denom = has_gsf & egamma.is_e
        numer = has_ele & denom
        pf_eff = float(numer.sum()) / float(denom.sum()) if float(
            denom.sum()) > 0. else 0.
    denom = has_gsf & (~egamma.is_e)
    numer = has_ele & denom
    pf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([pf_fr], [pf_eff],
             marker='o',
             color='purple',
             markersize=8,
             linestyle='None',
             label='PF GSF electron')

    print('eff: {:.3f}, mistag: {:.4f}'.format(ele_eff, ele_fr),
          'Low-pT GSF electrons + ID')
    print('eff: {:.3f}, mistag: {:.4f}'.format(gsf_eff, gsf_fr),
          'Low-pT GSF electrons + unbiased seed BDT')
    print('eff: {:.3f}, mistag: {:.4f}'.format(eg_eff, eg_fr),
          'EGamma GSF tracks')
    print('eff: {:.3f}, mistag: {:.4f}'.format(pf_eff, pf_fr),
          'EGamma GSF electrons')

    ##########
    # EGamma GSF electrons (ECAL-driven)

    #   has_gsf |= egamma.seed_ecal_driven
    #   has_ele &= egamma.seed_ecal_driven
    #
    #   if AxE is True :
    #      denom = egamma.is_e; numer = has_ele&denom
    #      _eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
    #   else :
    #      denom = has_gsf&egamma.is_e; numer = has_ele&denom
    #      _eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
    #   denom = has_gsf&(~egamma.is_e); numer = has_ele&denom
    #   _fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
    #   plt.plot([_fr], [_eff],
    #            marker='o', markerfacecolor='none', markeredgecolor='purple', markersize=8, linestyle='None',
    #            label='PF GSF electron (ECAL-driven)')
    #   print('eff: {:.3f}, mistag: {:.4f}'.format(_eff,_fr),'EGamma GSF electrons (ECAL-driven)')

    # "New" WPs
    unb_L = np.abs(gsf_unb_fpr * gsf_fr - eg_fr * 10.).argmin()
    unb_M = np.abs(gsf_unb_fpr * gsf_fr - eg_fr * 3.).argmin()
    unb_T = np.abs(gsf_unb_fpr * gsf_fr - eg_fr).argmin()
    unb_VT = np.abs(gsf_unb_tpr * gsf_eff - eg_eff).argmin()
    unb_ELE = np.abs(gsf_unb_fpr * gsf_fr - pf_fr).argmin()  # same FR
    id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin()  # same FR

    print("EG GSF track:      FR, Eff, score:",
          "{:.4f}, {:.3f}, {:5.2f} ".format(eg_fr, eg_eff, np.nan))
    print("VLoose   (10% FR): FR, Eff, score:",
          "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_fr, gsf_eff, np.nan))
    print(
        "Loose    (x10 FR): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_L] * gsf_fr,
                                          gsf_unb_tpr[unb_L] * gsf_eff,
                                          gsf_unb_score[unb_L]))
    print(
        "Medium    (x3 FR): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_M] * gsf_fr,
                                          gsf_unb_tpr[unb_M] * gsf_eff,
                                          gsf_unb_score[unb_M]))
    print(
        "Tight   (same FR): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_T] * gsf_fr,
                                          gsf_unb_tpr[unb_T] * gsf_eff,
                                          gsf_unb_score[unb_T]))
    print(
        "VTight (same eff): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(gsf_unb_fpr[unb_VT] * gsf_fr,
                                          gsf_unb_tpr[unb_VT] * gsf_eff,
                                          gsf_unb_score[unb_VT]))
    print("PF GSF electron:   FR, Eff, score:",
          "{:.4f}, {:.3f}, {:5.2f} ".format(pf_fr, pf_eff, np.nan))
    print(
        "Unb/PF  (same FR): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(ele_unb_fpr[unb_ELE] * ele_fr,
                                          ele_unb_tpr[unb_ELE] * ele_eff,
                                          ele_unb_score[unb_ELE]))
    print(
        "ID/PF   (same FR): FR, Eff, score:",
        "{:.4f}, {:.3f}, {:5.2f} ".format(id_fpr[id_ELE] * ele_fr,
                                          id_tpr[id_ELE] * ele_eff,
                                          id_score[id_ELE]))

    x, y = gsf_unb_fpr[unb_L] * gsf_fr, gsf_unb_tpr[unb_L] * gsf_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='green',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "L",
             fontsize=10,
             ha='center',
             va='center',
             color='green')

    x, y = gsf_unb_fpr[unb_M] * gsf_fr, gsf_unb_tpr[unb_M] * gsf_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='green',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "M",
             fontsize=10,
             ha='center',
             va='center',
             color='green')

    x, y = gsf_unb_fpr[unb_T] * gsf_fr, gsf_unb_tpr[unb_T] * gsf_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='green',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "T",
             fontsize=10,
             ha='center',
             va='center',
             color='green')

    x, y = gsf_unb_fpr[unb_VT] * gsf_fr, gsf_unb_tpr[unb_VT] * gsf_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='green',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "VT",
             fontsize=10,
             ha='center',
             va='center',
             color='green')

    x, y = ele_unb_fpr[unb_ELE] * ele_fr, ele_unb_tpr[unb_ELE] * ele_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='purple',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "E",
             fontsize=10,
             ha='center',
             va='center',
             color='purple')

    x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='purple',
             markersize=4)
    plt.text(x,
             y - 0.02,
             "E",
             fontsize=10,
             ha='center',
             va='center',
             color='purple')

    # Original WPs
    # https://github.com/cms-sw/cmssw/blob/CMSSW_10_2_X/RecoEgamma/EgammaElectronProducers/python/lowPtGsfElectronSeeds_cfi.py
    #orig_VL  = np.abs(gsf_unb_score-0.19).argmin()
    #orig_L   = np.abs(gsf_unb_score-1.20).argmin()
    #orig_M   = np.abs(gsf_unb_score-2.02).argmin()
    #orig_T   = np.abs(gsf_unb_score-3.05).argmin()
    #orig_ELE = np.abs(gsf_unb_score-5.26).argmin() # same FR?

    #x,y = gsf_unb_fpr[orig_VL]*gsf_fr,gsf_unb_tpr[orig_VL]*gsf_eff
    #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4)
    #plt.text(x, y-0.02, "VL", fontsize=10, ha='center', va='center', color='green' )

    #x,y = gsf_unb_fpr[orig_L]*gsf_fr,gsf_unb_tpr[orig_L]*gsf_eff
    #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4)
    #plt.text(x, y-0.02, "L", fontsize=10, ha='center', va='center', color='green' )

    #x,y = gsf_unb_fpr[orig_M]*gsf_fr,gsf_unb_tpr[orig_M]*gsf_eff
    #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4)
    #plt.text(x, y-0.02, "M", fontsize=10, ha='center', va='center', color='green' )

    #x,y = gsf_unb_fpr[orig_T]*gsf_fr,gsf_unb_tpr[orig_T]*gsf_eff
    #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='green', markersize=4)
    #plt.text(x, y-0.02, "T", fontsize=10, ha='center', va='center', color='green' )

    #x,y = gsf_unb_fpr[orig_ELE]*gsf_fr,gsf_unb_tpr[orig_ELE]*gsf_eff
    #plt.plot([x],[y], marker='^', markerfacecolor='none', markeredgecolor='purple', markersize=4)
    #plt.text(x, y+0.02, "E", fontsize=10, ha='center', va='center', color='purple' )

    ##########
    # Finish up ...
    plt.legend(loc='upper left', framealpha=None, frameon=False)
    plt.tight_layout()
    plt.savefig(dir + '/roc.pdf')
    plt.clf()
    plt.close()

    ##############
    # EFF CURVES #
    ##############

    # Binning
    bin_edges = np.linspace(0., 4., 8, endpoint=False)
    bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False))
    bin_edges = np.append(bin_edges, np.linspace(8., 10., 2, endpoint=True))
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.
    bin_widths = (bin_edges[1:] - bin_edges[:-1])
    bin_width = bin_widths[0]
    bin_widths /= bin_width
    #print("bin_edges",bin_edges)
    #print("bin_centres",bin_centres)
    #print("bin_widths",bin_widths)
    #print("bin_width",bin_width)

    plt.figure()
    ax = plt.subplot(111)

    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) <
                                                      2.5)
    has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
        egamma.gsf_eta) < 2.5)
    has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs(
        egamma.pfgsf_eta) < 2.5)
    has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
        egamma.ele_eta) < 2.5)
    curves = [
        {
            "label": "EG GSF",
            "var": egamma.gsf_pt,
            "mask": (egamma.is_e) & (has_gsf_),
            "condition": (has_pfgsf),
            "colour": "green",
            "size": 7,
        },
        {
            "label": "PF ELE",
            "var": egamma.gsf_pt,
            "mask": (egamma.is_e) & (has_gsf_),
            "condition": (has_ele_),
            "colour": "purple",
            "size": 7,
        },
        {
            "label": "Unbiased",
            "var": test.gsf_pt,
            "mask": (test.is_e) & (has_gsf),
            "condition":
            (has_ele) & (test.gsf_bdtout1 > gsf_unb_score[unb_ELE]),
            "colour": "blue",
            "size": 7,
        },
        {
            "label": "ID",
            "var": test.gsf_pt,
            "mask": (test.is_e) & (has_gsf),
            "condition": (has_ele) & (test['training_out'] > id_score[id_ELE]),
            "colour": "black",
            "size": 7,
        },
        #{"label":"Track (VT)","var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VT]),"colour":"red","size":7,},
        #{"label":"Track (T)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_T]),"colour":"red","size":6,},
        #{"label":"Track (M)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_M]),"colour":"red","size":5,},
        #{"label":"Track (L)", "var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_L]),"colour":"red","size":4,},
        #{"label":"Track (VL)","var":test.gsf_pt, "mask":(test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VL]),"colour":"red","size":3,},
        {
            "label": "Open",
            "var": test.gsf_pt,
            "mask": (test.is_e) & (has_gsf),
            "condition": (has_ele),
            "colour": "red",
            "size": 7,
        },
    ]

    for idx, curve in enumerate(curves):
        #print("label:",curve["label"])
        his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                    bins=bin_edges)
        his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                  & curve["condition"]],
                                     bins=bin_edges)
        x = bin_edges[:-1]
        y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
        yhigh = [
            binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
            for p, t in zip(his_passed, his_total)
        ]
        ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                for p, t in zip(his_passed, his_total)]
        yerr = [ylow, yhigh]
        label='{:s} (mean={:5.3f})'.format(curve["label"],
                                           float(his_passed.sum())/float(his_total.sum()) \
                                              if his_total.sum() > 0 else 0.)
        ax.errorbar(
            x=x,
            y=y,
            yerr=yerr,
            #color=None,
            label=label,
            marker='o',
            color=curve["colour"],
            markerfacecolor="white",
            markersize=curve["size"],
            linewidth=0.5,
            elinewidth=0.5)

    ##########
    # Finish up ...
    plt.title('Efficiency as a function of GSF track pT')
    plt.xlabel('Transverse momentum (GeV)')
    plt.ylabel('Efficiency')
    ax.set_xlim(bin_edges[0], bin_edges[-2])
    plt.ylim([0., 1.])
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(dir + '/eff.pdf')
    plt.clf()
    plt.close()

    #################
    # MISTAG CURVES #
    #################

    plt.figure()
    ax = plt.subplot(111)

    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) <
                                                      2.5)
    has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
        egamma.gsf_eta) < 2.5)
    has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs(
        egamma.pfgsf_eta) < 2.5)
    has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
        egamma.ele_eta) < 2.5)
    curves = [
        {
            "label": "EG GSF",
            "var": egamma.gsf_pt,
            "mask": (~egamma.is_e) & (has_gsf_),
            "condition": (has_pfgsf),
            "colour": "green",
            "size": 7,
        },
        {
            "label": "PF ELE",
            "var": egamma.gsf_pt,
            "mask": (~egamma.is_e) & (has_gsf_),
            "condition": (has_ele_),
            "colour": "purple",
            "size": 7,
        },
        {
            "label": "Unbiased",
            "var": test.gsf_pt,
            "mask": (~test.is_e) & (has_gsf),
            "condition":
            (has_ele) & (test.gsf_bdtout1 > gsf_unb_score[unb_ELE]),
            "colour": "blue",
            "size": 7,
        },
        {
            "label": "ID",
            "var": test.gsf_pt,
            "mask": (~test.is_e) & (has_gsf),
            "condition": (has_ele) & (test['training_out'] > id_score[id_ELE]),
            "colour": "black",
            "size": 7,
        },
        #{"label":"Track (VT)","var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VT]),"colour":"red","size":7,},
        #{"label":"Track (T)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_T]),"colour":"red","size":6,},
        #{"label":"Track (M)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_M]),"colour":"red","size":5,},
        #{"label":"Track (L)", "var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_L]),"colour":"red","size":4,},
        #{"label":"Track (VL)","var":test.gsf_pt, "mask":(~test.is_e)&(has_gsf), "condition":(has_ele)&(test.gsf_bdtout1>gsf_unb_score[unb_VL]),"colour":"red","size":3,},
        {
            "label": "Open",
            "var": test.gsf_pt,
            "mask": (~test.is_e) & (has_gsf),
            "condition": (has_ele),
            "colour": "red",
            "size": 7,
        },
    ]

    for idx, curve in enumerate(curves):
        #print("label:",curve["label"])
        his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                    bins=bin_edges)
        his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                  & curve["condition"]],
                                     bins=bin_edges)
        x = bin_edges[:-1]
        y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
        yhigh = [
            binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
            for p, t in zip(his_passed, his_total)
        ]
        ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                for p, t in zip(his_passed, his_total)]
        yerr = [ylow, yhigh]
        label='{:s} (mean={:5.3f})'.format(curve["label"],
                                           float(his_passed.sum())/float(his_total.sum()) \
                                              if his_total.sum() > 0 else 0.)
        ax.errorbar(
            x=x,
            y=y,
            yerr=yerr,
            #color=None,
            label=label,
            marker='o',
            color=curve["colour"],
            markerfacecolor="white",
            markersize=curve["size"],
            linewidth=0.5,
            elinewidth=0.5)

    ##########
    # Finish up ...
    plt.title('Mistag rate as a function of GSF track pT')
    plt.xlabel('Transverse momentum (GeV)')
    plt.ylabel('Mistag rate')
    plt.gca().set_yscale('log')
    ax.set_xlim(bin_edges[0], bin_edges[-2])
    ax.set_ylim([0.0001, 1.])
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(dir + '/mistag.pdf')
    plt.clf()
    plt.close()
Example #3
0
def miniaod(dir, test, egamma, has_pfgsf_branches=True, AxE=True):
    print(
        '##### MINIAOD ##########################################################'
    )

    #############
    # ROC CURVE #
    #############

    plt.figure(figsize=(6, 6))
    ax = plt.subplot(111)
    plt.title('Efficiency and mistag rate w.r.t. GSF tracks')
    plt.xlim(1.e-3, 1.1)
    plt.ylim([0., 1.02])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    ax.tick_params(axis='x', pad=10.)
    plt.gca().set_xscale('log')
    plt.grid(True)

    ########################################
    # "by chance" line
    plt.plot(np.arange(0., 1.,
                       plt.xlim()[0]),
             np.arange(0., 1.,
                       plt.xlim()[0]),
             'k--',
             lw=0.5)

    ########################################
    # Low-pT GSF tracks + ROC curves

    # pT > 0.5 GeV, VL WP for Seed BDT
    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    denom = has_gsf & test.is_e
    numer = has_gsf & denom
    gsf_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_gsf & denom
    gsf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [gsf_fr],
        [gsf_eff],
        marker='o',
        markerfacecolor='none',
        markeredgecolor='red',
        markersize=8,
        linestyle='none',
        label='Low-pT GSF track, pT > 0.5 GeV, VLoose Seed',
    )

    # pT > 1.0 GeV, Tight WP for Seed BDT
    has_gsf_T = has_gsf & (test.gsf_pt > 1.0) & ((test.gsf_bdtout1 > 3.05) |
                                                 (test.gsf_bdtout2 > 2.42))
    denom = has_gsf & test.is_e
    numer = has_gsf_T & denom
    gsf_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_gsf_T & denom
    gsf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [gsf_fr],
        [gsf_eff],
        marker='o',
        markerfacecolor='none',
        markeredgecolor='blue',
        markersize=8,
        linestyle='none',
        label='Low-pT GSF track, pT > 1.0 GeV, Tight Seed',
    )

    ########################################
    # Low-pT GSF electrons + ROC curves

    # pT > 0.5 GeV, VL WP for Seed BDT
    has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta) <
                                                      2.5)
    has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta) <
                                                      2.5)
    denom = has_gsf & test.is_e
    numer = has_ele & denom
    ele_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_ele & denom
    ele_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [ele_fr],
        [ele_eff],
        marker='o',
        markerfacecolor='red',
        markeredgecolor='red',
        markersize=8,
        linestyle='none',
        label='Low-pT GSF electron, pT > 0.5 GeV, VLoose Seed',
    )

    # pT > 1.0 GeV, Tight WP for Seed BDT
    has_ele = has_ele & (test.gsf_pt > 1.0) & ((test.gsf_bdtout1 > 3.05) |
                                               (test.gsf_bdtout2 > 2.42))
    denom = has_gsf & test.is_e
    numer = has_ele & denom
    ele_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~test.is_e)
    numer = has_ele & denom
    ele_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot(
        [ele_fr],
        [ele_eff],
        marker='o',
        markerfacecolor='blue',
        markeredgecolor='blue',
        markersize=8,
        linestyle='none',
        label='Low-pT GSF electron, pT > 1.0 GeV, Tight Seed',
    )

    id_fpr, id_tpr, id_score = roc_curve(test.is_e[has_ele],
                                         test['training_out'][has_ele])
    id_auc = roc_auc_score(test.is_e[has_ele],
                           test['training_out'][has_ele]) if len(
                               set(test.is_e[has_ele])) > 1 else 0.
    plt.plot(id_fpr * ele_fr,
             id_tpr * ele_eff,
             linestyle='solid',
             color='black',
             linewidth=1.0,
             label='Low-pT ID, AUC={:.3f}'.format(id_auc))

    ########################################
    # EGamma GSF tracks and PF GSF electrons

    has_gsf = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
        egamma.gsf_eta) < 2.5)
    has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt > 0.5) & (np.abs(
        egamma.pfgsf_eta) < 2.5)
    denom = has_gsf & egamma.is_e
    numer = has_pfgsf & denom
    eg_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~egamma.is_e)
    numer = has_pfgsf & denom
    eg_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([eg_fr], [eg_eff],
             marker='o',
             color='green',
             markersize=8,
             linestyle='none',
             label='EGamma GSF track')

    has_ele = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
        egamma.ele_eta) < 2.5)
    denom = has_gsf & egamma.is_e
    numer = has_ele & denom
    pf_eff = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    denom = has_gsf & (~egamma.is_e)
    numer = has_ele & denom
    pf_fr = float(numer.sum()) / float(denom.sum()) if float(
        denom.sum()) > 0. else 0.
    plt.plot([pf_fr], [pf_eff],
             marker='o',
             color='purple',
             markersize=8,
             linestyle='none',
             label='PF GSF electron')

    pf_id_fpr, pf_id_tpr, pf_id_score = roc_curve(
        egamma.is_e[has_ele], egamma['ele_mva_value'][has_ele])
    pf_id_auc = roc_auc_score(egamma.is_e[has_ele],
                              egamma['ele_mva_value'][has_ele]) if len(
                                  set(egamma.is_e[has_ele])) > 1 else 0.
    plt.plot(pf_id_fpr * pf_fr,
             pf_id_tpr * pf_eff,
             linestyle='dashed',
             color='purple',
             linewidth=1.0,
             label='PF ID, AUC={:.3f}'.format(pf_id_auc))

    #################
    # Working points

    id_ELE = np.abs(id_fpr * ele_fr - pf_fr).argmin()
    same_fr = test['training_out'] > id_score[id_ELE]

    x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='black',
             markeredgecolor='black',
             markersize=7)
    plt.text(x,
             y + 0.03,
             "Tight",
             fontsize=8,
             ha='center',
             va='center',
             color='black')

    id_ELE = np.abs(id_fpr * ele_fr - pf_fr * 2.).argmin()
    double_fr = test['training_out'] > id_score[id_ELE]

    x, y = id_fpr[id_ELE] * ele_fr, id_tpr[id_ELE] * ele_eff
    plt.plot([x], [y],
             marker='o',
             markerfacecolor='none',
             markeredgecolor='black',
             markersize=7)
    plt.text(x,
             y + 0.03,
             "Loose",
             fontsize=8,
             ha='center',
             va='center',
             color='black')

    ##########
    # Finish up ...
    plt.legend(loc='lower right', framealpha=None, frameon=False)
    plt.tight_layout()
    plt.savefig(dir + '/roc.pdf')
    plt.clf()
    plt.close()

    ##############
    # EFF CURVES #
    ##############

    # Binning
    bin_edges = np.linspace(0., 4., 8, endpoint=False)
    bin_edges = np.append(bin_edges, np.linspace(4., 8., 4, endpoint=False))
    bin_edges = np.append(bin_edges, np.linspace(8., 12., 3, endpoint=True))
    bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.
    bin_widths = (bin_edges[1:] - bin_edges[:-1])
    bin_width = bin_widths[0]
    bin_widths /= bin_width
    #print("bin_edges",bin_edges)
    #print("bin_centres",bin_centres)
    #print("bin_widths",bin_widths)
    #print("bin_width",bin_width)

    tuple = ([
        'gsf_pt',
        'gsf_mode_pt',
        'gsf_dxy',
        'gsf_dz',
        'rho',
    ], [
        bin_edges,
        bin_edges,
        np.linspace(0., 3.3, 12),
        np.linspace(0., 22., 12),
        np.linspace(0., 44., 12),
    ], [
        'Transverse momentum (GeV)',
        'Mode transverse momentum (GeV)',
        'Transverse impact parameter w.r.t. beamspot (cm)',
        'Longitudinal impact parameter w.r.t. beamspot (cm)',
        'Median energy density from UE/pileup (GeV / unit area)',
    ])

    print("Efficiency curves ...")
    for attr, binning, xlabel in zip(*tuple):
        print(attr)

        plt.figure()
        ax = plt.subplot(111)

        has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta)
                                                          < 2.5)
        has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta)
                                                          < 2.5)
        has_ele_T = has_ele & (test.gsf_pt > 1.0) & (
            (test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42))
        has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
            egamma.gsf_eta) < 2.5)
        has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
            egamma.ele_eta) < 2.5)
        curves = [
            {
                "label": "Open",
                "var": test[attr],
                "mask": (test.is_e) & (has_gsf),
                "condition": (has_ele),
                "colour": "red",
                "fill": True,
                "size": 7,
            },
            {
                "label": "Tight Seed",
                "var": test[attr],
                "mask": (test.is_e) & (has_gsf),
                "condition": (has_ele_T),
                "colour": "blue",
                "fill": True,
                "size": 7,
            },
            {
                "label": "PF ELE",
                "var": egamma[attr],
                "mask": (egamma.is_e) & (has_gsf_),
                "condition": (has_ele_),
                "colour": "purple",
                "fill": True,
                "size": 7,
            },
            {
                "label": "ID (Tight)",
                "var": test[attr],
                "mask": (test.is_e) & (has_gsf),
                "condition": (has_ele_T) & (same_fr),
                "colour": "black",
                "fill": True,
                "size": 7,
            },
            {
                "label": "ID (Loose)",
                "var": test[attr],
                "mask": (test.is_e) & (has_gsf),
                "condition": (has_ele_T) & (double_fr),
                "colour": "black",
                "fill": False,
                "size": 7,
            },
        ]

        for idx, curve in enumerate(curves):
            # print("label:",curve["label"])
            his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                        bins=binning)
            his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                      & curve["condition"]],
                                         bins=binning)
            x = binning[:-1]
            y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
            yhigh = [
                binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
                for p, t in zip(his_passed, his_total)
            ]
            ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                    for p, t in zip(his_passed, his_total)]
            yerr = [ylow, yhigh]
            label='{:s} (mean={:5.3f})'.format(curve["label"],
                                               float(his_passed.sum())/float(his_total.sum()) \
                                                  if his_total.sum() > 0 else 0.)
            ax.errorbar(
                x=x,
                y=y,
                yerr=yerr,
                #color=None,
                label=label,
                marker='o',
                color=curve["colour"],
                markerfacecolor=curve["colour"] if curve["fill"] else "white",
                markersize=curve["size"],
                linewidth=0.5,
                elinewidth=0.5)

        # #########
        # Finish up ...
        #plt.title('Efficiency as a function of GSF track pT')
        plt.xlabel(xlabel)
        plt.ylabel('Efficiency')
        ax.set_xlim(binning[0], binning[-2])
        plt.ylim([0., 1.])
        plt.legend(loc='best')
        plt.tight_layout()
        plt.savefig(dir + '/eff_vs_{:s}.pdf'.format(attr))
        plt.clf()
        plt.close()

    #################
    # MISTAG CURVES #
    #################

    print("Mistag curves ...")
    for attr, binning, xlabel in zip(*tuple):
        print(attr)

        plt.figure()
        ax = plt.subplot(111)

        has_gsf = (test.has_gsf) & (test.gsf_pt > 0.5) & (np.abs(test.gsf_eta)
                                                          < 2.5)
        has_ele = (test.has_ele) & (test.ele_pt > 0.5) & (np.abs(test.ele_eta)
                                                          < 2.5)
        has_ele_T = has_ele & (test.gsf_pt > 1.0) & (
            (test.gsf_bdtout1 > 3.05) | (test.gsf_bdtout2 > 2.42))
        has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt > 0.5) & (np.abs(
            egamma.gsf_eta) < 2.5)
        has_ele_ = (egamma.has_ele) & (egamma.ele_pt > 0.5) & (np.abs(
            egamma.ele_eta) < 2.5)
        curves = [
            {
                "label": "Open",
                "var": test[attr],
                "mask": (~test.is_e) & (has_gsf),
                "condition": (has_ele),
                "colour": "red",
                "fill": True,
                "size": 7,
            },
            {
                "label": "Tight Seed",
                "var": test[attr],
                "mask": (~test.is_e) & (has_gsf),
                "condition": (has_ele_T),
                "colour": "blue",
                "fill": True,
                "size": 7,
            },
            {
                "label": "PF ELE",
                "var": egamma[attr],
                "mask": (~egamma.is_e) & (has_gsf_),
                "condition": (has_ele_),
                "colour": "purple",
                "fill": True,
                "size": 7,
            },
            {
                "label": "ID (Tight)",
                "var": test[attr],
                "mask": (~test.is_e) & (has_gsf),
                "condition": (has_ele_T) & (same_fr),
                "colour": "black",
                "fill": True,
                "size": 7,
            },
            {
                "label": "ID (Loose)",
                "var": test[attr],
                "mask": (~test.is_e) & (has_gsf),
                "condition": (has_ele_T) & (double_fr),
                "colour": "black",
                "fill": False,
                "size": 7,
            },
        ]

        for idx, curve in enumerate(curves):
            his_total, _ = np.histogram(curve["var"][curve["mask"]],
                                        bins=binning)
            his_passed, _ = np.histogram(curve["var"][curve["mask"]
                                                      & curve["condition"]],
                                         bins=binning)
            x = binning[:-1]
            y = [x / y if y > 0 else 0. for x, y in zip(his_passed, his_total)]
            yhigh = [
                binomial_hpdr(p, t)[1] - (p / t) if t > 0 else 0.
                for p, t in zip(his_passed, his_total)
            ]
            ylow = [(p / t) - binomial_hpdr(p, t)[0] if t > 0 else 0.
                    for p, t in zip(his_passed, his_total)]
            yerr = [ylow, yhigh]
            label='{:s} (mean={:5.3f})'.format(curve["label"],
                                               float(his_passed.sum())/float(his_total.sum()) \
                                                  if his_total.sum() > 0 else 0.)
            ax.errorbar(
                x=x,
                y=y,
                yerr=yerr,
                #color=None,
                label=label,
                marker='o',
                color=curve["colour"],
                markerfacecolor=curve["colour"] if curve["fill"] else "white",
                markersize=curve["size"],
                linewidth=0.5,
                elinewidth=0.5)

        # #########
        # Finish up ...
        #plt.title('Mistag rate as a function of GSF track pT')
        plt.xlabel(xlabel)
        plt.ylabel('Mistag rate')
        plt.gca().set_yscale('log')
        ax.set_xlim(binning[0], binning[-2])
        ax.set_ylim([0.0001, 1.])
        plt.legend(loc='best')
        plt.tight_layout()
        plt.savefig(dir + '/mistag_vs_{:s}.pdf'.format(attr))
        plt.clf()
        plt.close()
Example #4
0
def ultralegacy_vloose(dir,test,egamma,has_pfgsf_branches=True,AxE=True) :
   print('##### ULTRA LEGACY VLOOSE ##########################################################')

   #############
   # ROC CURVE #
   #############

   plt.figure(figsize=(6,6))
   ax = plt.subplot(111)
   plt.title('Low-pT electron performance (UltraLegacy)')
   plt.xlim(0.8e-3,1.)
   plt.ylim([0., 1.])
   plt.xlabel('Mistag rate (w.r.t. GSF tracks, pT > 0.5 GeV)')
   plt.ylabel('Efficiency (w.r.t. GSF tracks, pT > 0.5 GeV)')
   ax.tick_params(axis='x', pad=10.)
   plt.gca().set_xscale('log')
   plt.grid(True)

   ########################################
   # "by chance" line
   plt.plot(np.arange(0.,1.,plt.xlim()[0]),np.arange(0.,1.,plt.xlim()[0]),ls='dotted',lw=0.5,label="By chance")

   ########################################
   # Low-pT GSF electrons + ROC curves

   # pT > 0.5 GeV, VL WP for Seed BDT 
   has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5)
   has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5)
   denom = has_gsf&test.is_e; numer = has_ele&denom;
   ele_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   denom = has_gsf&(~test.is_e); numer = has_ele&denom;
   ele_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   plt.plot([ele_fr], [ele_eff],
            marker='o', markerfacecolor='red', markeredgecolor='red', 
            markersize=8,linestyle='none',
            label='BParking MINIAOD',
            )

   # pT > 1.0 GeV, Tight WP for Seed BDT 
   has_ele = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) )
   denom = has_gsf&test.is_e; numer = has_ele&denom;
   ele_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   denom = has_gsf&(~test.is_e); numer = has_ele&denom;
   ele_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   plt.plot([ele_fr], [ele_eff],
            marker='o', markerfacecolor='blue', markeredgecolor='blue', 
            markersize=8, linestyle='none',
            label='UltraLegacy AOD',
            )

   id_branch = 'ele_mva_value_depth15'
   id_fpr,id_tpr,id_score = roc_curve(test.is_e[has_ele],test[id_branch][has_ele])
   id_auc = roc_auc_score(test.is_e[has_ele],test[id_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0.
   plt.plot(id_fpr*ele_fr, 
            id_tpr*ele_eff,
            linestyle='solid', color='blue', linewidth=1.0,
            label='ID, 2020Feb24 (AUC={:.3f})'.format(id_auc))

   id2_branch = 'training_out'
   if id2_branch is not None : 
      id2_fpr,id2_tpr,id2_score = roc_curve(test.is_e[has_ele],test[id2_branch][has_ele])
      id2_auc = roc_auc_score(test.is_e[has_ele],test[id2_branch][has_ele]) if len(set(test.is_e[has_ele])) > 1 else 0.
      plt.plot(id2_fpr*ele_fr, 
               id2_tpr*ele_eff,
               linestyle='dashed', color='blue', linewidth=1.0,
               label='ID, 2019Aug07 (AUC={:.3f})'.format(id2_auc))

   ########################################
   # EGamma PF GSF electrons

   has_gsf = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5)
   has_pfgsf = (egamma.has_pfgsf) & (egamma.pfgsf_pt>0.5) & (np.abs(egamma.pfgsf_eta)<2.5)
   has_ele = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5)
   denom = has_gsf&egamma.is_e; numer = has_ele&denom
   pf_eff = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   denom = has_gsf&(~egamma.is_e); numer = has_ele&denom
   pf_fr = float(numer.sum()) / float(denom.sum()) if float(denom.sum()) > 0. else 0.
   plt.plot([pf_fr], [pf_eff],
            marker='o', color='purple', 
            markersize=8, linestyle='none',
            label='PF electron')

   pf_id_fpr,pf_id_tpr,pf_id_score = roc_curve(egamma.is_e[has_ele],egamma['ele_mva_value_retrained'][has_ele])
   pf_id_auc = roc_auc_score(egamma.is_e[has_ele],egamma['ele_mva_value_retrained'][has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0.
   plt.plot(pf_id_fpr*pf_fr, 
            pf_id_tpr*pf_eff,
            linestyle='solid', color='purple', linewidth=1.0,
            label='ID, retrain (AUC={:.3f})'.format(pf_id_auc))

   pf_id2_fpr,pf_id2_tpr,pf_id2_score = roc_curve(egamma.is_e[has_ele],egamma['ele_mva_value'][has_ele])
   pf_id2_auc = roc_auc_score(egamma.is_e[has_ele],egamma['ele_mva_value'][has_ele]) if len(set(egamma.is_e[has_ele])) > 1 else 0.
   plt.plot(pf_id2_fpr*pf_fr, 
            pf_id2_tpr*pf_eff,
            linestyle='dashed', color='purple', linewidth=1.0,
            label='ID, EGamma (AUC={:.3f})'.format(pf_id2_auc))

   #################
   # Working points

   id_ELE = np.abs(id_fpr*ele_fr-pf_fr).argmin()
   same_fr = test[id_branch]>id_score[id_ELE]

   x,y = id_fpr[id_ELE]*ele_fr,id_tpr[id_ELE]*ele_eff
   plt.plot([x], [y], marker='v', markerfacecolor='white', markeredgecolor='blue', markersize=8)
   #plt.text(x, y+0.03, "Tight", fontsize=8, ha='center', va='center', color='blue' )

   id_ELE = np.abs(id_fpr*ele_fr-pf_fr*2.).argmin()
   double_fr = test[id_branch]>id_score[id_ELE]

   x,y = id_fpr[id_ELE]*ele_fr,id_tpr[id_ELE]*ele_eff
   plt.plot([x], [y], marker='^', markerfacecolor='white', markeredgecolor='blue', markersize=8)
   #plt.text(x, y+0.03, "Loose", fontsize=8, ha='center', va='center', color='blue' )

   ##########
   # Finish up ... 
   plt.legend(loc='lower right',facecolor='white',frameon=False)
   plt.tight_layout()
   plt.savefig(dir+'/roc.pdf')
   plt.clf()
   plt.close()

   ##############
   # EFF CURVES #
   ##############

   # Binning 
   bin_edges = np.linspace(0., 4., 8, endpoint=False)
   bin_edges = np.append( bin_edges, np.linspace(4., 8., 4, endpoint=False) )
   bin_edges = np.append( bin_edges, np.linspace(8., 12., 3, endpoint=True) )
   bin_centres = (bin_edges[:-1] + bin_edges[1:])/2.
   bin_widths = (bin_edges[1:] - bin_edges[:-1])
   bin_width = bin_widths[0]
   bin_widths /= bin_width
   #print("bin_edges",bin_edges)
   #print("bin_centres",bin_centres)
   #print("bin_widths",bin_widths)
   #print("bin_width",bin_width)

   tuple = ([
      'gen_pt',
      'gsf_pt',
      'gsf_mode_pt',
      'gsf_dxy',
      'gsf_dz',
      'rho',
      ],
   [
      bin_edges,
      bin_edges,
      bin_edges,
      np.linspace(0.,3.3,12),
      np.linspace(0.,22.,12),
      np.linspace(0.,44.,12),
      ],
   [
      'Generator-level transverse momentum (GeV)',
      'Transverse momentum (GeV)',
      'Mode transverse momentum (GeV)',
      'Transverse impact parameter w.r.t. beamspot (cm)',
      'Longitudinal impact parameter w.r.t. beamspot (cm)',
      'Median energy density from UE/pileup (GeV / unit area)',
      ])

   print("Efficiency curves ...")
   for attr,binning,xlabel in zip(*tuple) :
      print(attr)

      plt.figure()
      ax = plt.subplot(111)

      has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5)
      has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5)
      has_ele_T = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) )
      has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5)
      has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5)
      curves = [
         {"label":"BParking MINIAOD","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":8,},
         {"label":"UltraLegacy AOD","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T),"colour":"blue","fill":True,"size":8,},
         {"label":"UL MINIAOD?","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T)&(double_fr),"colour":"blue","fill":False,"size":8,"marker":"^"},
         {"label":"UL MINIAOD?","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele_T)&(same_fr),"colour":"blue","fill":False,"size":8,"marker":"v"},
         {"label":"PF electron","var":egamma[attr],"mask":(egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":8,},
         ]
             
      for idx,curve in enumerate(curves) :
         # print("label:",curve["label"])
         his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning)
         his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning)
         x=binning[:-1]
         y=[ x/y if y > 0 else 0. for x,y in zip(his_passed,his_total) ]
         yhigh=[ binomial_hpdr(p,t)[1]-(p/t) if t > 0 else 0. for p,t in zip(his_passed,his_total) ]
         ylow =[ (p/t)-binomial_hpdr(p,t)[0] if t > 0 else 0. for p,t in zip(his_passed,his_total) ]
         yerr =[ylow,yhigh]
         label='{:s} (mean={:5.3f})'.format(curve["label"],
                                            float(his_passed.sum())/float(his_total.sum()) \
                                               if his_total.sum() > 0 else 0.)
         ax.errorbar(x=x,
                     y=y,
                     yerr=yerr,
                     #color=None,
                     label=label,
                     marker=curve.get("marker",'o'),
                     color=curve["colour"],
                     markerfacecolor = curve["colour"] if curve["fill"] else "white",
                     markersize=curve["size"],
                     linewidth=0.5,
                     elinewidth=0.5)
         
      # #########
      # Finish up ... 
      plt.title('Low-pT electron performance (UltraLegacy)')
      plt.xlabel(xlabel)
      plt.ylabel('Efficiency (w.r.t. GSF tracks, pT > 0.5 GeV)')
      ax.set_xlim(binning[0],binning[-2])
      plt.ylim([0., 1.])
      plt.legend(loc='lower right',facecolor='white',frameon=False)
      plt.tight_layout()
      plt.savefig(dir+'/eff_vs_{:s}.pdf'.format(attr))
      plt.clf()
      plt.close()

   #################
   # MISTAG CURVES #
   #################

   print("Mistag curves ...")
   for attr,binning,xlabel in zip(*tuple) :
      print(attr)

      plt.figure()
      ax = plt.subplot(111)

      has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5)
      has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5)
      has_ele_T = has_ele & (test.gsf_pt>1.0) & ( (test.gsf_bdtout1>3.05) | (test.gsf_bdtout2>2.42) )
      has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5)
      has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5)
      curves = [
         {"label":"BParking MINIAOD","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":8,},
         {"label":"UltraLegacy AOD","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T),"colour":"blue","fill":True,"size":8,},
         {"label":"UL MINIAOD?","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T)&(double_fr),"colour":"blue","fill":False,"size":8,"marker":"^"},
         {"label":"UL MINIAOD?","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele_T)&(same_fr),"colour":"blue","fill":False,"size":8,"marker":"v"},
         {"label":"PF electron","var":egamma[attr],"mask":(~egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":8,},
         ]
   
      for idx,curve in enumerate(curves) :
         his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning)
         his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning)
         x=binning[:-1]
         y=[ x/y if y > 0 else 0. for x,y in zip(his_passed,his_total) ]
         yhigh=[ binomial_hpdr(p,t)[1]-(p/t) if t > 0 else 0. for p,t in zip(his_passed,his_total) ]
         ylow =[ (p/t)-binomial_hpdr(p,t)[0] if t > 0 else 0. for p,t in zip(his_passed,his_total) ]
         yerr =[ylow,yhigh]
         label='{:s} (mean={:6.4f})'.format(curve["label"],
                                            float(his_passed.sum())/float(his_total.sum()) \
                                               if his_total.sum() > 0 else 0.)
         ax.errorbar(x=x,
                     y=y,
                     yerr=yerr,
                     #color=None,
                     label=label,
                     marker=curve.get("marker",'o'),
                     color=curve["colour"],
                     markerfacecolor = curve["colour"] if curve["fill"] else "white",
                     markersize=curve["size"],
                     linewidth=0.5,
                     elinewidth=0.5)
         
      # #########
      # Finish up ... 
      plt.title('Low-pT electron performance (UltraLegacy)')
      plt.xlabel(xlabel)
      plt.ylabel('Mistag rate (w.r.t. GSF tracks, pT > 0.5 GeV)')
      #plt.gca().set_yscale('log')
      ax.set_xlim(binning[0],binning[-2])
      #ax.set_ylim([1.e-3, 1.])
      ax.set_ylim([0., 0.22])
      plt.legend(loc='lower right',facecolor='white',frameon=False)
      plt.tight_layout()
      plt.savefig(dir+'/mistag_vs_{:s}.pdf'.format(attr))
      plt.clf()
      plt.close()

   #############
   # HISTOGRAM #
   #############

   tuple = ([
      'gsf_pt',
      'gsf_mode_pt',
      'gsf_dxy',
      'gsf_dz',
      'rho',
      ],
   [
      bin_edges,
      bin_edges,
      np.linspace(0.,3.3,12),
      np.linspace(0.,22.,12),
      np.linspace(0.,44.,12),
      ],
   [
      'Transverse momentum (GeV)',
      'Mode transverse momentum (GeV)',
      'Transverse impact parameter w.r.t. beamspot (cm)',
      'Longitudinal impact parameter w.r.t. beamspot (cm)',
      'Median energy density from UE/pileup (GeV / unit area)',
      ])

   print("Histograms ...")
   for attr,binning,xlabel in zip(*tuple) :
      print(attr)

      plt.figure()
      ax = plt.subplot(111)

      has_gsf = (test.has_gsf) & (test.gsf_pt>0.5) & (np.abs(test.gsf_eta)<2.5)
      has_ele = (test.has_ele) & (test.ele_pt>0.5) & (np.abs(test.ele_eta)<2.5)
      has_gsf_ = (egamma.has_gsf) & (egamma.gsf_pt>0.5) & (np.abs(egamma.gsf_eta)<2.5)
      has_ele_ = (egamma.has_ele) & (egamma.ele_pt>0.5) & (np.abs(egamma.ele_eta)<2.5)
      curves = [
         {"label":"Low-pT (signal)","var":test[attr],"mask":(test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":False,"size":7,},
         {"label":"Low-pT (bkgd)","var":test[attr],"mask":(~test.is_e)&(has_gsf),"condition":(has_ele),"colour":"red","fill":True,"size":7,},
         {"label":"EGamma (signal)","var":egamma[attr],"mask":(egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":False,"size":7,},
         {"label":"EGamma (bkgd)","var":egamma[attr],"mask":(~egamma.is_e)&(has_gsf_),"condition":(has_ele_),"colour":"purple","fill":True,"size":7,},
         ]
             
      for idx,curve in enumerate(curves) :
         # print("label:",curve["label"])
         his_total,_ = np.histogram(curve["var"][curve["mask"]],bins=binning)
         his_passed,_ = np.histogram(curve["var"][curve["mask"]&curve["condition"]],bins=binning)
         x=binning[:-1]
         y=[ y/w if y > 0 else 0. for w,x,y in zip(bin_widths,his_passed,his_total) ]
         yhigh=[0.]*len(y)
         ylow =[0.]*len(y)
         yerr =[ylow,yhigh]
         label='{:s} (mean={:5.3f})'.format(curve["label"],
                                            float(his_passed.sum())/float(his_total.sum()) \
                                               if his_total.sum() > 0 else 0.)
         ax.errorbar(x=x,
                     y=y,
                     yerr=yerr,
                     #color=None,
                     label=label,
                     marker='o',
                     color=curve["colour"],
                     markerfacecolor = curve["colour"] if curve["fill"] else "white",
                     markersize=curve["size"],
                     linewidth=0.5,
                     elinewidth=0.5)
         
      # #########
      # Finish up ... 
      #plt.title('Low-pT electron performance (UltraLegacy)')
      plt.xlabel(xlabel)
      plt.ylabel('Denominator (w.r.t. GSF tracks, pT > 0.5 GeV)')
      ax.set_xlim(binning[0],binning[-2])
      plt.ylim([0.,None])
      plt.legend(loc='lower right',facecolor='white',frameon=False)
      plt.tight_layout()
      plt.savefig(dir+'/denom_vs_{:s}.pdf'.format(attr))
      plt.clf()
      plt.close()