Example #1
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, features, ROCs, AUCs, masscut, pt_range = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Plots
    # -- Random guessing
    bins = np.linspace(0.2, 1., 100 + 1, endpoint=True)
    bins = np.array([bins[0], bins[0] + 0.01 * np.diff(bins[:2])[0]] + list(bins[1:]))
    #bins = np.array([0.2] + list(bins[1:]))
    #edges = bins[1:-1]
    edges = bins
    centres = edges[:-1] + 0.5 * np.diff(edges)
    c.hist(np.power(centres, -1.), bins=edges, linecolor=ROOT.kGray + 2, fillcolor=ROOT.kBlack, alpha=0.05, linewidth=1, option='HISTC')

    # -- ROCs
    for is_simple in [True, False]:

        # Split the legend into simple- and MVA taggers
        for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):
            eff_sig, eff_bkg = ROCs[feat]
            c.graph(np.power(eff_bkg, -1.), bins=eff_sig, linestyle=1 + (ifeat % 2), linecolor=rp.colours[(ifeat // 2) % len(rp.colours)], linewidth=2, label=latex(feat, ROOT=True), option='L')
            pass

        # Draw class-specific legend
        width = 0.17
        c.legend(header=("Analytical:" if is_simple else "MVA:"),
                 width=width, xmin=0.58 + (width) * (is_simple), ymax=0.888)
        pass

    # Decorations
    c.xlabel("Signal efficiency #varepsilon_{sig}^{rel}")
    c.ylabel("Background rejection 1/#varepsilon_{bkg}^{rel}")
    c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
    c.text(["#sqrt{s} = 13 TeV",
            "#it{W} jet tagging"] + (
                ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
            ) + (
                ["Cut: m #in  [60, 100] GeV"] if masscut else []
            ),
           ATLAS=False)

    ranges = int(pt_range is not None) + int(masscut)
    mult = 10. if ranges == 2 else (2. if ranges == 1 else 1.)

    c.latex("Random guessing", 0.4, 1./0.4 * 0.9, align=23, angle=-12 + 2 * ranges, textsize=13, textcolor=ROOT.kGray + 2)
    c.xlim(0.2, 1.)
    c.ylim(1E+00, 5E+02 * mult)
    c.logy()
    c.legend()

    return c
Example #2
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    experiment, means, graph, idx_improvements, best_mean, bins = argv

    # Plot results
    c = rp.canvas(batch=True)
    ymax = 1.0  # 1.5
    ymin = 0.3
    oobx = map(lambda t: t[0], filter(lambda t: t[1] > ymax, enumerate(means)))
    ooby = np.ones_like(oobx) * 0.96 * (ymax - ymin) + ymin

    # Plots
    c.graph(graph,
            markercolor=rp.colours[1],
            linecolor=rp.colours[1],
            markersize=0.7,
            option='AP',
            label='Evaluations',
            legend_option='PE')
    c.graph(ooby,
            bins=oobx,
            markercolor=rp.colours[1],
            markerstyle=22,
            option='P')
    c.graph(best_mean,
            bins=bins,
            linecolor=rp.colours[5],
            linewidth=2,
            option='L',
            label='Best result')
    c.graph(best_mean[idx_improvements],
            bins=bins[idx_improvements],
            markercolor=rp.colours[5],
            markersize=0.5,
            option='P')

    # Decorations
    c.pad()._yaxis().SetNdivisions(505)
    c.xlabel("Bayesian optimisation step")
    c.ylabel("Cross-validation optimisation metric, L_{clf}^{val}")
    c.xlim(0, len(bins))
    #c.ylim(0, ymax)
    c.ylim(0.3, 1.0)
    c.legend(width=0.22, ymax=0.816)
    c.text(["#sqrt{s} = 13 TeV", "Neural network (NN) classifier"],
           qualifier=QUALIFIER)
    # Save
    mkdir('figures/optimisation/')
    c.save('figures/optimisation/optimisation_{}.pdf'.format(experiment))

    return
Example #3
0
def plot2D (*argv):
    """
    Method for delegating 2D plotting.
    """

    # Unpack arguments
    data, ddt, lda, contours, binsx, binsy, variable = argv

    with TemporaryStyle() as style:

        # Style
        style.SetNumberContours(10)

        # Canvas
        c = rp.canvas(batch=True)

        # Axes
        c.hist([binsy[0]], bins=[binsx[0], binsx[-1]], linestyle=0, linewidth=0)

        # Plotting contours
        for sig in [0,1]:
            c.hist2d(contours[sig], linecolor=rp.colours[1 + 3 * sig], label="Signal" if sig else "Background", option='CONT3', legend_option='L')
            pass

        # Linear fit
        x1, x2 = 1.5, 5.0
        intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_
        y1 = intercept + x1 * coef
        y2 = intercept + x2 * coef
        c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='DDT transform fit', linewidth=1, linestyle=1, option='L')

        # LDA decision boundary
        y1 = lda.intercept_ + x1 * lda.coef_
        y2 = lda.intercept_ + x2 * lda.coef_
        c.plot([y1,y2], bins=[x1,x2],  label='LDA boundary', linewidth=1, linestyle=2, option='L')

        # Decorations
        c.text(["#sqrt{s} = 13 TeV"], qualifier=QUALIFIER, ATLAS=False)
        c.legend()
        c.ylim(binsy[0], binsy[-1])
        c.xlabel("Large-#it{R} jet " + latex('rhoDDT', ROOT=True))
	if variable == VAR_TAU21:
        	c.ylabel("Large-#it{R} jet " + latex('#tau_{21}',  ROOT=True)) #changed these to latex formatting
	elif variable == VAR_N2:
		c.ylabel("Large-#it{R} jet " + latex('N_{2}',  ROOT=True))
	elif variable == VAR_DECDEEP:
		c.ylabel("Large-#it{R} jet " + latex('dec_deepWvsQCD',  ROOT=True))
	elif variable == VAR_DEEP:
		c.ylabel("Large-#it{R} jet " + latex('deepWvsQCD',  ROOT=True))

        # Save
        mkdir('figures/ddt')
        c.save('figures/ddt/ddt_{}_2d.pdf'.format(variable))
        pass
    return
Example #4
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, feat, profiles, cuts, effs = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.6, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        for idx, (profile, cut, eff) in enumerate(zip(profiles, cuts, effs)):
            colour = rp.colours[idx + 0]
            linestyle = 1
            c.hist(profile,
                   linecolor=colour,
                   linestyle=linestyle,
                   option='HIST L')
            c.hist(profile,
                   linecolor=colour,
                   fillcolor=colour,
                   alpha=0.3,
                   option='E3',
                   label=(" " if eff < 10 else "") + "{:d}%".format(eff))
            pass

        # Decorations
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Background efficiency, #varepsilon_{bkg}^{rel}")
        c.text(
            [
                "#sqrt{s} = 13 TeV,  Multijets",
                #"#it{W} jet tagging",
                "Cuts on {}".format(latex(feat, ROOT=True)),
            ],
            qualifier=QUALIFIER,
            ATLAS=False)
        c.ylim(0, 2.0)
        c.legend(reverse=True,
                 width=0.25,
                 ymax=0.87,
                 header="Incl. #bar{#varepsilon}_{bkg}^{rel}:")
        pass

    return c
Example #5
0
def plot(profile, fit):
    """
    Method for delegating plotting.
    """

    # rootplotting
    c = rp.canvas(batch=True)
    pad = c.pads()[0]._bare()
    pad.cd()
    pad.SetRightMargin(0.20)
    pad.SetLeftMargin(0.15)
    pad.SetTopMargin(0.10)

    # Styling
    profile.GetXaxis().SetTitle(latex(VARX, ROOT=True) +
                                " [GeV]")  #+ " = log(m^{2}/p_{T}^{2})")
    profile.GetYaxis().SetTitle(latex(VARY, ROOT=True) + " [GeV]")
    profile.GetZaxis().SetTitle("%s %s^{(%s%%)}" %
                                ("#it{k}-NN fitted" if fit else "Measured",
                                 latex(VAR, ROOT=True), EFF))

    profile.GetYaxis().SetNdivisions(505)
    profile.GetZaxis().SetNdivisions(505)
    profile.GetXaxis().SetTitleOffset(1.4)
    profile.GetYaxis().SetTitleOffset(1.8)
    profile.GetZaxis().SetTitleOffset(1.3)
    if ZRANGE:
        profile.GetZaxis().SetRangeUser(*ZRANGE)
        pass
    profile.SetContour(NB_CONTOUR)

    # Draw
    profile.Draw('COLZ')
    BOUNDS[0].DrawCopy("SAME")
    BOUNDS[1].DrawCopy("SAME")
    #c.latex("m > 50 GeV",  -4.5, BOUNDS[0].Eval(-4.5) + 30, align=21, angle=-37, textsize=13, textcolor=ROOT.kGray + 3)
    #c.latex("m < 300 GeV", -2.5, BOUNDS[1].Eval(-2.5) - 30, align=23, angle=-57, textsize=13, textcolor=ROOT.kGray + 3)

    # Decorations
    #c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15)
    c.text(["#sqrt{s} = 13 TeV", "Multijets"],
           ATLAS=False,
           textcolor=ROOT.kWhite)

    # Save
    mkdir('figures/knn/')
    c.save('figures/knn/knn_{}_{:s}_{}_{}.pdf'.format(
        'fit' if fit else 'profile', VAR, EFF, MODEL))
    c.save('figures/knn/knn_{}_{:s}_{}_{}.eps'.format(
        'fit' if fit else 'profile', VAR, EFF, MODEL))
    pass
Example #6
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, feat, msk_pass, msk_bkg, eff_sig = argv

    # Global variable override(s)
    HISTSTYLE[True] ['label'] = "Passing cut"
    HISTSTYLE[False]['label'] = "Failing cut"

    # Canvas
    c = rp.canvas(num_pads=2, size=(int(800 * 600 / 857.), 600), batch=not args.show)

    # Plots
    base = dict(bins=MASSBINS, alpha=0.3, normalise=True, linewidth=3)
    hist = dict()
    for passing, name in zip([False, True], ['fail', 'pass']):
        msk = msk_bkg & (msk_pass if passing else ~msk_pass)
        HISTSTYLE[passing].update(base)
        hist[name] = c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **HISTSTYLE[passing])
        pass

    # Ratio plots
    c.ratio_plot((hist['pass'], hist['pass']), option='HIST', fillstyle=0, linecolor=ROOT.kGray + 1, linewidth=1, linestyle=1)
    c.ratio_plot((hist['pass'], hist['fail']), option='E2', fillstyle=1001, fillcolor=rp.colours[0], linecolor=rp.colours[0], alpha=0.3)

    # -- Set this before drawing OOB markers
    c.pads()[1].logy()
    c.pads()[1].ylim(1E-01, 1E+01)

    h_ratio = c.ratio_plot((hist['pass'], hist['fail']), option='HIST', fillstyle=0, linewidth=3, linecolor=rp.colours[0])  # oob=True, oob_color=rp.colours[0])

    # Decorations
    c.xlabel("Large-#it{R} jet mass [GeV]")
    c.ylabel("Fraction of jets")
    c.text(["#sqrt{s} = 13 TeV,  Multijets",
            "#varepsilon_{sig} = %d%% cut on %s" % (eff_sig, latex(feat, ROOT=True)),
            ], qualifier=QUALIFIER, ATLAS=False)

    c.ylim(2E-04, 2E+02)
    c.logy()
    c.legend()

    c.pads()[1].ylabel("Passing / failing")

    return c
Example #7
0
File: test.py Project: nethemis/ANN
def plot1D (*argv):
    """
    Method for delegating 1D plotting.
    """

    # Unpack arguments
    graphs, ddt, arr_x = argv

    # Style
    ROOT.gStyle.SetTitleOffset(1.4, 'x')

    # Canvas
    c = rp.canvas(batch=True)

    # Setup
    pad = c.pads()[0]._bare()
    pad.cd()
    pad.SetTopMargin(0.10)
    pad.SetTopMargin(0.10)

    # Profiles
    c.graph(graphs['Tau21'],    label="Original, #tau_{21}",          linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE')
    c.graph(graphs['Tau21DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE')

    # Fit
    x1, x2 = min(arr_x), max(arr_x)
    intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_
    y1 = intercept + x1 * coef
    y2 = intercept + x2 * coef
    c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L')

    # Decorations
    c.xlabel("Large-#it{R} jet #rho^{DDT} = log(m^{2}/ p_{T} / 1 GeV)")
    c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT")

    c.text(["#sqrt{s} = 13 TeV,  Multijets"], qualifier=QUALIFIER)
    c.legend(width=0.25, xmin=0.57, ymax=None if "Internal" in QUALIFIER else 0.85)

    c.ylim(0, 1.4)
    c.latex("Fit range", sum(FIT_RANGE) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2)
    c.xline(FIT_RANGE[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2)
    c.xline(FIT_RANGE[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2)

    # Save
    mkdir('figures/ddt/')
    c.save('figures/ddt/ddt.pdf')
    return
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, feat, bins, pt_range, mass_range = argv

    # Canvas
    c = rp.canvas(batch=not args.show)

    # Style
    histstyle = dict(**HISTSTYLE)
    base = dict(bins=bins, alpha=0.5, normalise=True, linewidth=3)

    # Plots
    for signal in [0, 1]:
        msk = (data['signal'] == signal)
        histstyle[signal].update(base)
        c.hist(data.loc[msk, feat].values,
               weights=data.loc[msk, 'weight_test'].values,
               **histstyle[signal])
        pass

    # Decorations
    c.xlabel("Large-#it{R} jet " + latex(feat, ROOT=True))
    c.ylabel("Fraction of jets")
    c.text(TEXT + ["#it{W} jet tagging"] + ([
        "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])
    ] if pt_range is not None else []) + ([
        "m #in  [{:.0f}, {:.0f}] GeV".format(mass_range[0], mass_range[1]),
    ] if mass_range is not None else []),
           qualifier=QUALIFIER,
           ATLAS=False)
    c.ylim(4E-03, 4E-01)
    c.logy()
    c.legend()
    return c
Example #9
0
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, msks_pass, eff_sig = argv

    with TemporaryStyle() as style:

        # Style
        ymin, ymax = 5E-05, 5E+00
        scale = 0.8
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(style.GetLabelSize(coord) * scale, coord)
            style.SetTitleSize(style.GetTitleSize(coord) * scale, coord)
            pass
        style.SetTextSize      (style.GetTextSize()       * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)
        style.SetTickLength(0.07,                     'x')
        style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y')

        # Global variable override(s)
        histstyle = dict(**HISTSTYLE)
        histstyle[True]['fillstyle'] = 3554
        histstyle[True] ['label'] = None
        histstyle[False]['label'] = None
        for v in ['linecolor', 'fillcolor']:
            histstyle[True] [v] = 16
            histstyle[False][v] = ROOT.kBlack
            pass
        style.SetHatchesLineWidth(1)

        # Canvas
        c = rp.canvas(batch=not args.show, num_pads=(2,3))

        # Plots
        # -- Dummy, for proper axes
        for ipad, pad in enumerate(c.pads()[1:], 1):
            pad.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0, option=('Y+' if ipad % 2 else ''))
            pass

        # -- Inclusive
        base = dict(bins=MASSBINS, normalise=True, linewidth=2)
        for signal, name in zip([False, True], ['bkg', 'sig']):
            msk = data['signal'] == signal
            histstyle[signal].update(base)
            for ipad, pad in enumerate(c.pads()[1:], 1):
                histstyle[signal]['option'] = 'HIST'
                pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal])
                pass
            pass

        for sig in [True, False]:
            histstyle[sig]['option'] = 'FL'
            pass

        c.pads()[0].legend(header='Inclusive selection:', categories=[
            ("Multijets",   histstyle[False]),
            ("#it{W} jets", histstyle[True])
            ], xmin=0.18, width= 0.60, ymax=0.28 + 0.07, ymin=0.001 + 0.07, columns=2)
        c.pads()[0]._legends[-1].SetTextSize(style.GetLegendTextSize())
        c.pads()[0]._legends[-1].SetMargin(0.35)

        # -- Tagged
        base['linewidth'] = 2
        for ifeat, feat in enumerate(features):
            opts = dict(
                linecolor = rp.colours[(ifeat // 2)],
                linestyle = 1 + (ifeat % 2),
                linewidth = 2,
                )
            cfg = dict(**base)
            cfg.update(opts)
            msk = (data['signal'] == 0) & msks_pass[feat]
            pad = c.pads()[1 + ifeat//2]
            pad.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg)
            pass

        # -- Legend(s)
        for ipad, pad in enumerate(c.pads()[1:], 1):
            offsetx = (0.20 if ipad % 2 else 0.05)
            offsety =  0.20 * ((2 - (ipad // 2)) / float(2.))
            pad.legend(width=0.25, xmin=0.68 - offsetx, ymax=0.80 - offsety)
            pad.latex("Tagged multijets:", NDC=True, x=0.93 - offsetx, y=0.84 - offsety, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.8, align=31)
            pad._legends[-1].SetMargin(0.35)
            pad._legends[-1].SetTextSize(style.GetLegendTextSize())
            pass

        # Formatting pads
        margin = 0.2
        for ipad, pad in enumerate(c.pads()):
            tpad = pad._bare()  # ROOT.TPad
            right = ipad % 2
            f = (ipad // 2) / float(len(c.pads()) // 2 - 1)
            tpad.SetLeftMargin (0.05 + 0.15 * (1 - right))
            tpad.SetRightMargin(0.05 + 0.15 * right)
            tpad.SetBottomMargin(f * margin)
            tpad.SetTopMargin((1 - f) * margin)
            if ipad == 0: continue
            pad._xaxis().SetNdivisions(505)
            pad._yaxis().SetNdivisions(505)
            if ipad // 2 < len(c.pads()) // 2 - 1:  # Not bottom pad(s)
                pad._xaxis().SetLabelOffset(9999.)
                pad._xaxis().SetTitleOffset(9999.)
            else:
                pad._xaxis().SetTitleOffset(2.7)
                pass
            pass

        # Re-draw axes
        for pad in c.pads()[1:]:
            pad._bare().RedrawAxis()
            pad._bare().Update()
            pad._xaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
            pad._yaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
            pass

        # Decorations
        c.pads()[-1].xlabel("Large-#it{R} jet mass [GeV]")
        c.pads()[-2].xlabel("Large-#it{R} jet mass [GeV]")
        c.pads()[1].ylabel("#splitline{#splitline{#splitline{#splitline{}{}}{#splitline{}{}}}{#splitline{}{}}}{#splitline{}{#splitline{}{#splitline{}{Fraction of jets}}}}")
        c.pads()[2].ylabel("#splitline{#splitline{#splitline{#splitline{Fraction of jets}{}}{}}{}}{#splitline{#splitline{}{}}{#splitline{#splitline{}{}}{#splitline{}{}}}}")
        # I have written a _lot_ of ugly code, but this ^ is probably the worst.

        c.pads()[0].text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging",
                    "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig,
                    ], xmin=0.2, ymax=0.72, qualifier=QUALIFIER)

        for pad in c.pads()[1:]:
            pad.ylim(ymin, ymax)
            pad.logy()
            pass

        pass  # end temprorary style

    return c
Example #10
0
def jsd(data_, args, feature_dict, pt_range, title=None):
    """
    Perform study of ...

    Saves plot `figures/jsd.pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        features: Features for ...
    """

    # Extract features and count appearance of each base variable
    features = []
    appearances = []
    for basevar in feature_dict.keys():
        for suffix in feature_dict[basevar]:
            features.append(basevar + suffix)
        appearances.append(len(feature_dict[basevar]))

    # Select data
    if pt_range is not None:
        data = data_[(data_['pt'] > pt_range[0]) & (data_['pt'] < pt_range[1])]
    else:
        data = data_
        pass

    # Create local histogram style dict
    histstyle = dict(**HISTSTYLE)
    histstyle[True]['label'] = "Pass"
    histstyle[False]['label'] = "Fail"

    # Define common variables
    msk = data['signal'] == 0
    effs = np.linspace(0, 100, 10 * 2, endpoint=False)[1:].astype(int)

    # Loop tagger features
    jsd = {feat: [] for feat in features}
    for ifeat, feat in enumerate(features):

        if len(jsd[feat]): continue  # Duplicate feature.

        # Define cuts
        cuts = list()
        for eff in effs:
            cut = wpercentile(data.loc[msk, feat].values,
                              eff if signal_low(feat) else 100 - eff,
                              weights=data.loc[msk, 'weight_test'].values)
            cuts.append(cut)
            pass

        # Compute KL divergence for successive cuts
        for cut, eff in zip(cuts, effs):

            # Create ROOT histograms
            msk_pass = data[feat] > cut
            if signal_low(feat):
                msk_pass = ~msk_pass
                pass

            # Get histograms / plot
            c = rp.canvas(batch=not args.show)
            h_pass = c.hist(data.loc[msk_pass & msk, 'm'].values,
                            bins=MASSBINS,
                            weights=data.loc[msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[True])  #, display=False)
            h_fail = c.hist(data.loc[~msk_pass & msk, 'm'].values,
                            bins=MASSBINS,
                            weights=data.loc[~msk_pass & msk,
                                             'weight_test'].values,
                            normalise=True,
                            **histstyle[False])  #, display=False)

            # Convert to numpy arrays
            p = root_numpy.hist2array(h_pass)
            f = root_numpy.hist2array(h_fail)

            # Compute Jensen-Shannon divergence
            jsd[feat].append(JSD(p, f, base=2))

            # -- Decorations
            #c.xlabel("Large-#it{R} jet mass [GeV]")
            #c.ylabel("Fraction of jets")
            #c.legend()
            #c.logy()
            #c.text(TEXT + [
            #    "{:s} {} {:.3f}".format(latex(feat, ROOT=True), '<' if signal_low(feat) else '>', cut),
            #    "JSD = {:.4f}".format(jsd[feat][-1])] + \
            #    (["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(*pt_range)] if pt_range else []),
            #    qualifier=QUALIFIER, ATLAS=False)

            # -- Save
            #if title is None:
            #    c.save('figures/temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)))
            #else:
            #    c.save('figures/'+title+'_temp_jsd_{:s}_{:.0f}{}.pdf'.format(feat, eff, '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range)))

            pass
        pass

    # Compute meaningful limit on JSD
    jsd_limits = list()
    sigmoid = lambda x: 1. / (1. + np.exp(-x))
    for eff in sigmoid(np.linspace(-5, 5, 20 + 1, endpoint=True)):
        limits = jsd_limit(data[msk], eff, num_bootstrap=5)
        jsd_limits.append((eff, np.mean(limits), np.std(limits)))
        pass

    # Perform plotting
    c = plot(args, data, effs, jsd, jsd_limits, features, pt_range,
             appearances)

    # Output
    if title is None:
        path = 'figures/jsd{}.pdf'.format(
            '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))
    else:
        path = 'figures/' + title + '_jsd{}.pdf'.format(
            '' if pt_range is None else '__pT{:.0f}_{:.0f}'.format(*pt_range))
    c.save(path=path)
    return c, args, path
Example #11
0
def plot_individual (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, msks_pass, eff_sig = argv

    with TemporaryStyle() as style:

        # Style @TEMP?
        ymin, ymax = 5E-05, 5E+00
        scale = 0.6
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(style.GetLabelSize(coord) * scale, coord)
            style.SetTitleSize(style.GetTitleSize(coord) * scale, coord)
            pass
        #style.SetTextSize      (style.GetTextSize()       * scale)
        #style.SetLegendTextSize(style.GetLegendTextSize() * (scale + 0.03))
        style.SetTickLength(0.07,                     'x')
        style.SetTickLength(0.07 * (5./6.) * (2./3.), 'y')

        # Global variable override(s)
        histstyle = dict(**HISTSTYLE)
        histstyle[True]['fillstyle'] = 3554
        histstyle[True] ['linewidth'] = 4
        histstyle[False]['linewidth'] = 4
        histstyle[True] ['label'] = None
        histstyle[False]['label'] = None
        for v in ['linecolor', 'fillcolor']:
            histstyle[True] [v] = 16
            histstyle[False][v] = ROOT.kBlack
            pass
        style.SetHatchesLineWidth(6)

        # Loop features
        ts  = style.GetTextSize()
        lts = style.GetLegendTextSize()
        for ifeat, feats in enumerate([None] + list(zip(features[::2], features[1::2])), start=-1):
            first = ifeat == -1

            # Style
            style.SetTitleOffset(1.25 if first else 1.2, 'x')
            style.SetTitleOffset(1.7  if first else 1.6, 'y')
            style.SetTextSize(ts * (0.8 if first else scale))
            style.SetLegendTextSize(lts * (0.8 + 0.03 if first else scale + 0.03))

            # Canvas
            c = rp.canvas(batch=not args.show, size=(300, 200))#int(200 * (1.45 if first else 1.))))

            if first:
                opts = dict(xmin=0.185, width=0.60, columns=2)
                c.legend(header=' ', categories=[
                            ("Multijets",   histstyle[False]),
                            ("#it{W} jets", histstyle[True])
                        ], ymax=0.45, **opts)
                c.legend(header='Inclusive selection:',
                         ymax=0.40, **opts)
                #c.pad()._legends[-2].SetTextSize(style.GetLegendTextSize())
                #c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())
                c.pad()._legends[-2].SetMargin(0.35)
                c.pad()._legends[-1].SetMargin(0.35)

                c.text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging",
                        "Cuts at #varepsilon_{sig}^{rel} = %.0f%%" % eff_sig,
                        ], xmin=0.2, ymax=0.80, qualifier=QUALIFIER)


            else:


                # Plots
                # -- Dummy, for proper axes
                c.hist([ymin], bins=[50, 300], linestyle=0, fillstyle=0)

                # -- Inclusive
                base = dict(bins=MASSBINS, normalise=True)
                for signal, name in zip([False, True], ['bkg', 'sig']):
                    msk = data['signal'] == signal
                    histstyle[signal].update(base)
                    histstyle[signal]['option'] = 'HIST'
                    c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, **histstyle[signal])
                    pass

                for sig in [True, False]:
                    histstyle[sig]['option'] = 'FL'
                    pass

                # -- Tagged
                for jfeat, feat in enumerate(feats):
                    opts = dict(
                        linecolor = rp.colours[((2 * ifeat + jfeat) // 2)],
                        linestyle = 1 + 6 * (jfeat % 2),
                        linewidth = 4,
                        )
                    cfg = dict(**base)
                    cfg.update(opts)
                    msk = (data['signal'] == 0) & msks_pass[feat]
                    c.hist(data.loc[msk, 'm'].values, weights=data.loc[msk, 'weight_test'].values, label=" " + latex(feat, ROOT=True), **cfg)
                    pass

                # -- Legend(s)
                y =  0.46  if first else 0.68
                dy = 0.025 if first else 0.04
                c.legend(width=0.25, xmin=0.63, ymax=y)
                c.latex("Tagged multijets:", NDC=True, x=0.87, y=y + dy, textcolor=ROOT.kGray + 3, textsize=style.GetLegendTextSize() * 0.9, align=31)
                c.pad()._legends[-1].SetMargin(0.35)
                c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())

                # Formatting pads
                tpad = c.pad()._bare()
                tpad.SetLeftMargin  (0.20)
                tpad.SetBottomMargin(0.12 if first else 0.20)
                tpad.SetTopMargin   (0.39 if first else 0.05)

                # Re-draw axes
                tpad.RedrawAxis()
                tpad.Update()
                c.pad()._xaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"
                c.pad()._yaxis().SetAxisColor(ROOT.kWhite)  # Remove "double ticks"

                # Decorations
                c.xlabel("Large-#it{R} jet mass [GeV]")
                c.ylabel("Fraction of jets")

                c.text(qualifier=QUALIFIER, xmin=0.25, ymax=0.82)

                c.ylim(ymin, ymax)
                c.logy()
                pass

            # Save
            c.save(path = 'figures/jetmasscomparison__eff_sig_{:d}__{}.pdf'.format(int(eff_sig), 'legend' if first else '{}_{}'.format(*feats)))
            pass
        pass  # end temprorary style

    return
Example #12
0
def main(args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    mc, features, _ = load_data('data/djr_LCTopo_2.h5')  #, test=True) #
    data, features, _ = load_data('data/djr_LCTopo_data.h5')  #, test=True) #

    histstyle[True]['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, Model A, m = 2 TeV'

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')
    #base_vars = ['lead_'+base_var, 'sub_'+base_var]
    #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var]
    """
    with Profile("Add variables"):
        #for i in range(len(base_var)):                                               
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        add_knn(mc, newfeat='lead_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        add_knn(mc, newfeat='sub_'+kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
    """
    #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))

    bins_pt = np.linspace(450, 5000, 50)

    # Useful masks
    msk_bkg_data = data['signal'] == 0
    msk_bkg_mc = (mc['signal'] == 0)  #& (mc['weight']<0.0002)
    msk_sig_mc = (mc['signal'] == 1)  #& (mc['weight']<0.0002)

    msk_CR = (mc['lead_jet_ungrtrk500'] < 20) | (mc['sub_jet_ungrtrk500'] < 20)

    scale = 139 * 1000000  # (inverse nanobarn)

    # pT dist
    c = rp.canvas(batch=True)
    hist_incl_data = c.hist(data.loc[msk_bkg_data, 'jet_pt'].values,
                            bins=bins_pt,
                            weights=data.loc[msk_bkg_data, 'weight'].values,
                            label="Data, control region",
                            normalise=False,
                            linecolor=ROOT.kGreen + 2)

    hist_incl_mc = c.hist(mc.loc[msk_bkg_mc, 'sub_jet_pt'].values,
                          bins=bins_pt,
                          weights=scale * mc.loc[msk_bkg_mc, 'weight'].values,
                          label="MC, scaled with lumi",
                          normalise=False,
                          linecolor=ROOT.kViolet + 2)

    hist_incl_sig = c.hist(mc.loc[msk_sig_mc, 'sub_jet_pt'].values,
                           bins=bins_pt,
                           weights=mc.loc[msk_sig_mc, 'weight'].values,
                           label="Combined Signal",
                           normalise=False,
                           linecolor=ROOT.kOrange + 2)

    c.legend(width=0.4, xmin=0.5, ymax=0.9)
    c.ylabel("Number of events")
    c.xlabel("Sub-leading jet pT [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut))
    #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut))
    c.save('figures/distributions/sub_pt_bkg_data_mc.pdf')
    c.save('figures/distributions/sub_pt_bkg_data_mc.eps')

    print "Data bkg effective entries: ", hist_incl_data.GetEffectiveEntries()
    print "MC bkg effective entries: ", hist_incl_mc.GetEffectiveEntries()

    print "Data bkg integral: ", hist_incl_data.Integral()
    print "MC bkg integral: ", hist_incl_mc.Integral()

    del c

    c = rp.canvas(batch=True)
    hist_bkg_CR = c.hist(mc.loc[(msk_bkg_mc & msk_CR), 'lead_jet_pt'].values,
                         bins=bins_pt,
                         weights=scale *
                         mc.loc[(msk_bkg_mc & msk_CR), 'weight'].values,
                         label="MC, control region",
                         normalise=False,
                         linecolor=ROOT.kGreen + 2)

    hist_sig_CR = c.hist(mc.loc[(msk_sig_mc & msk_CR), 'lead_jet_pt'].values,
                         bins=bins_pt,
                         weights=mc.loc[(msk_sig_mc & msk_CR),
                                        'weight'].values,
                         label="MC, control region",
                         normalise=False,
                         linecolor=ROOT.kGreen + 2)

    print "CR sig contamination (eff. entries): ", hist_sig_CR.GetEffectiveEntries(
    ) / (hist_bkg_CR.GetEffectiveEntries() + hist_sig_CR.GetEffectiveEntries())
    print "CR sig contamination (integral): ", hist_sig_CR.Integral() / (
        hist_bkg_CR.Integral() + hist_sig_CR.Integral())

    print "CR sig efficiency (eff. entries): ", hist_sig_CR.GetEffectiveEntries(
    ) / hist_incl_sig.GetEffectiveEntries()
    print "CR sig efficiency (integral): ", hist_sig_CR.Integral(
    ) / hist_incl_sig.Integral()
Example #13
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range, appearances = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)
        linestyles = [1, 3, 5, 7]

        width = 0.15
        if len(appearances) != 2:
            for is_simple in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in range(len(indices) - 1):
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        if is_simple != signal_low(feat): continue
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = 1 + ifeat
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=("Analytical:" if is_simple else "MVA:"),
                         width=width * (1 + 0.8 * int(is_simple)),
                         xmin=0.42 + (width + 0.05) * (is_simple),
                         ymax=0.888,
                         columns=2 if is_simple else 1,
                         margin=0.35)  # moved one intendation to the left
        else:
            for first_var in [True, False]:

                indices = np.array([0] + appearances).cumsum()
                for i in [0, 1]:
                    if i == 0 and not first_var: continue
                    if i == 1 and first_var: continue
                    for ifeat, feat in enumerate(
                            features[indices[i]:indices[i + 1]]):
                        colour = rp.colours[i % len(rp.colours)]
                        linestyle = linestyles[ifeat]
                        if ifeat == 0:
                            markerstyle = 20
                        else:
                            markerstyle = 23 + ifeat
                        c.plot(jsd[feat],
                               bins=np.array(effs) / 100.,
                               linecolor=colour,
                               markercolor=colour,
                               linestyle=linestyle,
                               markerstyle=markerstyle,
                               label=latex(feat, ROOT=True),
                               option='PL')
                        pass

                c.legend(header=(latex(features[0], ROOT=True) +
                                 "-based:" if first_var else
                                 latex(features[appearances[1]], ROOT=True) +
                                 "-based:"),
                         width=width,
                         xmin=0.45 + (width + 0.06) * (first_var),
                         ymax=0.888)

            pass

####  c.legend(header=(features[0]+":" if first_var else features[appearances[1]]+":"), #work in progress!!!!!!!!!!!!!!!!!!!!!
####                  width=width, xmin=0.45 + (width + 0.06) * (first_var), ymax=0.888)

# Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV",  "Multijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        #c.ymin(5E-05)
        c.ymin(1E-06)  #chosen for highest pT bin
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
Example #14
0
def main (args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data('data/' + args.input) #, test=True)
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, (x,percs, err) = fill_profile_1D(data[msk_bkg])
    weights = 1/err

    # Add k-NN variable
    knnfeat = 'knn'
    orgfeat = VAR
    add_knn(data, newfeat=knnfeat, path='models/knn/{}_{}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) 

    # Loading KNN classifier
    knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL))
    #knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL))

    X = x.reshape(-1,1)

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8

        # Short-hands
        vbins, vmin, vmax = AXIS[VARX]

        # Re-binned bin edges  @TODO: Make standardised right away?
        # edges = np.interp(np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), 
        #                  range(vbins + 1),
        #                  np.linspace(vmin, vmax,  vbins + 1,         endpoint=True))

        fineBins = np.linspace(vmin, vmax,  vbins*rebin + 1,         endpoint=True)
        orgBins = np.linspace(vmin, vmax,  vbins + 1,         endpoint=True)

        # Re-binned bin centres
        fineCentres = fineBins[:-1] + 0.5 * np.diff(fineBins)
        orgCentres = orgBins[:-1] + 0.5 * np.diff(orgBins)
        
        pass

        # Get predictions evaluated at re-binned bin centres
        if 'erf' in FIT:
            fit = func(fineCentres, knn[0], knn[1], knn[2])
            print "Check: ", func([1500, 2000], knn[0], knn[1], knn[2]) 
        else:
            fit = knn.predict(fineCentres.reshape(-1,1)) #centres.reshape(-1,1))

        # Fill ROOT "profile"
        profile_fit = ROOT.TH1F('profile_fit', "", len(fineBins) - 1, fineBins.flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        
        knn1 = PolynomialFeatures(degree=2)                                           
        X_poly = knn1.fit_transform(X)
        reg = LinearRegression(fit_intercept=False) #fit_intercept=False)
        reg.fit(X_poly, percs, weights)
        score = round(reg.score(X_poly, percs), 4)
        coef = reg.coef_
        intercept = reg.intercept_
        print "COEFFICIENTS: ", coef, intercept
        
        TCoef = ROOT.TVector3(coef[0], coef[1], coef[2]) 
        outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_{}.root".format(FIT, EFF, MIN_STAT, MODEL),"RECREATE")
        outFile.cd()
        TCoef.Write()
        profile_fit.SetName("kNNfit")
        profile_fit.Write()
        outFile.Close()

        # profile_meas2 = ROOT.TH1F('profile_meas', "", len(x) - 1, x.flatten('C'))
        # root_numpy.array2hist(percs, profile_meas2)
        profile_meas2 = ROOT.TGraph(len(x), x, percs) 
        pass


    # Plotting
    with Profile("Plotting"):
        # Plot
        plot(profile_meas2, profile_fit)
        pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency

    # MC weights are scaled with lumi. This is just for better comparison
    #if INPUT =="mc": 
    #    data.loc[:,'TotalEventWeight'] /=  139000000. 

    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):

        # Define arrays
        shape   = AXIS[VARX][0]
        bins    = np.linspace(AXIS[VARX][1], AXIS[VARX][2], AXIS[VARX][0]+ 1, endpoint=True)
        #bins = np.linspace(AXIS[VARX][1], 4000, 40, endpoint=True)
        #bins = np.append(bins, [4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000])

        print "HERE: ", bins 
        
        #x, y = (np.zeros(shape) for _ in range(2))

        # Create `profile` histogram
        profile_knn = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') )
        profile_org = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') )

        # Compute inclusive efficiency in bins of `VARX`
        effs = list()
        
        for i in range(shape):
            msk_bin  = (data[VARX] > bins[i]) & (data[VARX] <= bins[i+1])
            msk_pass =  data[knnfeat] > 0 # <?
            msk_pass_org =  data[orgfeat] > 70 # <?
            num = data.loc[msk & msk_bin & msk_pass, 'TotalEventWeight'].values.sum()
            num_org = data.loc[msk & msk_bin & msk_pass_org, 'TotalEventWeight'].values.sum()
            den = data.loc[msk & msk_bin,'TotalEventWeight'].values.sum()
            if den > 0:
                eff = num/den *100.
                eff_org = num_org/den *100.
                profile_knn.SetBinContent(i + 1, eff)
                profile_org.SetBinContent(i + 1, eff_org)
                effs.append(eff)
            #else:
            #print i, "Density = 0"
            pass

        c = rp.canvas(batch=True)
        leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85)
        leg.AddEntry(profile_knn, "#it{n}_{trk}^{#varepsilon=%s%%} > 0" % ( EFF), "l")
        leg.AddEntry(profile_org, "#it{n}_{trk} > 70", "l")
        leg.Draw()

        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.10)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile_knn.SetLineColor(rp.colours[1])
        profile_org.SetLineColor(rp.colours[2])
        profile_knn.SetMarkerStyle(24)
        profile_knn.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + "[GeV]") #+ " = log(m^{2}/p_{T}^{2})")
        #profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True))# + " = log(m^{2}/p_{T}^{2})")
        profile_org.GetYaxis().SetTitle("Selection efficiency (%)") # for #it{n}_{trk}^{#varepsilon=%s%%}>0" % ( EFF))

        profile_knn.GetYaxis().SetNdivisions(505)
        #profile_knn.GetXaxis().SetNdivisions(505)
        profile_knn.GetXaxis().SetTitleOffset(1.4)
        profile_knn.GetYaxis().SetTitleOffset(1.8)
        profile_knn.GetXaxis().SetRangeUser(*XRANGE)
        profile_org.GetXaxis().SetRangeUser(*XRANGE)

        yrange = (0., EFF*3) #2.0 percent
        if yrange:
            profile_knn.GetYaxis().SetRangeUser(*yrange)
            profile_org.GetYaxis().SetRangeUser(*yrange)
            pass

        # Draw
        profile_org.Draw()
        profile_knn.Draw("same")

        # Save
        mkdir('figures/knn/')
        c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.pdf'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT))
        #c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.png'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL, MIN_STAT))
        c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.eps'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT))
        del c
        
        pass

    return
Example #15
0
def plot1D (*argv):
    """
    Method for delegating 1D plotting.
    """

    # Unpack arguments
    graphs, ddt, arr_x, variable, fit_range = argv

    # Style
    ROOT.gStyle.SetTitleOffset(1.4, 'x')

    # Canvas
    c = rp.canvas(batch=True)

    # Setup
    pad = c.pads()[0]._bare()
    pad.cd()
    pad.SetTopMargin(0.10)
    pad.SetTopMargin(0.10)

    # Profiles
    if variable == VAR_TAU21:
    	c.graph(graphs[variable],         label="Original, #tau_{21}",          linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE')
    	c.graph(graphs[variable + 'DDT'], label="Transformed, #tau_{21}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE')
    elif variable == VAR_N2:
    	c.graph(graphs[variable],         label="Original, N_{2}",          linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE')
    	c.graph(graphs[variable + 'DDT'], label="Transformed, N_{2}^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE')
    elif variable == VAR_DECDEEP:
    	c.graph(graphs[variable],         label="Original, dec_deepWvsQCD",          linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE')
    	c.graph(graphs[variable + 'DDT'], label="Transformed, dec_deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE')
    elif variable == VAR_DEEP:
    	c.graph(graphs[variable],         label="Original, deepWvsQCD",          linecolor=rp.colours[4], markercolor=rp.colours[4], markerstyle=24, legend_option='PE')
    	c.graph(graphs[variable + 'DDT'], label="Transformed, deepWvsQCD^{DDT}", linecolor=rp.colours[1], markercolor=rp.colours[1], markerstyle=20, legend_option='PE')


    # Fit
    x1, x2 = min(arr_x), max(arr_x)
    intercept, coef = ddt.intercept_ + ddt.offset_, ddt.coef_
    y1 = intercept + x1 * coef
    y2 = intercept + x2 * coef
    c.plot([y1,y2], bins=[x1,x2], color=rp.colours[-1], label='Linear fit', linewidth=1, linestyle=1, option='L')

    # Decorations
    c.xlabel("jet #rho^{DDT} = log[m^{2} / (p_{T} #times 1 GeV)]")
    if variable == VAR_TAU21:
        c.ylabel("#LT#tau_{21}#GT, #LT#tau_{21}^{DDT}#GT")
    elif variable == VAR_N2:
	c.ylabel("#LTN_{2}#GT, #LTN_{2}^{DDT}#GT")
    elif variable == VAR_DECDEEP:
	c.ylabel("#LTdec_deepWvsQCD#GT, #LTdec_deepWvsQCD^{DDT}#GT")
    elif variable == VAR_DEEP:
	c.ylabel("#LTdeepWvsQCD#GT, #LTdeepWvsQCD^{DDT}#GT")

    c.text(["#sqrt{s} = 13 TeV,  Multijets"], qualifier=QUALIFIER, ATLAS=False)
    c.legend(width=0.25, xmin=0.57, ymax=0.86) #None if "Internal" in QUALIFIER else 0.93)

    c.xlim(0, 6.0)
    if variable == VAR_N2:
	ymax = 0.8
    else:
	ymax = 1.4
    c.ylim(0, ymax)
    c.latex("Fit range", sum(fit_range) / 2., 0.08, textsize=13, textcolor=ROOT.kGray + 2)
    c.latex("Fit parameters:", 0.37, 0.7*ymax, align=11, textsize=14, textcolor=ROOT.kBlack)
    c.latex("  intercept = {:7.4f}".format(intercept[0]), 0.37, 0.65*ymax, align=11, textsize=14, textcolor=ROOT.kBlack)
    c.latex("  coef = {:7.4f}".format(coef[0]), 0.37, 0.6*ymax, align=11, textsize=14, textcolor=ROOT.kBlack)
    c.xline(fit_range[0], ymax=0.82, text_align='BR', linecolor=ROOT.kGray + 2)
    c.xline(fit_range[1], ymax=0.82, text_align='BL', linecolor=ROOT.kGray + 2)

    # Save
    mkdir('figures/ddt/')
    c.save('figures/ddt/ddt_{}.pdf'.format(variable))
    return
Example #16
0
def test(data, variable, bg_eff, signal_above=False):
    # Shout out to Cynthia Brewer and Mark Harrower
    # [http://colorbrewer2.org]. Palette is colorblind-safe.
    rgbs = [(247 / 255., 251 / 255., 255 / 255.),
            (222 / 255., 235 / 255., 247 / 255.),
            (198 / 255., 219 / 255., 239 / 255.),
            (158 / 255., 202 / 255., 225 / 255.),
            (107 / 255., 174 / 255., 214 / 255.),
            (66 / 255., 146 / 255., 198 / 255.),
            (33 / 255., 113 / 255., 181 / 255.),
            (8 / 255., 81 / 255., 156 / 255.),
            (8 / 255., 48 / 255., 107 / 255.)]

    red, green, blue = map(np.array, zip(*rgbs))
    nb_cols = len(rgbs)
    stops = np.linspace(0, 1, nb_cols, endpoint=True)
    ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                         NB_CONTOUR)

    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # Fill measured profile
    with Profile("filling profile"):
        profile_meas, _ = fill_profile(data[msk_bkg],
                                       variable,
                                       bg_eff,
                                       signal_above=signal_above)

    # Add k-NN variable
    with Profile("adding variable"):
        knnfeat = 'knn'
        #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff))
        add_knn(data,
                feat=variable,
                newfeat=knnfeat,
                path=args.output +
                '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Loading KNN classifier
    with Profile("loading model"):
        #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))
        knn = loadclf(
            args.output +
            '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    for fit in [False, True]:

        # Select correct profile
        profile = profile_fit if fit else profile_meas

        # Plot
        plot(profile, fit, variable, bg_eff)
        pass
    pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):
        if sig:
            print "working on signal"
        else:
            print "working on bg"

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - bg_eff / 100.) + bg_eff / 100.))
            pass

            ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green,
                                                 blue, NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            if signal_above:
                msk_pass = data[knnfeat] > 0  # ensure correct cut direction
            else:
                msk_pass = data[knnfeat] < 0
            num_msk = msk * msk_bin * msk_pass
            num = data.loc[num_msk, 'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        with Profile("Fill profile"):
            for i, j in itertools.product(*map(range, shape)):
                #print "Fill profile - (i, j) = ({}, {})".format(i,j)
                # Bin edges in x and y
                edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

                # Masks
                msks = [
                    (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)
                ]
                msk_bin = reduce(lambda x, y: x & y, msks)

                # Set non-zero bin content
                if np.sum(msk & msk_bin):
                    if signal_above:
                        msk_pass = data[
                            knnfeat] > 0  # ensure correct cut direction
                    else:
                        msk_pass = data[knnfeat] < 0
                    num_msk = msk * msk_bin * msk_pass
                    num = data.loc[num_msk, 'weight_test'].values.sum()
                    den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                    eff = num / den
                    profile.SetBinContent(i + 1, j + 1, eff)
                    pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(variable, ROOT=True), bg_eff))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('knn_fitter/figures/')
        c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        mkdir(args.output + '/figures/')
        c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        pass

    return
def plot (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut, var = argv

    with TemporaryStyle() as style:

        # Set styles
        scale = 0.9
        style.SetTextSize(scale * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale * style.GetTitleSize(coord), coord)
            pass

        # Canvas
        c = rp.canvas(num_pads=2, fraction=0.55, size=(int(800 * 600 / 857.), 600), batch=not args.show)
        c.pads()[0]._bare().SetTopMargin(0.10)
        c.pads()[0]._bare().SetRightMargin(0.23)
        c.pads()[1]._bare().SetRightMargin(0.23)

        # To fix 30.5 --> 30 for NPV
        bins[-1] = np.floor(bins[-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins[0], bins[-1]], **boxopts)
        c.pads()[1].hist([1], bins=[bins[0], bins[-1]], **boxopts)


        for is_simple in [True, False]:
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                opts = dict(
                    linecolor   = rp.colours[(ifeat // 2)],
                    markercolor = rp.colours[(ifeat // 2)],
                    fillcolor   = rp.colours[(ifeat // 2)],
                    linestyle   = 1 + (ifeat % 2),
                    alpha       = 0.3,
                    option      = 'E2',
                )

                mean_rej, std_rej = map(np.array, zip(*rejs[feat]))  # @TEMP
                #mean_rej, std_rej = map(np.array, zip(*effs[feat]))  # @TEMP
                mean_jsd, std_jsd = map(np.array, zip(*jsds[feat]))

                # Error boxes
                x    = np.array(bins[:-1]) + 0.5 * np.diff(bins)
                xerr = 0.5 * np.diff(bins)
                graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                c.pads()[0].hist(graph_rej, **opts)
                c.pads()[1].hist(graph_jsd, **opts)

                # Markers and lines
                opts['option']      = 'PE2L'
                opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                graph_rej = ROOT.TGraph(len(x), meanx, mean_rej)
                graph_jsd = ROOT.TGraph(len(x), meanx, mean_jsd)

                c.pads()[0].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                c.pads()[1].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                pass

            pass

        # Draw class-specific legend
        width = 0.20
        c.pads()[0].legend(header='MVA:',    width=width, xmin=0.79, ymax=0.92)
        c.pads()[1].legend(header='Analytical:', width=width, xmin=0.79, ymax=0.975)

        # Meaningful limits on JSD
        x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(x)
        x[0] = bins[0]
        x[-1] = bins[-1]
        format = lambda arr: arr.flatten('C').astype(float)
        gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
        gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
        smooth_tgrapherrors(gr_stat, ntimes=2)
        smooth_tgrapherrors(gr_comb, ntimes=2)
        c.pads()[1].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
        c.pads()[1].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
        idx = gr_comb.GetN() - 1
        gr_comb.GetPoint(idx, x_,  y_)
        ey_ = gr_comb.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.pads()[1].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + ey_, align=31, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        # Decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        # -- x-axis label
        if var == 'pt':
            xlabel = "Large-#it{R} jet p_{T} [GeV]"
        elif var == 'npv':
            xlabel = "Number of reconstructed vertices N_{PV}"
        else:
            raise NotImplementedError("Variable {} is not supported.".format(xlabel))

        c.xlabel(xlabel)
        c.pads()[0].ylabel("1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
        c.pads()[1].ylabel("1/JSD @ #varepsilon_{sig}^{rel} = 50%")

        xmid = (bins[0] + bins[-1]) * 0.5
        c.pads()[0].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)
        c.pads()[1].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11, angle=0, textcolor=ROOT.kGray + 2)

        c.text([], qualifier=QUALIFIER, xmin=0.15, ymax=0.93)

        c.text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                (['m #in  [60, 100] GeV'] if masscut else []),
                 ATLAS=False, ymax=0.76)

        c.pads()[1].text(["Multijets"], ATLAS=False)

        c.pads()[0].ylim(1, 500)
        c.pads()[1].ylim(0.2, 2E+05)

        c.pads()[0].logy()
        c.pads()[1].logy()

        pass  # Temporary style scope

    return c
Example #18
0
def plot_distributions(data, var, bins):
    """
    Method for delegating plotting
    """

    h_D2lowmass = None
    for mass, (mass_down,
               mass_up) in enumerate(zip(MASS_BINS[:-1], MASS_BINS[1:])):

        # Canvas
        c = rp.canvas(batch=True)

        # Fill histograms
        msk = (data['m'] >= mass_down) & (data['m'] < mass_up)
        h_D2 = c.hist(data.loc[msk, var].values,
                      bins=bins,
                      weights=data.loc[msk, 'weight_test'].values,
                      display=False)
        h_D2CSS = c.hist(data.loc[msk, var + "CSS"].values,
                         bins=bins,
                         weights=data.loc[msk, 'weight_test'].values,
                         display=False)

        if h_D2lowmass is not None:
            sumChi2, bestOmega, profile_css, profile0rebin = fit(
                h_D2, 1.0, h_D2lowmass, "%.2f" % mass)
            normalise(profile_css, density=True)
        else:
            profile_css = None
            pass

        h_D2 = kde(h_D2)
        h_D2CSS = kde(h_D2CSS)

        normalise(h_D2, density=True)
        normalise(h_D2CSS, density=True)

        if h_D2lowmass is None:
            h_D2lowmass = h_D2.Clone('h_lowmass')
            pass

        # Draw histograms
        lowmassbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
            MASS_BINS[0], MASS_BINS[1]).replace('.0', '')
        massbin = "#it{{m}} #in  [{:.1f}, {:.1f}] GeV".format(
            MASS_BINS[mass], MASS_BINS[mass + 1]).replace('.0', '')
        c.hist(h_D2lowmass,
               label=latex(var, ROOT=True) + ",    {}".format(lowmassbin),
               linecolor=rp.colours[1],
               fillcolor=rp.colours[1],
               alpha=0.5,
               option='HISTL',
               legend_option='FL')
        c.hist(h_D2,
               label=latex(var, ROOT=True) + ",    {}".format(massbin),
               linecolor=rp.colours[4],
               linestyle=2,
               option='HISTL')
        c.hist(h_D2CSS,
               label=latex(var + 'CSS', ROOT=True) + ", {}".format(massbin),
               linecolor=rp.colours[3],
               option='HISTL')
        ''' # Draw reference histogram from fit.
        if profile_css is not None:
            c.hist(profile_css, linecolor=ROOT.kBlack, linestyle=2, label='Transformed hist (CSS)')
            pass
        #'''

        # Decorations
        c.xlabel(latex(var, ROOT=True) + ", " + latex(var + 'CSS', ROOT=True))
        c.ylabel("Number of jets p.d.f.")
        c.ylim(
            0, 5.2
        )  #now optimized for N2, so probably need to adjust for other variables
        c.legend(xmin=0.45, ymax=0.76, width=0.25)
        c.text(["#sqrt{s} = 13 TeV,  Multijets", "KDE smoothed"],
               qualifier=QUALIFIER,
               ATLAS=False)
        c.pad()._xaxis().SetTitleOffset(1.3)
        c.pad()._yaxis().SetNdivisions(105)
        c.pad()._primitives[-1].Draw('SAME AXIS')

        # Save
        c.save('figures/css/cssProfile_{}_{}.pdf'.format(var, mass))
        pass

    return
def plot_full (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    with TemporaryStyle() as style:

        # Set styles
        scale      = 1.0
        scale_axis = 0.7
        margin_squeeze = 0.035
        margin_vert    = 0.20
        margin_hori    = 0.35
        size = (800, 600)

        style.SetTextSize(scale_axis * style.GetTextSize())
        for coord in ['x', 'y', 'z']:
            style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
            style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
            pass
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)
        style.SetTickLength(0.05,                                                               'x')
        style.SetTickLength(0.07 * (float(size[0])/float(size[1])) * (margin_hori/margin_vert), 'y')

        # Canvas
        c = rp.canvas(num_pads=(2,2), size=size, batch=not args.show)

        # Margins
        c.pads()[0]._bare().SetTopMargin   (margin_vert)
        c.pads()[1]._bare().SetTopMargin   (margin_vert)
        c.pads()[2]._bare().SetBottomMargin(margin_vert)
        c.pads()[3]._bare().SetBottomMargin(margin_vert)

        c.pads()[0]._bare().SetLeftMargin  (margin_hori)
        c.pads()[2]._bare().SetLeftMargin  (margin_hori)
        c.pads()[1]._bare().SetRightMargin (margin_hori)
        c.pads()[3]._bare().SetRightMargin (margin_hori)

        c.pads()[1]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[3]._bare().SetLeftMargin  (margin_squeeze)
        c.pads()[0]._bare().SetRightMargin (margin_squeeze)
        c.pads()[2]._bare().SetRightMargin (margin_squeeze)

        c.pads()[0]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[1]._bare().SetBottomMargin(margin_squeeze)
        c.pads()[2]._bare().SetTopMargin   (margin_squeeze)
        c.pads()[3]._bare().SetTopMargin   (margin_squeeze)

        # To fix 30.5 --> 30 for NPV
        bins['npv'][-1] = np.floor(bins['npv'][-1])

        # Plots
        # -- References
        boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
        c.pads()[0].hist([2], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[1].hist([2], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)
        c.pads()[2].hist([1], bins=[bins['pt'] [0], bins['pt'] [-1]], **boxopts)
        c.pads()[3].hist([1], bins=[bins['npv'][0], bins['npv'][-1]], **boxopts)

        nb_col = 2
        for col, var in enumerate(['pt', 'npv']):
            for is_simple in [True, False]:
                for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                    opts = dict(
                        linecolor   = rp.colours[(ifeat // 2)],
                        markercolor = rp.colours[(ifeat // 2)],
                        fillcolor   = rp.colours[(ifeat // 2)],
                        linestyle   = 1 + (ifeat % 2),
                        alpha       = 0.3,
                        option      = 'E2',
                    )

                    mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                    mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                    # Only _show_ mass-decorrelated features for `npv`
                    if (col == 1) and (ifeat % 2 == 0):
                        mean_rej *= -9999.
                        mean_jsd *= -9999.
                        pass

                    # Error boxes
                    x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                    xerr = 0.5 * np.diff(bins[var])
                    graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                    graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, **opts)

                    # Markers and lines
                    opts['option']      = 'PE2L'
                    opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                    graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                    graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                    c.pads()[col + 0 * nb_col].hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                    c.pads()[col + 1 * nb_col].hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                    pass
                pass

            # Meaningful limits on JSD
            x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
            ex = np.zeros_like(x)
            x[0]  = bins[var][0]
            x[-1] = bins[var][-1]
            format = lambda arr: arr.flatten('C').astype(float)
            gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
            gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
            smooth_tgrapherrors(gr_stat, ntimes=2)
            smooth_tgrapherrors(gr_comb, ntimes=2)
            c.pads()[col + 1 * nb_col].graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
            c.pads()[col + 1 * nb_col].graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

            if col == 0:
                x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                idx = gr_comb.GetN() - 1
                gr_comb.GetPoint(idx, x_,  y_)
                ey_ = gr_comb.GetErrorY(idx)
                x_, y_ = map(float, (x_, y_))
                c.pads()[col + 1 * nb_col].latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ + 0.75 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                pass

            # Decorations
            # -- offsets
            c.pads()[0]._xaxis().SetLabelOffset(9999.)
            c.pads()[0]._xaxis().SetTitleOffset(9999.)
            c.pads()[1]._xaxis().SetLabelOffset(9999.)
            c.pads()[1]._xaxis().SetTitleOffset(9999.)

            c.pads()[2]._xaxis().SetTitleOffset(2.3)
            c.pads()[3]._xaxis().SetTitleOffset(2.3)

            c.pads()[1]._yaxis().SetLabelOffset(9999.)
            c.pads()[1]._yaxis().SetTitleOffset(9999.)
            c.pads()[3]._yaxis().SetLabelOffset(9999.)
            c.pads()[3]._yaxis().SetTitleOffset(9999.)

            # -- x-axis label
            if   var == 'pt':
                xlabel = "Large-#it{R} jet p_{T} [GeV]"
            elif var == 'npv':
                xlabel = "Number of reconstructed vertices N_{PV}"
            else:
                raise NotImplementedError("Variable {} is not supported.".format(var))

            c.pads()[col + 1 * nb_col].xlabel(xlabel)
            if col == 0:
                pattern = "#splitline{#splitline{#splitline{%s}{}}{#splitline{}{}}}{#splitline{#splitline{}{}}{#splitline{}{}}}"
                c.pads()[col + 0 * nb_col].ylabel(pattern % "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%")
                c.pads()[col + 1 * nb_col].ylabel(pattern % "1/JSD @ #varepsilon_{sig}^{rel} = 50%")
                pass

            xmid = (bins[var][0] + bins[var][-1]) * 0.5
            c.pads()[col + 0 * nb_col].latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
            c.pads()[col + 1 * nb_col].latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)

            c.pads()[col + 0 * nb_col].ylim(1,   70)  # 500
            c.pads()[col + 1 * nb_col].ylim(0.2, 7E+04)  # 2E+05

            c.pads()[col + 0 * nb_col].logy()
            c.pads()[col + 1 * nb_col].logy()

            pass  # end: loop `col`

        # Draw class-specific legend
        width = margin_hori - 0.03
        c.pads()[col + 0 * nb_col].legend(header='MVA:',        width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_vert    + 0.02)
        c.pads()[col + 1 * nb_col].legend(header='Analytical:', width=width, xmin=1. - margin_hori + 0.03, ymax=1. - margin_squeeze + 0.02)
        c.pads()[col + 0 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())
        c.pads()[col + 1 * nb_col]._legends[-1].SetTextSize(style.GetLegendTextSize())

        # Common decorations
        for pad in c.pads():
            pad._xaxis().SetNdivisions(504)
            pass

        c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

        c.pads()[1].text(["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                        (['m #in  [60, 100] GeV'] if masscut else []),
                        ATLAS=False, ymax=1. - margin_vert - 0.10)

        c.pads()[3].text(["Multijets"],
                         ATLAS=False, ymax=1. - margin_squeeze - 0.10)

        # Arrows
        c._bare().cd()
        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        tlatex = ROOT.TLatex()
        tlatex.SetTextAngle(90)
        tlatex.SetTextAlign(22)
        tlatex.SetTextSize(11)
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
        tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")

        pass  # Temporary style scope

    return c
Example #20
0
def main (args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data('data/' + args.input) #, test=True) # 

    outFile = ROOT.TFile.Open("figures/knn_jet_ungrtrk500_eff{}_data.root".format(knn_eff),"RECREATE")


    EFF = 0.5
    VAR = 'jet_ungrtrk500'
    VARX = 'dijetmass'
    FIT_RANGE = (0, 6000) # Necessary?

    #eff_sig = 0.50
    #fpr, tpr, thresholds = roc_curve(data['signal'], data[kNN_basevar], sample_weight=data['weight'])
    #idx = np.argmin(np.abs(tpr - eff_sig))
    #print "Background acceptance @ {:.2f}% sig. eff.: {:.2f}% ({} > {:.2f})".format(eff_sig * 100., (fpr[idx]) * 100., kNN_basevar, thresholds[idx]) #changed from 1-fpr[idx]
    #print "Chosen target efficiency: {:.2f}%".format(kNN_eff)


    weight = 'weight'  # 'weight_test' / 'weight'
    bins_mjj = np.linspace(100, 8000, 20)
    fineBins = np.linspace(100, 8000, 7900)
    fineBinsRe = fineBins.reshape(-1,1)

    percs = []
    for i in range(1, len(bins_mjj)):
        
        msk = (data[VARX] > bins_mjj[i-1]) & (data[VARX] <= bins_mjj[i]) & (data['signal']==0) 

        if np.sum(msk) > 20:  # Ensure sufficient statistics for meaningful percentile. Was 20
            percs.append( wpercentile(data=data.loc[msk, VAR].values, percents=100-EFF, weights=data.loc[msk, weight].values) )#wpercentile
            
        else:
            percs.append(0)

    print "Length of percs: ", len(percs), percs

    percs = percs[0:-1]
    bins_mjj = bins_mjj[0:-1]
    
    X = bins_mjj.reshape(-1,1)
    X = X[1:len(bins_mjj)]


    print len(X), len(percs)

    # Fit parameters
    knn_neighbors = 2
    knn_weights = 'uniform'
    fit_deg = 1

    knn = KNeighborsRegressor(n_neighbors=5, weights='distance') 
    y_knn = knn.fit(X, percs).predict(fineBinsRe)
    
    c = rp.canvas(batch=True)
    knnFit = c.plot(y_knn, bins=fineBins, linecolor=ROOT.kRed+2, linewidth=2, linestyle=1, label="knn fit, uniform", option='L')

    c.save('figures/distributions/percentile_test.pdf'.format(EFF, args.input))           

    outFile.cd()
    knnFit.SetName("kNNfit")
    knnFit.Write()
    outFile.Close()

    """
Example #21
0
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, scan_features, points, jsd_limits, masscut, pt_range = argv

    with TemporaryStyle() as style:

        # Compute yaxis range
        ranges = int(pt_range is not None) + int(masscut)
        mult = 10. if ranges == 2 else (5. if ranges == 1 else 1.)

        # Define variable(s)
        axisrangex = (1.4, 100.)
        axisrangey = (0.3, 100000. * mult)
        aminx, amaxx = axisrangex
        aminy, amaxy = axisrangey

        # Styling
        scale = 0.95
        style.SetTitleOffset(1.8, 'x')
        style.SetTitleOffset(1.6, 'y')
        style.SetTextSize(style.GetTextSize() * scale)
        style.SetLegendTextSize(style.GetLegendTextSize() * scale)

        # Canvas
        c = rp.canvas(batch=not args.show, size=(600, 600))

        # Reference lines
        nullopts = dict(linecolor=0,
                        linewidth=0,
                        linestyle=0,
                        markerstyle=0,
                        markersize=0,
                        fillstyle=0)
        lineopts = dict(linecolor=ROOT.kGray + 2, linewidth=1, option='L')
        boxopts = dict(fillcolor=ROOT.kBlack,
                       alpha=0.05,
                       linewidth=0,
                       option='HIST')
        c.hist([aminy], bins=list(axisrangex), **nullopts)
        c.plot([1, amaxy], bins=[2, 2], **lineopts)
        c.plot([1, 1], bins=[2, amaxx], **lineopts)
        c.hist([amaxy], bins=[aminx, 2], **boxopts)
        c.hist([1], bins=[2, amaxx], **boxopts)

        # Meaningful limits on 1/JSD
        x, y, ey = map(np.array, zip(*jsd_limits))
        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=3)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = 3
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ + ey_,
                align=21,
                textsize=11,
                angle=-5,
                textcolor=ROOT.kGray + 2)

        # Markers
        for is_simple in [True, False]:

            # Split the legend into simple- and MVA taggers
            for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]),
                                      enumerate(features)):

                # Coordinates, label
                idx = map(lambda t: t[2], points).index(feat)
                x, y, label = points[idx]

                # Overwrite default name of parameter-scan classifier
                label = 'ANN' if label.startswith('ANN') else label
                label = 'uBoost' if label.startswith('uBoost') else label

                # Style
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                markerstyle = 20 + (ifeat % 2) * 4

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        label='#scale[%.1f]{%s}' %
                        (scale, latex(label, ROOT=True)),
                        option='P')
                pass

            # Draw class-specific legend
            width = 0.15
            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width,
                     xmin=0.60 + (width + 0.02) * (is_simple),
                     ymax=0.888)  #, ymax=0.827)
            pass

        # Make legends transparent
        for leg in c.pads()[0]._legends:
            leg.SetFillStyle(0)
            pass

        # Markers, parametrised decorrelation
        for base_feat, group in scan_features.iteritems():

            # Get index in list of features
            ifeat = features.index(base_feat)

            # Style
            colour = rp.colours[(ifeat // 2) % len(rp.colours)]
            markerstyle = 24

            for feat, label in group:
                idx = map(lambda t: t[2], points).index(feat)
                x, y, _ = points[idx]

                # Draw
                c.graph([y],
                        bins=[x],
                        markercolor=colour,
                        markerstyle=markerstyle,
                        option='P')
                if base_feat == 'NN':
                    c.latex("   " + label,
                            x,
                            y,
                            textsize=11,
                            align=12,
                            textcolor=ROOT.kGray + 2)
                else:
                    c.latex(label + "   ",
                            x,
                            y,
                            textsize=11,
                            align=32,
                            textcolor=ROOT.kGray + 2)
                    pass
                pass

            # Connecting lines (scan)
            feats = [base_feat] + map(lambda t: t[0], group)
            for feat1, feat2 in zip(feats[:-1], feats[1:]):
                idx1 = map(lambda t: t[2], points).index(feat1)
                idx2 = map(lambda t: t[2], points).index(feat2)

                x1, y1, _ = points[idx1]
                x2, y2, _ = points[idx2]

                c.graph([y1, y2],
                        bins=[x1, x2],
                        linecolor=colour,
                        linestyle=2,
                        option='L')
                pass
            pass

        # Connecting lines (simple)

        print "points: "
        print points
        points.pop(1)
        print points

        for i in range(2):
            x1, y1, _ = points[2 * i + 0]
            x2, y2, _ = points[2 * i + 1]
            colour = rp.colours[i]
            c.graph([y1, y2],
                    bins=[x1, x2],
                    linecolor=colour,
                    linestyle=2,
                    option='L')
            pass

        # Decorations
        c.xlabel(
            "Background rejection, 1 / #varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
        )
        c.ylabel("Mass-decorrelation, 1 / JSD @ #varepsilon_{sig}^{rel} = 50%")
        c.xlim(*axisrangex)
        c.ylim(*axisrangey)
        c.logx()
        c.logy()

        opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
        midpointx = np.power(10, 0.5 * np.log10(amaxx))
        midpointy = np.power(10, 0.5 * np.log10(amaxy))
        c.latex("No separation",
                1.91,
                midpointy,
                angle=90,
                align=21,
                **opts_text)
        c.latex("Maximal sculpting",
                midpointx,
                0.89,
                angle=0,
                align=23,
                **opts_text)
        c.latex("    Less sculpting #rightarrow",
                2.1,
                midpointy,
                angle=90,
                align=23,
                **opts_text)
        c.latex("     Greater separation #rightarrow",
                midpointx,
                1.1,
                angle=0,
                align=21,
                **opts_text)

        #c.text(TEXT + ["#it{W} jet tagging"], xmin=0.24, qualifier=QUALIFIER)
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(TEXT + \
               ["#it{W} jet tagging"] + (
                    ["p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(pt_range[0], pt_range[1])] if pt_range is not None else []
                ) + (
                    ['Cut: m #in  [60, 100] GeV'] if masscut else []
                ),
               xmin=0.26, ATLAS=None)
        pass

    return c
def plot(*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    args, data, effs, jsd, jsd_limits, features, pt_range = argv

    with TemporaryStyle() as style:

        # Style
        style.SetTitleOffset(1.5, 'x')
        style.SetTitleOffset(2.0, 'y')

        # Canvas
        c = rp.canvas(batch=not args.show)

        # Plots
        ref = ROOT.TH1F('ref', "", 10, 0., 1.)
        for i in range(ref.GetXaxis().GetNbins()):
            ref.SetBinContent(i + 1, 1)
            pass
        c.hist(ref, linecolor=ROOT.kGray + 2, linewidth=1)

        width = 0.15
        for is_simple in [True, False]:
            for ifeat, feat in enumerate(features):
                if is_simple != signal_low(feat): continue
                colour = rp.colours[(ifeat // 2) % len(rp.colours)]
                linestyle = 1 + (ifeat % 2)
                markerstyle = 20 + (ifeat % 2) * 4
                c.plot(jsd[feat][1:],
                       bins=np.array(effs[1:]) / 100.,
                       linecolor=colour,
                       markercolor=colour,
                       linestyle=linestyle,
                       markerstyle=markerstyle,
                       label=latex(feat, ROOT=True),
                       option='PL')
                pass

            c.legend(header=("Analytical:" if is_simple else "MVA:"),
                     width=width * (1 + 0.8 * int(is_simple)),
                     xmin=0.42 + (width + 0.05) * (is_simple),
                     ymax=0.888)
            pass

        # Meaningful limits on JSD
        x, y, ey = map(np.array, zip(*jsd_limits))

        ex = np.zeros_like(ey)
        gr = ROOT.TGraphErrors(len(x), x, y, ex, ey)
        smooth_tgrapherrors(gr, ntimes=2)
        c.graph(gr,
                linestyle=2,
                linecolor=ROOT.kGray + 1,
                fillcolor=ROOT.kBlack,
                alpha=0.03,
                option='L3')

        # Redraw axes
        c.pads()[0]._primitives[0].Draw('AXIS SAME')

        # Decorations
        c.xlabel("Background efficiency #varepsilon_{bkg}^{rel}")
        c.ylabel("Mass correlation, JSD")
        c.text([], xmin=0.15, ymax=0.96, qualifier=QUALIFIER)
        c.text(["#sqrt{s} = 13 TeV",  "Dijets"] + \
              (["p_{T} [GeV] #in", "    [{:.0f}, {:.0f}]".format(*pt_range)] if pt_range else []),
               ymax=0.85, ATLAS=None)

        c.latex("Maximal sculpting",
                0.065,
                1.2,
                align=11,
                textsize=11,
                textcolor=ROOT.kGray + 2)
        c.xlim(0, 1)
        c.ymin(1E-05)
        c.padding(0.45)
        c.logy()

        for leg in c.pad()._legends:
            leg.SetMargin(0.5)
            pass

        x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(
            0), ROOT.Double(0)
        idx = gr.GetN() - 7
        gr.GetPoint(idx, x_, y_)
        ey_ = gr.GetErrorY(idx)
        x_, y_ = map(float, (x_, y_))
        c.latex("Statistical limit",
                x_,
                y_ - ey_ / 2.,
                align=23,
                textsize=11,
                angle=12,
                textcolor=ROOT.kGray + 2)
        pass

    return c
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(args.input + 'data.h5', train=True)
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, _ = fill_profile(data[msk_bkg])

    # Add k-NN variable
    knnfeat = 'knn'
    add_knn(data,
            newfeat=knnfeat,
            path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF))

    # Loading KNN classifier
    knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges  @TODO: Make standardised right away?
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    with Profile("Plotting"):
        for fit in [False, True]:

            # Select correct profile
            profile = profile_fit if fit else profile_meas

            # Plot
            plot(profile, fit)
            pass
        pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - EFF / 100.) + EFF / 100.))
            pass

        ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                             NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            msk_pass = data[knnfeat] < 0
            num = data.loc[msk & msk_bin & msk_pass,
                           'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        for i, j in itertools.product(*map(range, shape)):

            # Bin edges in x and y
            edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

            # Masks
            msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)]
            msk_bin = reduce(lambda x, y: x & y, msks)
            data_ = data[msk & msk_bin]

            # Set non-zero bin content
            if np.sum(msk & msk_bin):
                msk_pass = data_[knnfeat] < 0
                num = data.loc[msk & msk_bin & msk_pass,
                               'weight_test'].values.sum()
                den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                eff = num / den
                profile.SetBinContent(i + 1, j + 1, eff)
                pass
            pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(VAR, ROOT=True), EFF))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('figures/knn/')
        c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', VAR, EFF))
        pass

    return
Example #24
0
def plot(data, urs, classifiers):
    """
    Common method to perform tests on named uBoost/Adaboost classifier.
    """

    # Plotting learning process
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    with Profile("Plotting learning process"):

        for alpha, (title, name) in zip(urs, classifiers):
            if title is 'AdaBoost': continue
            print "===", name, title

            # Get training/test split masks
            msk_train = data['train'] == 1
            msk_test = data['train'] == 0

            # Get target and weight arrays
            y_train = data.loc[msk_train, 'signal'].values.flatten()
            y_test = data.loc[msk_test, 'signal'].values.flatten()
            w_train = data.loc[msk_train, 'weight_adv'].values.flatten()
            w_test = data.loc[msk_test, 'weight_adv'].values.flatten()

            # Compute log-loss for each epoch
            ll_ab_train, ll_ab_test = list(), list()
            ll_ub_train, ll_ub_test = list(), list()

            nb_epochs = len(
                filter(lambda col: col.startswith(name), data.columns))
            x = np.arange(nb_epochs)

            for epoch in range(nb_epochs):

                # -- Get column names for current epoch
                col_ab = '{:s}__{:d}'.format(
                    classifiers[0][1],
                    epoch)  # Assuming `AdaBoost` is first classifier
                col_ub = '{:s}__{:d}'.format(name, epoch)

                # -- Get classifier variables for current epoch
                p_ab_train = data.loc[msk_train, col_ab]
                p_ab_test = data.loc[msk_test, col_ab]
                p_ub_train = data.loc[msk_train, col_ub]
                p_ub_test = data.loc[msk_test, col_ub]

                # -- Compute log-loss for current epoch
                ll_ab_train.append(
                    log_loss(y_train, p_ab_train, sample_weight=w_train))
                ll_ab_test.append(
                    log_loss(y_test, p_ab_test, sample_weight=w_test))
                ll_ub_train.append(
                    log_loss(y_train, p_ub_train, sample_weight=w_train))
                ll_ub_test.append(
                    log_loss(y_test, p_ub_test, sample_weight=w_test))
                pass

            # Plot log-loss curves
            c = rp.canvas(batch=True)

            # -- Common plotting options
            opts = dict(linewidth=2, legend_option='L')
            c.graph(ll_ab_train,
                    bins=x,
                    linecolor=rp.colours[5],
                    linestyle=1,
                    option='AL',
                    label='AdaBoost',
                    **opts)
            c.graph(ll_ab_test,
                    bins=x,
                    linecolor=rp.colours[5],
                    linestyle=2,
                    option='L',
                    **opts)
            c.graph(ll_ub_train,
                    bins=x,
                    linecolor=rp.colours[1],
                    linestyle=1,
                    option='L',
                    label='uBoost',
                    **opts)
            c.graph(ll_ub_test,
                    bins=x,
                    linecolor=rp.colours[1],
                    linestyle=2,
                    option='L',
                    **opts)

            # -- Decorations
            c.pad()._yaxis().SetNdivisions(505)
            c.xlabel("Training epoch")
            c.ylabel("BDT classifier loss")
            c.xlim(0, len(x))
            c.ylim(0.3, 1.4)
            c.legend(width=0.28)
            c.legend(header='Dataset:',
                     categories=[('Training', {
                         'linestyle': 1
                     }), ('Testing', {
                         'linestyle': 2
                     })],
                     width=0.28,
                     ymax=0.69)

            for leg in c.pad()._legends:
                leg.SetFillStyle(0)
                pass

            c.text([
                "#sqrt{s} = 13 TeV", "#it{W} jet tagging",
                "Uniforming rate #alpha = {:3.1f}".format(alpha)
            ],
                   qualifier="Simulation Internal")

            # -- Save
            c.save('figures/loss_uboost__alpha{:4.2f}'.format(alpha).replace(
                '.', 'p') + '.pdf')

            pass
        pass

    return
def main(args):

    # ...

    # Load data
    data_, features, _ = load_data(args.input + 'data.h5', train=True)

    for pt_bin in [(200., 500.), (500., 1000.)]:

        # Impose pT-cut
        data = data_[(data_['pt'] >= pt_bin[0]) & (data_['pt'] < pt_bin[1])]

        var = 'Tau21'
        msk_sig = (data['signal'] == 1)
        x = data[var].values
        m = data['m'].values
        w = data['weight_test'].values

        # Get cut value
        cut = wpercentile(x[msk_sig], 50., weights=w)
        print "Cut value: {:.2f}".format(cut)

        # Discard signal
        x = x[~msk_sig]
        m = m[~msk_sig]
        w = w[~msk_sig]

        # Get pass mask
        msk_pass = x < cut
        print "Background efficiency: {:.1f}%".format(
            100. * w[msk_pass].sum() / w.sum())

        # Canvas
        offset = 0.06
        margin = 0.3
        # @NOTE
        #   A = Height of pad 0
        #   B = Height of pads 1,2
        #   C = Height of pad 3
        # -->
        #   A = 0.5
        #
        #   (1. - 2 * offset) * B = (1. - 2*offset - margin) * C
        #   ==>
        #   B = C * (1. - 2*offset - margin) / (1. - 2 * offset)
        #   ==>
        #   B = C * (1 - margin / (1. - 2 * offset))
        #
        #   A + 2 * B + C = 1
        #   ==>
        #   A + 2 * C * (1 - margin / (1. - 2 * offset)) + C = 1
        #   ==>
        #   C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset)))

        A = 0.5
        C = (1 - A) / (1 + 2 * (1 - margin / (1. - 2 * offset)))
        B = C * (1 - margin / (1. - 2 * offset))

        c = rp.canvas(batch=True,
                      num_pads=4,
                      fraction=(A, B, B, C),
                      size=(600, 700))

        # Set pad margins
        c.pad(0)._bare().SetBottomMargin(offset)
        c.pad(1)._bare().SetTopMargin(offset)
        c.pad(1)._bare().SetBottomMargin(offset)
        c.pad(2)._bare().SetTopMargin(offset)
        c.pad(2)._bare().SetBottomMargin(offset)
        c.pad(3)._bare().SetTopMargin(offset)
        c.pad(3)._bare().SetBottomMargin(offset + margin)

        # Styling
        HISTSTYLE[True]['label'] = 'Passing cut, #it{{P}}'.format(
            latex(var, ROOT=True))
        HISTSTYLE[False]['label'] = 'Failing cut, #it{{F}}'.format(
            latex(var, ROOT=True))

        # Histograms
        F = c.hist(m[~msk_pass],
                   bins=MASSBINS,
                   weights=w[~msk_pass],
                   normalise=True,
                   **HISTSTYLE[False])
        P = c.hist(m[msk_pass],
                   bins=MASSBINS,
                   weights=w[msk_pass],
                   normalise=True,
                   **HISTSTYLE[True])

        P, F = map(root_numpy.hist2array, [P, F])
        M = (P + F) / 2
        c.hist(M,
               bins=MASSBINS,
               normalise=True,
               linewidth=3,
               linecolor=ROOT.kViolet,
               linestyle=2,
               label='Average, #it{M}')

        # Compute divergences
        KL_PM = -P * np.log2(M / P)
        KL_FM = -F * np.log2(M / F)
        JSD = (KL_PM + KL_FM) / 2.
        JSDsum = np.cumsum(JSD)

        opts = dict(bins=MASSBINS, fillcolor=ROOT.kGray, alpha=0.5)

        # Draw divergences
        c.pad(1).hist(KL_PM, **opts)
        c.pad(1).ylim(-0.12, 0.05)
        c.pad(1).yline(0.)

        c.pad(2).hist(KL_FM, **opts)
        c.pad(2).ylim(-0.05, 0.12)
        c.pad(2).yline(0.)

        c.pad(3).hist(JSD, **opts)
        c.pad(3).ylim(0., 0.03)
        c.pad(3).yline(0.)

        o = rp.overlay(c.pad(3), color=ROOT.kViolet, ndiv=502)
        o.hist(JSDsum, bins=MASSBINS, linecolor=ROOT.kViolet)
        o.label("#sum_{i #leq n} JSD(P #parallel F)")
        o.lim(0, 0.2)
        #o._update_overlay()

        # Styling axes
        c.pad(0)._xaxis().SetTitleOffset(999.)
        c.pad(1)._xaxis().SetTitleOffset(999.)
        c.pad(2)._xaxis().SetTitleOffset(999.)
        c.pad(3)._xaxis().SetTitleOffset(5.)
        c.pad(0)._xaxis().SetLabelOffset(999.)
        c.pad(1)._xaxis().SetLabelOffset(999.)
        c.pad(2)._xaxis().SetLabelOffset(999.)

        c.pad(0)._yaxis().SetNdivisions(505)
        c.pad(1)._yaxis().SetNdivisions(502)
        c.pad(2)._yaxis().SetNdivisions(502)
        c.pad(3)._yaxis().SetNdivisions(502)

        c.pad(0).ylim(0, 0.20)
        c.pad(0).cd()
        c.pad(0)._get_first_primitive().Draw('SAME AXIS')

        # Decorations
        c.text(TEXT + [
            "Multijets, training dataset",
            "Cut on {:s} at #varepsilon_{{sig}}^{{rel}} = 50%".format(
                latex(var, ROOT=True)),
            "p_{{T}} #in  [{:.0f}, {:.0f}] GeV".format(*pt_bin)
        ],
               qualifier='Simulation Internal')
        c.legend(width=0.25)
        c.xlabel("Large-#it{R} jet mass [GeV]")
        c.ylabel("Fraction of jets")
        c.pad(1).ylabel('KL(P #parallel M)')
        c.pad(2).ylabel('KL(F #parallel M)')
        c.pad(3).ylabel('JSD(P #parallel F)')

        # Save
        c.save('figures/massdecorrelationmetric_{:s}__pT{:.0f}_{:.0f}GeV.pdf'.
               format(var, *pt_bin))
        pass
    return 0
Example #26
0
File: loss.py Project: nethemis/ANN
def plot_adversarial_training_loss(
        lambda_reg,
        num_folds,
        pretrain_epochs,
        H_prior=None,
        basedir='models/adversarial/combined/crossval/'):
    """
    Plot the classifier, adversary, and combined losses for the adversarial
    training of the jet classifier.
    """

    # Check(s)
    if not basedir.endswith('/'):
        basedir += '/'
        pass

    # Define variable(s)
    digits = int(np.ceil(max(-np.log10(lambda_reg), 0)))
    lambda_str = '{l:.{d:d}f}'.format(d=digits, l=lambda_reg).replace('.', 'p')

    # Get paths to all cross-validation adversarially trained classifiers
    if num_folds:
        paths = sorted(
            glob.glob(basedir +
                      'history__combined_lambda{}__*of{}.json'.format(
                          lambda_str, num_folds)))
    else:
        paths = glob.glob(basedir +
                          'history__combined_lambda{}.json'.format(lambda_str))
        pass

    print "Found {} paths.".format(len(paths))
    if len(paths) == 0:
        return

    # Store losses
    keys = [
        'train_comb', 'train_clf', 'train_adv', 'val_comb', 'val_clf',
        'val_adv'
    ]
    losses = {key: list() for key in keys}
    for path in paths:
        with open(path, 'r') as f:
            d = json.load(f)
            pass

        # Loop loss classes
        for name, prefix in zip(['train', 'val'], ['', 'val_']):
            try:
                # Classifier
                loss = np.array(d[prefix + 'classifier_loss'])
                loss[loss > 7.0] = np.nan
                losses[name + '_clf'].append(loss)

                # Adversary
                loss = np.array(d[prefix + 'adversary_loss'])
                losses[name + '_adv'].append(loss)

                # Combined
                losses[name +
                       '_comb'].append(losses[name + '_clf'][-1] -
                                       lambda_reg * losses[name + '_adv'][-1])
            except KeyError:
                pass  # No validation
            pass

    # Plot results
    c = rp.canvas(batch=True, num_pads=3, ratio=False, size=(600, 800))
    bins = np.arange(len(loss))
    histbins = np.arange(len(loss) + 1) - 0.5

    # Axes
    for idx in range(3):
        c.pads()[idx].hist([0],
                           bins=[0, len(bins) - 1],
                           linewidth=0,
                           linestyle=0)  # Force correct x-axis
        pass

    # Plots
    categories = list()
    for ityp, typ in enumerate(['val', 'train']):
        for igrp, grp in enumerate(['clf', 'adv', 'comb']):
            key = '{}_{}'.format(typ, grp)
            colour = rp.colours[1 if typ == 'train' else 4]

            # Create histogram
            try:
                loss_mean = np.nanmean(losses[key], axis=0)
                loss_std = np.nanstd(losses[key], axis=0)
                hist = ROOT.TH1F(key, "", len(histbins) - 1, histbins)
                for ibin in range(len(loss_mean)):
                    hist.SetBinContent(ibin + 1, loss_mean[ibin])
                    hist.SetBinError(ibin + 1, loss_std[ibin])
                    pass

                c.pads()[igrp].hist(hist,
                                    fillcolor=colour,
                                    linestyle=ityp + 1,
                                    linewidth=0,
                                    alpha=0.3,
                                    option='LE3')
                c.pads()[igrp].hist(hist,
                                    fillcolor=0,
                                    fillstyle=0,
                                    linecolor=colour,
                                    linestyle=ityp + 1,
                                    linewidth=3,
                                    option='HISTL')
            except TypeError:
                pass  # No validation

            if igrp == 0:
                categories += [('Training' if typ == 'train' else 'Validation',
                                {
                                    'linestyle': ityp + 1,
                                    'linewidth': 3,
                                    'fillcolor': colour,
                                    'alpha': 0.3,
                                    'linecolor': colour,
                                    'option': 'FL'
                                })]
                pass
            pass
        pass

    # Formatting pads
    margin = 0.2
    ymins, ymaxs = list(), list()
    clf_opt_val = None
    for ipad, pad in enumerate(c.pads()):
        tpad = pad._bare()  # ROOT.TPad
        f = ipad / float(len(c.pads()) - 1)
        tpad.SetLeftMargin(0.20)
        tpad.SetBottomMargin(f * margin)
        tpad.SetTopMargin((1 - f) * margin)
        pad._xaxis().SetNdivisions(505)
        pad._yaxis().SetNdivisions(505)
        if ipad < len(c.pads()) - 1:  # Not bottom pad
            pad._xaxis().SetLabelOffset(9999.)
            pad._xaxis().SetTitleOffset(9999.)
        else:
            pad._xaxis().SetTitleOffset(3.5)
            pass

        ymin, ymax = list(), list()
        for hist in pad._primitives:
            if not isinstance(hist, ROOT.TGraph):
                ymin.append(get_min(hist))
                ymax.append(get_max(hist))
                pass
            pass

        # Get reference-line value
        clf_opt_val = clf_opt_val or c.pads()[0]._primitives[1].GetBinContent(
            1)
        ref = clf_opt_val if ipad == 0 else (
            H_prior if ipad == 1 else clf_opt_val - lambda_reg * H_prior)

        ymin = min(ymin + [ref])
        ymax = max(ymax + [ref])

        ydiff = ymax - ymin
        ymin -= ydiff * 0.2
        ymax += ydiff * (0.7 if ipad == 0 else (0.7 if ipad == 1 else 0.2))

        if ipad == 0:
            #    ymin = 0.25
            ymax *= 1.2
            pass

        pad.ylim(ymin, ymax)

        ymins.append(ymin)
        ymaxs.append(ymax)
        pass

    c._bare().Update()

    # Pre-training boxes
    boxes = list()
    for ipad, pad in enumerate(c.pads()):
        pad._bare().cd()
        boxes.append(ROOT.TBox(0, ymins[ipad], pretrain_epochs, ymaxs[ipad]))
        boxes[-1].SetFillColorAlpha(ROOT.kBlack, 0.05)
        boxes[-1].Draw("SAME")
        pass

    # Vertical lines
    for ipad in range(len(c.pads())):
        align = 'TR' if ipad < 2 else 'BR'
        c.pads()[ipad].xline(
            pretrain_epochs,
            ymin=ymins[ipad],
            ymax=ymaxs[ipad],
            text='  Adv. pre-training  ' if ipad == 0 else None,
            text_align=align,
            linestyle=1,
            linecolor=ROOT.kGray + 2)
        pass

    # Horizontal lines
    c.pads()[0].yline(clf_opt_val)
    if H_prior is not None:
        c.pads()[1].yline(H_prior)
        c.pads()[2].yline(clf_opt_val - lambda_reg * (H_prior))
        pass

    opts = dict(align=31, textcolor=ROOT.kGray + 2, textsize=14)
    c.pads()[0].latex("Stand-alone NN  ", bins[-1] * 0.98,
                      clf_opt_val + (ymaxs[0] - ymins[0]) * 0.03, **opts)

    if H_prior is not None:
        c.pads()[1].latex("#it{H}(prior)  ", bins[-1] * 0.98,
                          H_prior + (ymaxs[1] - ymins[1]) * 0.03, **opts)
        opts['align'] = 33
        c.pads()[2].latex(
            "Ideal  ", bins[-1] * 0.98, clf_opt_val - lambda_reg * (H_prior) -
            (ymaxs[2] - ymins[2]) * 0.03, **opts)
        pass

    # Decorations
    ROOT.gStyle.SetTitleOffset(2.0, 'y')  # 2.2
    c.xlabel("Training epoch")
    c.pads()[0].ylabel("#it{L}_{clf.}")
    c.pads()[1].ylabel("#it{L}_{adv.}")
    c.pads()[2].ylabel("#it{L}_{clf.} #minus #lambda #it{L}_{adv.}")
    for pad in c.pads():
        pad.xlim(0, max(bins) - 1)
        pass

    c.pads()[0].text([], xmin=0.2, ymax=0.85, qualifier=QUALIFIER)

    c.pads()[1].text([
        "#sqrt{s} = 13 TeV", "#it{W} jet tagging",
        "Adversarial training (#lambda = %s)" % (lambda_str.replace('p', '.'))
    ],
                     ATLAS=False,
                     ymax=0.70,
                     xmin=0.27)
    c.pads()[0].legend(xmin=0.60, ymax=0.70, categories=categories)

    # Save
    mkdir('figures/')
    c.save('figures/loss_adversarial_lambda{}_{}.pdf'.format(
        lambda_str, 'full' if num_folds is None else 'cv'))
    return
Example #27
0
def plot (profile, fit):
    """
    Method for delegating plotting.
    """

    # rootplotting
    c = rp.canvas(batch=True)
    pad = c.pads()[0]._bare()
    pad.cd()
    pad.SetRightMargin(0.20)
    pad.SetLeftMargin(0.15)
    pad.SetTopMargin(0.10)

    # Styling
    #profile.SetLineColor(4)
    profile.SetMarkerColor(4)
    profile.SetMarkerStyle(20)
    fit.SetLineColor(2)
    fit.SetMarkerColor(4)
    fit.SetMarkerStyle(20)
    profile.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + " [GeV]") #+ " = log(m^{2}/p_{T}^{2})")
    profile.GetYaxis().SetTitle( "#it{P}^{#varepsilon=%s%%}" % (EFF) )
#"%s %s^{(%s%%)}" % ("#it{k}-NN fitted" if fit else "Measured", latex(VAR, ROOT=True), EFF))

    profile.GetYaxis().SetNdivisions(505)
    profile.GetXaxis().SetTitleOffset(1.4)
    profile.GetYaxis().SetTitleOffset(1.4)
    profile.GetXaxis().SetRangeUser(*XRANGE)
    #profile.GetXaxis().SetRangeUser(1000, 9000)
    #fit.GetXaxis().SetRangeUser(1000, 8000)

    if YRANGE:
        profile.GetYaxis().SetRangeUser(*YRANGE)
        pass

    # Draw Goddamn it

    #    print profile.GetBinContent(10), profile.GetNbinsX(), profile.GetEntries()

    profile.Draw("AP")
    fit.Draw("SAME") #("SAME")
    
    leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85)

    if INPUT=='data':
        leg.AddEntry(profile, "CR Data", "p")
    elif INPUT=='mcCR':
        leg.AddEntry(profile, "CR MC", "p")
    elif INPUT=='mc':
        leg.AddEntry(profile, "Full MC", "p")


    if 'knn' in FIT:
        fitLegend =  "k-NN fit "
    elif 'poly2' in FIT:
        fitLegend = "2. order polynomial fit " 
    elif 'poly3' in FIT:
        fitLegend = "3. order polynomial fit "
    elif 'erf' in FIT:
        fitLegend = "Error function fit "

    if MODEL=='data':
        fitLegend += "to CR Data"
    elif MODEL=='mcCR':
        fitLegend += "to CR MC"
    elif MODEL=='mc':
        fitLegend += "to Full MC"

    leg.AddEntry(fit, fitLegend, "l")
    leg.Draw() 


    # Save
    mkdir('figures/knn/')
    c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.pdf'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT))
    #c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.png'.format( FIT, VAR, EFF, MODEL, MIN_STAT))
    c.save('figures/knn/{}_profile_{:s}_{}_{}_stat{}.eps'.format( FIT, VAR, EFF, MODEL+INPUT, MIN_STAT))
    
    del c
    pass
Example #28
0
File: loss.py Project: nethemis/ANN
def plot_classifier_training_loss(
        num_folds, basedir='models/adversarial/classifier/crossval/'):
    """
    Plot the classifier training loss.
    """

    # Check(s)
    if not basedir.endswith('/'):
        basedir += '/'
        pass

    # Get paths to classifier training losses
    paths = sorted(
        glob.glob(
            basedir +
            '/history__crossval_classifier__*of{}.json'.format(num_folds)))

    if len(paths) == 0:
        print "No models found for classifier CV study."
        return

    # Read losses from files
    losses = {'train': list(), 'val': list()}
    for path in paths:
        with open(path, 'r') as f:
            d = json.load(f)
            pass

        loss = np.array(d['val_loss'])
        print "Outliers:", loss[np.abs(loss - 0.72) < 0.02]
        loss[np.abs(loss - 0.72) <
             0.02] = np.nan  # @FIXME: This probably isn't completely kosher
        losses['val'].append(loss)
        loss = np.array(d['loss'])
        losses['train'].append(loss)
        pass

    # Define variable(s)
    bins = np.arange(len(loss))
    histbins = np.arange(len(loss) + 1) + 0.5

    # Canvas
    c = rp.canvas(batch=True)

    # Plots
    categories = list()

    for name, key, colour, linestyle in zip(['Validation', 'Training'],
                                            ['val', 'train'],
                                            [rp.colours[4], rp.colours[1]],
                                            [1, 2]):

        # Histograms
        loss_mean = np.nanmean(losses[key], axis=0)
        loss_std = np.nanstd(losses[key], axis=0)
        hist = ROOT.TH1F(key + '_loss', "", len(histbins) - 1, histbins)
        for idx in range(len(loss_mean)):
            hist.SetBinContent(idx + 1, loss_mean[idx])
            hist.SetBinError(idx + 1, loss_std[idx])
            pass

        c.hist([0], bins=[0, max(bins)], linewidth=0,
               linestyle=0)  # Force correct x-axis
        c.hist(hist, fillcolor=colour, alpha=0.3, option='LE3')
        c.hist(hist,
               linecolor=colour,
               linewidth=3,
               linestyle=linestyle,
               option='HISTL')

        categories += [(name, {
            'linestyle': linestyle,
            'linewidth': 3,
            'linecolor': colour,
            'fillcolor': colour,
            'alpha': 0.3,
            'option': 'FL'
        })]
        pass

    # Decorations
    c.pads()[0]._yaxis().SetNdivisions(505)
    c.xlabel("Training epoch")
    c.ylabel("Cross-validation classifier loss, L_{clf}")
    c.xlim(0, max(bins))
    c.ylim(0.3, 0.5)
    c.legend(categories=categories, width=0.25)  # ..., xmin=0.475
    c.text(TEXT + ["#it{W} jet tagging", "Neural network (NN) classifier"],
           qualifier=QUALIFIER)
    # Save
    mkdir('figures/')
    c.save('figures/loss_classifier.pdf')
    return
def plot_individual (*argv):
    """
    Method for delegating plotting.
    """

    # Unpack arguments
    data, args, features, bins, effs, rejs, jsds, meanx, jsd_limits, masscut = argv

    # To fix 30.5 --> 30 for NPV
    bins['npv'][-1] = np.floor(bins['npv'][-1])

    # Loop combinations
    for var, metric in itertools.product(['pt', 'npv', None], ['rej', 'jsd']):

        with TemporaryStyle() as style:

            # Set styles
            scale      = 1.0
            scale_axis = 0.7
            margin_squeeze = 0.07
            margin_vert    = 0.15
            margin_hori    = 0.17
            size = (350, 300)

            style.SetTextSize(scale_axis * style.GetTextSize())
            for coord in ['x', 'y', 'z']:
                style.SetLabelSize(scale_axis * style.GetLabelSize(coord), coord)
                style.SetTitleSize(scale_axis * style.GetTitleSize(coord), coord)
                pass
            style.SetTitleOffset(1.8, 'y')
            style.SetLegendTextSize(style.GetLegendTextSize() * scale)
            style.SetTickLength(0.05, 'x')
            style.SetTickLength(0.05, 'y')

            # Canvas
            c = rp.canvas(size=size if var is not None else (150, 300), batch=not args.show)

            # Margins
            tpad = c.pad()._bare()
            tpad.SetBottomMargin(margin_vert    if var is not None else 0.49)
            tpad.SetLeftMargin  (margin_hori    if var is not None else 0.49)
            tpad.SetRightMargin (margin_squeeze if var is not None else 0.49)
            tpad.SetTopMargin   (margin_vert    if var is not None else 0.49)

            # Plots
            # -- References
            if var is not None:
                boxopts  = dict(fillcolor=ROOT.kBlack, alpha=0.05, linecolor=ROOT.kGray + 2, linewidth=1, option='HIST')
                c.hist([2 if metric == 'rej' else 1], bins=[bins[var] [0], bins[var] [-1]], **boxopts)

                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )

                        mean_rej, std_rej = map(np.array, zip(*rejs[var][feat]))  # @TEMP
                        mean_jsd, std_jsd = map(np.array, zip(*jsds[var][feat]))

                        # Only _show_ mass-decorrelated features for `npv`
                        if (var == 'npv') and (ifeat % 2 == 0):
                            mean_rej *= -9999.
                            mean_jsd *= -9999.
                            pass

                        # Error boxes
                        x    = np.array(bins[var][:-1]) + 0.5 * np.diff(bins[var])
                        xerr = 0.5 * np.diff(bins[var])
                        graph_rej = ROOT.TGraphErrors(len(x), x, mean_rej, xerr, std_rej)
                        graph_jsd = ROOT.TGraphErrors(len(x), x, mean_jsd, xerr, std_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, **opts)
                        else:
                            c.hist(graph_jsd, **opts)
                            pass

                        # Markers and lines
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        graph_rej = ROOT.TGraph(len(x), meanx[var], mean_rej)
                        graph_jsd = ROOT.TGraph(len(x), meanx[var], mean_jsd)

                        if metric == 'rej':
                            c.hist(graph_rej, label=latex(feat, ROOT=True) if not is_simple else None, **opts)
                        else:
                            c.hist(graph_jsd, label=latex(feat, ROOT=True) if     is_simple else None, **opts)
                            pass
                        pass
                    pass   # end loop: `is_simple`

                # Meaningful limits on JSD
                if metric == 'jsd':
                    x, y, ey_stat, ey_syst  = map(np.array, zip(*jsd_limits[var]))
                    ex = np.zeros_like(x)
                    x[0]  = bins[var][0]
                    x[-1] = bins[var][-1]
                    format = lambda arr: arr.flatten('C').astype(float)
                    gr_stat = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, ey_stat])))
                    gr_comb = ROOT.TGraphErrors(len(x), *list(map(format, [x, y, ex, np.sqrt(np.square(ey_stat) + np.square(ey_syst))])))
                    smooth_tgrapherrors(gr_stat, ntimes=2)
                    smooth_tgrapherrors(gr_comb, ntimes=2)
                    c.graph(gr_comb,                                        fillcolor=ROOT.kBlack, alpha=0.03, option='3')
                    c.graph(gr_stat, linestyle=2, linecolor=ROOT.kGray + 1, fillcolor=ROOT.kBlack, alpha=0.03, option='L3')

                    x_, y_, ex_, ey_ = ROOT.Double(0), ROOT.Double(0), ROOT.Double(0), ROOT.Double(0)
                    idx = (gr_comb.GetN() - 1) if var == 'pt' else (gr_comb.GetN() // 2)
                    gr_comb.GetPoint(idx, x_,  y_)
                    ey_ = gr_comb.GetErrorY(idx)
                    x_, y_ = map(float, (x_, y_))
                    if var == 'pt':
                        c.latex("Mean stat. #oplus #varepsilon_{bkg}^{rel} var. limit     ", x_, y_ - 1.0 * ey_, align=31, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                        pass
                    pass

                # Decorations
                # -- offsets
                #c.pads()[2]._xaxis().SetTitleOffset(2.3)

                # -- x-axis label
                if   var == 'pt':
                    xlabel = "Large-#it{R} jet p_{T} [GeV]"
                elif var == 'npv':
                    xlabel = "Number of reconstructed vertices N_{PV}"
                elif var is not None:
                    raise NotImplementedError("Variable {} is not supported.".format(var))

                c.xlabel(xlabel)

                # -- y-axis label
                if   metric == 'rej':
                    ylabel = "1/#varepsilon_{bkg}^{rel} @ #varepsilon_{sig}^{rel} = 50%"
                elif metric == 'jsd':
                    ylabel = "1/JSD @ #varepsilon_{sig}^{rel} = 50%"
                else:
                    raise NotImplementedError("Metric {} is not supported.".format(metric))

                c.ylabel(ylabel)

                xmid = (bins[var][0] + bins[var][-1]) * 0.5
                if metric == 'rej':
                    c.latex("Random guessing",   xmid, 2 * 0.9, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(1,   100)  # 500
                else:
                    c.latex("Maximal sculpting", xmid, 1 * 0.8, align=23, textsize=11 * scale, angle=0, textcolor=ROOT.kGray + 2)
                    c.ylim(0.2, 7E+04)  # 2E+05
                    pass

                c.logy()

                # Common decorations
                c.pad()._xaxis().SetNdivisions(504)

                c.text([], qualifier=QUALIFIER, xmin=margin_hori, ymax=1. - margin_vert + 0.03)

                c.text( ["#sqrt{s} = 13 TeV,  #it{W} jet tagging"] + \
                       (['m #in  [60, 100] GeV'] if masscut else []) + \
                       (['Multijets'] if metric == 'jsd' else []),
                       ATLAS=False, ymax=0.40 if (masscut and (var == 'pt') and (metric == 'rej')) else None)
                       #, ymax=1. - margin_vert - 0.10)

            else:

                # Draw dummy histogram
                for is_simple in [True, False]:
                    for ifeat, feat in filter(lambda t: is_simple == signal_low(t[1]), enumerate(features)):

                        opts = dict(
                            linecolor   = rp.colours[(ifeat // 2)],
                            markercolor = rp.colours[(ifeat // 2)],
                            fillcolor   = rp.colours[(ifeat // 2)],
                            linestyle   = 1 + (ifeat % 2),
                            alpha       = 0.3,
                            option      = 'E2',
                        )
                        opts['option']      = 'PE2L'
                        opts['markerstyle'] = 20 + 4 * (ifeat % 2)

                        label = latex(feat, ROOT=True) if is_simple == (metric == 'jsd') else None
                        h = c.hist([0.5], bins=[0,1], label=label, **opts)
                        pass
                    pass

                # "Remove" axes
                pad = c.pad()
                tpad = pad._bare()
                white = ROOT.kWhite
                pad._xaxis().SetLabelOffset(9999.)
                pad._xaxis().SetTitleOffset(9999.)
                pad._yaxis().SetLabelOffset(9999.)
                pad._yaxis().SetTitleOffset(9999.)
                pad._xaxis().SetAxisColor  (white)  # Remove "double ticks"
                pad._yaxis().SetAxisColor  (white)  # Remove "double ticks"
                tpad.SetFillColor          (white)
                tpad.SetFrameFillColor     (white)
                c._bare().SetFillColor     (white)
                c._bare().SetFrameFillColor(white)

                # Draw class-specific legend
                width = 0.90 #margin_hori - 0.03
                if var is None:
                    if metric == 'rej':
                        c.legend(header='MVA:',        width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)  # xmin = margin_hori + 0.03
                    else:
                        c.legend(header='Analytical:', width=width, xmin=0.05, ymax=1. - margin_vert + 0.02)
                        pass
                    c.pad()._legends[-1].SetTextSize(style.GetLegendTextSize())
                    pass
                pass
            pass

            # Arrows
            '''
            c._bare().cd()
            opts_text = dict(textsize=11, textcolor=ROOT.kGray + 2)
            tlatex = ROOT.TLatex()
            tlatex.SetTextAngle(90)
            tlatex.SetTextAlign(22)
            tlatex.SetTextSize(11)
            tlatex.SetTextColor(ROOT.kGray + 2)
            tlatex.DrawLatexNDC(0.5, 0. + 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "    Less sculpting #rightarrow")
            tlatex.DrawLatexNDC(0.5, 1. - 0.5 * (margin_vert + 0.5 * (1.0 - margin_squeeze - margin_vert)), "     Greater separation #rightarrow")
            '''

            # Save
            c.save('figures/robustness__{}_{}{}.pdf'.format(var if var is not None else 'legend', metric if var is not None else ('mva' if metric == 'rej' else 'analytical'), '_masscut' if masscut else ''))

            pass  # Temporary style scope

        pass
    return
Example #30
0
def main(args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    #data = np.zeros(1, 95213009, 10)
    data, features, _ = load_data(
        'data/djr_LCTopo_2.h5')  # + args.input) #, test=True) #
    #data2, features, _ = load_data('data/djr_LCTopo_2.h5') # + args.input) #, test=True) #
    #data = np.concatenate((data1, data2))

    #f1 = h5py.File('data/djr_LCTopo_1.h5', 'r')
    #f2 = h5py.File('data/djr_LCTopo_2.h5', 'r')

    knnCut = 0
    ntrkCut = 50
    emfracCut = 0.65
    scale = 139 * 1000000  # (inverse nanobarn)
    signal_to_plot = 7

    sigDict = {
        0: 'All Models',
        1: 'Model A, m = 2 TeV',
        2: 'Model A, m = 1 TeV',
        3: 'Model A, m = 1.5 TeV',
        4: 'Model A, m = 2.5 TeV',
        5: 'Model B, m = 1 TeV',
        6: 'Model B, m = 1.5 TeV',
        7: 'Model B, m = 2 TeV',
        8: 'Model B, m = 2.5 TeV',
        9: 'Model C, m = 1 TeV',
        10: 'Model C, m = 1.5 TeV',
        11: 'Model C, m = 2 TeV',
        12: 'Model C, m = 2.5 TeV',
        13: 'Model D, m = 1 TeV',
        14: 'Model D, m = 1.5 TeV',
        15: 'Model D, m = 2 TeV',
        16: 'Model D, m = 2.5 TeV',
    }

    outHistFile = ROOT.TFile.Open(
        "figures/mjjHistograms_kNN{}_eff{}.root".format(knnCut, kNN_eff),
        "RECREATE")

    histstyle[True]['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[signal_to_plot])

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')
    #base_vars = ['lead_'+base_var, 'sub_'+base_var]
    #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var]

    print data.shape

    with Profile("Add variables"):
        #for i in range(len(base_var)):
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data,
                newfeat='lead_' + kNN_var,
                path='models/knn/{}_{}_{}_{}.pkl.gz'.format(
                    FIT, base_var, kNN_eff, sigModel))
        add_knn(data,
                newfeat='sub_' + kNN_var,
                path='models/knn/{}_{}_{}_{}.pkl.gz'.format(
                    FIT, base_var, kNN_eff, sigModel))

        #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))

        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff,
                                                      sigModel)
        """
        base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
        kNN_var = [var.replace('jet', 'knn') for var in base_var]
        
        with Profile("Add variables"):
        from run.knn.common import add_knn, MODEL, VAR as kNN_basevar, EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var)
        for i in range(len(base_var)):
        add_knn(data, newfeat=kNN_var[i], path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL))
        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL)
        """

    weight = 'weight'  # 'weight_test' / 'weight'
    bins_pt = np.linspace(450, 3500, 40)
    bins_mjj = np.linspace(0, 8000, 80)

    # Useful masks
    msk_bkg = data['signal'] == 0
    if signal_to_plot == 0:
        msk_sig = data['signal'] == 1
    else:
        msk_sig = data['sigType'] == signal_to_plot

    #msk_weight = data['weight']<0.2

    msk_knn = (data['lead_knn_ungrtrk500'] >
               knnCut) & (data['sub_knn_ungrtrk500'] > knnCut)
    msk_ungr = (data['lead_jet_ungrtrk500'] >
                ntrkCut) & (data['sub_jet_ungrtrk500'] > ntrkCut)
    msk_emfrac = (data['lead_jet_EMFrac'] <
                  emfracCut) & (data['sub_jet_EMFrac'] < emfracCut)

    msk_knn_1 = (data['lead_knn_ungrtrk500'] > knnCut)
    msk_ungr_1 = (data['lead_jet_ungrtrk500'] > ntrkCut)

    #msk_knn = (data['knn_ungrtrk500']>knnCut)
    #msk_ungr = (data['jet_ungrtrk500']>90.0)

    msk_ntrkBkg = msk_ungr & msk_emfrac & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_ntrkSig = msk_ungr & msk_emfrac & msk_sig  #& msk_pt & msk_m & msk_eta

    msk_knnBkg = msk_knn & msk_bkg
    msk_knnSig = msk_knn & msk_sig

    msk_ntrkBkg1 = msk_ungr_1 & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_ntrkSig1 = msk_ungr_1 & msk_sig  #& msk_pt & msk_m & msk_eta
    msk_knnBkg1 = msk_knn_1 & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_knnSig1 = msk_knn_1 & msk_sig  #& msk_pt & msk_m & msk_eta

    msk_inclBkg = msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_inclSig = msk_sig  #& msk_pt & msk_m & msk_eta

    # Mjj dist with cut on ntrk, ungrtrk compared to inclusive selection
    c = rp.canvas(batch=True)
    hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_inclBkg, weight].values,
                          label="Multijets, Inclusive",
                          normalise=True,
                          linecolor=ROOT.kGreen + 2,
                          linewidth=3)
    hist_knnBkg = c.hist(
        data.loc[msk_knnBkg, 'dijetmass'].values,
        bins=bins_mjj,
        weights=scale * data.loc[msk_knnBkg, weight].values,
        label="Multijets, n_{{trk}}^{{#epsilon}}>{}".format(knnCut),
        normalise=True,
        linecolor=ROOT.kMagenta + 2,
        linestyle=2,
        linewidth=3)

    hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_ntrkBkg, weight].values,
                          label="Multijets, n_{{trk}}>{}".format(ntrkCut),
                          normalise=True,
                          linecolor=ROOT.kOrange + 2,
                          linestyle=2,
                          linewidth=3)
    #hist_CRBkg = c.hist(data.loc[msk_CR_bkg, 'dijetmass'].values, bins=bins_mjj, weights=scale*data.loc[msk_CR_bkg, weight].values, label="CR Bkg, C<20", normalise=True, linecolor=ROOT.kGray+2, linestyle=2)

    c.legend(width=0.4, xmin=0.5, ymax=0.9)
    c.ylabel("Fraction of jets")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut))
    #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut))
    c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.pdf'.format(
        ntrkCut, knnCut, FIT))
    c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.eps'.format(
        ntrkCut, knnCut, FIT))

    del c

    c = rp.canvas(batch=True)
    hist_Sig = c.hist(data.loc[msk_sig, 'dijetmass'].values,
                      bins=bins_mjj,
                      weights=data.loc[msk_sig, weight].values,
                      label="Model A, m = 2 TeV, inclusive",
                      normalise=True,
                      linecolor=ROOT.kGreen + 2)

    hist_knnSig = c.hist(
        data.loc[msk_knnSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_knnSig, weight].values,
        label="Model A, m = 2 TeV, #it{{n}}_{{trk}}^{{#epsilon}}>{}".format(
            knnCut),
        normalise=True,
        linecolor=ROOT.kMagenta + 2,
        linestyle=2)

    hist_ntrkSig = c.hist(
        data.loc[msk_ntrkSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_ntrkSig, weight].values,
        label="Model A, m = 2 TeV, #it{{n}}_{{trk}}>{}".format(ntrkCut),
        normalise=True,
        linecolor=ROOT.kOrange + 2,
        linestyle=2)

    #hist_CRSig = c.hist(data.loc[msk_CR_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_CR_sig, weight].values, label="Sig, CR", normalise=True, linecolor=ROOT.kGray+2, linestyle=2)

    c.legend(width=0.4, xmin=0.5, ymax=0.9)
    c.ylabel("Fraction of jets")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.pdf'.format(
        ntrkCut, knnCut, FIT))
    c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.eps'.format(
        ntrkCut, knnCut, FIT))

    del c

    c = rp.canvas(batch=True)

    hist_knnSig = c.hist(
        data.loc[msk_knnSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_knnSig, weight].values,
        label="Model A, m = 2 TeV, knn_ntrk>{}".format(knnCut),
        normalise=False,
        linecolor=ROOT.kBlue + 1,
        linestyle=1)

    hist_knnBkg = c.hist(data.loc[msk_knnBkg, 'dijetmass'].values,
                         bins=bins_mjj,
                         weights=scale * data.loc[msk_knnBkg, weight].values,
                         label="Multijets, knn_ntrk>{}".format(knnCut),
                         normalise=False,
                         linecolor=ROOT.kMagenta + 2,
                         linestyle=2)

    hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_ntrkBkg, weight].values,
                          label="Multijets, ntrk>{}".format(ntrkCut),
                          normalise=False,
                          linecolor=ROOT.kOrange + 2,
                          linestyle=2)

    c.legend(width=0.4, xmin=0.3, ymax=0.9)
    c.ylabel("Number of events")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.pdf'.format(
        knnCut, FIT))
    c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.eps'.format(
        knnCut, FIT))

    bins_mjj = np.linspace(0, 10000, 50)

    # Unscaled histograms for calculating efficiencies

    hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_inclBkg, weight].values,
                          normalise=False)

    hist_inclSig = c.hist(data.loc[msk_inclSig, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_inclSig, weight].values,
                          normalise=False)

    hist_ntrkSig = c.hist(data.loc[msk_ntrkSig, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_ntrkSig, weight].values,
                          normalise=False)

    hist_knnSig = c.hist(data.loc[msk_knnSig, 'dijetmass'].values,
                         bins=bins_mjj,
                         weights=data.loc[msk_knnSig, weight].values,
                         normalise=False)

    hist_ntrkSig1 = c.hist(data.loc[msk_ntrkSig1, 'dijetmass'].values,
                           bins=bins_mjj,
                           weights=data.loc[msk_ntrkSig1, weight].values,
                           normalise=False)

    hist_ntrkBkg1 = c.hist(data.loc[msk_ntrkBkg1, 'dijetmass'].values,
                           bins=bins_mjj,
                           weights=data.loc[msk_ntrkBkg1, weight].values,
                           normalise=False)

    hist_knnBkg1 = c.hist(data.loc[msk_knnBkg1, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_knnBkg1, weight].values,
                          normalise=False)

    hist_knnSig1 = c.hist(data.loc[msk_knnSig1, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_knnSig1, weight].values,
                          normalise=False)

    print "Bkg inclusive integral: ", hist_inclBkg.GetEffectiveEntries()
    print "Sig inclusive integral: ", hist_inclSig.GetEffectiveEntries()

    print "Bkg pass kNN eff entries / integral: ", hist_knnBkg.GetEffectiveEntries(
    ), hist_knnBkg.Integral()
    print "Sig pass kNN eff entries / integral: ", hist_knnSig.GetEffectiveEntries(
    ), hist_knnSig.Integral()

    print "Bkg pass ntrk eff entries / integral: ", hist_ntrkBkg.GetEffectiveEntries(
    ), hist_ntrkBkg.Integral()
    print "Sig pass ntrk eff entries / integral: ", hist_ntrkSig.GetEffectiveEntries(
    ), hist_ntrkSig.Integral()

    print "Bkg Eff. knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnBkg.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnSig.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    print "Bkg Eff. knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnBkg.Integral() / hist_inclBkg.Integral()
    print "Sig Eff. knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnSig.Integral() / hist_inclSig.Integral()

    print "Bkg Eff. ntrk>{}, eff. entries: ".format(
        ntrkCut), 100 * hist_ntrkBkg.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. ntrk>{}, eff. entries: ".format(
        ntrkCut), 100 * hist_ntrkSig.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries(
        )  #, hist_ntrkSig.GetEffectiveEntries()

    print "Bkg Eff. 1 jet knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnBkg1.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. 1 jet knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnSig1.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    print "Bkg Eff. 1 jet knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnBkg1.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. 1 jet knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnSig1.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    outHistFile.cd()
    hist_knnBkg.SetName("bkg_knn")
    hist_knnSig.SetName("sig_knn")
    hist_knnBkg.Write()
    hist_knnSig.Write()
    outHistFile.Close()
    # Mjj dist for CR compared to inclusive selection
    """