Esempio n. 1
0
def add_knn (data, feat=VAR_TAU21, newfeat=None, path=None):
    """
    Add kNN-transformed `feat` to `data`. Modifies `data` in-place.

    Arguments:
        data: Pandas DataFrame to which to add the kNN-transformed variable.
        feat: Substructure variable to be decorrelated.
        newfeat: Name of output feature. By default, `{feat}kNN`.
        path: Path to trained kNN transform model.
    """

    # Check(s)
    assert path is not None, "add_knn: Please specify a model path."
    if newfeat is None:
        newfeat = '{}kNN'.format(feat)
        pass

    # Prepare data array
    X = standardise(data)

    # Load model
    knn = loadclf(path)

    # Add new classifier to data array
    data[newfeat] = pd.Series(data[feat] - knn.predict(X).flatten(), index=data.index)
    return
def get_function (jssVar, funcName):
    css_func_all = []
    for m in range(len(MASS_BINS)-1):
        clf = loadclf('models/css/css_%s_%s_%i.pkl.gz' % (jssVar, funcName, m))
        css_func_all.append(clf)
        pass

    return css_func_all
Esempio n. 3
0
def add_ddt (data, feat=VAR_TAU21, newfeat=None, path='models/ddt/ddt_{}.pkl.gz'.format(VAR_TAU21)):
    """
    Add DDT-transformed `feat` to `data`. Modifies `data` in-place.

    Arguments:
        data: Pandas DataFrame to which to add the DDT-transformed variable.
        feat: Substructure variable to be decorrelated.
        newfeat: Name of output feature. By default, `{feat}DDT`.
        path: Path to trained DDT transform model.
    """

    # Check(s)
    if newfeat is None:
        newfeat = feat + 'DDT'
        pass

    # Load model
    ddt = loadclf(path)

    # Add new classifier to data array
    data[newfeat] = pd.Series(data[feat] - ddt.predict(data[[VAR_RHODDT]].values), index=data.index)
    return
def add_bdt(data, var=None, path=None):
    """
    Add BDT-based classifier `feat` to `data`. Modifies `data` in-place.

    Arguments:
        data: Pandas DataFrame to which to add the DDT-transformed variable.
        var: Name of output feature.
        path: Path to trained BDT classifier.
    """

    # Check(s)
    assert var is not None, "add_bdt: Please specify an output variable name."
    assert path is not None, "add_bdt: Please specify a model path."

    # Load model
    clf = loadclf(path)

    # Use parallelisation to speed-up prediction
    result = parallel_predict(clf, data, n_jobs=16, method='predict_proba')

    # Add new classifier to data array
    data[var] = pd.Series(result[:, 1].flatten(), index=data.index)
    return
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(args.input + 'data.h5', train=True)
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, _ = fill_profile(data[msk_bkg])

    # Add k-NN variable
    knnfeat = 'knn'
    add_knn(data,
            newfeat=knnfeat,
            path='models/knn/knn_{}_{}.pkl.gz'.format(VAR, EFF))

    # Loading KNN classifier
    knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges  @TODO: Make standardised right away?
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    with Profile("Plotting"):
        for fit in [False, True]:

            # Select correct profile
            profile = profile_fit if fit else profile_meas

            # Plot
            plot(profile, fit)
            pass
        pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - EFF / 100.) + EFF / 100.))
            pass

        ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                             NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            msk_pass = data[knnfeat] < 0
            num = data.loc[msk & msk_bin & msk_pass,
                           'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        for i, j in itertools.product(*map(range, shape)):

            # Bin edges in x and y
            edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

            # Masks
            msks = [(data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)]
            msk_bin = reduce(lambda x, y: x & y, msks)
            data_ = data[msk & msk_bin]

            # Set non-zero bin content
            if np.sum(msk & msk_bin):
                msk_pass = data_[knnfeat] < 0
                num = data.loc[msk & msk_bin & msk_pass,
                               'weight_test'].values.sum()
                den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                eff = num / den
                profile.SetBinContent(i + 1, j + 1, eff)
                pass
            pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(VAR, ROOT=True), EFF))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('figures/knn/')
        c.save('figures/knn/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', VAR, EFF))
        pass

    return
Esempio n. 6
0
def add_knn(data, feat=VAR, newfeat=None, path=None):
    """
    Add kNN-transformed `feat` to `data`. Modifies `data` in-place.

    Arguments:
        data: Pandas DataFrame to which to add the kNN-transformed variable.
        feat: Substructure variable to be decorrelated.
        newfeat: Name of output feature. By default, `{feat}kNN`.
        path: Path to trained kNN transform model.
    """

    # Check(s)
    assert path is not None, "add_knn: Please specify a model path."

    if newfeat is None:
        newfeat = '{}kNN'.format(feat)
        pass

    # Prepare data array
    if ('1D' in FIT) or ('lin' in FIT) or ('poly' in FIT) or ('erf' in FIT):
        X = data[VARX].values.astype(np.float)
        X = X.reshape(-1, 1)

    else:
        X = standardise(data, rank=newfeat)

    # Load model
    knn = loadclf(path)

    # Add new classifier to data array

    if 'erf' in FIT:
        print "HEJ ", knn[0], knn[1], knn[2]
        if 'lead' in newfeat:
            data[newfeat] = pd.Series(data['lead_' + feat] -
                                      func(X, knn[0], knn[1], knn[2]),
                                      index=data.index)  #predict(X).flatten()
        if 'sub' in newfeat:
            data[newfeat] = pd.Series(data['sub_' + feat] -
                                      func(X, knn[0], knn[1], knn[2]),
                                      index=data.index)  #predict(X).flatten()
        else:
            data[newfeat] = pd.Series(data[feat] -
                                      func(X, knn[0], knn[1], knn[2]),
                                      index=data.index)  #predict(X).flatten()

    else:
        if 'lead' in newfeat:
            data[newfeat] = pd.Series(data['lead_' + feat] - knn.predict(X),
                                      index=data.index)  #predict(X).flatten()
        elif 'sub' in newfeat:
            data[newfeat] = pd.Series(data['sub_' + feat] - knn.predict(X),
                                      index=data.index)  #predict(X).flatten()
        else:
            print "Something wrong with the newfeat name?"
            #print "Check shapes !!: ", data[feat].shape, knn.predict(X).flatten().shape

            data[newfeat] = pd.Series(data[feat] - knn.predict(X).flatten(),
                                      index=data.index)  #

    return
Esempio n. 7
0
def test(data, variable, bg_eff, signal_above=False):
    # Shout out to Cynthia Brewer and Mark Harrower
    # [http://colorbrewer2.org]. Palette is colorblind-safe.
    rgbs = [(247 / 255., 251 / 255., 255 / 255.),
            (222 / 255., 235 / 255., 247 / 255.),
            (198 / 255., 219 / 255., 239 / 255.),
            (158 / 255., 202 / 255., 225 / 255.),
            (107 / 255., 174 / 255., 214 / 255.),
            (66 / 255., 146 / 255., 198 / 255.),
            (33 / 255., 113 / 255., 181 / 255.),
            (8 / 255., 81 / 255., 156 / 255.),
            (8 / 255., 48 / 255., 107 / 255.)]

    red, green, blue = map(np.array, zip(*rgbs))
    nb_cols = len(rgbs)
    stops = np.linspace(0, 1, nb_cols, endpoint=True)
    ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green, blue,
                                         NB_CONTOUR)

    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # Fill measured profile
    with Profile("filling profile"):
        profile_meas, _ = fill_profile(data[msk_bkg],
                                       variable,
                                       bg_eff,
                                       signal_above=signal_above)

    # Add k-NN variable
    with Profile("adding variable"):
        knnfeat = 'knn'
        #add_knn(data, feat=variable, newfeat=knnfeat, path='knn_fitter/models/knn_{}_{}.pkl.gz'.format(variable, bg_eff))
        add_knn(data,
                feat=variable,
                newfeat=knnfeat,
                path=args.output +
                '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Loading KNN classifier
    with Profile("loading model"):
        #knn = loadclf('knn_fitter/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))
        knn = loadclf(
            args.output +
            '/models/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    for fit in [False, True]:

        # Select correct profile
        profile = profile_fit if fit else profile_meas

        # Plot
        plot(profile, fit, variable, bg_eff)
        pass
    pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency
    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):
        if sig:
            print "working on signal"
        else:
            print "working on bg"

        if sig:
            rgbs = [(247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.linspace(0, 1, nb_cols, endpoint=True)
        else:
            rgbs = [(255 / 255., 51 / 255., 4 / 255.),
                    (247 / 255., 251 / 255., 255 / 255.),
                    (222 / 255., 235 / 255., 247 / 255.),
                    (198 / 255., 219 / 255., 239 / 255.),
                    (158 / 255., 202 / 255., 225 / 255.),
                    (107 / 255., 174 / 255., 214 / 255.),
                    (66 / 255., 146 / 255., 198 / 255.),
                    (33 / 255., 113 / 255., 181 / 255.),
                    (8 / 255., 81 / 255., 156 / 255.),
                    (8 / 255., 48 / 255., 107 / 255.)]

            red, green, blue = map(np.array, zip(*rgbs))
            nb_cols = len(rgbs)
            stops = np.array([0] + list(
                np.linspace(0, 1, nb_cols - 1, endpoint=True) *
                (1. - bg_eff / 100.) + bg_eff / 100.))
            pass

            ROOT.TColor.CreateGradientColorTable(nb_cols, stops, red, green,
                                                 blue, NB_CONTOUR)

        # Define arrays
        shape = (AXIS[VARX][0], AXIS[VARY][0])
        bins = [
            np.linspace(AXIS[var][1],
                        AXIS[var][2],
                        AXIS[var][0] + 1,
                        endpoint=True) for var in VARS
        ]
        x, y, z = (np.zeros(shape) for _ in range(3))

        # Create `profile` histogram
        profile = ROOT.TH2F('profile', "",
                            len(bins[0]) - 1, bins[0].flatten('C'),
                            len(bins[1]) - 1, bins[1].flatten('C'))

        # Compute inclusive efficiency in bins of `VARY`
        effs = list()
        for edges in zip(bins[1][:-1], bins[1][1:]):
            msk_bin = (data[VARY] > edges[0]) & (data[VARY] < edges[1])
            if signal_above:
                msk_pass = data[knnfeat] > 0  # ensure correct cut direction
            else:
                msk_pass = data[knnfeat] < 0
            num_msk = msk * msk_bin * msk_pass
            num = data.loc[num_msk, 'weight_test'].values.sum()
            den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
            effs.append(num / den)
            pass

        # Fill profile
        with Profile("Fill profile"):
            for i, j in itertools.product(*map(range, shape)):
                #print "Fill profile - (i, j) = ({}, {})".format(i,j)
                # Bin edges in x and y
                edges = [bin[idx:idx + 2] for idx, bin in zip([i, j], bins)]

                # Masks
                msks = [
                    (data[var] > edges[dim][0]) & (data[var] <= edges[dim][1])
                    for dim, var in enumerate(VARS)
                ]
                msk_bin = reduce(lambda x, y: x & y, msks)

                # Set non-zero bin content
                if np.sum(msk & msk_bin):
                    if signal_above:
                        msk_pass = data[
                            knnfeat] > 0  # ensure correct cut direction
                    else:
                        msk_pass = data[knnfeat] < 0
                    num_msk = msk * msk_bin * msk_pass
                    num = data.loc[num_msk, 'weight_test'].values.sum()
                    den = data.loc[msk & msk_bin, 'weight_test'].values.sum()
                    eff = num / den
                    profile.SetBinContent(i + 1, j + 1, eff)
                    pass

        c = rp.canvas(batch=True)
        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.20)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile.GetXaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARX, ROOT=True) +
                                    " = log(m^{2}/p_{T}^{2})")
        profile.GetYaxis().SetTitle("Large-#it{R} jet " +
                                    latex(VARY, ROOT=True) + " [GeV]")
        profile.GetZaxis().SetTitle("Selection efficiency for %s^{(%s%%)}" %
                                    (latex(variable, ROOT=True), bg_eff))

        profile.GetYaxis().SetNdivisions(505)
        profile.GetZaxis().SetNdivisions(505)
        profile.GetXaxis().SetTitleOffset(1.4)
        profile.GetYaxis().SetTitleOffset(1.8)
        profile.GetZaxis().SetTitleOffset(1.3)
        zrange = (0., 1.)
        if zrange:
            profile.GetZaxis().SetRangeUser(*zrange)
            pass
        profile.SetContour(NB_CONTOUR)

        # Draw
        profile.Draw('COLZ')

        # Decorations
        c.text(qualifier=QUALIFIER, ymax=0.92, xmin=0.15, ATLAS=False)
        c.text(["#sqrt{s} = 13 TeV", "#it{W} jets" if sig else "Multijets"],
               ATLAS=False)

        # -- Efficiencies
        xaxis = profile.GetXaxis()
        yaxis = profile.GetYaxis()
        tlatex = ROOT.TLatex()
        tlatex.SetTextColor(ROOT.kGray + 2)
        tlatex.SetTextSize(0.023)
        tlatex.SetTextFont(42)
        tlatex.SetTextAlign(32)
        xt = xaxis.GetBinLowEdge(xaxis.GetNbins())
        for eff, ibin in zip(effs, range(1, yaxis.GetNbins() + 1)):
            yt = yaxis.GetBinCenter(ibin)
            tlatex.DrawLatex(
                xt, yt, "%s%.1f%%" %
                ("#bar{#varepsilon}^{rel}_{%s} = " %
                 ('sig' if sig else 'bkg') if ibin == 1 else '', eff * 100.))
            pass

        # -- Bounds
        BOUNDS[0].DrawCopy("SAME")
        BOUNDS[1].DrawCopy("SAME")
        c.latex("m > 50 GeV",
                -4.5,
                BOUNDS[0].Eval(-4.5) + 30,
                align=21,
                angle=-37,
                textsize=13,
                textcolor=ROOT.kGray + 3)
        c.latex("m < 300 GeV",
                -2.5,
                BOUNDS[1].Eval(-2.5) - 30,
                align=23,
                angle=-57,
                textsize=13,
                textcolor=ROOT.kGray + 3)

        # Save
        mkdir('knn_fitter/figures/')
        c.save('knn_fitter/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        mkdir(args.output + '/figures/')
        c.save(args.output + '/figures/knn_eff_{}_{:s}_{:.0f}.pdf'.format(
            'sig' if sig else 'bkg', variable, bg_eff))
        pass

    return
Esempio n. 8
0
def main (args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data('data/' + args.input) #, test=True)
    msk_sig = data['signal'] == 1
    msk_bkg = ~msk_sig

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, (x,percs, err) = fill_profile_1D(data[msk_bkg])
    weights = 1/err

    # Add k-NN variable
    knnfeat = 'knn'
    orgfeat = VAR
    add_knn(data, newfeat=knnfeat, path='models/knn/{}_{}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL)) 

    # Loading KNN classifier
    knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL))
    #knn = loadclf('models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL))

    X = x.reshape(-1,1)

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8

        # Short-hands
        vbins, vmin, vmax = AXIS[VARX]

        # Re-binned bin edges  @TODO: Make standardised right away?
        # edges = np.interp(np.linspace(0, vbins, vbins * rebin + 1, endpoint=True), 
        #                  range(vbins + 1),
        #                  np.linspace(vmin, vmax,  vbins + 1,         endpoint=True))

        fineBins = np.linspace(vmin, vmax,  vbins*rebin + 1,         endpoint=True)
        orgBins = np.linspace(vmin, vmax,  vbins + 1,         endpoint=True)

        # Re-binned bin centres
        fineCentres = fineBins[:-1] + 0.5 * np.diff(fineBins)
        orgCentres = orgBins[:-1] + 0.5 * np.diff(orgBins)
        
        pass

        # Get predictions evaluated at re-binned bin centres
        if 'erf' in FIT:
            fit = func(fineCentres, knn[0], knn[1], knn[2])
            print "Check: ", func([1500, 2000], knn[0], knn[1], knn[2]) 
        else:
            fit = knn.predict(fineCentres.reshape(-1,1)) #centres.reshape(-1,1))

        # Fill ROOT "profile"
        profile_fit = ROOT.TH1F('profile_fit', "", len(fineBins) - 1, fineBins.flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        
        knn1 = PolynomialFeatures(degree=2)                                           
        X_poly = knn1.fit_transform(X)
        reg = LinearRegression(fit_intercept=False) #fit_intercept=False)
        reg.fit(X_poly, percs, weights)
        score = round(reg.score(X_poly, percs), 4)
        coef = reg.coef_
        intercept = reg.intercept_
        print "COEFFICIENTS: ", coef, intercept
        
        TCoef = ROOT.TVector3(coef[0], coef[1], coef[2]) 
        outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_{}.root".format(FIT, EFF, MIN_STAT, MODEL),"RECREATE")
        outFile.cd()
        TCoef.Write()
        profile_fit.SetName("kNNfit")
        profile_fit.Write()
        outFile.Close()

        # profile_meas2 = ROOT.TH1F('profile_meas', "", len(x) - 1, x.flatten('C'))
        # root_numpy.array2hist(percs, profile_meas2)
        profile_meas2 = ROOT.TGraph(len(x), x, percs) 
        pass


    # Plotting
    with Profile("Plotting"):
        # Plot
        plot(profile_meas2, profile_fit)
        pass

    # Plotting local selection efficiencies for D2-kNN < 0
    # -- Compute signal efficiency

    # MC weights are scaled with lumi. This is just for better comparison
    #if INPUT =="mc": 
    #    data.loc[:,'TotalEventWeight'] /=  139000000. 

    for sig, msk in zip([True, False], [msk_sig, msk_bkg]):

        # Define arrays
        shape   = AXIS[VARX][0]
        bins    = np.linspace(AXIS[VARX][1], AXIS[VARX][2], AXIS[VARX][0]+ 1, endpoint=True)
        #bins = np.linspace(AXIS[VARX][1], 4000, 40, endpoint=True)
        #bins = np.append(bins, [4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000])

        print "HERE: ", bins 
        
        #x, y = (np.zeros(shape) for _ in range(2))

        # Create `profile` histogram
        profile_knn = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') )
        profile_org = ROOT.TH1F('profile', "", len(bins) - 1, bins ) #.flatten('C') )

        # Compute inclusive efficiency in bins of `VARX`
        effs = list()
        
        for i in range(shape):
            msk_bin  = (data[VARX] > bins[i]) & (data[VARX] <= bins[i+1])
            msk_pass =  data[knnfeat] > 0 # <?
            msk_pass_org =  data[orgfeat] > 70 # <?
            num = data.loc[msk & msk_bin & msk_pass, 'TotalEventWeight'].values.sum()
            num_org = data.loc[msk & msk_bin & msk_pass_org, 'TotalEventWeight'].values.sum()
            den = data.loc[msk & msk_bin,'TotalEventWeight'].values.sum()
            if den > 0:
                eff = num/den *100.
                eff_org = num_org/den *100.
                profile_knn.SetBinContent(i + 1, eff)
                profile_org.SetBinContent(i + 1, eff_org)
                effs.append(eff)
            #else:
            #print i, "Density = 0"
            pass

        c = rp.canvas(batch=True)
        leg = ROOT.TLegend(0.2, 0.75, 0.5, 0.85)
        leg.AddEntry(profile_knn, "#it{n}_{trk}^{#varepsilon=%s%%} > 0" % ( EFF), "l")
        leg.AddEntry(profile_org, "#it{n}_{trk} > 70", "l")
        leg.Draw()

        pad = c.pads()[0]._bare()
        pad.cd()
        pad.SetRightMargin(0.10)
        pad.SetLeftMargin(0.15)
        pad.SetTopMargin(0.10)

        # Styling
        profile_knn.SetLineColor(rp.colours[1])
        profile_org.SetLineColor(rp.colours[2])
        profile_knn.SetMarkerStyle(24)
        profile_knn.GetXaxis().SetTitle( "#it{m}_{jj} [GeV]" ) #latex(VARX, ROOT=True) + "[GeV]") #+ " = log(m^{2}/p_{T}^{2})")
        #profile.GetXaxis().SetTitle("Large-#it{R} jet " + latex(VARX, ROOT=True))# + " = log(m^{2}/p_{T}^{2})")
        profile_org.GetYaxis().SetTitle("Selection efficiency (%)") # for #it{n}_{trk}^{#varepsilon=%s%%}>0" % ( EFF))

        profile_knn.GetYaxis().SetNdivisions(505)
        #profile_knn.GetXaxis().SetNdivisions(505)
        profile_knn.GetXaxis().SetTitleOffset(1.4)
        profile_knn.GetYaxis().SetTitleOffset(1.8)
        profile_knn.GetXaxis().SetRangeUser(*XRANGE)
        profile_org.GetXaxis().SetRangeUser(*XRANGE)

        yrange = (0., EFF*3) #2.0 percent
        if yrange:
            profile_knn.GetYaxis().SetRangeUser(*yrange)
            profile_org.GetYaxis().SetRangeUser(*yrange)
            pass

        # Draw
        profile_org.Draw()
        profile_knn.Draw("same")

        # Save
        mkdir('figures/knn/')
        c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.pdf'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT))
        #c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.png'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL, MIN_STAT))
        c.save('figures/knn/{}_eff_{}_{:s}_{}_{}_stat{}.eps'.format(FIT, 'sig' if sig else 'bkg', VAR, EFF, MODEL+INPUT, MIN_STAT))
        del c
        
        pass

    return
Esempio n. 9
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(args.input + 'data.h5', train=True, background=True)

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Fill measured profile
    profile_meas, _ = fill_profile(data)

    # Loading KNN classifier
    knn = loadclf('models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(VAR, EFF))

    # Filling fitted profile
    with Profile("Filling fitted profile"):
        rebin = 8
        edges, centres = dict(), dict()
        for ax, var in zip(['x', 'y'], [VARX, VARY]):

            # Short-hands
            vbins, vmin, vmax = AXIS[var]

            # Re-binned bin edges  @TODO: Make standardised right away?
            edges[ax] = np.interp(
                np.linspace(0, vbins, vbins * rebin + 1, endpoint=True),
                range(vbins + 1),
                np.linspace(vmin, vmax, vbins + 1, endpoint=True))

            # Re-binned bin centres
            centres[ax] = edges[ax][:-1] + 0.5 * np.diff(edges[ax])
            pass

        # Get predictions evaluated at re-binned bin centres
        g = dict()
        g['x'], g['y'] = np.meshgrid(centres['x'], centres['y'])
        g['x'], g['y'] = standardise(g['x'], g['y'])

        X = np.vstack((g['x'].flatten(), g['y'].flatten())).T
        fit = knn.predict(X).reshape(g['x'].shape).T

        # Fill ROOT "profile"
        profile_fit = ROOT.TH2F('profile_fit', "",
                                len(edges['x']) - 1, edges['x'].flatten('C'),
                                len(edges['y']) - 1, edges['y'].flatten('C'))
        root_numpy.array2hist(fit, profile_fit)
        pass

    # Plotting
    with Profile("Plotting"):
        for fit in [False, True]:

            # Select correct profile
            profile = profile_fit if fit else profile_meas

            # Plot
            plot(profile, fit)
            pass
        pass

    return