예제 #1
0
def main(args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    #data = np.zeros(1, 95213009, 10)
    data, features, _ = load_data(
        'data/djr_LCTopo_2.h5')  # + args.input) #, test=True) #
    #data2, features, _ = load_data('data/djr_LCTopo_2.h5') # + args.input) #, test=True) #
    #data = np.concatenate((data1, data2))

    #f1 = h5py.File('data/djr_LCTopo_1.h5', 'r')
    #f2 = h5py.File('data/djr_LCTopo_2.h5', 'r')

    knnCut = 0
    ntrkCut = 50
    emfracCut = 0.65
    scale = 139 * 1000000  # (inverse nanobarn)
    signal_to_plot = 7

    sigDict = {
        0: 'All Models',
        1: 'Model A, m = 2 TeV',
        2: 'Model A, m = 1 TeV',
        3: 'Model A, m = 1.5 TeV',
        4: 'Model A, m = 2.5 TeV',
        5: 'Model B, m = 1 TeV',
        6: 'Model B, m = 1.5 TeV',
        7: 'Model B, m = 2 TeV',
        8: 'Model B, m = 2.5 TeV',
        9: 'Model C, m = 1 TeV',
        10: 'Model C, m = 1.5 TeV',
        11: 'Model C, m = 2 TeV',
        12: 'Model C, m = 2.5 TeV',
        13: 'Model D, m = 1 TeV',
        14: 'Model D, m = 1.5 TeV',
        15: 'Model D, m = 2 TeV',
        16: 'Model D, m = 2.5 TeV',
    }

    outHistFile = ROOT.TFile.Open(
        "figures/mjjHistograms_kNN{}_eff{}.root".format(knnCut, kNN_eff),
        "RECREATE")

    histstyle[True]['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[signal_to_plot])

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')
    #base_vars = ['lead_'+base_var, 'sub_'+base_var]
    #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var]

    print data.shape

    with Profile("Add variables"):
        #for i in range(len(base_var)):
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data,
                newfeat='lead_' + kNN_var,
                path='models/knn/{}_{}_{}_{}.pkl.gz'.format(
                    FIT, base_var, kNN_eff, sigModel))
        add_knn(data,
                newfeat='sub_' + kNN_var,
                path='models/knn/{}_{}_{}_{}.pkl.gz'.format(
                    FIT, base_var, kNN_eff, sigModel))

        #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))

        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff,
                                                      sigModel)
        """
        base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
        kNN_var = [var.replace('jet', 'knn') for var in base_var]
        
        with Profile("Add variables"):
        from run.knn.common import add_knn, MODEL, VAR as kNN_basevar, EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var)
        for i in range(len(base_var)):
        add_knn(data, newfeat=kNN_var[i], path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL))
        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL)
        """

    weight = 'weight'  # 'weight_test' / 'weight'
    bins_pt = np.linspace(450, 3500, 40)
    bins_mjj = np.linspace(0, 8000, 80)

    # Useful masks
    msk_bkg = data['signal'] == 0
    if signal_to_plot == 0:
        msk_sig = data['signal'] == 1
    else:
        msk_sig = data['sigType'] == signal_to_plot

    #msk_weight = data['weight']<0.2

    msk_knn = (data['lead_knn_ungrtrk500'] >
               knnCut) & (data['sub_knn_ungrtrk500'] > knnCut)
    msk_ungr = (data['lead_jet_ungrtrk500'] >
                ntrkCut) & (data['sub_jet_ungrtrk500'] > ntrkCut)
    msk_emfrac = (data['lead_jet_EMFrac'] <
                  emfracCut) & (data['sub_jet_EMFrac'] < emfracCut)

    msk_knn_1 = (data['lead_knn_ungrtrk500'] > knnCut)
    msk_ungr_1 = (data['lead_jet_ungrtrk500'] > ntrkCut)

    #msk_knn = (data['knn_ungrtrk500']>knnCut)
    #msk_ungr = (data['jet_ungrtrk500']>90.0)

    msk_ntrkBkg = msk_ungr & msk_emfrac & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_ntrkSig = msk_ungr & msk_emfrac & msk_sig  #& msk_pt & msk_m & msk_eta

    msk_knnBkg = msk_knn & msk_bkg
    msk_knnSig = msk_knn & msk_sig

    msk_ntrkBkg1 = msk_ungr_1 & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_ntrkSig1 = msk_ungr_1 & msk_sig  #& msk_pt & msk_m & msk_eta
    msk_knnBkg1 = msk_knn_1 & msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_knnSig1 = msk_knn_1 & msk_sig  #& msk_pt & msk_m & msk_eta

    msk_inclBkg = msk_bkg  #& msk_weight #& msk_pt & msk_m & msk_eta
    msk_inclSig = msk_sig  #& msk_pt & msk_m & msk_eta

    # Mjj dist with cut on ntrk, ungrtrk compared to inclusive selection
    c = rp.canvas(batch=True)
    hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_inclBkg, weight].values,
                          label="Multijets, Inclusive",
                          normalise=True,
                          linecolor=ROOT.kGreen + 2,
                          linewidth=3)
    hist_knnBkg = c.hist(
        data.loc[msk_knnBkg, 'dijetmass'].values,
        bins=bins_mjj,
        weights=scale * data.loc[msk_knnBkg, weight].values,
        label="Multijets, n_{{trk}}^{{#epsilon}}>{}".format(knnCut),
        normalise=True,
        linecolor=ROOT.kMagenta + 2,
        linestyle=2,
        linewidth=3)

    hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_ntrkBkg, weight].values,
                          label="Multijets, n_{{trk}}>{}".format(ntrkCut),
                          normalise=True,
                          linecolor=ROOT.kOrange + 2,
                          linestyle=2,
                          linewidth=3)
    #hist_CRBkg = c.hist(data.loc[msk_CR_bkg, 'dijetmass'].values, bins=bins_mjj, weights=scale*data.loc[msk_CR_bkg, weight].values, label="CR Bkg, C<20", normalise=True, linecolor=ROOT.kGray+2, linestyle=2)

    c.legend(width=0.4, xmin=0.5, ymax=0.9)
    c.ylabel("Fraction of jets")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut))
    #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut))
    c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.pdf'.format(
        ntrkCut, knnCut, FIT))
    c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.eps'.format(
        ntrkCut, knnCut, FIT))

    del c

    c = rp.canvas(batch=True)
    hist_Sig = c.hist(data.loc[msk_sig, 'dijetmass'].values,
                      bins=bins_mjj,
                      weights=data.loc[msk_sig, weight].values,
                      label="Model A, m = 2 TeV, inclusive",
                      normalise=True,
                      linecolor=ROOT.kGreen + 2)

    hist_knnSig = c.hist(
        data.loc[msk_knnSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_knnSig, weight].values,
        label="Model A, m = 2 TeV, #it{{n}}_{{trk}}^{{#epsilon}}>{}".format(
            knnCut),
        normalise=True,
        linecolor=ROOT.kMagenta + 2,
        linestyle=2)

    hist_ntrkSig = c.hist(
        data.loc[msk_ntrkSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_ntrkSig, weight].values,
        label="Model A, m = 2 TeV, #it{{n}}_{{trk}}>{}".format(ntrkCut),
        normalise=True,
        linecolor=ROOT.kOrange + 2,
        linestyle=2)

    #hist_CRSig = c.hist(data.loc[msk_CR_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_CR_sig, weight].values, label="Sig, CR", normalise=True, linecolor=ROOT.kGray+2, linestyle=2)

    c.legend(width=0.4, xmin=0.5, ymax=0.9)
    c.ylabel("Fraction of jets")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.pdf'.format(
        ntrkCut, knnCut, FIT))
    c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.eps'.format(
        ntrkCut, knnCut, FIT))

    del c

    c = rp.canvas(batch=True)

    hist_knnSig = c.hist(
        data.loc[msk_knnSig, 'dijetmass'].values,
        bins=bins_mjj,
        weights=data.loc[msk_knnSig, weight].values,
        label="Model A, m = 2 TeV, knn_ntrk>{}".format(knnCut),
        normalise=False,
        linecolor=ROOT.kBlue + 1,
        linestyle=1)

    hist_knnBkg = c.hist(data.loc[msk_knnBkg, 'dijetmass'].values,
                         bins=bins_mjj,
                         weights=scale * data.loc[msk_knnBkg, weight].values,
                         label="Multijets, knn_ntrk>{}".format(knnCut),
                         normalise=False,
                         linecolor=ROOT.kMagenta + 2,
                         linestyle=2)

    hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_ntrkBkg, weight].values,
                          label="Multijets, ntrk>{}".format(ntrkCut),
                          normalise=False,
                          linecolor=ROOT.kOrange + 2,
                          linestyle=2)

    c.legend(width=0.4, xmin=0.3, ymax=0.9)
    c.ylabel("Number of events")
    c.xlabel("m_{jj} [GeV]")
    c.logy()
    #c.ylim(0.00005, 5)
    c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.pdf'.format(
        knnCut, FIT))
    c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.eps'.format(
        knnCut, FIT))

    bins_mjj = np.linspace(0, 10000, 50)

    # Unscaled histograms for calculating efficiencies

    hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=scale * data.loc[msk_inclBkg, weight].values,
                          normalise=False)

    hist_inclSig = c.hist(data.loc[msk_inclSig, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_inclSig, weight].values,
                          normalise=False)

    hist_ntrkSig = c.hist(data.loc[msk_ntrkSig, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_ntrkSig, weight].values,
                          normalise=False)

    hist_knnSig = c.hist(data.loc[msk_knnSig, 'dijetmass'].values,
                         bins=bins_mjj,
                         weights=data.loc[msk_knnSig, weight].values,
                         normalise=False)

    hist_ntrkSig1 = c.hist(data.loc[msk_ntrkSig1, 'dijetmass'].values,
                           bins=bins_mjj,
                           weights=data.loc[msk_ntrkSig1, weight].values,
                           normalise=False)

    hist_ntrkBkg1 = c.hist(data.loc[msk_ntrkBkg1, 'dijetmass'].values,
                           bins=bins_mjj,
                           weights=data.loc[msk_ntrkBkg1, weight].values,
                           normalise=False)

    hist_knnBkg1 = c.hist(data.loc[msk_knnBkg1, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_knnBkg1, weight].values,
                          normalise=False)

    hist_knnSig1 = c.hist(data.loc[msk_knnSig1, 'dijetmass'].values,
                          bins=bins_mjj,
                          weights=data.loc[msk_knnSig1, weight].values,
                          normalise=False)

    print "Bkg inclusive integral: ", hist_inclBkg.GetEffectiveEntries()
    print "Sig inclusive integral: ", hist_inclSig.GetEffectiveEntries()

    print "Bkg pass kNN eff entries / integral: ", hist_knnBkg.GetEffectiveEntries(
    ), hist_knnBkg.Integral()
    print "Sig pass kNN eff entries / integral: ", hist_knnSig.GetEffectiveEntries(
    ), hist_knnSig.Integral()

    print "Bkg pass ntrk eff entries / integral: ", hist_ntrkBkg.GetEffectiveEntries(
    ), hist_ntrkBkg.Integral()
    print "Sig pass ntrk eff entries / integral: ", hist_ntrkSig.GetEffectiveEntries(
    ), hist_ntrkSig.Integral()

    print "Bkg Eff. knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnBkg.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnSig.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    print "Bkg Eff. knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnBkg.Integral() / hist_inclBkg.Integral()
    print "Sig Eff. knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnSig.Integral() / hist_inclSig.Integral()

    print "Bkg Eff. ntrk>{}, eff. entries: ".format(
        ntrkCut), 100 * hist_ntrkBkg.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. ntrk>{}, eff. entries: ".format(
        ntrkCut), 100 * hist_ntrkSig.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries(
        )  #, hist_ntrkSig.GetEffectiveEntries()

    print "Bkg Eff. 1 jet knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnBkg1.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. 1 jet knn_ntrk> {}, eff. entries: ".format(
        knnCut), 100 * hist_knnSig1.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    print "Bkg Eff. 1 jet knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnBkg1.GetEffectiveEntries(
        ) / hist_inclBkg.GetEffectiveEntries()
    print "Sig Eff. 1 jet knn_ntrk> {}, integral: ".format(
        knnCut), 100 * hist_knnSig1.GetEffectiveEntries(
        ) / hist_inclSig.GetEffectiveEntries()

    outHistFile.cd()
    hist_knnBkg.SetName("bkg_knn")
    hist_knnSig.SetName("sig_knn")
    hist_knnBkg.Write()
    hist_knnSig.Write()
    outHistFile.Close()
    # Mjj dist for CR compared to inclusive selection
    """
예제 #2
0
파일: comparison.py 프로젝트: nethemis/ANN
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Initialise Keras backend
    initialise_backend(args)

    # Neural network-specific initialisation of the configuration dict
    initialise_config(args, cfg)

    # Keras import(s)
    import keras.backend as K
    from keras.models import load_model

    # Project import(s)
    from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model

    # Load data
    data, features, _ = load_data(args.input + 'data.h5', test=True)

    # Common definitions
    # --------------------------------------------------------------------------
    # -- k-nearest neighbour
    kNN_var = 'D2-k#minusNN'

    def meaningful_digits(number):
        digits = 0
        if number > 0:
            digits = int(np.ceil(max(-np.log10(number), 0)))
            pass
        return '{l:.{d:d}f}'.format(d=digits, l=number)

    # -- Adversarial neural network (ANN) scan
    lambda_reg = 10.
    lambda_regs = sorted([1., 3., 10.])
    ann_vars = list()
    lambda_strs = list()
    for lambda_reg_ in lambda_regs:
        lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p')
        lambda_strs.append(lambda_str)

        ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.'))
        ann_vars.append(ann_var_)
        pass

    ann_var = ann_vars[lambda_regs.index(lambda_reg)]

    # -- uBoost scan
    uboost_eff = 92
    uboost_ur = 0.3
    uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0])
    uboost_var = 'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur))
    uboost_vars = [
        'uBoost(#alpha={:s})'.format(meaningful_digits(ur))
        for ur in uboost_urs
    ]
    uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format(
        uboost_eff)

    # Tagger feature collection
    tagger_features = [
        'Tau21', 'Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var,
        'Adaboost', uboost_var
    ]

    # Add variables
    # --------------------------------------------------------------------------
    with Profile("Add variables"):

        # Tau21DDT
        from run.ddt.common import add_ddt
        add_ddt(data, path='models/ddt/ddt.pkl.gz')

        # D2-kNN
        from run.knn.common import add_knn, VAR as kNN_basevar, EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var)
        add_knn(data,
                newfeat=kNN_var,
                path='models/knn/knn_{}_{}.pkl.gz'.format(
                    kNN_basevar, kNN_eff))

        # D2-CSS
        from run.css.common import add_css
        add_css("D2", data)

        # NN
        from run.adversarial.common import add_nn
        with Profile("NN"):
            classifier = load_model(
                'models/adversarial/classifier/full/classifier.h5')
            add_nn(data, classifier, 'NN')
            pass

        # ANN
        with Profile("ANN"):
            from adversarial.utils import DECORRELATION_VARIABLES
            adversary = adversary_model(
                gmm_dimensions=len(DECORRELATION_VARIABLES),
                **cfg['adversary']['model'])

            combined = combined_model(classifier, adversary,
                                      **cfg['combined']['model'])

            for ann_var_, lambda_str_ in zip(ann_vars, lambda_strs):
                print "== Loading model for {}".format(ann_var_)
                combined.load_weights(
                    'models/adversarial/combined/full/combined_lambda{}.h5'.
                    format(lambda_str_))
                add_nn(data, classifier, ann_var_)
                pass
            pass

        # Adaboost/uBoost
        with Profile("Adaboost/uBoost"):
            from run.uboost.common import add_bdt
            for var, ur in zip(uboost_vars, uboost_urs):
                var = ('Adaboost' if ur == 0 else var)
                path = 'models/uboost/' + uboost_pattern.format(ur).replace(
                    '.', 'p') + '.pkl.gz'
                print "== Loading model for {}".format(var)
                add_bdt(data, var, path)
                pass

            # Remove `Adaboost` from scan list
            uboost_vars.pop(0)
            pass

        pass

    # Remove unused variables
    used_variables = set(tagger_features + ann_vars + uboost_vars +
                         ['m', 'pt', 'npv', 'weight_test'])
    unused_variables = [var for var in list(data) if var not in used_variables]
    data.drop(columns=unused_variables)
    gc.collect()

    # Perform performance studies
    perform_studies(data, args, tagger_features, ann_vars, uboost_vars)

    return 0
예제 #3
0
def main (args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data('data/djr_LCTopo_1.h5') #, test=True)
    #data2, features, _ = load_data('data/djr_LCTopo_2.h5') #, test=True)

    #data = np.concatenate((data1, data2))

    sigNumber = 0

    sigDict = {
        0: 'All Models',
        1: 'Model A, m = 1 TeV',
        2: 'Model A, m = 1.5 TeV',
        3: 'Model A, m = 2 TeV',
        4: 'Model A, m = 2.5 TeV',
        5: 'Model B, m = 1 TeV',
        6: 'Model B, m = 1.5 TeV',
        7: 'Model B, m = 2 TeV',
        8: 'Model B, m = 2.5 TeV',
        9: 'Model C, m = 1 TeV',
        10: 'Model C, m = 1.5 TeV',
        11: 'Model C, m = 2 TeV',
        12: 'Model C, m = 2.5 TeV',
        13: 'Model D, m = 1 TeV',
        14: 'Model D, m = 1.5 TeV',
        15: 'Model D, m = 2 TeV',
        16: 'Model D, m = 2.5 TeV',
        }

    histstyle[True] ['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[sigNumber])

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')
    #base_vars = [base_var]
    #kNN_vars = [kNN_var]
    base_vars = ['lead_'+base_var, 'sub_'+base_var]
    kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var]

    
    with Profile("Add variables"):
        from run.knn.common import add_knn, EFF as kNN_eff
        #for i in range(len(base_var)):
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))

        #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)

    # Check variable distributions
        
    weight = 'weight'  # 'weight_test' / 'weight'
    scale = 139*1000000 # (inverse nanobarn)

    msk_bkg = data['signal'] == 0
    if sigNumber==0:
        msk_sig = data['signal'] == 1 
    else:
        msk_sig = data['sigType'] == sigNumber 


    knnBins = np.linspace(-100, 200, 75, endpoint=True)

    for var in kNN_vars:
        ### Canvas ###
        c = rp.canvas(num_pads=2, batch=True)
        c_tmp = rp.canvas(num_pads=1, batch=True)
        c2 = rp.canvas(batch=True)

        ### Plot ###
        h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False])
        h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True])

        h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False)
        h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=False)

        #h1_CR = c_tmp.hist(data.loc[msk_CR_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_CR_bkg, weight].values, normalise=False)
        #h2_CR = c_tmp.hist(data.loc[msk_CR_sig, var].values, bins=knnBins, weights=data.loc[msk_CR_sig, weight].values, normalise=False)

        print "bkg. incl integral: ", h1_incl.GetEffectiveEntries()
        print "sig. incl integral: ", h2_incl.GetEffectiveEntries()
        #print "bkg. CR efficiency: ", h1_CR.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()
        #print "sig. CR efficiency: ", h2_CR.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()

        normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.GetEffectiveEntries()) )
        print "Sensitivity with no cut: ", normFactor

        ### sensitivity ###
        sensitivity = []
        bkg_eff_1jet = []
        i = 0
        for cut in knnBins:

            msk_pass = (data[kNN_vars[0]]>cut) & (data[kNN_vars[1]]>cut)
            msk_pass1 = data[kNN_vars[0]>cut)
            #msk_pass = (data[var]>cut)
            msk_bkg_pass = msk_bkg & msk_pass
            msk_sig_pass = msk_sig & msk_pass

            msk_bkg_pass1 = msk_bkg & msk_pass_1jet
            msk_sig_pass1 = msk_sig & msk_pass_1jet

            h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False)
            h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False)

            h1_pass1 = c_tmp.hist(data.loc[msk_bkg_pass1, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False)

            if ( h2_incl.GetEffectiveEntries()>0 ) : #and h1_pass.GetEffectiveEntries()>0) :
                sensitivity.append( ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries()) )) / normFactor )
                #print "bkg. eff. @ " , cut, ": ", h1_pass.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()  
                #print "signal eff. @ ", cut, ": ", h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()
                #print "Sensitivity gain@ ", cut, ": ", ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries())) ) / normFactor

            else: 
                sensitivity.append(0)

            if (h1_incl.GetEffectiveEntries()>0 ) :
                bkg_eff_1jet.append(h1_pass1.GetEffectiveEntries()/h1_incl.GetEffectiveEntries())
            else:
                bkg_eff_1jet.append(0)
                

            i = i+1

        #c.pads()[0].ylim(0,0.25)
        c.pads()[0].logy()
        c.pads()[0].xlim(-100,200)
        c.pads()[1].ylim(0,30)
        c.pads()[1].xlim(-100,200)
        c.pads()[1].graph( sensitivity, bins=knnBins) #, oob=False )

        ### Decorations ###
        c.legend(width=0.4, xmin=0.3, ymax=0.9)
        #c.xlabel("n_{trk}^{#epsilon={}\%}".format(kNN_eff)) #latex(var, ROOT=True))
        c.xlabel("n_{trk}^{#epsilon}") #latex(var, ROOT=True))
        c.ylabel("Fraction of jets")
        c.pads()[1].ylabel("Sensitivity gain")#"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})/")
        c.pads()[1].text(["Sensitivity = #varepsilon_{S}/(#frac{3}{2} + #sqrt{B})", 
                ], xmin=0.2, ymax=0.80, ATLAS=False)


        c2.graph(sensitivity, bkg_eff_1jet)
        c2.xlabel("Single jet #varepsilon_B")
        c2.ylabel("Sensitivity gain")
        c2.text(["#epsilon=0.5 %",], xmin=0.2, ymax=0.8, ATLAS=False)

        ### Save ###
        #mkdir('figures/distributions')
        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff))
        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff))

        c2.save('figure/distribution/sensitivity_1jEfficiency.pdf'.format(var,sigNumber,kNN_eff))
        print 'figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff)
        pass
    

    # Plot also the normal ntrk distribution for cross check with Roland's result

    msk_bkg = data['signal'] == 0
    if sigNumber==0:
        msk_sig = data['signal'] == 1 # data['sigType'] == sigNumber #                             
    else:
        msk_sig = data['sigType'] == sigNumber # data['sigType'] == sigNumber #                    
    #msk_weight = data['weight']<0.0002
    #msk_bkg = msk_bkg & msk_pt & msk_m & msk_eta 
    #msk_sig = msk_sig & msk_pt & msk_m & msk_eta 


    baseBins = np.linspace(0, 200, 75, endpoint=True) #axes[var][1], axes[var][2], axes[var][0] + 1, endpoint=True)

    for var in base_vars:
        ### Canvas ###
        c = rp.canvas(num_pads=2, batch=True)
        c.pads()[0].logy()

        c_tmp = rp.canvas(batch=True)

        ### Plot ###
        h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=baseBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False])
        h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True])

        h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False)
        h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=baseBins, weights=data.loc[msk_sig, weight].values, normalise=False)


        print "bkg. incl integral: ", h1_incl.GetEffectiveEntries()
        print "sig. incl integral: ", h2_incl.GetEffectiveEntries()

        normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.Integral()) )

        #print "Sensitivity with no cut: ", normFactor


        ### sensitivity ###
        sensitivity = []
        i = 0
        for cut in baseBins:
            #print cut

            msk_pass = (data[base_vars[0]]>cut) & (data[base_vars[1]]>cut) #
            #msk_pass = data[var]>cut

            msk_bkg_pass = msk_bkg & msk_pass
            msk_sig_pass = msk_sig & msk_pass
            
            h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False)
            h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=baseBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False)


            if ( h2_incl.Integral()>0 ): #and h1_pass.Integral()>0 ):
                sensitivity.append( (h2_pass.Integral()/h2_incl.Integral()) /  (3./2. + np.sqrt(h1_pass.Integral())) / normFactor )

                #print "signal eff.  at ", cut, ": ", (h2_pass.Integral()/h2_incl.Integral()) 
                #print "bkg eff.  at ", cut, ": ", (h1_pass.Integral()/h1_incl.Integral()) 
                #print "sensitivity gain at ", cut, ": ", (h2_pass.Integral()/h2_incl.Integral()) /  (3./2. + np.sqrt(h1_pass.Integral())) / normFactor

            else:
                sensitivity.append(0)

            i = i+1

        c.pads()[1].ylim(0,80)
        c.pads()[1].xlim(0,200)
        c.pads()[1].graph( sensitivity, bins=baseBins) #, oob=False )

        ### Decorations ###
        c.legend(width=0.4, xmin=0.3, ymax=0.9)
        #c.xlabel(latex(var, ROOT=True))
        c.ylabel("Fraction of jets")
        c.xlabel("n_{trk}") #latex(var, ROOT=True))                                             
        c.pads()[1].ylabel("sensitivity gain") #"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})")
        c.pads()[1].text(["sensitivity = #epsilon_{S}/(#frac{3}{2} + #sqrt{B})",
                ], xmin=0.2, ymax=0.80, ATLAS=False)

        ### Save ###
        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff))
        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff))
        pass
예제 #4
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Initialise Keras backend
    initialise_backend(args)

    # Neural network-specific initialisation of the configuration dict
    initialise_config(args, cfg)

    # Keras import(s)
    import keras.backend as K
    from keras.models import load_model

    # Project import(s)
    from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model

    # Load data
    data, features, _ = load_data('data/' + args.input, test=True)

    # Common definitions
    # --------------------------------------------------------------------------
    # -- k-nearest neighbour
    #kNN_var = 'D2-k#minusNN'
    #kNN_var = 'C1_02-knn'
    #base_var = 'sub_jet_ntrk'
    #kNN_var = base_var.replace('sub_jet_', '') + '-knn'

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    #kNN_var = [var.replace('jet', 'knn') for var in base_var]

    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')

    #base_var = ['jet_ungrtrk500']
    #kNN_var = [var.replace('jet', 'knn') for var in base_var]

    #base_var = ['ntrk_sum']
    #kNN_var = [var + '-knn' for var in base_var]

    def meaningful_digits(number):
        digits = 0
        if number > 0:
            digits = int(np.ceil(max(-np.log10(number), 0)))
            pass
        return '{l:.{d:d}f}'.format(d=digits, l=number)

    """
    # -- Adversarial neural network (ANN) scan
    lambda_reg  = 10.
    lambda_regs = sorted([1., 3., 10.])
å ham har jeg talt med løbende. For mange dage siden har vi talt om, om man kunne bruge grundlovsdag, og hvordan det ville hænge sammen med de frister, der er. In    ann_vars    = list()
    lambda_strs = list()
    for lambda_reg_ in lambda_regs:
        lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p')
        lambda_strs.append(lambda_str)

        ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.'))
        ann_vars.append(ann_var_)
        pass

    ann_var = ann_vars[lambda_regs.index(lambda_reg)]

    # -- uBoost scan
    uboost_eff = 92
    uboost_ur  = 0.3
    uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0])
    uboost_var  =  'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur))
    uboost_vars = ['uBoost(#alpha={:s})'.format(meaningful_digits(ur)) for ur in uboost_urs]
    uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format(uboost_eff)
    """
    # Tagger feature collection
    #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var]
    #tagger_features = ['lead_jet_C1_02', kNN_var]
    tagger_features = [
        'lead_' + base_var, 'lead_' + kNN_var, 'sub_' + base_var,
        'sub_' + kNN_var
    ]

    #tagger_features = base_var + kNN_var

    # Add variables
    # --------------------------------------------------------------------------

    with Profile("Add variables"):
        #for i in range(len(base_var)):
        from run.knn.common import add_knn, MODEL as sigModel, VAR as kNN_basevar, EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data,
                newfeat='lead_' + kNN_var,
                path='models/knn/knn_{}_{}_{}.pkl.gz'.format(
                    base_var, kNN_eff, sigModel))
        add_knn(data,
                newfeat='sub_' + kNN_var,
                path='models/knn/knn_{}_{}_{}.pkl.gz'.format(
                    base_var, kNN_eff, sigModel))

    # Remove unused variables
    used_variables = set(tagger_features +
                         ['lead_jet_m', 'lead_jet_pt', 'dijetmass', 'weight'])
    unused_variables = [var for var in list(data) if var not in used_variables]
    data.drop(columns=unused_variables)
    gc.collect()

    # Perform performance studies
    perform_studies(data, args, tagger_features)

    return 0
예제 #5
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Initialise Keras backend
    #initialise_backend(args)

    # Neural network-specific initialisation of the configuration dict
    #initialise_config(args, cfg)

    # Keras import(s)
    #import keras.backend as K
    #from keras.models import load_model

    # Project import(s)
    #from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model

    # Load data
    #data, features, _ = load_data(args.input + 'data.h5', test=True)
    data, features, _ = load_data(args.input + 'data.h5',
                                  test_full_signal=True)

    # Common definitions
    # --------------------------------------------------------------------------
    # -- k-nearest neighbour
    kNN_var_N2 = 'N_{2}-k#minusNN'
    kNN_var_tau21 = 'tau_{21}-k#minusNN'

    def meaningful_digits(number):
        digits = 0
        if number > 0:
            digits = int(np.ceil(max(-np.log10(number), 0)))
            pass
        return '{l:.{d:d}f}'.format(d=digits, l=number)

    # -- Adversarial neural network (ANN) scan
    #lambda_reg  = 10.
    #lambda_regs = sorted([1., 3., 10.])
    #ann_vars    = list()
    #lambda_strs = list()
    #for lambda_reg_ in lambda_regs:
    #    lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p')
    #    lambda_strs.append(lambda_str)

    #    ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.'))
    #    ann_vars.append(ann_var_)
    #    pass

    #ann_var = ann_vars[lambda_regs.index(lambda_reg)]

    # -- uBoost scan
    #uboost_eff = 92
    #uboost_ur  = 0.3
    #uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0])
    #uboost_var  =  'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur))
    #uboost_vars = ['uBoost(#alpha={:s})'.format(meaningful_digits(ur)) for ur in uboost_urs]
    #uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format(uboost_eff)

    # Tagger feature collection
    #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var]
    #tagger_features = ['tau21', 'tau21DDT', 'tau21', 'tau21kNN', 'tau21', 'tau21CSS', 'N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="tau21_vs_N2_B1"
    #tagger_features = ['N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="N2_B1"
    #tagger_features = ['tau21', 'tau21DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="ATLAS"
    tagger_features = [
        'decDeepWvsQCD', 'decDeepWvsQCDDDT', 'decDeepWvsQCD',
        'decDeepWvsQCDkNN', 'decDeepWvsQCD', 'decDeepWvsQCDCSS'
    ]
    title = "decDeep"
    tagger_features = [
        'DeepWvsQCD', 'DeepWvsQCDDDT', 'DeepWvsQCD', 'DeepWvsQCDkNN',
        'DeepWvsQCD', 'DeepWvsQCDCSS'
    ]
    title = "Deep"

    # Add variables
    # --------------------------------------------------------------------------
    with Profile("Add variables"):

        ## Tau21DDT
        #from run.ddt.common import add_ddt
        #add_ddt(data, feat='tau21', path='models/ddt/ddt_tau21.pkl.gz')

        ## N2DDT
        #from run.ddt.common import add_ddt
        #add_ddt(data, feat='N2_B1', path='models/ddt/ddt_N2_B1.pkl.gz')

        ## decDeepQvsQCDDDT
        #from run.ddt.common import add_ddt
        #add_ddt(data, feat='decDeepWvsQCD', path='models/ddt/ddt_decDeepWvsQCD.pkl.gz')

        # DeepQvsQCDDDT
        from run.ddt.common import add_ddt
        add_ddt(data,
                feat='DeepWvsQCD',
                path='models/ddt/ddt_DeepWvsQCD.pkl.gz')

        ## Tau21-kNN
        #from run.knn.common import add_knn, VAR_TAU21 as kNN_basevar, TAU21_EFF as kNN_eff
        #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_tau21)
        #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff))

        ## N2-kNN
        #from run.knn.common import add_knn, VAR_N2 as kNN_basevar, N2_EFF as kNN_eff
        #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2)
        #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff))

        ## decDeepWvsQCD-kNN
        #from run.knn.common import add_knn, VAR_DECDEEP as kNN_basevar, DECDEEP_EFF as kNN_eff
        #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2)
        #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff))

        # DeepWvsQCD-kNN
        from run.knn.common import add_knn, VAR_DEEP as kNN_basevar, DEEP_EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2)
        add_knn(data,
                feat=kNN_basevar,
                path='models/knn/knn_{}_{}.pkl.gz'.format(
                    kNN_basevar, kNN_eff))

        ## Tau21-CSS
        #from run.css.common import add_css
        #add_css("tau21", data)

        ## N2-CSS
        #from run.css.common import add_css
        #add_css("N2_B1", data)

        ## decDeepWvsQCD-CSS
        #from run.css.common import add_css
        #add_css("decDeepWvsQCD", data)

        # DeepWvsQCD-CSS
        from run.css.common import add_css
        add_css("DeepWvsQCD", data)

        pass

    # Remove unused variables
    #used_variables = set(tagger_features + ann_vars + uboost_vars + ['m', 'pt', 'npv', 'weight_test'])
    used_variables = set(tagger_features +
                         ['m', 'pt', 'weight_test', 'npv'
                          ])  ## need to put 'npv' back in for robustness study
    unused_variables = [var for var in list(data) if var not in used_variables]
    data.drop(columns=unused_variables)
    gc.collect()

    # Perform performance studies
    #perform_studies (data, args, tagger_features, ann_vars, uboost_vars)
    perform_studies(data, args, tagger_features, title=title)

    return 0
예제 #6
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  test_full_signal=True)

    #data, features, _ = load_data(args.input + 'data.h5', train_full_signal=True)  #for faster checking, don't use for actual comparison

    # Common definitions
    # --------------------------------------------------------------------------

    def meaningful_digits(number):
        digits = 0
        if number > 0:
            digits = int(np.ceil(max(-np.log10(number), 0)))
            pass
        return '{l:.{d:d}f}'.format(d=digits, l=number)

    # Tagger feature collection
    #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var]
    #tagger_features = ['tau21', 'tau21DDT', 'tau21', 'tau21kNN', 'tau21', 'tau21CSS', 'N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="tau21_vs_N2_B1"
    #tagger_features = ['N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="N2_B1"
    #tagger_features = ['tau21', 'tau21DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="ATLAS"
    #tagger_features = ['decDeepWvsQCD', 'decDeepWvsQCDDDT', 'decDeepWvsQCD', 'decDeepWvsQCDkNN', 'decDeepWvsQCD', 'decDeepWvsQCDCSS']; title="decDeep"

    #tagger_features = {'tau21':['','DDT'], 'N2_B1':['','kNN','CSS']}; title='ATLAS2'
    #tagger_features = {'tau21':['','DDT'], 'N2_B1':['','kNN',], 'decDeepWvsQCD':['','kNN'], 'DeepWvsQCD':['','kNN']}; title='Deep_vs_Analytic'
    #tagger_features = {'tau21':[''], 'N2_B1':[''], 'decDeepWvsQCD':[''], 'DeepWvsQCD':['']}; title='Deep_Check2'
    tagger_features = {
        'tau21': ['', 'DDT', 'kNN', 'CSS'],
        'N2_B1': ['', 'DDT', 'kNN', 'CSS']
    }
    title = 'Corrected_Full_Analytic'
    #tagger_features = {'tau21':['', 'DDT', 'kNN', 'CSS'], 'N2_B1':['', 'DDT', 'kNN','CSS']}; title='Full_Analytic_vs_Atlas'

    extracted_features = []
    for basevar in tagger_features.keys():
        for suffix in tagger_features[basevar]:
            extracted_features.append(basevar + suffix)

    # Add variables
    # --------------------------------------------------------------------------
    with Profile("Add variables"):

        # the selections of which variables to add could also be automated from the tagger_features list...

        # Tau21DDT
        from run.ddt.common import add_ddt
        add_ddt(data, feat='tau21', path='models/ddt/ddt_tau21.pkl.gz')

        # N2DDT
        from run.ddt.common import add_ddt
        add_ddt(data, feat='N2_B1', path='models/ddt/ddt_N2_B1.pkl.gz')

        ## decDeepQvsQCDDDT
        #from run.ddt.common import add_ddt
        #add_ddt(data, feat='decDeepWvsQCD', path='models/ddt/ddt_decDeepWvsQCD.pkl.gz')

        ## DeepQvsQCDDDT
        #from run.ddt.common import add_ddt
        #add_ddt(data, feat='DeepWvsQCD', path='models/ddt/ddt_DeepWvsQCD.pkl.gz')

        # Tau21-kNN
        from run.knn.common import add_knn, VAR_TAU21 as kNN_basevar, TAU21_EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar,
                                                       'tau_{21}-k#minusNN')
        add_knn(data,
                feat=kNN_basevar,
                path='models/knn/knn_{}_{}.pkl.gz'.format(
                    kNN_basevar, kNN_eff))

        # N2-kNN
        from run.knn.common import add_knn, VAR_N2 as kNN_basevar, N2_EFF as kNN_eff
        print "k-NN base variable: {} (cp. {})".format(kNN_basevar,
                                                       'N_{2}-kNN')
        add_knn(data,
                feat=kNN_basevar,
                path='models/knn/knn_{}_{}.pkl.gz'.format(
                    kNN_basevar, kNN_eff))

        ## decDeepWvsQCD-kNN
        #from run.knn.common import add_knn, VAR_DECDEEP as kNN_basevar, DECDEEP_EFF as kNN_eff
        #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'decDeepWvsQCD')
        #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff))

        ## DeepWvsQCD-kNN
        #from run.knn.common import add_knn, VAR_DEEP as kNN_basevar, DEEP_EFF as kNN_eff
        #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'DeepWvsQCD')
        #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff))

        # Tau21-CSS
        from run.css.common import add_css
        add_css("tau21", data)

        # N2-CSS
        from run.css.common import add_css
        add_css("N2_B1", data)

        ## decDeepWvsQCD-CSS
        #from run.css.common import add_css
        #add_css("decDeepWvsQCD", data)

        ## DeepWvsQCD-CSS
        #from run.css.common import add_css
        #add_css("DeepWvsQCD", data)

        pass

    # Remove unused variables
    #used_variables = set(tagger_features + ['m', 'pt', 'weight_test', 'npv'])
    used_variables = set(extracted_features +
                         ['m', 'pt', 'weight_test', 'npv'])
    unused_variables = [var for var in list(data) if var not in used_variables]
    data.drop(columns=unused_variables)
    gc.collect()

    # Perform performance studies
    perform_studies(data,
                    args,
                    tagger_features,
                    extracted_features,
                    title=title)
    return 0
예제 #7
0
def main (args):

    # Definitions
    histstyle = dict(**HISTSTYLE)

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data('data/djr_LCTopo_1.h5') #, test=True)
    #data2, features, _ = load_data('data/djr_LCTopo_2.h5') #, test=True)

    #data = np.concatenate((data1, data2))

    sigNumber = 0

    sigDict = {
        0: 'All Models',
        1: 'Model A, m = 1 TeV',
        2: 'Model A, m = 1.5 TeV',
        3: 'Model A, m = 2 TeV',
        4: 'Model A, m = 2.5 TeV',
        5: 'Model B, m = 1 TeV',
        6: 'Model B, m = 1.5 TeV',
        7: 'Model B, m = 2 TeV',
        8: 'Model B, m = 2.5 TeV',
        9: 'Model C, m = 1 TeV',
        10: 'Model C, m = 1.5 TeV',
        11: 'Model C, m = 2 TeV',
        12: 'Model C, m = 2.5 TeV',
        13: 'Model D, m = 1 TeV',
        14: 'Model D, m = 1.5 TeV',
        15: 'Model D, m = 2 TeV',
        16: 'Model D, m = 2.5 TeV',
        }

    outFile = ROOT.TFile.Open("figures/sensitivity_targetEff{}.root".format(kNN_eff),"RECREATE")

    histstyle[True] ['label'] = 'Multijets'
    histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[sigNumber])

    # Add knn variables

    #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500']
    base_var = 'jet_ungrtrk500'
    kNN_var = base_var.replace('jet', 'knn')
    #base_vars = [base_var]
    #kNN_vars = [kNN_var]
    base_vars = ['lead_'+base_var, 'sub_'+base_var]
    kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var]

    
    with Profile("Add variables"):
        #for i in range(len(base_var)):
        print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var)
        add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))

        #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel))
        print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)

    # Check variable distributions
        
    weight = 'weight'  # 'weight_test' / 'weight'
    scale = 139*1000000 # (inverse nanobarn)

    msk_bkg = data['signal'] == 0
    if sigNumber==0:
        msk_sig = data['signal'] == 1 
    else:
        msk_sig = data['sigType'] == sigNumber 


    knnBins = np.linspace(-100, 200, 75, endpoint=True)
    effBins = np.linspace(0,1,100, endpoint=True)

    for var in kNN_vars:
        ### Canvas ###
        c = rp.canvas(num_pads=2, batch=True)
        c_tmp = rp.canvas(num_pads=1, batch=True)

        ### Plot ###
        h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False])
        h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True])

        h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False)
        h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=False)

        #h1_CR = c_tmp.hist(data.loc[msk_CR_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_CR_bkg, weight].values, normalise=False)
        #h2_CR = c_tmp.hist(data.loc[msk_CR_sig, var].values, bins=knnBins, weights=data.loc[msk_CR_sig, weight].values, normalise=False)

        print "bkg. incl integral: ", h1_incl.GetEffectiveEntries()
        print "sig. incl integral: ", h2_incl.GetEffectiveEntries()
        #print "bkg. CR efficiency: ", h1_CR.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()
        #print "sig. CR efficiency: ", h2_CR.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()

        normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.GetEffectiveEntries()) )
        print "Sensitivity with no cut: ", normFactor

        ### sensitivity ###
        sensitivity, bkg_eff_1jet = array( 'd' ), array( 'd' )
        #sensitivity = []
        #bkg_eff_1jet = []
        i = 0
        for cut in knnBins:

            msk_pass = (data[kNN_vars[0]]>cut) & (data[kNN_vars[1]]>cut)
            msk_pass1 = data[var]>cut
            #msk_pass = (data[var]>cut)
            msk_bkg_pass = msk_bkg & msk_pass
            msk_sig_pass = msk_sig & msk_pass

            msk_bkg_pass1 = msk_bkg & msk_pass1
            msk_sig_pass1 = msk_sig & msk_pass1

            h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False)
            h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False)

            h1_pass1 = c_tmp.hist(data.loc[msk_bkg_pass1, var].values, bins=knnBins, weights=data.loc[msk_bkg_pass1, weight].values, normalise=False)

            if ( h2_incl.GetEffectiveEntries()>0 ) : #and h1_pass.GetEffectiveEntries()>0) :
                sensitivity.append( ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries()) )) / normFactor )

                #print "bkg. eff. @ " , cut, ": ", h1_pass.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()  
                #print "signal eff. @ ", cut, ": ", h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()
                #print "Sensitivity gain@ ", cut, ": ", ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries())) ) / normFactor

            else: 
                sensitivity.append(0)

            if (h1_incl.GetEffectiveEntries()>0 ) :
                bkg_eff_1jet.append(h1_pass1.GetEffectiveEntries()/h1_incl.GetEffectiveEntries())
            else:
                bkg_eff_1jet.append(0)
                
            i = i+1

        #c.pads()[0].ylim(0,0.25)
        c.pads()[0].logy()
        c.pads()[0].xlim(-100,200)
        c.pads()[1].ylim(0,30)
        c.pads()[1].xlim(-100,200)
        c.pads()[1].graph( sensitivity, bins=knnBins) #, oob=False )

        ### Decorations ###
        c.legend(width=0.4, xmin=0.3, ymax=0.9)
        #c.xlabel("n_{trk}^{#epsilon={}\%}".format(kNN_eff)) #latex(var, ROOT=True))
        c.xlabel("n_{trk}^{#epsilon}") #latex(var, ROOT=True))
        c.ylabel("Fraction of jets")
        c.pads()[1].ylabel("Sensitivity gain")#"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})/")
        c.pads()[1].text(["Sensitivity = #varepsilon_{S}/(#frac{3}{2} + #sqrt{B})", 
                ], xmin=0.2, ymax=0.80, ATLAS=False)

        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff))
        c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff))

        del c

        gr_sen = ROOT.TGraph(len(sensitivity), knnBins, sensitivity)
        gr_eff = ROOT.TGraph(len(bkg_eff_1jet), knnBins, bkg_eff_1jet)

        gr_more = ROOT.TGraph(len(sensitivity), bkg_eff_1jet, sensitivity)

        gr_sen.GetXaxis().SetTitle("#it{n}_{trk}^{#epsilon}-cut")
        gr_sen.GetYaxis().SetTitle("Sensitivity gain")
        gr_eff.GetYaxis().SetTitle("Single jet #varepsilon_{B}")
        gr_sen.GetYaxis().SetAxisColor(ROOT.kOrange+2)
        gr_eff.GetYaxis().SetAxisColor(ROOT.kGreen+2)
        gr_sen.SetMarkerColor(ROOT.kOrange+2)
        gr_eff.SetMarkerColor(ROOT.kGreen+2)
        gr_eff.SetDrawOption("Y+")

        c2 = rp.canvas(batch=True)
        c2.pads()[0].logx()
        c2.pads()[0].cd()
        #c2.pads()[0].graph(sensitivity, bkg_eff_1jet)
        gr_more.GetXaxis().SetTitle("Single jet #varepsilon_{B}")
        gr_more.GetYaxis().SetTitle("Sensitivity gain")
        #gr_more.GetXaxis().SetRangeUser(0, 0.02)
        gr_more.Draw("AP")


        #c2 = ROOT.TCanvas("can2", "", 200,10,700,500) #(batch=True)
        #pad1 = ROOT.TPad("pad1", "", 0,0,1,1) #c2.pads()[0]._bare()
        #pad1.Draw()
        #pad1.cd()
        #gr_sen.Draw("AP")
        

        #c2.cd()
        #pad2 = ROOT.TPad("pad2", "", 0,0,1,1) #c2.pads()[0]._bare()
        #pad2.SetFillStyle(4000)
        #pad2.Draw()
        #pad2.cd()
        #gr_eff.Draw("PY+")

        #gr_eff.Draw("APY+")
        #gr_sen.Draw("SAME")

        #gr_sen = c2.graph(sensitivity, bins=knnBins, markercolor=ROOT.kOrange+2)
        #gr_eff = c2.graph(bkg_eff_1jet, bins=knnBins, markercolor=ROOT.kGreen+2, option='Y+' )
        #gr_eff.GetYaxis.SetRange(0,1)
        #gr_eff.Draw("SAME Y+")
        #c2.xlabel("Single jet #varepsilon_{B}")
        #c2.ylabel("Sensitivity gain")
        #c2.text(["#epsilon=0.5 %",], xmin=0.2, ymax=0.8, ATLAS=False)

        ### Save ###
        #mkdir('figures/distributions')

        c2.save('figures/distributions/sensitivity_{}_eff{}_1jet.pdf'.format(var,kNN_eff) )
        del c2

        outFile.cd()
        gr_more.SetName("sensitivity_eff{}".format(kNN_eff))
        gr_more.Write()
        outFile.Close()

        #print 'figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff)
        pass
    

    # Plot also the normal ntrk distribution for cross check with Roland's result
    """