def main(args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data #data = np.zeros(1, 95213009, 10) data, features, _ = load_data( 'data/djr_LCTopo_2.h5') # + args.input) #, test=True) # #data2, features, _ = load_data('data/djr_LCTopo_2.h5') # + args.input) #, test=True) # #data = np.concatenate((data1, data2)) #f1 = h5py.File('data/djr_LCTopo_1.h5', 'r') #f2 = h5py.File('data/djr_LCTopo_2.h5', 'r') knnCut = 0 ntrkCut = 50 emfracCut = 0.65 scale = 139 * 1000000 # (inverse nanobarn) signal_to_plot = 7 sigDict = { 0: 'All Models', 1: 'Model A, m = 2 TeV', 2: 'Model A, m = 1 TeV', 3: 'Model A, m = 1.5 TeV', 4: 'Model A, m = 2.5 TeV', 5: 'Model B, m = 1 TeV', 6: 'Model B, m = 1.5 TeV', 7: 'Model B, m = 2 TeV', 8: 'Model B, m = 2.5 TeV', 9: 'Model C, m = 1 TeV', 10: 'Model C, m = 1.5 TeV', 11: 'Model C, m = 2 TeV', 12: 'Model C, m = 2.5 TeV', 13: 'Model D, m = 1 TeV', 14: 'Model D, m = 1.5 TeV', 15: 'Model D, m = 2 TeV', 16: 'Model D, m = 2.5 TeV', } outHistFile = ROOT.TFile.Open( "figures/mjjHistograms_kNN{}_eff{}.root".format(knnCut, kNN_eff), "RECREATE") histstyle[True]['label'] = 'Multijets' histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[signal_to_plot]) # Add knn variables #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_vars = ['lead_'+base_var, 'sub_'+base_var] #kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var] print data.shape with Profile("Add variables"): #for i in range(len(base_var)): print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_' + kNN_var, path='models/knn/{}_{}_{}_{}.pkl.gz'.format( FIT, base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_' + kNN_var, path='models/knn/{}_{}_{}_{}.pkl.gz'.format( FIT, base_var, kNN_eff, sigModel)) #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel) """ base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] kNN_var = [var.replace('jet', 'knn') for var in base_var] with Profile("Add variables"): from run.knn.common import add_knn, MODEL, VAR as kNN_basevar, EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var) for i in range(len(base_var)): add_knn(data, newfeat=kNN_var[i], path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var[i], kNN_eff, MODEL) """ weight = 'weight' # 'weight_test' / 'weight' bins_pt = np.linspace(450, 3500, 40) bins_mjj = np.linspace(0, 8000, 80) # Useful masks msk_bkg = data['signal'] == 0 if signal_to_plot == 0: msk_sig = data['signal'] == 1 else: msk_sig = data['sigType'] == signal_to_plot #msk_weight = data['weight']<0.2 msk_knn = (data['lead_knn_ungrtrk500'] > knnCut) & (data['sub_knn_ungrtrk500'] > knnCut) msk_ungr = (data['lead_jet_ungrtrk500'] > ntrkCut) & (data['sub_jet_ungrtrk500'] > ntrkCut) msk_emfrac = (data['lead_jet_EMFrac'] < emfracCut) & (data['sub_jet_EMFrac'] < emfracCut) msk_knn_1 = (data['lead_knn_ungrtrk500'] > knnCut) msk_ungr_1 = (data['lead_jet_ungrtrk500'] > ntrkCut) #msk_knn = (data['knn_ungrtrk500']>knnCut) #msk_ungr = (data['jet_ungrtrk500']>90.0) msk_ntrkBkg = msk_ungr & msk_emfrac & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_ntrkSig = msk_ungr & msk_emfrac & msk_sig #& msk_pt & msk_m & msk_eta msk_knnBkg = msk_knn & msk_bkg msk_knnSig = msk_knn & msk_sig msk_ntrkBkg1 = msk_ungr_1 & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_ntrkSig1 = msk_ungr_1 & msk_sig #& msk_pt & msk_m & msk_eta msk_knnBkg1 = msk_knn_1 & msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_knnSig1 = msk_knn_1 & msk_sig #& msk_pt & msk_m & msk_eta msk_inclBkg = msk_bkg #& msk_weight #& msk_pt & msk_m & msk_eta msk_inclSig = msk_sig #& msk_pt & msk_m & msk_eta # Mjj dist with cut on ntrk, ungrtrk compared to inclusive selection c = rp.canvas(batch=True) hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_inclBkg, weight].values, label="Multijets, Inclusive", normalise=True, linecolor=ROOT.kGreen + 2, linewidth=3) hist_knnBkg = c.hist( data.loc[msk_knnBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_knnBkg, weight].values, label="Multijets, n_{{trk}}^{{#epsilon}}>{}".format(knnCut), normalise=True, linecolor=ROOT.kMagenta + 2, linestyle=2, linewidth=3) hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_ntrkBkg, weight].values, label="Multijets, n_{{trk}}>{}".format(ntrkCut), normalise=True, linecolor=ROOT.kOrange + 2, linestyle=2, linewidth=3) #hist_CRBkg = c.hist(data.loc[msk_CR_bkg, 'dijetmass'].values, bins=bins_mjj, weights=scale*data.loc[msk_CR_bkg, weight].values, label="CR Bkg, C<20", normalise=True, linecolor=ROOT.kGray+2, linestyle=2) c.legend(width=0.4, xmin=0.5, ymax=0.9) c.ylabel("Fraction of jets") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) #c.save('figures/distributions/mjj_Bkg_CR20.pdf'.format(knnCut)) #c.save('figures/distributions/mjj_Bkg_CR20.eps'.format(knnCut)) c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.pdf'.format( ntrkCut, knnCut, FIT)) c.save('figures/distributions/mjj_BkgDist_ntrk{}_knn{}_{}.eps'.format( ntrkCut, knnCut, FIT)) del c c = rp.canvas(batch=True) hist_Sig = c.hist(data.loc[msk_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_sig, weight].values, label="Model A, m = 2 TeV, inclusive", normalise=True, linecolor=ROOT.kGreen + 2) hist_knnSig = c.hist( data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, label="Model A, m = 2 TeV, #it{{n}}_{{trk}}^{{#epsilon}}>{}".format( knnCut), normalise=True, linecolor=ROOT.kMagenta + 2, linestyle=2) hist_ntrkSig = c.hist( data.loc[msk_ntrkSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig, weight].values, label="Model A, m = 2 TeV, #it{{n}}_{{trk}}>{}".format(ntrkCut), normalise=True, linecolor=ROOT.kOrange + 2, linestyle=2) #hist_CRSig = c.hist(data.loc[msk_CR_sig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_CR_sig, weight].values, label="Sig, CR", normalise=True, linecolor=ROOT.kGray+2, linestyle=2) c.legend(width=0.4, xmin=0.5, ymax=0.9) c.ylabel("Fraction of jets") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.pdf'.format( ntrkCut, knnCut, FIT)) c.save('figures/distributions/mjj_SigDist_ntrk{}_knn{}_{}.eps'.format( ntrkCut, knnCut, FIT)) del c c = rp.canvas(batch=True) hist_knnSig = c.hist( data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, label="Model A, m = 2 TeV, knn_ntrk>{}".format(knnCut), normalise=False, linecolor=ROOT.kBlue + 1, linestyle=1) hist_knnBkg = c.hist(data.loc[msk_knnBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_knnBkg, weight].values, label="Multijets, knn_ntrk>{}".format(knnCut), normalise=False, linecolor=ROOT.kMagenta + 2, linestyle=2) hist_ntrkBkg = c.hist(data.loc[msk_ntrkBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_ntrkBkg, weight].values, label="Multijets, ntrk>{}".format(ntrkCut), normalise=False, linecolor=ROOT.kOrange + 2, linestyle=2) c.legend(width=0.4, xmin=0.3, ymax=0.9) c.ylabel("Number of events") c.xlabel("m_{jj} [GeV]") c.logy() #c.ylim(0.00005, 5) c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.pdf'.format( knnCut, FIT)) c.save('figures/distributions/mjj_Dist_noNorm_knn{}_{}.eps'.format( knnCut, FIT)) bins_mjj = np.linspace(0, 10000, 50) # Unscaled histograms for calculating efficiencies hist_inclBkg = c.hist(data.loc[msk_inclBkg, 'dijetmass'].values, bins=bins_mjj, weights=scale * data.loc[msk_inclBkg, weight].values, normalise=False) hist_inclSig = c.hist(data.loc[msk_inclSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_inclSig, weight].values, normalise=False) hist_ntrkSig = c.hist(data.loc[msk_ntrkSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig, weight].values, normalise=False) hist_knnSig = c.hist(data.loc[msk_knnSig, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig, weight].values, normalise=False) hist_ntrkSig1 = c.hist(data.loc[msk_ntrkSig1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkSig1, weight].values, normalise=False) hist_ntrkBkg1 = c.hist(data.loc[msk_ntrkBkg1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_ntrkBkg1, weight].values, normalise=False) hist_knnBkg1 = c.hist(data.loc[msk_knnBkg1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnBkg1, weight].values, normalise=False) hist_knnSig1 = c.hist(data.loc[msk_knnSig1, 'dijetmass'].values, bins=bins_mjj, weights=data.loc[msk_knnSig1, weight].values, normalise=False) print "Bkg inclusive integral: ", hist_inclBkg.GetEffectiveEntries() print "Sig inclusive integral: ", hist_inclSig.GetEffectiveEntries() print "Bkg pass kNN eff entries / integral: ", hist_knnBkg.GetEffectiveEntries( ), hist_knnBkg.Integral() print "Sig pass kNN eff entries / integral: ", hist_knnSig.GetEffectiveEntries( ), hist_knnSig.Integral() print "Bkg pass ntrk eff entries / integral: ", hist_ntrkBkg.GetEffectiveEntries( ), hist_ntrkBkg.Integral() print "Sig pass ntrk eff entries / integral: ", hist_ntrkSig.GetEffectiveEntries( ), hist_ntrkSig.Integral() print "Bkg Eff. knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnBkg.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnSig.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() print "Bkg Eff. knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnBkg.Integral() / hist_inclBkg.Integral() print "Sig Eff. knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnSig.Integral() / hist_inclSig.Integral() print "Bkg Eff. ntrk>{}, eff. entries: ".format( ntrkCut), 100 * hist_ntrkBkg.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. ntrk>{}, eff. entries: ".format( ntrkCut), 100 * hist_ntrkSig.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries( ) #, hist_ntrkSig.GetEffectiveEntries() print "Bkg Eff. 1 jet knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnBkg1.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. 1 jet knn_ntrk> {}, eff. entries: ".format( knnCut), 100 * hist_knnSig1.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() print "Bkg Eff. 1 jet knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnBkg1.GetEffectiveEntries( ) / hist_inclBkg.GetEffectiveEntries() print "Sig Eff. 1 jet knn_ntrk> {}, integral: ".format( knnCut), 100 * hist_knnSig1.GetEffectiveEntries( ) / hist_inclSig.GetEffectiveEntries() outHistFile.cd() hist_knnBkg.SetName("bkg_knn") hist_knnSig.SetName("sig_knn") hist_knnBkg.Write() hist_knnSig.Write() outHistFile.Close() # Mjj dist for CR compared to inclusive selection """
def main(args): # Initialise args, cfg = initialise(args) # Initialise Keras backend initialise_backend(args) # Neural network-specific initialisation of the configuration dict initialise_config(args, cfg) # Keras import(s) import keras.backend as K from keras.models import load_model # Project import(s) from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model # Load data data, features, _ = load_data(args.input + 'data.h5', test=True) # Common definitions # -------------------------------------------------------------------------- # -- k-nearest neighbour kNN_var = 'D2-k#minusNN' def meaningful_digits(number): digits = 0 if number > 0: digits = int(np.ceil(max(-np.log10(number), 0))) pass return '{l:.{d:d}f}'.format(d=digits, l=number) # -- Adversarial neural network (ANN) scan lambda_reg = 10. lambda_regs = sorted([1., 3., 10.]) ann_vars = list() lambda_strs = list() for lambda_reg_ in lambda_regs: lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p') lambda_strs.append(lambda_str) ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.')) ann_vars.append(ann_var_) pass ann_var = ann_vars[lambda_regs.index(lambda_reg)] # -- uBoost scan uboost_eff = 92 uboost_ur = 0.3 uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0]) uboost_var = 'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur)) uboost_vars = [ 'uBoost(#alpha={:s})'.format(meaningful_digits(ur)) for ur in uboost_urs ] uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format( uboost_eff) # Tagger feature collection tagger_features = [ 'Tau21', 'Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var ] # Add variables # -------------------------------------------------------------------------- with Profile("Add variables"): # Tau21DDT from run.ddt.common import add_ddt add_ddt(data, path='models/ddt/ddt.pkl.gz') # D2-kNN from run.knn.common import add_knn, VAR as kNN_basevar, EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var) add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}.pkl.gz'.format( kNN_basevar, kNN_eff)) # D2-CSS from run.css.common import add_css add_css("D2", data) # NN from run.adversarial.common import add_nn with Profile("NN"): classifier = load_model( 'models/adversarial/classifier/full/classifier.h5') add_nn(data, classifier, 'NN') pass # ANN with Profile("ANN"): from adversarial.utils import DECORRELATION_VARIABLES adversary = adversary_model( gmm_dimensions=len(DECORRELATION_VARIABLES), **cfg['adversary']['model']) combined = combined_model(classifier, adversary, **cfg['combined']['model']) for ann_var_, lambda_str_ in zip(ann_vars, lambda_strs): print "== Loading model for {}".format(ann_var_) combined.load_weights( 'models/adversarial/combined/full/combined_lambda{}.h5'. format(lambda_str_)) add_nn(data, classifier, ann_var_) pass pass # Adaboost/uBoost with Profile("Adaboost/uBoost"): from run.uboost.common import add_bdt for var, ur in zip(uboost_vars, uboost_urs): var = ('Adaboost' if ur == 0 else var) path = 'models/uboost/' + uboost_pattern.format(ur).replace( '.', 'p') + '.pkl.gz' print "== Loading model for {}".format(var) add_bdt(data, var, path) pass # Remove `Adaboost` from scan list uboost_vars.pop(0) pass pass # Remove unused variables used_variables = set(tagger_features + ann_vars + uboost_vars + ['m', 'pt', 'npv', 'weight_test']) unused_variables = [var for var in list(data) if var not in used_variables] data.drop(columns=unused_variables) gc.collect() # Perform performance studies perform_studies(data, args, tagger_features, ann_vars, uboost_vars) return 0
def main (args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data('data/djr_LCTopo_1.h5') #, test=True) #data2, features, _ = load_data('data/djr_LCTopo_2.h5') #, test=True) #data = np.concatenate((data1, data2)) sigNumber = 0 sigDict = { 0: 'All Models', 1: 'Model A, m = 1 TeV', 2: 'Model A, m = 1.5 TeV', 3: 'Model A, m = 2 TeV', 4: 'Model A, m = 2.5 TeV', 5: 'Model B, m = 1 TeV', 6: 'Model B, m = 1.5 TeV', 7: 'Model B, m = 2 TeV', 8: 'Model B, m = 2.5 TeV', 9: 'Model C, m = 1 TeV', 10: 'Model C, m = 1.5 TeV', 11: 'Model C, m = 2 TeV', 12: 'Model C, m = 2.5 TeV', 13: 'Model D, m = 1 TeV', 14: 'Model D, m = 1.5 TeV', 15: 'Model D, m = 2 TeV', 16: 'Model D, m = 2.5 TeV', } histstyle[True] ['label'] = 'Multijets' histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[sigNumber]) # Add knn variables #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_vars = [base_var] #kNN_vars = [kNN_var] base_vars = ['lead_'+base_var, 'sub_'+base_var] kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var] with Profile("Add variables"): from run.knn.common import add_knn, EFF as kNN_eff #for i in range(len(base_var)): print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel) # Check variable distributions weight = 'weight' # 'weight_test' / 'weight' scale = 139*1000000 # (inverse nanobarn) msk_bkg = data['signal'] == 0 if sigNumber==0: msk_sig = data['signal'] == 1 else: msk_sig = data['sigType'] == sigNumber knnBins = np.linspace(-100, 200, 75, endpoint=True) for var in kNN_vars: ### Canvas ### c = rp.canvas(num_pads=2, batch=True) c_tmp = rp.canvas(num_pads=1, batch=True) c2 = rp.canvas(batch=True) ### Plot ### h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False]) h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True]) h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False) h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=False) #h1_CR = c_tmp.hist(data.loc[msk_CR_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_CR_bkg, weight].values, normalise=False) #h2_CR = c_tmp.hist(data.loc[msk_CR_sig, var].values, bins=knnBins, weights=data.loc[msk_CR_sig, weight].values, normalise=False) print "bkg. incl integral: ", h1_incl.GetEffectiveEntries() print "sig. incl integral: ", h2_incl.GetEffectiveEntries() #print "bkg. CR efficiency: ", h1_CR.GetEffectiveEntries()/h1_incl.GetEffectiveEntries() #print "sig. CR efficiency: ", h2_CR.GetEffectiveEntries()/h2_incl.GetEffectiveEntries() normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.GetEffectiveEntries()) ) print "Sensitivity with no cut: ", normFactor ### sensitivity ### sensitivity = [] bkg_eff_1jet = [] i = 0 for cut in knnBins: msk_pass = (data[kNN_vars[0]]>cut) & (data[kNN_vars[1]]>cut) msk_pass1 = data[kNN_vars[0]>cut) #msk_pass = (data[var]>cut) msk_bkg_pass = msk_bkg & msk_pass msk_sig_pass = msk_sig & msk_pass msk_bkg_pass1 = msk_bkg & msk_pass_1jet msk_sig_pass1 = msk_sig & msk_pass_1jet h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False) h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False) h1_pass1 = c_tmp.hist(data.loc[msk_bkg_pass1, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False) if ( h2_incl.GetEffectiveEntries()>0 ) : #and h1_pass.GetEffectiveEntries()>0) : sensitivity.append( ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries()) )) / normFactor ) #print "bkg. eff. @ " , cut, ": ", h1_pass.GetEffectiveEntries()/h1_incl.GetEffectiveEntries() #print "signal eff. @ ", cut, ": ", h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries() #print "Sensitivity gain@ ", cut, ": ", ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries())) ) / normFactor else: sensitivity.append(0) if (h1_incl.GetEffectiveEntries()>0 ) : bkg_eff_1jet.append(h1_pass1.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()) else: bkg_eff_1jet.append(0) i = i+1 #c.pads()[0].ylim(0,0.25) c.pads()[0].logy() c.pads()[0].xlim(-100,200) c.pads()[1].ylim(0,30) c.pads()[1].xlim(-100,200) c.pads()[1].graph( sensitivity, bins=knnBins) #, oob=False ) ### Decorations ### c.legend(width=0.4, xmin=0.3, ymax=0.9) #c.xlabel("n_{trk}^{#epsilon={}\%}".format(kNN_eff)) #latex(var, ROOT=True)) c.xlabel("n_{trk}^{#epsilon}") #latex(var, ROOT=True)) c.ylabel("Fraction of jets") c.pads()[1].ylabel("Sensitivity gain")#"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})/") c.pads()[1].text(["Sensitivity = #varepsilon_{S}/(#frac{3}{2} + #sqrt{B})", ], xmin=0.2, ymax=0.80, ATLAS=False) c2.graph(sensitivity, bkg_eff_1jet) c2.xlabel("Single jet #varepsilon_B") c2.ylabel("Sensitivity gain") c2.text(["#epsilon=0.5 %",], xmin=0.2, ymax=0.8, ATLAS=False) ### Save ### #mkdir('figures/distributions') c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff)) c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff)) c2.save('figure/distribution/sensitivity_1jEfficiency.pdf'.format(var,sigNumber,kNN_eff)) print 'figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff) pass # Plot also the normal ntrk distribution for cross check with Roland's result msk_bkg = data['signal'] == 0 if sigNumber==0: msk_sig = data['signal'] == 1 # data['sigType'] == sigNumber # else: msk_sig = data['sigType'] == sigNumber # data['sigType'] == sigNumber # #msk_weight = data['weight']<0.0002 #msk_bkg = msk_bkg & msk_pt & msk_m & msk_eta #msk_sig = msk_sig & msk_pt & msk_m & msk_eta baseBins = np.linspace(0, 200, 75, endpoint=True) #axes[var][1], axes[var][2], axes[var][0] + 1, endpoint=True) for var in base_vars: ### Canvas ### c = rp.canvas(num_pads=2, batch=True) c.pads()[0].logy() c_tmp = rp.canvas(batch=True) ### Plot ### h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=baseBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False]) h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True]) h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False) h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=baseBins, weights=data.loc[msk_sig, weight].values, normalise=False) print "bkg. incl integral: ", h1_incl.GetEffectiveEntries() print "sig. incl integral: ", h2_incl.GetEffectiveEntries() normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.Integral()) ) #print "Sensitivity with no cut: ", normFactor ### sensitivity ### sensitivity = [] i = 0 for cut in baseBins: #print cut msk_pass = (data[base_vars[0]]>cut) & (data[base_vars[1]]>cut) # #msk_pass = data[var]>cut msk_bkg_pass = msk_bkg & msk_pass msk_sig_pass = msk_sig & msk_pass h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=baseBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False) h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=baseBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False) if ( h2_incl.Integral()>0 ): #and h1_pass.Integral()>0 ): sensitivity.append( (h2_pass.Integral()/h2_incl.Integral()) / (3./2. + np.sqrt(h1_pass.Integral())) / normFactor ) #print "signal eff. at ", cut, ": ", (h2_pass.Integral()/h2_incl.Integral()) #print "bkg eff. at ", cut, ": ", (h1_pass.Integral()/h1_incl.Integral()) #print "sensitivity gain at ", cut, ": ", (h2_pass.Integral()/h2_incl.Integral()) / (3./2. + np.sqrt(h1_pass.Integral())) / normFactor else: sensitivity.append(0) i = i+1 c.pads()[1].ylim(0,80) c.pads()[1].xlim(0,200) c.pads()[1].graph( sensitivity, bins=baseBins) #, oob=False ) ### Decorations ### c.legend(width=0.4, xmin=0.3, ymax=0.9) #c.xlabel(latex(var, ROOT=True)) c.ylabel("Fraction of jets") c.xlabel("n_{trk}") #latex(var, ROOT=True)) c.pads()[1].ylabel("sensitivity gain") #"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})") c.pads()[1].text(["sensitivity = #epsilon_{S}/(#frac{3}{2} + #sqrt{B})", ], xmin=0.2, ymax=0.80, ATLAS=False) ### Save ### c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff)) c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff)) pass
def main(args): # Initialise args, cfg = initialise(args) # Initialise Keras backend initialise_backend(args) # Neural network-specific initialisation of the configuration dict initialise_config(args, cfg) # Keras import(s) import keras.backend as K from keras.models import load_model # Project import(s) from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model # Load data data, features, _ = load_data('data/' + args.input, test=True) # Common definitions # -------------------------------------------------------------------------- # -- k-nearest neighbour #kNN_var = 'D2-k#minusNN' #kNN_var = 'C1_02-knn' #base_var = 'sub_jet_ntrk' #kNN_var = base_var.replace('sub_jet_', '') + '-knn' #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] #kNN_var = [var.replace('jet', 'knn') for var in base_var] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_var = ['jet_ungrtrk500'] #kNN_var = [var.replace('jet', 'knn') for var in base_var] #base_var = ['ntrk_sum'] #kNN_var = [var + '-knn' for var in base_var] def meaningful_digits(number): digits = 0 if number > 0: digits = int(np.ceil(max(-np.log10(number), 0))) pass return '{l:.{d:d}f}'.format(d=digits, l=number) """ # -- Adversarial neural network (ANN) scan lambda_reg = 10. lambda_regs = sorted([1., 3., 10.]) å ham har jeg talt med løbende. For mange dage siden har vi talt om, om man kunne bruge grundlovsdag, og hvordan det ville hænge sammen med de frister, der er. In ann_vars = list() lambda_strs = list() for lambda_reg_ in lambda_regs: lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p') lambda_strs.append(lambda_str) ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.')) ann_vars.append(ann_var_) pass ann_var = ann_vars[lambda_regs.index(lambda_reg)] # -- uBoost scan uboost_eff = 92 uboost_ur = 0.3 uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0]) uboost_var = 'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur)) uboost_vars = ['uBoost(#alpha={:s})'.format(meaningful_digits(ur)) for ur in uboost_urs] uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format(uboost_eff) """ # Tagger feature collection #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var] #tagger_features = ['lead_jet_C1_02', kNN_var] tagger_features = [ 'lead_' + base_var, 'lead_' + kNN_var, 'sub_' + base_var, 'sub_' + kNN_var ] #tagger_features = base_var + kNN_var # Add variables # -------------------------------------------------------------------------- with Profile("Add variables"): #for i in range(len(base_var)): from run.knn.common import add_knn, MODEL as sigModel, VAR as kNN_basevar, EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_' + kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format( base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_' + kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format( base_var, kNN_eff, sigModel)) # Remove unused variables used_variables = set(tagger_features + ['lead_jet_m', 'lead_jet_pt', 'dijetmass', 'weight']) unused_variables = [var for var in list(data) if var not in used_variables] data.drop(columns=unused_variables) gc.collect() # Perform performance studies perform_studies(data, args, tagger_features) return 0
def main(args): # Initialise args, cfg = initialise(args) # Initialise Keras backend #initialise_backend(args) # Neural network-specific initialisation of the configuration dict #initialise_config(args, cfg) # Keras import(s) #import keras.backend as K #from keras.models import load_model # Project import(s) #from adversarial.models import classifier_model, adversary_model, combined_model, decorrelation_model # Load data #data, features, _ = load_data(args.input + 'data.h5', test=True) data, features, _ = load_data(args.input + 'data.h5', test_full_signal=True) # Common definitions # -------------------------------------------------------------------------- # -- k-nearest neighbour kNN_var_N2 = 'N_{2}-k#minusNN' kNN_var_tau21 = 'tau_{21}-k#minusNN' def meaningful_digits(number): digits = 0 if number > 0: digits = int(np.ceil(max(-np.log10(number), 0))) pass return '{l:.{d:d}f}'.format(d=digits, l=number) # -- Adversarial neural network (ANN) scan #lambda_reg = 10. #lambda_regs = sorted([1., 3., 10.]) #ann_vars = list() #lambda_strs = list() #for lambda_reg_ in lambda_regs: # lambda_str = meaningful_digits(lambda_reg_).replace('.', 'p') # lambda_strs.append(lambda_str) # ann_var_ = "ANN(#lambda={:s})".format(lambda_str.replace('p', '.')) # ann_vars.append(ann_var_) # pass #ann_var = ann_vars[lambda_regs.index(lambda_reg)] # -- uBoost scan #uboost_eff = 92 #uboost_ur = 0.3 #uboost_urs = sorted([0., 0.01, 0.1, 0.3, 1.0]) #uboost_var = 'uBoost(#alpha={:s})'.format(meaningful_digits(uboost_ur)) #uboost_vars = ['uBoost(#alpha={:s})'.format(meaningful_digits(ur)) for ur in uboost_urs] #uboost_pattern = 'uboost_ur_{{:4.2f}}_te_{:.0f}_rel21_fixed'.format(uboost_eff) # Tagger feature collection #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var] #tagger_features = ['tau21', 'tau21DDT', 'tau21', 'tau21kNN', 'tau21', 'tau21CSS', 'N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="tau21_vs_N2_B1" #tagger_features = ['N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="N2_B1" #tagger_features = ['tau21', 'tau21DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="ATLAS" tagger_features = [ 'decDeepWvsQCD', 'decDeepWvsQCDDDT', 'decDeepWvsQCD', 'decDeepWvsQCDkNN', 'decDeepWvsQCD', 'decDeepWvsQCDCSS' ] title = "decDeep" tagger_features = [ 'DeepWvsQCD', 'DeepWvsQCDDDT', 'DeepWvsQCD', 'DeepWvsQCDkNN', 'DeepWvsQCD', 'DeepWvsQCDCSS' ] title = "Deep" # Add variables # -------------------------------------------------------------------------- with Profile("Add variables"): ## Tau21DDT #from run.ddt.common import add_ddt #add_ddt(data, feat='tau21', path='models/ddt/ddt_tau21.pkl.gz') ## N2DDT #from run.ddt.common import add_ddt #add_ddt(data, feat='N2_B1', path='models/ddt/ddt_N2_B1.pkl.gz') ## decDeepQvsQCDDDT #from run.ddt.common import add_ddt #add_ddt(data, feat='decDeepWvsQCD', path='models/ddt/ddt_decDeepWvsQCD.pkl.gz') # DeepQvsQCDDDT from run.ddt.common import add_ddt add_ddt(data, feat='DeepWvsQCD', path='models/ddt/ddt_DeepWvsQCD.pkl.gz') ## Tau21-kNN #from run.knn.common import add_knn, VAR_TAU21 as kNN_basevar, TAU21_EFF as kNN_eff #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_tau21) #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff)) ## N2-kNN #from run.knn.common import add_knn, VAR_N2 as kNN_basevar, N2_EFF as kNN_eff #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2) #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff)) ## decDeepWvsQCD-kNN #from run.knn.common import add_knn, VAR_DECDEEP as kNN_basevar, DECDEEP_EFF as kNN_eff #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2) #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff)) # DeepWvsQCD-kNN from run.knn.common import add_knn, VAR_DEEP as kNN_basevar, DEEP_EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, kNN_var_N2) add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format( kNN_basevar, kNN_eff)) ## Tau21-CSS #from run.css.common import add_css #add_css("tau21", data) ## N2-CSS #from run.css.common import add_css #add_css("N2_B1", data) ## decDeepWvsQCD-CSS #from run.css.common import add_css #add_css("decDeepWvsQCD", data) # DeepWvsQCD-CSS from run.css.common import add_css add_css("DeepWvsQCD", data) pass # Remove unused variables #used_variables = set(tagger_features + ann_vars + uboost_vars + ['m', 'pt', 'npv', 'weight_test']) used_variables = set(tagger_features + ['m', 'pt', 'weight_test', 'npv' ]) ## need to put 'npv' back in for robustness study unused_variables = [var for var in list(data) if var not in used_variables] data.drop(columns=unused_variables) gc.collect() # Perform performance studies #perform_studies (data, args, tagger_features, ann_vars, uboost_vars) perform_studies(data, args, tagger_features, title=title) return 0
def main(args): # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data(args.input + 'data.h5', test_full_signal=True) #data, features, _ = load_data(args.input + 'data.h5', train_full_signal=True) #for faster checking, don't use for actual comparison # Common definitions # -------------------------------------------------------------------------- def meaningful_digits(number): digits = 0 if number > 0: digits = int(np.ceil(max(-np.log10(number), 0))) pass return '{l:.{d:d}f}'.format(d=digits, l=number) # Tagger feature collection #tagger_features = ['Tau21','Tau21DDT', 'D2', kNN_var, 'D2', 'D2CSS', 'NN', ann_var, 'Adaboost', uboost_var] #tagger_features = ['tau21', 'tau21DDT', 'tau21', 'tau21kNN', 'tau21', 'tau21CSS', 'N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="tau21_vs_N2_B1" #tagger_features = ['N2_B1', 'N2_B1DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="N2_B1" #tagger_features = ['tau21', 'tau21DDT', 'N2_B1', 'N2_B1kNN', 'N2_B1', 'N2_B1CSS']; title="ATLAS" #tagger_features = ['decDeepWvsQCD', 'decDeepWvsQCDDDT', 'decDeepWvsQCD', 'decDeepWvsQCDkNN', 'decDeepWvsQCD', 'decDeepWvsQCDCSS']; title="decDeep" #tagger_features = {'tau21':['','DDT'], 'N2_B1':['','kNN','CSS']}; title='ATLAS2' #tagger_features = {'tau21':['','DDT'], 'N2_B1':['','kNN',], 'decDeepWvsQCD':['','kNN'], 'DeepWvsQCD':['','kNN']}; title='Deep_vs_Analytic' #tagger_features = {'tau21':[''], 'N2_B1':[''], 'decDeepWvsQCD':[''], 'DeepWvsQCD':['']}; title='Deep_Check2' tagger_features = { 'tau21': ['', 'DDT', 'kNN', 'CSS'], 'N2_B1': ['', 'DDT', 'kNN', 'CSS'] } title = 'Corrected_Full_Analytic' #tagger_features = {'tau21':['', 'DDT', 'kNN', 'CSS'], 'N2_B1':['', 'DDT', 'kNN','CSS']}; title='Full_Analytic_vs_Atlas' extracted_features = [] for basevar in tagger_features.keys(): for suffix in tagger_features[basevar]: extracted_features.append(basevar + suffix) # Add variables # -------------------------------------------------------------------------- with Profile("Add variables"): # the selections of which variables to add could also be automated from the tagger_features list... # Tau21DDT from run.ddt.common import add_ddt add_ddt(data, feat='tau21', path='models/ddt/ddt_tau21.pkl.gz') # N2DDT from run.ddt.common import add_ddt add_ddt(data, feat='N2_B1', path='models/ddt/ddt_N2_B1.pkl.gz') ## decDeepQvsQCDDDT #from run.ddt.common import add_ddt #add_ddt(data, feat='decDeepWvsQCD', path='models/ddt/ddt_decDeepWvsQCD.pkl.gz') ## DeepQvsQCDDDT #from run.ddt.common import add_ddt #add_ddt(data, feat='DeepWvsQCD', path='models/ddt/ddt_DeepWvsQCD.pkl.gz') # Tau21-kNN from run.knn.common import add_knn, VAR_TAU21 as kNN_basevar, TAU21_EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'tau_{21}-k#minusNN') add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format( kNN_basevar, kNN_eff)) # N2-kNN from run.knn.common import add_knn, VAR_N2 as kNN_basevar, N2_EFF as kNN_eff print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'N_{2}-kNN') add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format( kNN_basevar, kNN_eff)) ## decDeepWvsQCD-kNN #from run.knn.common import add_knn, VAR_DECDEEP as kNN_basevar, DECDEEP_EFF as kNN_eff #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'decDeepWvsQCD') #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff)) ## DeepWvsQCD-kNN #from run.knn.common import add_knn, VAR_DEEP as kNN_basevar, DEEP_EFF as kNN_eff #print "k-NN base variable: {} (cp. {})".format(kNN_basevar, 'DeepWvsQCD') #add_knn(data, feat=kNN_basevar, path='models/knn/knn_{}_{}.pkl.gz'.format(kNN_basevar, kNN_eff)) # Tau21-CSS from run.css.common import add_css add_css("tau21", data) # N2-CSS from run.css.common import add_css add_css("N2_B1", data) ## decDeepWvsQCD-CSS #from run.css.common import add_css #add_css("decDeepWvsQCD", data) ## DeepWvsQCD-CSS #from run.css.common import add_css #add_css("DeepWvsQCD", data) pass # Remove unused variables #used_variables = set(tagger_features + ['m', 'pt', 'weight_test', 'npv']) used_variables = set(extracted_features + ['m', 'pt', 'weight_test', 'npv']) unused_variables = [var for var in list(data) if var not in used_variables] data.drop(columns=unused_variables) gc.collect() # Perform performance studies perform_studies(data, args, tagger_features, extracted_features, title=title) return 0
def main (args): # Definitions histstyle = dict(**HISTSTYLE) # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data('data/djr_LCTopo_1.h5') #, test=True) #data2, features, _ = load_data('data/djr_LCTopo_2.h5') #, test=True) #data = np.concatenate((data1, data2)) sigNumber = 0 sigDict = { 0: 'All Models', 1: 'Model A, m = 1 TeV', 2: 'Model A, m = 1.5 TeV', 3: 'Model A, m = 2 TeV', 4: 'Model A, m = 2.5 TeV', 5: 'Model B, m = 1 TeV', 6: 'Model B, m = 1.5 TeV', 7: 'Model B, m = 2 TeV', 8: 'Model B, m = 2.5 TeV', 9: 'Model C, m = 1 TeV', 10: 'Model C, m = 1.5 TeV', 11: 'Model C, m = 2 TeV', 12: 'Model C, m = 2.5 TeV', 13: 'Model D, m = 1 TeV', 14: 'Model D, m = 1.5 TeV', 15: 'Model D, m = 2 TeV', 16: 'Model D, m = 2.5 TeV', } outFile = ROOT.TFile.Open("figures/sensitivity_targetEff{}.root".format(kNN_eff),"RECREATE") histstyle[True] ['label'] = 'Multijets' histstyle[False]['label'] = 'Dark jets, {}'.format(sigDict[sigNumber]) # Add knn variables #base_var = ['lead_jet_ungrtrk500', 'sub_jet_ungrtrk500'] base_var = 'jet_ungrtrk500' kNN_var = base_var.replace('jet', 'knn') #base_vars = [base_var] #kNN_vars = [kNN_var] base_vars = ['lead_'+base_var, 'sub_'+base_var] kNN_vars = ['lead_'+kNN_var, 'sub_'+kNN_var] with Profile("Add variables"): #for i in range(len(base_var)): print "k-NN base variable: {} (cp. {})".format(base_var, kNN_var) add_knn(data, newfeat='lead_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) add_knn(data, newfeat='sub_'+kNN_var, path='models/knn/knn1D_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) #add_knn(data, newfeat=kNN_var, path='models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel)) print 'models/knn/knn_{}_{}_{}.pkl.gz'.format(base_var, kNN_eff, sigModel) # Check variable distributions weight = 'weight' # 'weight_test' / 'weight' scale = 139*1000000 # (inverse nanobarn) msk_bkg = data['signal'] == 0 if sigNumber==0: msk_sig = data['signal'] == 1 else: msk_sig = data['sigType'] == sigNumber knnBins = np.linspace(-100, 200, 75, endpoint=True) effBins = np.linspace(0,1,100, endpoint=True) for var in kNN_vars: ### Canvas ### c = rp.canvas(num_pads=2, batch=True) c_tmp = rp.canvas(num_pads=1, batch=True) ### Plot ### h2 = c.pads()[0].hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=True, **histstyle[False]) h1 = c.pads()[0].hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=True, **histstyle[True]) h1_incl = c_tmp.hist(data.loc[msk_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg, weight].values, normalise=False) h2_incl = c_tmp.hist(data.loc[msk_sig, var].values, bins=knnBins, weights=data.loc[msk_sig, weight].values, normalise=False) #h1_CR = c_tmp.hist(data.loc[msk_CR_bkg, var].values, bins=knnBins, weights=scale*data.loc[msk_CR_bkg, weight].values, normalise=False) #h2_CR = c_tmp.hist(data.loc[msk_CR_sig, var].values, bins=knnBins, weights=data.loc[msk_CR_sig, weight].values, normalise=False) print "bkg. incl integral: ", h1_incl.GetEffectiveEntries() print "sig. incl integral: ", h2_incl.GetEffectiveEntries() #print "bkg. CR efficiency: ", h1_CR.GetEffectiveEntries()/h1_incl.GetEffectiveEntries() #print "sig. CR efficiency: ", h2_CR.GetEffectiveEntries()/h2_incl.GetEffectiveEntries() normFactor = 1.0 / (3./2 + np.sqrt(h1_incl.GetEffectiveEntries()) ) print "Sensitivity with no cut: ", normFactor ### sensitivity ### sensitivity, bkg_eff_1jet = array( 'd' ), array( 'd' ) #sensitivity = [] #bkg_eff_1jet = [] i = 0 for cut in knnBins: msk_pass = (data[kNN_vars[0]]>cut) & (data[kNN_vars[1]]>cut) msk_pass1 = data[var]>cut #msk_pass = (data[var]>cut) msk_bkg_pass = msk_bkg & msk_pass msk_sig_pass = msk_sig & msk_pass msk_bkg_pass1 = msk_bkg & msk_pass1 msk_sig_pass1 = msk_sig & msk_pass1 h1_pass = c_tmp.hist(data.loc[msk_bkg_pass, var].values, bins=knnBins, weights=scale*data.loc[msk_bkg_pass, weight].values, normalise=False) h2_pass = c_tmp.hist(data.loc[msk_sig_pass, var].values, bins=knnBins, weights=data.loc[msk_sig_pass, weight].values, normalise=False) h1_pass1 = c_tmp.hist(data.loc[msk_bkg_pass1, var].values, bins=knnBins, weights=data.loc[msk_bkg_pass1, weight].values, normalise=False) if ( h2_incl.GetEffectiveEntries()>0 ) : #and h1_pass.GetEffectiveEntries()>0) : sensitivity.append( ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries()) )) / normFactor ) #print "bkg. eff. @ " , cut, ": ", h1_pass.GetEffectiveEntries()/h1_incl.GetEffectiveEntries() #print "signal eff. @ ", cut, ": ", h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries() #print "Sensitivity gain@ ", cut, ": ", ((h2_pass.GetEffectiveEntries()/h2_incl.GetEffectiveEntries()) / (3./2 + np.sqrt(h1_pass.GetEffectiveEntries())) ) / normFactor else: sensitivity.append(0) if (h1_incl.GetEffectiveEntries()>0 ) : bkg_eff_1jet.append(h1_pass1.GetEffectiveEntries()/h1_incl.GetEffectiveEntries()) else: bkg_eff_1jet.append(0) i = i+1 #c.pads()[0].ylim(0,0.25) c.pads()[0].logy() c.pads()[0].xlim(-100,200) c.pads()[1].ylim(0,30) c.pads()[1].xlim(-100,200) c.pads()[1].graph( sensitivity, bins=knnBins) #, oob=False ) ### Decorations ### c.legend(width=0.4, xmin=0.3, ymax=0.9) #c.xlabel("n_{trk}^{#epsilon={}\%}".format(kNN_eff)) #latex(var, ROOT=True)) c.xlabel("n_{trk}^{#epsilon}") #latex(var, ROOT=True)) c.ylabel("Fraction of jets") c.pads()[1].ylabel("Sensitivity gain")#"#epsilon_{S}/(#frac{3}{2} + #sqrt{B})/") c.pads()[1].text(["Sensitivity = #varepsilon_{S}/(#frac{3}{2} + #sqrt{B})", ], xmin=0.2, ymax=0.80, ATLAS=False) c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff)) c.save('figures/distributions/sensitivity_{}_sig{}_eff{}.eps'.format(var, sigNumber, kNN_eff)) del c gr_sen = ROOT.TGraph(len(sensitivity), knnBins, sensitivity) gr_eff = ROOT.TGraph(len(bkg_eff_1jet), knnBins, bkg_eff_1jet) gr_more = ROOT.TGraph(len(sensitivity), bkg_eff_1jet, sensitivity) gr_sen.GetXaxis().SetTitle("#it{n}_{trk}^{#epsilon}-cut") gr_sen.GetYaxis().SetTitle("Sensitivity gain") gr_eff.GetYaxis().SetTitle("Single jet #varepsilon_{B}") gr_sen.GetYaxis().SetAxisColor(ROOT.kOrange+2) gr_eff.GetYaxis().SetAxisColor(ROOT.kGreen+2) gr_sen.SetMarkerColor(ROOT.kOrange+2) gr_eff.SetMarkerColor(ROOT.kGreen+2) gr_eff.SetDrawOption("Y+") c2 = rp.canvas(batch=True) c2.pads()[0].logx() c2.pads()[0].cd() #c2.pads()[0].graph(sensitivity, bkg_eff_1jet) gr_more.GetXaxis().SetTitle("Single jet #varepsilon_{B}") gr_more.GetYaxis().SetTitle("Sensitivity gain") #gr_more.GetXaxis().SetRangeUser(0, 0.02) gr_more.Draw("AP") #c2 = ROOT.TCanvas("can2", "", 200,10,700,500) #(batch=True) #pad1 = ROOT.TPad("pad1", "", 0,0,1,1) #c2.pads()[0]._bare() #pad1.Draw() #pad1.cd() #gr_sen.Draw("AP") #c2.cd() #pad2 = ROOT.TPad("pad2", "", 0,0,1,1) #c2.pads()[0]._bare() #pad2.SetFillStyle(4000) #pad2.Draw() #pad2.cd() #gr_eff.Draw("PY+") #gr_eff.Draw("APY+") #gr_sen.Draw("SAME") #gr_sen = c2.graph(sensitivity, bins=knnBins, markercolor=ROOT.kOrange+2) #gr_eff = c2.graph(bkg_eff_1jet, bins=knnBins, markercolor=ROOT.kGreen+2, option='Y+' ) #gr_eff.GetYaxis.SetRange(0,1) #gr_eff.Draw("SAME Y+") #c2.xlabel("Single jet #varepsilon_{B}") #c2.ylabel("Sensitivity gain") #c2.text(["#epsilon=0.5 %",], xmin=0.2, ymax=0.8, ATLAS=False) ### Save ### #mkdir('figures/distributions') c2.save('figures/distributions/sensitivity_{}_eff{}_1jet.pdf'.format(var,kNN_eff) ) del c2 outFile.cd() gr_more.SetName("sensitivity_eff{}".format(kNN_eff)) gr_more.Write() outFile.Close() #print 'figures/distributions/sensitivity_{}_sig{}_eff{}.pdf'.format(var, sigNumber, kNN_eff) pass # Plot also the normal ntrk distribution for cross check with Roland's result """