def get_sms_yield(sms_filename, xsec_filename, hist_filename, mct_cut=120., xsec_multiplier=1.): sms_file = pd.HDFStore(sms_filename) sms = sms_file['data'] sel_sms = selection.get_samples(sms) mumu_high_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) > 1.) mumu_low_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) < 1.) weight = (sel_sms['opposite_sign_ee'].astype(float)*ee_trigger_eff+mumu_high_eta_sms.astype(float)*mumu_high_eta_trigger_eff +mumu_low_eta_sms.astype(float)*mumu_low_eta_trigger_eff + sel_sms['opposite_sign_emu'].astype(float)*emu_trigger_eff) weight.name="weight" sms = sms.join(weight) # check to see if the file exists, since ROOT will happily continue along with a non-existent file if not os.path.exists(hist_filename): raise IOError(hist_filename+" does not exist.") nevents_file = R.TFile(hist_filename) nevents_hist = nevents_file.Get("ScanValidator/NEvents_histo") xsec_dict = load_xsec(xsec_filename) groups = sms.groupby(['mass1', 'mass2']) output = [] for name, data in groups: m1, m2 = name sel = selection.get_samples(data) events_per_point = nevents_hist.GetBinContent(nevents_hist.FindBin(m1, m2)) try: xsec, xsec_err = xsec_dict[float(m1)] except KeyError: continue xsec *= xsec_multiplier for ch in channels: mass_point = data[sel['sig_'+ch] & (data.mctperp > mct_cut)] mass_point.weight *= xsec*lumi/events_per_point this_point = {"mass1":m1, "mass2":m2, "channel":ch, "yield": mass_point.weight.sum()} output.append(this_point) return pd.DataFrame(output)
def create_data_file_with_signal(sms_filename, xsec_filename, hist_filename, filename="data.root", bins=19, histrange=(10,200), mass1=400, mass2=100, xsec_multiplier=1.): sms_file = HDFStore(sms_filename) sms = sms_file['data'] sel_sms = selection.get_samples(sms) mumu_high_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) > 1.) mumu_low_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) < 1.) weight = (sel_sms['opposite_sign_ee'].astype(float)*ee_trigger_eff+mumu_high_eta_sms.astype(float)*mumu_high_eta_trigger_eff +mumu_low_eta_sms.astype(float)*mumu_low_eta_trigger_eff + sel_sms['opposite_sign_emu'].astype(float)*emu_trigger_eff) weight.name="weight" sms = sms.join(weight) sms_data = sms[(sms.mass1==mass1) & (sms.mass2==mass2)] sel = selection.get_samples(sms_data) # check to see if the file exists, since ROOT will happily continue along with a non-existent file if not os.path.exists(hist_filename): raise IOError(hist_filename+" does not exist.") nevents_file = R.TFile(hist_filename) nevents_hist = nevents_file.Get("ScanValidator/NEvents_histo") xsec_dict = load_xsec(xsec_filename) events_per_point = nevents_hist.GetBinContent(nevents_hist.FindBin(mass1, mass2)) xsec, xsec_err = xsec_dict[float(mass1)] xsec *= xsec_multiplier rfile = R.TFile(filename, "RECREATE") rfile.cd() for ch in channels: d = data[sd['sig_'+ch]] s = sms_data[sel['sig_'+ch]] template = rootutils.create_TH1(d.mctperp, d.weight, "data_"+ch, bins, histrange) sms_template = rootutils.create_TH1(s.mctperp, s.weight, "sms_"+ch, bins, histrange) sms_template.Scale(xsec*lumi/events_per_point) for i in xrange(int(sms_template.Integral())): template.Fill(sms_template.GetRandom()) template.Write() rfile.Close()
return event wz = wz.apply(shuffle_leptons, axis=1) mt = np.sqrt(wz.pt3*wz.metPt*(1-np.cos(wz.phi3-wz.metPhi))) # wz = wz[(mt > 30) & (mt < 80)] # recalculate MCT wz.mctperp = wz.apply(recalc_MCT, axis=1) wz.mctperp *= 80.4/91.2 wz.metPt = wz.apply(recalc_MET, axis=1) wz.ThirdLepton = False swz = selection.get_samples(wz) wz = wz[swz['sig_of']] wz_data = data[data.ThirdLepton & (data.metPt > 30)] wz_data = wz_data.apply(shuffle_leptons, axis=1) mt = np.sqrt(wz_data.pt3*wz_data.metPt*(1-np.cos(wz_data.phi3-wz_data.metPhi))) # wz_data = wz_data[(mt > 30) & (mt < 80)] wz_data.mctperp = wz_data.apply(recalc_MCT, axis=1) wz_data.mctperp *= 80.4/91.2 wz_data.metPt = wz_data.apply(recalc_MET, axis=1) wz_data.ThirdLepton = False swz_data = selection.get_samples(wz_data) wz_data = wz_data[swz_data['sig_of']]
""" This script is meant to test my idea for finding the number of un-tagged top events by comparing the results of two different b-taggers. """ from pandas import * from math import log import selection reload(selection) # load the dataset I made for this occasion s = HDFStore("work/mc/b-tagging.hdf5") mc = s['data'] smc = selection.get_samples(mc) # get the data that passes all cuts but the b-veto sig_mc = mc[smc['sig_no_b_veto']&smc['mct_low']] # now we get the numbers of events with different taggers n_csvl = {} for n_tags, data in sig_mc.groupby("n_jets_csvl"): n_csvl[n_tags] = sum(data.weight) n_csvm = {} for n_tags, data in sig_mc.groupby("n_jets_csvm"): n_csvm[n_tags] = sum(data.weight) n_jpl = {} for n_tags, data in sig_mc.groupby("n_jets_jpl"):
def create_template_file(filename="templates.root", bins=19, histrange=(10, 200)): """ Create a ROOT file containing all of the background templates """ mcvv = mc[(mc.mc_cat=='WW') | (mc.mc_cat=='ZZ') | (mc.mc_cat=='WZ') | (mc.mc_cat=='VVV') | (mc.mc_cat=='HWW')] mcz = mc[mc.mc_cat=='DY'] selvv = selection.get_samples( mcvv, 100.) selz = selection.get_samples( mcz, 100.) rfile = R.TFile(filename, "RECREATE") rfile.cd() templates = {} constraints = {} # prep WW systematics three_m1_mc = mc[mc.ThirdLepton & ~np.isnan(mc.pt3) & (mc.metPt > 30)] three_m1_mc = three_m1_mc.apply(shuffle_leptons, axis=1) three_m1_mc.mctperp = three_m1_mc.apply(recalc_MCT, axis=1) three_m1_mc.mctperp *= 80.4/91.2 three_m1_mc.metPt = three_m1_mc.apply(recalc_MET, axis=1) three_m1_mc.ThirdLepton = False sthree_m1_mc = selection.get_samples(three_m1_mc) three_m1_mc = three_m1_mc[sthree_m1_mc['sig_of']] three_m1_data = data[data.ThirdLepton] three_m1_data = three_m1_data.apply(shuffle_leptons, axis=1) three_m1_data.mctperp = three_m1_data.apply(recalc_MCT, axis=1) three_m1_data.mctperp *= 80.4/91.2 three_m1_data.metPt = three_m1_data.apply(recalc_MET, axis=1) three_m1_data.ThirdLepton = False sthree_m1_data = selection.get_samples(three_m1_data) three_m1_data = three_m1_data[sthree_m1_data['sig_of']] ww = mc[smc['sig_of'] & (mc.mc_cat=="WW")] # re-weighting factors from comparing 3-1 lepton region to WW in MC ww_hist, ww_bins = np.histogram(ww.mctperp, weights=ww.weight, bins=19, range=(10,200), normed=True) three_m1_mc_hist, _ = np.histogram(three_m1_mc.mctperp, weights=three_m1_mc.weight, bins=19, range=(10,200), normed=True) three_m1_reweighting = ww_hist.astype(np.float64)/three_m1_mc_hist.astype(np.float64) def get_weight(mctperp): i = np.argmax(np.where(ww_bins<mctperp, ww_bins, 0)) return three_m1_reweighting[i] # re-weight data 3-1 region reweighting = three_m1_data.mctperp.apply(get_weight) three_m1_data.weight *= reweighting # derive systematic from comparison of 3-1 data to MC # systematic is the larger of fractional uncertainty on data and discrepancy between data and MC three_m1_data_hist, _ = np.histogram(three_m1_data.mctperp, bins=19, range=(10,200)) # unweighted three_m1_stat = 1./np.sqrt(three_m1_data_hist) # fractional three_m1_data_hist_normed, _ = np.histogram(three_m1_data.mctperp, weights=three_m1_data.weight, bins=19, range=(10,200), normed=True) ww_discrepancy = abs(three_m1_data_hist_normed.astype(np.float64)-ww_hist.astype(np.float64))/ww_hist.astype(np.float64) ww_systematic = np.max((three_m1_stat, ww_discrepancy), axis=0) ww_systematic[np.isinf(ww_systematic)] = 1. ww_systematic[np.isnan(ww_systematic)] = 1. for ch in channels: top = data[sd['top_ctrl_'+ch]] templates['top_'+ch] = rootutils.create_TH1(top.mctperp, top.weight, "top_template_"+ch, bins, histrange, True) templates['top_'+ch].Scale(mc[smc['sig_mct_low_'+ch] & (mc.mc_cat=="top") & (mc['gen_neutrinos'] >= 2)].weight.sum()) wjets = data[sd['wjets_ctrl_'+ch]] templates['wjets_'+ch] = rootutils.create_TH1(wjets.mctperp, wjets.weight, "wjets_template_"+ch, bins, histrange, True) # systematic on w+jets template rhist = R.TH1D("wjets_syst_"+ch, "wjets_syst_"+ch, bins, histrange[0], histrange[1]) for i in xrange(bins): # if templates['wjets_'+ch].GetBinContent(i+1) > 0: #only do non-zero bins rhist.SetBinContent(i+1, 0.3) # 50% systematic templates['wjets_syst_'+ch] = rhist templates['wjets_'+ch].Scale(mc[smc['sig_mct_low_'+ch] & ((mc['mc_cat']=='fake') | ((mc['mc_cat']=='top') & (mc['gen_neutrinos'] < 2)))].weight.sum()) vv = mcvv[selvv['sig_'+ch]] templates['vv_'+ch] = rootutils.create_TH1(vv.mctperp, vv.weight, "vv_template_"+ch, bins, histrange, False) # top to vv ratio constraints['top_vv_ratio_'+ch] = R.TVectorD(1) constraints['top_vv_ratio_'+ch][0] = mc[smc['sig_mct_low_'+ch] & (mc.mc_cat=="top")].weight.sum() / mc[smc['sig_mct_low_'+ch] & ((mc.mc_cat=='WW') | (mc.mc_cat=='ZZ') | (mc.mc_cat=='WZ') | (mc.mc_cat=='VVV') | (mc.mc_cat=='HWW'))].weight.sum() ww_hist, template_bins = np.histogram(ww.mctperp, weights=ww.weight, bins=bins, range=histrange) vv_hist, template_bins = np.histogram(vv.mctperp, weights=vv.weight, bins=bins, range=histrange) ww_frac = ww_hist/vv_hist ww_frac[np.isnan(ww_frac)] = 0. bin_centers = (template_bins[1:]+template_bins[:-1])/2 rhist = R.TH1D("ww_syst_"+ch, "ww_syst_"+ch, bins, histrange[0], histrange[1]) for i, b in enumerate(bin_centers): if b > ww_bins[-1]: syst = 0. else: j = np.argmax(np.where(ww_bins<b, ww_bins, 0)) syst = ww_systematic[j]*ww_frac[i] if np.isnan(syst): syst = 0. rhist.SetBinContent(i+1, syst) templates['ww_syst_'+ch] = rhist # shape systematic weights = vv.weight*(1+(vv.mc_cat=="WW")*0.10) templates['vv_syst_WW_'+ch+'Up'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_WW_"+ch+"Up", bins, histrange, False) weights = vv.weight*(1-(vv.mc_cat=="WW")*0.10) templates['vv_syst_WW_'+ch+'Down'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_WW_"+ch+"Down", bins, histrange, False) weights = vv.weight*(1+(vv.mc_cat=="WZ")*0.10) templates['vv_syst_WZ_'+ch+'Up'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_WZ_"+ch+"Up", bins, histrange, False) weights = vv.weight*(1-(vv.mc_cat=="WZ")*0.10) templates['vv_syst_WZ_'+ch+'Down'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_WZ_"+ch+"Down", bins, histrange, False) weights = vv.weight*(1+(vv.mc_cat=="ZZ")*0.10) templates['vv_syst_ZZ_'+ch+'Up'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_ZZ_"+ch+"Up", bins, histrange, False) weights = vv.weight*(1-(vv.mc_cat=="ZZ")*0.10) templates['vv_syst_ZZ_'+ch+'Down'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_ZZ_"+ch+"Down", bins, histrange, False) weights = vv.weight*(1+(vv.mc_cat=="VVV")*0.50) templates['vv_syst_VVV_'+ch+'Up'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_VVV_"+ch+"Up", bins, histrange, False) weights = vv.weight*(1-(vv.mc_cat=="VVV")*0.50) templates['vv_syst_VVV_'+ch+'Down'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_VVV_"+ch+"Down", bins, histrange, False) weights = vv.weight*(1+(vv.mc_cat=="HWW")*0.50) templates['vv_syst_HWW_'+ch+'Up'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_HWW_"+ch+"Up", bins, histrange, False) weights = vv.weight*(1-(vv.mc_cat=="HWW")*0.50) templates['vv_syst_HWW_'+ch+'Down'] = rootutils.create_TH1(vv.mctperp, weights, "vv_syst_HWW_"+ch+"Down", bins, histrange, False) z = mcz[selz['sig_'+ch]] templates['z_'+ch] = rootutils.create_TH1(z.mctperp, z.weight, "z_template_"+ch, bins, histrange, True) if ch == 'sf': # systematic on Z monte carlo mc_onz = mc[smc['z_ctrl_sf']] data_onz = data[sd['z_ctrl_sf']] mc_hist, mc_edges = np.histogram(mc_onz.mctperp, weights=mc_onz.weight, bins=bins, range=histrange, normed=True) d_hist, d_edges = np.histogram(data_onz.mctperp, weights=data_onz.weight, bins=bins, range=histrange, normed=True) err = np.zeros(bins) err[:] = np.max(abs(mc_hist[:10]-d_hist[:10])/d_hist[:10]) # err[10:] = 0.5 # make a TH1 out of it rhist = R.TH1D("z_syst", "z_syst", bins, histrange[0], histrange[1]) for i, val in enumerate(err): rhist.SetBinContent(i+1, val) templates['z_syst'] = rhist constraints['top_ratio'] = R.TVectorD(1) constraints['top_ratio'][0] = mc[smc['sig_mct_low_of'] & (mc.mc_cat=="top")].weight.sum()/mc[smc['sig_mct_low_sf'] & (mc.mc_cat=="top")].weight.sum() constraints['vv_ratio'] = R.TVectorD(1) constraints['vv_ratio'][0] = (mc[smc['sig_mct_low_of'] & (mc.mc_cat=="WW")].weight.sum()/mc[smc['sig_mct_low_sf'] & (mc.mc_cat=="WW")].weight.sum())\ * mcvv[selvv['sig_mct_low_sf']].weight.sum()/mcvv[selvv['sig_mct_low_of']].weight.sum() for k in templates.keys(): templates[k].Write() for k in constraints.keys(): constraints[k].Write(k) rfile.Close()
def create_signal_file(input_file, out_filename, hist_filename, xsec_filename, xsec_multiplier=1., bins=19, histrange=(10,200) ): sms_file = HDFStore(input_file) sms = sms_file['data'] sel_sms = selection.get_samples(sms) mumu_high_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) > 1.) mumu_low_eta_sms = sel_sms['opposite_sign_mumu'] & (abs(sms.eta2) < 1.) weight = (sel_sms['opposite_sign_ee'].astype(float)*ee_trigger_eff+mumu_high_eta_sms.astype(float)*mumu_high_eta_trigger_eff +mumu_low_eta_sms.astype(float)*mumu_low_eta_trigger_eff + sel_sms['opposite_sign_emu'].astype(float)*emu_trigger_eff) weight.name="weight" sms = sms.join(weight) # pu reweighting sms_pu_hist, _ = np.histogram(sms.nTruePuVertices, bins=101, range=(0, 101)) sms_pu_hist = np.asarray(sms_pu_hist, dtype=np.float) sms_pu_hist = np.asarray([2.344E-05, 2.344E-05, 2.344E-05, 2.344E-05, 4.687E-04, 4.687E-04, 7.032E-04, 9.414E-04, 1.234E-03, 1.603E-03, 2.464E-03, 3.250E-03, 5.021E-03, 6.644E-03, 8.502E-03, 1.121E-02, 1.518E-02, 2.033E-02, 2.608E-02, 3.171E-02, 3.667E-02, 4.060E-02, 4.338E-02, 4.520E-02, 4.641E-02, 4.735E-02, 4.816E-02, 4.881E-02, 4.917E-02, 4.909E-02, 4.842E-02, 4.707E-02, 4.501E-02, 4.228E-02, 3.896E-02, 3.521E-02, 3.118E-02, 2.702E-02, 2.287E-02, 1.885E-02, 1.508E-02, 1.166E-02, 8.673E-03, 6.190E-03, 4.222E-03, 2.746E-03, 1.698E-03, 9.971E-04, 5.549E-04, 2.924E-04, 1.457E-04, 6.864E-05, 3.054E-05, 1.282E-05, 5.081E-06, 1.898E-06, 6.688E-07, 2.221E-07, 6.947E-08, 2.047E-08]) pu_file = R.TFile("config/TruePU.root") pu_th1 = pu_file.Get("pileup") data_pu_hist = np.zeros(sms_pu_hist.shape) for i in xrange(len(data_pu_hist)): data_pu_hist[i] = pu_th1.GetBinContent(i+1) # normalize the histograms sms_pu_hist *= 1./np.sum(sms_pu_hist) data_pu_hist *= 1./np.sum(data_pu_hist) # calculate weights pu_weights = data_pu_hist/sms_pu_hist # apply the weights sms.nTruePuVertices[sms.nTruePuVertices > 60] = 60 event_pu_weights = sms.nTruePuVertices.apply(lambda n: pu_weights.item(int(n))) sms.weight *= event_pu_weights # FastSim -> FullSim reweighting mu_weight_file = R.TFile("config/muon_FastSim_EWKino.root") mu_fastsim_weights = mu_weight_file.Get("SF") ele_weight_file = R.TFile("config/electron_FastSim_EWKino.root") ele_fastsim_weights = mu_weight_file.Get("SF") def fastsim_weight(row): if abs(row['pdg1']) == 13: s1 = mu_fastsim_weights.GetBinContent(mu_fastsim_weights.GetXaxis().FindBin(row['pt1']), mu_fastsim_weights.GetYaxis().FindBin(abs(row['eta1']))) else: s1 = ele_fastsim_weights.GetBinContent(ele_fastsim_weights.GetXaxis().FindBin(row['pt1']), ele_fastsim_weights.GetYaxis().FindBin(abs(row['eta1']))) if abs(row['pdg2']) == 13: s2 = mu_fastsim_weights.GetBinContent(mu_fastsim_weights.GetXaxis().FindBin(row['pt2']), mu_fastsim_weights.GetYaxis().FindBin(abs(row['eta2']))) else: s2 = ele_fastsim_weights.GetBinContent(ele_fastsim_weights.GetXaxis().FindBin(row['pt2']), ele_fastsim_weights.GetYaxis().FindBin(abs(row['eta2']))) return s1 * s2 sms.weight *= sms.apply(fastsim_weight, axis=1) # check to see if the file exists, since ROOT will happily continue along with a non-existent file if not os.path.exists(hist_filename): raise IOError(hist_filename+" does not exist.") nevents_file = R.TFile(hist_filename) nevents_hist = nevents_file.Get("ScanValidator/NEvents_histo") xsec_dict = load_xsec(xsec_filename) rfile = R.TFile(out_filename, "RECREATE") templates = {} jes_up_templates = {} jes_down_templates = {} # create a different template for each mass point # templates are scaled to number of exected events given the reference cross-section groups = sms.groupby(['mass1', 'mass2']) for name, data in groups: m1, m2 = name sel = selection.get_samples(data) events_per_point = nevents_hist.GetBinContent(nevents_hist.FindBin(m1, m2)) try: xsec, xsec_err = xsec_dict[float(m1)] except KeyError: continue xsec *= xsec_multiplier for ch in channels: mass_point = data[sel['sig_'+ch]] mass_point_jes_up = data[sel['sig_scaleup_'+ch]] mass_point_jes_down = data[sel['sig_scaledown_'+ch]] if mass_point[(mass_point.mctperp > histrange[0]) & (mass_point.mctperp < histrange[1])].mctperp.count() == 0: continue h = rootutils.create_TH1(mass_point.mctperp, mass_point.weight, "sms_template_{}_{}_{}".format(ch, int(m1), int(m2)), bins, histrange, False) hup = rootutils.create_TH1(mass_point_jes_up.mctperp_up, mass_point_jes_up.weight, "sms_template_jes_up_{}_{}_{}".format(ch, int(m1), int(m2)), bins, histrange, False) hdown = rootutils.create_TH1(mass_point_jes_down.mctperp_down, mass_point_jes_down.weight, "sms_template_jes_down_{}_{}_{}".format(ch, int(m1), int(m2)), bins, histrange, False) h.Scale(xsec*lumi/events_per_point) hup.Scale(xsec*lumi/events_per_point) hdown.Scale(xsec*lumi/events_per_point) tname = 'sms_{}_{}_{}'.format(ch, m1, m2) templates[tname] = h jes_up_templates[tname] = hup jes_down_templates[tname] = hdown for k in templates.keys(): templates[k].Write() jes_down_templates[k].Write() jes_up_templates[k].Write() rfile.Close()
from pandas import HDFStore from selection import get_samples import sys sf_only = False if len(sys.argv) >= 4: sf_only = bool(sys.argv[3]) s = HDFStore(sys.argv[1]) d = s['data'] sd = get_samples(d) dsig_of = d[sd['sig_mct_low_of']] masses_of = set() for g, _ in dsig_of.groupby(['mass1', 'mass2']): masses_of.add(tuple(map(int, g))) dsig_sf = d[sd['sig_mct_low_sf']] masses_sf = set() for g, _ in dsig_sf.groupby(['mass1', 'mass2']): masses_sf.add(tuple(map(int, g))) if sf_only: masses = masses_sf else: masses = masses_sf.intersection(masses_of)
import numpy as np import ROOT as R # background MC s = HDFStore("work/mc/mc_20131227.hdf5") mc = s['data'] mc = mc[mc.mctype != "WGStarToLNu2E"] weights = mc.x_eff*lumi weights.name="weight" mc = mc.join(weights) # apply trigger efficiencies smc = selection.get_samples(mc) mumu_high_eta = smc['mumu'] & (abs(mc.eta2) > 1.) mumu_low_eta = smc['mumu'] & (abs(mc.eta2) < 1.) mc.weight *= (smc['ee'].astype(float)*ee_trigger_eff+mumu_high_eta.astype(float)*mumu_high_eta_trigger_eff +mumu_low_eta.astype(float)*mumu_low_eta_trigger_eff + smc['emu'].astype(float)*emu_trigger_eff) # adjust some MC categories # cat = mc.pop('mc_cat') # cat[mc.mctype=="WWZNoGstar"] = "VVV" # cat[mc.mctype=="WWW"] = 'VVV' # mc = mc.join(cat) # b-tag reweighting
def __init__(self, data, mc, signal, mct_cut): """ Initialize model Arguments: data - dataframe containing observations mc - dataframe containing simulation signal - dataframe containing the signal mct_cut - float defining MCT cut for signal region """ super(StatModel, self).__init__() self.mct_cut = mct_cut self.channels = ['_of', '_sf'] self.roofit_channel = r.RooCategory("channel", "channel") for c in self.channels: self.roofit_channel.defineType(c) self.backgrounds = ['top', 'wjets', 'vv', 'z'] # set up the data samples self.data = data self.mcvv = mc[(mc.mc_cat=='WV') | (mc.mc_cat=='ZZ')] self.mcz = mc[mc.mc_cat=='DY'] # set up the selections self.sel = selection.get_samples(self.data, mct_cut) self.selvv = selection.get_samples(self.mcvv, mct_cut) self.selz = selection.get_samples(self.mcz, mct_cut) # set up RooFit variables self.mct = r.RooRealVar("mct", "mct", 5., 100.) # TODO: fix upper limit self.w = r.RooRealVar("w", "w", 0., 10.) self.evt_yield = r.RooRealVar("yield", "yield", 0., 1000.) # number of events in the sideband regions # one copy for each channel self.n_sb = defaultdict(dict) self.n_ctrl_sb = defaultdict(dict) self.n_ctrl_sig = defaultdict(dict) for bkg in self.backgrounds: for channel in self.channels: # n_sb comes from the normalization fit in the sideband self.n_sb[bkg][channel] = r.RooRealVar("n_sb_{0}_{1}".format(bkg, channel), "n_sb_{0}_{1}".format(bkg, channel), 1000, -10000., 10000.) # This comes from the sideband in the control sample # It's treated as having no uncertainty self.n_ctrl_sb[bkg][channel] = r.RooRealVar("n_ctrl_sb_{0}_{1}".format(bkg, channel), "n_ctrl_sb_{0}_{1}".format(bkg, channel), 1000, -10000., 10000.) # This comes from the high-mct signal region in the control sample # It's treated as a sample from a Poisson-distributed quantity with unknown mean self.n_ctrl_sig[bkg][channel] = r.RooRealVar("n_ctrl_sig_{0}_{1}".format(bkg, channel), "n_ctrl_sig_{0}_{1}".format(bkg, channel), 1000, -10000., 10000.) # set up dataset ds_ch = {} for channel in self.channels: ds_ch[channel] = rootutils.create_roodataset(self.data[self.sel['sig_mct_low'+channel]].mctperp, self.data[self.sel['sig_mct_low'+channel]].weight, self.mct, self.w, title="data") self.dataset = r.RooDataSet("dataset", "dataset", r.RooArgSet(self.mct, self.w), r.RooFit.Index(self.roofit_channel), r.RooFit.Import("_of", ds_ch['_of']), r.RooFit.Import("_sf", ds_ch['_sf']) )