def main(): # Parse command-line arguments args = parser.parse_args() # Check(s) if (not args.data) and args.subtractWZMC: warning( "Requesting to subtract W/Z MC from MC background which contains no contamination. Exiting." ) return if (not args.data) and args.subtractWZdata: warning( "Requesting to subtract W/Z data from MC background which contains no contamination. Exiting." ) return # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Load data if args.data: files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root') else: files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') pass files_WZ = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \ glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root') if len(files_data) == 0: warning("No files found. Try to run:") warning(" $ source getSomeData.sh") return data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix']) WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix']) info_data = loadData(files_data, tf.config['outputtree'], stop=1) info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1) # Scaling by cross section xsec = loadXsec(tf.config['xsec_file']) # Append new DSID field data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int) for idx, id in enumerate(info_data['id']): msk = ( data['id'] == id ) # Get mask of all 'data' entries with same id, i.e. from same file tmp_DSID = info_data['DSID'][idx] # Get DSID for this file if not args.data: data['weight'][msk] *= xsec[ tmp_DSID] # Scale by cross section x filter eff. for this DSID data['DSID'][msk] = tmp_DSID # Store DSID pass pass if not args.data: data['weight'] *= tf.config[ 'lumi'] # Scale all events (MC) by luminosity pass WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int) for idx in info_WZ['id']: msk = ( WZ['id'] == idx ) # Get mask of all 'data' entries with same id, i.e. from same file tmp_DSID = info_WZ['DSID'][idx] # Get DSID for this file WZ['weight'][msk] *= xsec[ tmp_DSID] # Scale by cross section x filter eff. for this DSID WZ['DSID'][msk] = tmp_DSID # Store DSID pass WZ['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) WZ = append_fields(WZ, 'logpt', np.log(WZ['pt'])) # Transfer factor # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Pass/fail masks msk_pass = tf.config['pass'](data) msk_fail = ~msk_pass msk_WZ_pass = tf.config['pass'](WZ) msk_WZ_fail = ~msk_WZ_pass # Transfer factor calculator instance calc = tf.calculator(data=data, config=tf.config, subtract=WZ if (args.subtractWZMC and args.data) else None) # GBS mass bins masses = np.linspace(100, 270, 34 + 1, endpoint=True) # GBS mass bins # Weight and counter arrays weights_bkg_nom = np.zeros((np.sum(msk_fail), ), dtype=float) weights_bkg_up = np.zeros((np.sum(msk_fail), ), dtype=float) weights_bkg_down = np.zeros((np.sum(msk_fail), ), dtype=float) counter_bkg = np.zeros((np.sum(msk_fail), ), dtype=float) weights_WZ_nom = np.zeros((np.sum(msk_WZ_fail), ), dtype=float) weights_WZ_up = np.zeros((np.sum(msk_WZ_fail), ), dtype=float) weights_WZ_down = np.zeros((np.sum(msk_WZ_fail), ), dtype=float) counter_WZ = np.zeros((np.sum(msk_WZ_fail), ), dtype=float) #ctemp = ap.canvas(batch=True) for mass in masses: print " --", mass # Fit TF profile calc.mass = mass calc.fullfit() if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/tf_gbs_%s_%dGeV_' % ('data' if args.data else 'MC', mass), MC=not args.data) # Get TF weights w_nom, w_up, w_down = calc.fullweights(data[msk_fail]) w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail]) # Compute mask for which jets to use in GBS computation msk_gbs = ~(np.abs(data[msk_fail]['m'] - mass) < 0.2 * mass) msk_gbs_WZ = ~(np.abs(WZ[msk_WZ_fail]['m'] - mass) < 0.2 * mass) # Store weights and increment counter for masked jets weights_bkg_nom[msk_gbs] += w_nom[msk_gbs] weights_bkg_up[msk_gbs] += w_up[msk_gbs] weights_bkg_down[msk_gbs] += w_down[msk_gbs] counter_bkg[msk_gbs] += 1. weights_WZ_nom[msk_gbs_WZ] += w_WZ_nom[msk_gbs_WZ] weights_WZ_up[msk_gbs_WZ] += w_WZ_up[msk_gbs_WZ] weights_WZ_down[msk_gbs_WZ] += w_WZ_down[msk_gbs_WZ] counter_WZ[msk_gbs_WZ] += 1. pass # Take average of jets in signal regions msk = (counter_bkg > 0) weights_bkg_nom[msk] /= counter_bkg[msk] weights_bkg_up[msk] /= counter_bkg[msk] weights_bkg_down[msk] /= counter_bkg[msk] msk = (counter_WZ > 0) weights_WZ_nom[msk] /= counter_WZ[msk] weights_WZ_up[msk] /= counter_WZ[msk] weights_WZ_down[msk] /= counter_WZ[msk] # Computing data-driven background estimate # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - check_make_dir('output') DSID = 400000 if args.data else 400001 # Write TF-scaled failing data to file output = ROOT.TFile( 'output/objdef_GBS{MC}_{DSID}.root'.format( DSID=DSID, MC='' if args.data else 'MC'), 'RECREATE') for shift, w, w_WZ in zip( [0, 1, -1], [weights_bkg_nom, weights_bkg_up, weights_bkg_down], [weights_WZ_nom, weights_WZ_up, weights_WZ_down]): # -- Get branch name for current variation var_name = 'Nominal' if shift == 0 else ( 'TF_UP' if shift == 1 else 'TF_DOWN') # -- Prepare mass- and weight vectors vector_m = data['m'][msk_fail] vector_w = data['weight'][msk_fail] * w if args.subtractWZdata and args.data: if WZ is not None and WZ.size > 0: print " Subtracting TF-scaled W/Z MC from background estimate" vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail])) vector_w = np.concatenate( (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ)) else: warning( " Could not subtract failed, TF-scale W/Z MC component") pass pass # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit # -- Prepare DISD and isMC vectors vector_DSID = np.ones_like(vector_w) * DSID vector_isMC = np.ones_like(vector_w).astype(bool) array1 = np.array(zip(vector_m, vector_w), dtype=[(tf.config['prefix'] + 'm', np.float64), ('weight', np.float64)]) array2 = np.array(zip(vector_DSID, vector_isMC), dtype=[('DSID', np.uint32), ('isMC', np.bool_)]) # Mass and weight branch print " Writing arrays to file: %s" % var_name treename1 = tf.config['tree'].replace('NumLargeRadiusJets', 'Jet_tau21DDT').replace( 'Nominal', var_name) make_directories('/'.join(treename1.split('/')[:-1]), fromDir=output) tree1 = ROOT.TTree(treename1.split('/')[-1], "") array2tree(array1, tree=tree1) # outputTree treename2 = tf.config['outputtree'].replace('Nominal', var_name) make_directories('/'.join(treename2.split('/')[:-1]), fromDir=output) tree2 = ROOT.TTree(treename2.split('/')[-1], "") array2tree(array2, tree=tree2) output.Write() pass output.Close() # Save configuration check_make_dir('logs') # -- Turn numpy arrays into lists, in order to make them JSON serializable cfg = make_serializable(tf.config) json.dump([cfg, vars(args)], open( 'logs/gbs_config_%s_%d.log' % ('data' if args.data else 'MC', DSID), 'w')) return
def main(): # Parse command-line arguments args = parser.parse_args() # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get signal file sig_DSID = get_signal_DSID(args.mass, tolerance=10) if sig_DSID is None: return sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID) # Load data files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') + [ tf.config['base_path'] + sig_file ] if len(files) == 0: warning("No files found.") return data = loadData(files, tf.config['tree'], prefix=tf.config['prefix']) info = loadData(files, tf.config['outputtree'], stop=1) # Scaling by cross section xsec = loadXsec(tf.config['xsec_file']) # Append new DSID field # @TODO: Make more elegant? data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int) for idx in info['id']: msk = ( data['id'] == idx ) # Get mask of all 'data' entries with same id, i.e. from same file DSID = info['DSID'][idx] # Get DSID for this file data['weight'][msk] *= xsec[ DSID] # Scale by cross section x filter eff. for this DSID data['DSID'][msk] = DSID # Store DSID pass data['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity # Check output. if data.size == 0: warning("No data was loaded.") return # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) # Separate out signal MC msk_sig = (data['DSID'] == sig_DSID) msk_data = ~msk_sig print "DATA STATISTICS:", np.sum(data[msk_data]['weight']) signal = data[msk_sig] if not args.inject: # If we're not injecting signal, explicitly remove it from the 'data' array data = data[~msk_sig] pass # Toys # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if args.toys: # Get masks msk_pass = tf.config['pass'](data) msk_fail = ~msk_pass # Create histograms if args.inject: pdf_pass = get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_pass & ~msk_sig) pdf_fail = get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_fail & ~msk_sig) else: pdf_pass = get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_pass) pdf_fail = get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_fail) pass # Smooth (only leading background) for _ in range(2): pdf_pass.Smooth() pdf_fail.Smooth() pass # Inject afterwards if args.inject: pdf_pass.Add( get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_pass & msk_sig)) pdf_fail.Add( get_histogram(data, tf.config['params'], tf.config['axes'], mask=msk_fail & msk_sig)) # Create p.d.f.s # -- Define variables rhoDDT = ROOT.RooRealVar('rhoDDT', 'rhoDDT', tf.config['axes'][0][0], tf.config['axes'][0][-1]) logpt = ROOT.RooRealVar('logpt', 'logpt', tf.config['axes'][1][0], tf.config['axes'][1][-1]) rhoDDT.setBins(len(tf.config['axes'][0]) - 1) logpt.setBins(len(tf.config['axes'][1]) - 1) # -- Define histograms rdh_pass = ROOT.RooDataHist('rdh_pass', 'rdh_pass', ROOT.RooArgList(rhoDDT, logpt), pdf_pass) rdh_fail = ROOT.RooDataHist('rdh_fail', 'rdh_fail', ROOT.RooArgList(rhoDDT, logpt), pdf_fail) # -- Turn histograms into pdf's rhp_pass = ROOT.RooHistPdf('rhp_pass', 'rhp_pass', ROOT.RooArgSet(rhoDDT, logpt), rdh_pass) rhp_fail = ROOT.RooHistPdf('rhp_fail', 'rhp_fail', ROOT.RooArgSet(rhoDDT, logpt), rdh_fail) # Generate toys mult = 1. N_pass = int(np.sum(data['weight'][msk_pass]) * mult) N_fail = int(np.sum(data['weight'][msk_fail]) * mult) dtype = ['rhoDDT', 'logpt', 'tau21DDT', 'pt', 'm', 'weight'] dtype = [(var, 'f8') for var in dtype] toys_pass = np.zeros(N_pass, dtype=dtype) toys_fail = np.zeros(N_fail, dtype=dtype) print "Generating toys (pass: %d, fail: %d)" % (N_pass, N_fail) rds_pass = rhp_pass.generate(ROOT.RooArgSet(rhoDDT, logpt), N_pass, True, False) rds_fail = rhp_fail.generate(ROOT.RooArgSet(rhoDDT, logpt), N_fail, True, False) for idx in range(N_pass): toys_pass['rhoDDT'][idx] = rds_pass.get(idx).getRealValue('rhoDDT') toys_pass['logpt'][idx] = rds_pass.get(idx).getRealValue('logpt') toys_pass['pt'][idx] = np.exp(toys_pass['logpt'][idx]) toys_pass['m'][idx] = np.sqrt( np.exp(toys_pass['rhoDDT'][idx]) * toys_pass['pt'][idx] * 1.) toys_pass['weight'][idx] = 1. / float(mult) toys_pass['tau21DDT'][idx] = 0. pass for idx in range(N_fail): toys_fail['rhoDDT'][idx] = rds_fail.get(idx).getRealValue('rhoDDT') toys_fail['logpt'][idx] = rds_fail.get(idx).getRealValue('logpt') toys_fail['pt'][idx] = np.exp(toys_fail['logpt'][idx]) toys_fail['m'][idx] = np.sqrt( np.exp(toys_fail['rhoDDT'][idx]) * toys_fail['pt'][idx] * 1.) toys_fail['weight'][idx] = 1. / float(mult) toys_fail['tau21DDT'][idx] = 1. pass data = np.concatenate((toys_pass, toys_fail)) # ??? pass # Transfer factor # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - calc = tf.calculator(data=data, config=tf.config) # Using default configuration calc.mass = args.mass calc.fullfit() # Pass/fail masks msk_data_pass = tf.config['pass'](data) msk_data_fail = ~msk_data_pass msk_sig_pass = tf.config['pass'](signal) msk_sig_fail = ~msk_sig_pass print " -- Computing data weights" w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail]) print " -- Computing signal weights" w_sig, _, _ = calc.fullweights(signal[msk_sig_fail]) print " -- Final fit done" if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/new_signalinjection_%s%s_' % ("toys_" if args.toys else "", "injected" if args.inject else "notinjected")) # Performing signal injection test # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if True or args.show or args.save: bestfit_mu = None for mu, fit, prefit, subtract in zip([0, 1, 1, None], [False, False, True, False], [True, True, True, False], [True, True, False, True]): if not prefit: mu = bestfit_mu[0] pass # Plotting # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - c = ap.canvas(num_pads=2, batch=not args.show) p0, p1 = c.pads() # -- Histograms: Main pad bins = tf.config['massbins'] h_bkg = c.hist(data['m'][msk_data_fail], bins=bins, weights=data['weight'][msk_data_fail] * w_nom, display=False) h_bkg_up = c.hist(data['m'][msk_data_fail], bins=bins, weights=data['weight'][msk_data_fail] * w_up, display=False) h_bkg_down = c.hist(data['m'][msk_data_fail], bins=bins, weights=data['weight'][msk_data_fail] * w_down, display=False) h_sig = c.hist(signal['m'][msk_sig_pass], bins=bins, weights=signal['weight'][msk_sig_pass], scale=mu, display=False) h_sfl = c.hist(signal['m'][msk_sig_fail], bins=bins, weights=signal['weight'][msk_sig_fail] * w_sig, scale=mu, display=False) h_data = c.plot(data['m'][msk_data_pass], bins=bins, weights=data['weight'][msk_data_pass], display=False) for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1): width = float(h_bkg.GetBinWidth(bin)) h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width) h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width) h_bkg_up.SetBinContent(bin, h_bkg_up.GetBinContent(bin) / width) h_bkg_up.SetBinError(bin, h_bkg_up.GetBinError(bin) / width) h_bkg_down.SetBinContent(bin, h_bkg_down.GetBinContent(bin) / width) h_bkg_down.SetBinError(bin, h_bkg_down.GetBinError(bin) / width) h_sig.SetBinContent(bin, h_sig.GetBinContent(bin) / width) h_sig.SetBinError(bin, h_sig.GetBinError(bin) / width) h_sfl.SetBinContent(bin, h_sfl.GetBinContent(bin) / width) h_sfl.SetBinError(bin, h_sfl.GetBinError(bin) / width) h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width) h_data.SetBinError(bin, h_data.GetBinError(bin) / width) pass if not fit: h_bkg.Add(h_sfl, -1) # Subtracting signal h_bkg_up.Add(h_sfl, -1) # -- h_bkg_down.Add(h_sfl, -1) # -- pass c.hist( h_bkg, option='HIST', linestyle=0, fillstyle=0, fillcolor=0 ) # Staring with standard histogram, not THStack, just to get y-axis to coorperate h_bkg = c.stack(h_bkg, fillcolor=ROOT.kAzure + 7, label='Background pred.') h_sig = c.stack(h_sig, fillcolor=ROOT.kRed - 4, label="Z' (#mu = %s)" % ("%.0f" % mu if prefit else "%.2f #pm %.2f" % (mu, bestfit_mu[1]))) h_sum = h_bkg h_sum = c.hist(h_sum, fillstyle=3245, fillcolor=ROOT.kGray + 3, option='E2', label='Stat. uncert.') h_bkg_up = c.hist(h_bkg_up, linecolor=ROOT.kGreen + 1, linestyle=2, option='HIST', label='Syst. uncert.') h_bkg_down = c.hist(h_bkg_down, linecolor=ROOT.kGreen + 1, linestyle=2, option='HIST') h_data = c.plot(h_data, label='Pseudo-data') c.hist(h_bkg, option='AXIS') # Re-draw axes # -- Histograms: Ratio pad c.ratio_plot((h_sig, h_sum), option='HIST', offset=1) c.ratio_plot((h_sum, h_sum), option='E2') c.ratio_plot((h_bkg_up, h_sum), option='HIST') c.ratio_plot((h_bkg_down, h_sum), option='HIST') c.ratio_plot((h_data, h_sum)) # -- Axis labels c.xlabel('Large-#it{R} jet mass [GeV]') c.ylabel('Events / GeV') p1.ylabel('Data / Est.') # -- Axis limits c.ylim(1.0E+00, 1.0E+06) p1.ylim(0.80, 1.20) # -- Line(s) p1.yline(1.0) # -- Region(s) c.region("SR", 0.8 * args.mass, 1.2 * args.mass) # -- Text c.text( [ "#sqrt{s} = 13 TeV, %s fb^{-1}" % tf.config['lumi'], "Incl. #gamma Monte Carlo", "Photon channel", #("Signal" if args.inject else "No signal") + " injected", ] + (["Using toys"] if args.toys else []), qualifier='Simulation Internal') # -- Log c.log() # -- Legend c.legend() if args.save and not fit: c.save('plots/new_signalinjection_%s%dGeV_pm%d_%s_%s.pdf' % ("toys_" if args.toys else "", args.mass, 20., ('prefit_mu%d' % mu if prefit else 'postfit'), ('injected' if args.inject else 'notinjected'))) if args.show and not fit: c.show() # Fitting # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if fit: bestfit_mu = list() hs_save = [ h_bkg_down.Clone('h_save_down'), h_bkg.Clone('h_save_nom'), h_bkg_up.Clone('h_save_up'), ] for variation in range(3): print "Variation: " + ("Nominal" if variation == 1 else ( "Up" if variation == 0 else "Down")) # Get correct histogram fore this variation h_bkg_use = hs_save[variation] # -- Define jet mass variable mJ = ROOT.RooRealVar('mJ', 'mJ', 50, 300) #mJ.setBins(50) roobinning = ROOT.RooBinning( len(tf.config['massbins']) - 1, tf.config['massbins']) mJ.setBinning(roobinning) # -- Define histograms rdh_bkg = ROOT.RooDataHist('rdh_bkg', 'rdh_bkg', ROOT.RooArgList(mJ), h_bkg_use) rdh_sig = ROOT.RooDataHist('rdh_sig', 'rdh_sig', ROOT.RooArgList(mJ), h_sig) rdh_sfl = ROOT.RooDataHist('rdh_sfl', 'rdh_sfl', ROOT.RooArgList(mJ), h_sfl) # -- Turn histograms into pdf's rhp_bkg = ROOT.RooHistPdf('rhp_bkg', 'rhp_bkg', ROOT.RooArgSet(mJ), rdh_bkg) rhp_sig = ROOT.RooHistPdf('rhp_sig', 'rhp_sig', ROOT.RooArgSet(mJ), rdh_sig) rhp_sfl = ROOT.RooHistPdf('rhp_sfl', 'rhp_sfl', ROOT.RooArgSet(mJ), rdh_sfl) # -- Define integrals as constants n_bkg = ROOT.RooRealVar('n_bkg', 'n_bkg', h_bkg_use.Integral()) n_sig = ROOT.RooRealVar('n_sig', 'n_sig', h_sig.Integral()) n_sfl = ROOT.RooRealVar('n_sfl', 'n_sfl', h_sfl.Integral()) # -- Define signal strength and constant(s) mu = ROOT.RooRealVar('mu', 'mu', 1, 0, 5) neg1 = ROOT.RooRealVar('neg1', 'neg1', -1) # -- Define fittable normalisation factors c_bkg = ROOT.RooFormulaVar('c_bkg', 'c_bkg', '@0', ROOT.RooArgList(n_bkg)) c_sig = ROOT.RooFormulaVar('c_sig', 'c_sig', '@0 * @1', ROOT.RooArgList(mu, n_sig)) c_sfl = ROOT.RooFormulaVar( 'c_sfl', 'c_sfl', '@0 * @1 * @2', ROOT.RooArgList(neg1, mu, n_sfl)) # -- Construct combined pdf pdf = ROOT.RooAddPdf( 'pdf', 'pdf', ROOT.RooArgList(rhp_bkg, rhp_sig, rhp_sfl), ROOT.RooArgList(c_bkg, c_sig, c_sfl)) # -- Construct data histogram rdh_data = ROOT.RooDataHist('rdh_data', 'rdh_data', ROOT.RooArgList(mJ), h_data) # -- Fit pdf to data histogram pdf.chi2FitTo(rdh_data, ROOT.RooLinkedList()) print "Best fit mu: %.3f +/- %.3f" % (mu.getValV(), mu.getError()) bestfit_mu.append((mu.getValV(), mu.getError())) pass bestfit_mu = bestfit_mu[1][0], np.sqrt( np.power( abs(bestfit_mu[0][0] - bestfit_mu[2][0]) / 2., 2.) + np.power(bestfit_mu[1][1], 2.)) pass pass pass return
def main(): # Parse command-line arguments args = parser.parse_args() DSID = int("100%03d" % args.mass) # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get signal file sig_DSID = get_signal_DSID(args.mass, tolerance=10) if sig_DSID is None: warning("No signal file was found") return sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID) # Load data files = { 'data': glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root'), 'gbs': glob.glob(tf.config['base_path'] + 'objdef_GBSMC_400001.root'), 'WZ': glob.glob(tf.config['base_path'] + 'objdef_MC_3054*.root') } if args.inject: files['sig'] = glob.glob(tf.config['base_path'] + sig_file) pass if len(files) == 0: warning("No files found. Try to run:") warning(" $ source getSomeData.sh") return data = loadData(files['data'], tf.config['tree'], prefix=tf.config['prefix']) gbs = loadData(files['gbs'], tf.config['finaltree'], prefix=tf.config['prefix']) WZ = loadData(files['WZ'], tf.config['tree'], prefix=tf.config['prefix']) if args.inject: signal = loadData(files['sig'], tf.config['tree'], prefix=tf.config['prefix']) else: signal = None pass info = { key: loadData(files[key], tf.config['outputtree'], stop=1) for key in files } # Scaling by cross section xsec = loadXsec(tf.config['xsec_file']) # Append new DSID field if args.inject: signal = append_fields(signal, 'DSID', np.zeros((signal.size, )), dtypes=int) for idx, id in enumerate(info['sig']['id']): msk = ( signal['id'] == id ) # Get mask of all 'signal' entries with same id, i.e. from same file DSID = info['sig']['DSID'][idx] # Get DSID for this file signal['weight'][msk] *= xsec[ DSID] # Scale by cross section x filter eff. for this DSID signal['DSID'][msk] = DSID # Store DSID pass signal['weight'] *= tf.config['lumi'] pass WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int) for idx, id in enumerate(info['WZ']['id']): msk = ( WZ['id'] == id ) # Get mask of all 'WZ' entries with same id, i.e. from same file DSID = info['WZ']['DSID'][idx] # Get DSID for this file WZ['weight'][msk] *= xsec[ DSID] # Scale by cross section x filter eff. for this DSID WZ['DSID'][msk] = DSID # Store DSID pass WZ['weight'] *= tf.config['lumi'] #if not args.data: data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int) for idx, id in enumerate(info['data']['id']): msk = ( data['id'] == id ) # Get mask of all 'data' entries with same id, i.e. from same file DSID = info['data']['DSID'][idx] # Get DSID for this file data['weight'][msk] *= xsec[ DSID] # Scale by cross section x filter eff. for this DSID data['DSID'][msk] = DSID # Store DSID pass data['weight'] *= tf.config['lumi'] #pass # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) WZ = append_fields(WZ, 'logpt', np.log(WZ['pt'])) if signal is not None: signal = append_fields(signal, 'logpt', np.log(signal['pt'])) pass # Inject signal into data if args.inject: data = np.array(np.concatenate((data, signal)), dtype=data.dtype) pass #if not args.data: data = np.array(np.concatenate((data, WZ)), dtype=data.dtype) #pass """ @TODO: Not sure this script works for data input... But it's not used anyway. """ # Transfer factor # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Pass/fail masks # -- Data (incl. signal) msk_pass = tf.config['pass'](data) msk_fail = ~msk_pass # -- W/Z msk_WZ_pass = tf.config['pass'](WZ) msk_WZ_fail = ~msk_WZ_pass # -- Signal if args.inject: msk_sig_pass = tf.config['pass'](signal) msk_sig_fail = ~msk_sig_pass pass # Transfer factor calculator instance calc = tf.calculator(data=data, config=tf.config, subtract=WZ) # Nominal fit calc.fit() w_nom = calc.weights(data[msk_fail]) w_nom_WZ = calc.weights(WZ[msk_WZ_fail]) if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/globalbackground_%s_%s_' % ('injected' if args.inject else 'notinjected', 'data' if args.data else 'MC')) # mass +/- 20% stripe fit calc.mass = args.mass calc.window = 0.2 calc.fit() w_stripe = calc.weights(data[msk_fail]) w_stripe_WZ = calc.weights(WZ[msk_WZ_fail]) if args.inject: w_stripe_sig = calc.weights(signal[msk_sig_fail]) pass # Plotting # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bins = np.linspace(100, 250, 30 + 1, endpoint=True) # Setup canvas c = ap.canvas(num_pads=2, batch=not args.show) p0, p1 = c.pads() # Add stacked backgrounds h_bkg_nom = c.hist(data['m'][msk_fail], bins=bins, weights=data['weight'][msk_fail] * w_nom, display=False) h_bkg_stripe = c.hist(data['m'][msk_fail], bins=bins, weights=data['weight'][msk_fail] * w_stripe, display=False) h_WZfl_nom = c.hist(WZ['m'][msk_WZ_fail], bins=bins, weights=WZ['weight'][msk_WZ_fail] * w_nom_WZ, display=False) h_WZfl_stripe = c.hist(WZ['m'][msk_WZ_fail], bins=bins, weights=WZ['weight'][msk_WZ_fail] * w_stripe_WZ, display=False) if args.inject: h_sig = c.hist(signal['m'][msk_sig_pass], bins=bins, weights=signal['weight'][msk_sig_pass], display=False) h_sfl = c.hist(signal['m'][msk_sig_fail], bins=bins, weights=signal['weight'][msk_sig_fail] * w_stripe_sig, display=False) pass h_gbs = c.hist(gbs['m'], bins=bins, weights=gbs['weight'], display=False) # -- Subtract (opt.) if args.inject: h_bkg_stripe.Add(h_sfl, -1) h_gbs.Add(h_sfl, -1) pass h_bkg_nom.Add(h_WZfl_nom, -1) h_bkg_stripe.Add(h_WZfl_stripe, -1) # -- Actually draw #if not args.data: h_WZ = c.stack(WZ['m'][msk_WZ_pass], bins=bins, weights=WZ['weight'][msk_WZ_pass], fillcolor=ROOT.kRed - 4, label='W/Z + #gamma') #pass h_bkg_nom = c.stack(h_bkg_nom, fillcolor=ROOT.kAzure + 7, label="Bkg. (full)") h_sum = c.getStackSum() h_bkg_stripe.Add(h_WZ) h_gbs.Add(h_WZ) if args.inject: h_sig = c.stack(h_sig, fillcolor=ROOT.kViolet - 4, label="Z' (%d GeV)" % args.mass) pass h_bkg_stripe = c.hist(h_bkg_stripe, linecolor=ROOT.kGreen + 1, label="Bkg. (window)") # % args.mass) h_gbs = c.hist(h_gbs, linecolor=ROOT.kViolet + 1, label="Bkg. (GBS)") # Draw stats. error of stacked sum h_sum = c.hist(h_sum, fillstyle=3245, fillcolor=ROOT.kGray + 2, linecolor=ROOT.kGray + 3, label='Stats. uncert.', option='E2') # Add (pseudo-) data h_data = c.plot(data['m'][msk_pass], bins=bins, weights=data['weight'][msk_pass], markersize=0.8, label='Data' if args.data else 'Pseudo-data') # Axis limits p1.ylim(0.8, 1.2) c.padding(0.45) c.log(True) # Draw error- and ratio plots if args.inject: hr_sig = c.ratio_plot((h_sig, h_sum), option='HIST', offset=1) pass h_err = c.ratio_plot((h_sum, h_sum), option='E2') h_ratio = c.ratio_plot((h_data, h_sum), oob=True) h_rgbs = c.ratio_plot((h_gbs, h_sum), linecolor=ROOT.kViolet + 1, option='HIST ][') h_rgbs = c.ratio_plot((h_bkg_stripe, h_sum), linecolor=ROOT.kGreen + 1, option='HIST ][') # Add labels and text c.xlabel('Signal jet mass [GeV]') c.ylabel('Events') p1.ylabel('Data / Nom.') c.text([ "#sqrt{s} = 13 TeV, L = 36.1 fb^{-1}", ] + ([ "Sherpa incl. #gamma MC", ] if not args.data else []) + [ "Trimmed anti-k_{t}^{R=1.0} jets", "ISR #gamma selection", ] + (["Signal injected"] if args.inject else []), qualifier='%sInternal' % ("Simulation " if not args.data else "")) # Add line(s) p1.yline(1.0) # Draw legend c.legend() c.region("SR", 0.8 * args.mass, 1.2 * args.mass) # Save and show plot if args.save: c.save('plots/globalbackground_spectrum_%dGeV_%s_%s.pdf' % (args.mass, 'injected' if args.inject else 'notinjected', 'data' if args.data else 'MC')) if args.show: c.show() # p0-plot # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup canvas c2 = ap.canvas(batch=not args.show) p_local = h_data.Clone('p_local') p_global = h_data.Clone('p_global') for bin in range(1, h_data.GetXaxis().GetNbins() + 1): c_data = h_data.GetBinContent(bin) e_data = h_data.GetBinError(bin) c_loc = h_bkg_stripe.GetBinContent(bin) e_loc = h_bkg_stripe.GetBinError(bin) c_glb = h_gbs.GetBinContent(bin) e_glb = e_loc # h_gbs .GetBinError (bin) z_loc = (c_data - c_loc) / np.sqrt(np.square(e_data) + np.square(e_loc)) z_glb = (c_data - c_glb) / np.sqrt(np.square(e_data) + np.square(e_glb)) if c_glb > 0 else 0 p_loc = min(ROOT.TMath.Erfc(z_loc / np.sqrt(2)), 1) p_glb = min(ROOT.TMath.Erfc(z_glb / np.sqrt(2)), 1) p_local.SetBinContent(bin, p_loc) p_global.SetBinContent(bin, p_glb) p_local.SetBinError(bin, 0) p_global.SetBinError(bin, 0) pass c2.plot(p_local, markercolor=ROOT.kGreen + 1, linecolor=ROOT.kGreen + 1, option='PL', label="Local (20% window)") c2.plot(p_global, markercolor=ROOT.kViolet + 1, linecolor=ROOT.kViolet + 1, option='PL', label="Global (GBS)") c2.xlabel("Signal jet mass [GeV]") c2.ylabel("p_{0}") c2.log() c2.ylim(1E-04, 1E+04) for sigma in range(4): c2.yline(ROOT.TMath.Erfc(sigma / np.sqrt(2))) pass c2.text([ "#sqrt{s} = 13 TeV, L = 36.1 fb^{-1}", ] + ([ "Sherpa incl. #gamma MC", ] if not args.data else []) + [ "Trimmed anti-k_{t}^{R=1.0} jets", "ISR #gamma selection", ("Signal" if args.inject else "No signal") + " injected" + (" at m = %d GeV" % args.mass if args.inject else ""), ], qualifier='Simulation Internal') c2.region("SR", 0.8 * args.mass, 1.2 * args.mass) c2.legend() if args.save: c2.save('plots/globalbackground_p0_%dGeV_%s_%s.pdf' % (args.mass, 'injected' if args.inject else 'notinjected', 'data' if args.data else 'MC')) if args.show: c2.show() return
def main(): # Parse command-line arguments args = parser.parse_args() DSID = int("1%02d%03d" % (0 if args.window is None else args.window * 100, args.mass)) # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get signal file sig_DSID = get_signal_DSID(args.mass) # Load data #files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root') files_WZ = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \ glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root') if sig_DSID is None: if args.mass < 100.: print "Assuming signal is W/Z" files_sig = files_WZ files_WZ = [] else: files_sig = [] pass else: sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID) print "Using signal file: %s" % sig_file files_sig = [tf.config['base_path'] + sig_file] pass if len(files_data) == 0 or (sig_DSID and len(files_sig) == 0): warning("No files found.") return data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix']) signal = loadData(files_sig, tf.config['tree'], prefix=tf.config['prefix']) WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix']) info_data = loadData(files_data, tf.config['outputtree'], stop=1) info_sig = loadData(files_sig, tf.config['outputtree'], stop=1) info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1) # Scaling by cross section xsec = loadXsec(tf.config['xsec_file']) # ---------------------------------------------------- # Make more elegant! # ---------------------------------------------------- # Append new DSID field # @TODO: Make more elegant? #for arr, info in zip([signal, WZ], [info_sig, info_WZ]): '''# @TEMP >>> if data is not None: data = append_fields(data, 'DSID', np.zeros((data.size,)), dtypes=int) for idx in info_data['id']: msk = (data['id'] == idx) # Get mask of all 'data' entries with same id, i.e. from same file tmp_DSID = info_data['DSID'][idx] # Get DSID for this file data['weight'][msk] *= xsec[tmp_DSID] # Scale by cross section x filter eff. for this DSID data['DSID'] [msk] = tmp_DSID # Store DSID pass #data['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity pass # @TEMP <<<''' if signal is not None: signal = append_fields(signal, 'DSID', np.zeros((signal.size, )), dtypes=int) for idx in info_sig['id']: msk = ( signal['id'] == idx ) # Get mask of all 'data' entries with same id, i.e. from same file tmp_DSID = info_sig['DSID'][idx] # Get DSID for this file signal['weight'][msk] *= xsec[ tmp_DSID] # Scale by cross section x filter eff. for this DSID signal['DSID'][msk] = tmp_DSID # Store DSID pass signal['weight'] *= tf.config[ 'lumi'] # Scale all events (MC) by luminosity pass if WZ is not None: WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int) for idx in info_WZ['id']: msk = ( WZ['id'] == idx ) # Get mask of all 'data' entries with same id, i.e. from same file tmp_DSID = info_WZ['DSID'][idx] # Get DSID for this file WZ['weight'][msk] *= xsec[ tmp_DSID] # Scale by cross section x filter eff. for this DSID WZ['DSID'][msk] = tmp_DSID # Store DSID pass # @TODO: k-factors? WZ['weight'] *= tf.config[ 'lumi'] # Scale all events (MC) by luminosity pass # Check output. if data.size == 0 or ((signal is not None) and signal.size == 0): warning("No data was loaded. Exiting.") return # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) if signal is not None: signal = append_fields(signal, 'logpt', np.log(signal['pt'])) pass if WZ is not None: WZ = append_fields(WZ, 'logpt', np.log(WZ['pt'])) pass # Transfer factor # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Pass/fail masks msk_data_pass = tf.config['pass'](data) msk_data_fail = ~msk_data_pass if signal is not None: msk_sig_pass = tf.config['pass'](signal) msk_sig_fail = ~msk_sig_pass pass if WZ is not None: msk_WZ_pass = tf.config['pass'](WZ) msk_WZ_fail = ~msk_WZ_pass pass calc = tf.calculator(data=data, config=tf.config, subtract=WZ if args.subtractWZMC else None) # Using default configuration calc.mass = args.mass # Perform full fit if args.window is None: calc.fullfit() print " -- Computing data weights" w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail]) if signal is not None: print " -- Computing signal weights" w_sig_nom, w_sig_up, w_sig_down = calc.fullweights( signal[msk_sig_fail]) w_sig_pass, _, _ = calc.fullweights(signal[msk_sig_pass]) print "Scale factors for signal strength:" print " pass / (pass + fail) = %.3e / (%.3e + %.3e) = %.4f" % ( np.sum(w_sig_pass), np.sum(w_sig_pass), np.sum(w_sig_nom), np.sum(w_sig_pass) / (np.sum(w_sig_nom) + np.sum(w_sig_pass))) pass if WZ is not None: print " -- Computing W/Z weights" w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail]) else: w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None pass print " -- Final fit done" if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/tf_', MC=False) # Perform fit with manually-set window size else: # @TODO: - Forcing the fit to use same length scale as 20% window fit. Improve? calc.window = 0.2 calc.fit() theta = calc.theta() calc.window = args.window calc.fit(theta=theta) print " -- Computing data weights" w_nom = calc.weights(data[msk_data_fail]) w_up = calc.weights(data[msk_data_fail], shift=+1) w_down = calc.weights(data[msk_data_fail], shift=-1) if signal is not None: print " -- Computing signal weights" w_sig_nom = calc.weights(signal[msk_sig_fail]) w_sig_up = calc.weights(signal[msk_sig_fail], shift=+1) w_sig_down = calc.weights(signal[msk_sig_fail], shift=-1) pass if WZ is not None: print " -- Computing W/Z weights" w_WZ_nom, = calc.weights(WZ[msk_WZ_fail]) w_WZ_up = calc.weights(WZ[msk_WZ_fail], shift=+1) w_WZ_down = calc.weights(WZ[msk_WZ_fail], shift=-1) else: w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None pass print " -- Manual fit done" if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/tf_', MC=False) pass # Computing data-driven background estimate # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - check_make_dir('output') # Write TF-scaled failing data to file if args.save: output = ROOT.TFile( 'output/objdef_TF_{DSID:6d}.root'.format(DSID=DSID), 'RECREATE') pass for shift, w, w_WZ in zip([0, 1, -1], [w_nom, w_up, w_down], [w_WZ_nom, w_WZ_up, w_WZ_down]): # -- Get branch name for current variation var_name = 'Nominal' if shift == 0 else ( 'TF_UP' if shift == 1 else 'TF_DOWN') # -- Prepare mass- and weight vectors vector_m = data['m'][msk_data_fail] vector_w = data['weight'][msk_data_fail] * w if args.subtractWZdata: if WZ is not None and WZ.size > 0: print " Subtracting TF-scaled W/Z MC from background estimate" vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail])) vector_w = np.concatenate( (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ)) else: warning( " Could not subtract failed, TF-scale W/Z MC component") pass pass # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit # -- Prepare DISD and isMC vectors vector_DSID = np.ones_like(vector_w) * DSID vector_isMC = np.ones_like(vector_w).astype(bool) array1 = np.array(zip(vector_m, vector_w), dtype=[(tf.config['prefix'] + 'm', np.float64), ('weight', np.float64)]) array2 = np.array(zip(vector_DSID, vector_isMC), dtype=[('DSID', np.uint32), ('isMC', np.bool_)]) if args.save: # Mass and weight branch print " Writing arrays to file: %s" % var_name treename1 = tf.config['tree'].replace('NumLargeRadiusJets', 'Jet_tau21DDT').replace( 'Nominal', var_name) make_directories('/'.join(treename1.split('/')[:-1]), fromDir=output) tree1 = ROOT.TTree(treename1.split('/')[-1], "") array2tree(array1, tree=tree1) # outputTree treename2 = tf.config['outputtree'].replace('Nominal', var_name) make_directories('/'.join(treename2.split('/')[:-1]), fromDir=output) tree2 = ROOT.TTree(treename2.split('/')[-1], "") array2tree(array2, tree=tree2) output.Write() pass pass if args.save: output.Close() pass # Write TF-scaled failing signal MC to file if signal is not None: if args.save: output = ROOT.TFile( 'output/objdef_TF_{DSID:6d}_signalfail.root'.format(DSID=DSID), 'RECREATE') pass for shift, w_sig in zip([0, 1, -1], [w_sig_nom, w_sig_up, w_sig_down]): # -- Get branch name for current variation var_name = 'Nominal' if shift == 0 else ( 'TF_UP' if shift == 1 else 'TF_DOWN') # -- Prepare mass- and weight vectors vector_m = signal['m'][msk_sig_fail] vector_w = signal['weight'][msk_sig_fail] * w_sig # -- Prepare DISD and isMC vectors vector_DSID = np.ones_like(vector_w) * (DSID + 1E+05) vector_isMC = np.ones_like(vector_w).astype(bool) array1 = np.array(zip(vector_m, vector_w), dtype=[(tf.config['prefix'] + 'm', np.float64), ('weight', np.float64)]) array2 = np.array(zip(vector_DSID, vector_isMC), dtype=[('DSID', np.uint32), ('isMC', np.bool_)]) if args.save: # Mass and weight branch print " Writing arrays to file: %s" % var_name treename1 = tf.config['tree'].replace('NumLargeRadiusJets', 'Jet_tau21DDT').replace( 'Nominal', var_name) make_directories('/'.join(treename1.split('/')[:-1]), fromDir=output) tree1 = ROOT.TTree(treename1.split('/')[-1], "") array2tree(array1, tree=tree1) # outputTree treename2 = tf.config['outputtree'].replace( 'Nominal', var_name) make_directories('/'.join(treename2.split('/')[:-1]), fromDir=output) tree2 = ROOT.TTree(treename2.split('/')[-1], "") array2tree(array2, tree=tree2) output.Write() pass pass if args.save: output.Close() pass pass # Save configuration check_make_dir('logs') # -- Turn numpy arrays into lists, in order to make them JSON serializable cfg = make_serializable(tf.config) json.dump([cfg, vars(args)], open('logs/tf_config_%d.log' % DSID, 'w')) return
def main(): # Parse command-line arguments args = parser.parse_args() # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Load data files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') if len(files) == 0: warning("No files found.") return data = loadData(files, tf.config['tree'], prefix=tf.config['prefix']) info = loadData(files, tf.config['outputtree'], stop=1) # Scaling by cross section xsec = loadXsec(tf.config['xsec_file']) # Append new DSID field # @TODO: Make more elegant? data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int) for idx in info['id']: msk = ( data['id'] == idx ) # Get mask of all 'data' entries with same id, i.e. from same file DSID = info['DSID'][idx] # Get DSID for this file data['weight'][msk] *= xsec[ DSID] # Scale by cross section x filter eff. for this DSID data['DSID'][msk] = DSID # Store DSID pass data['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity # Check output. if data.size == 0: warning("No data was loaded.") return # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) # Transfer factor # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Pass/fail masks msk_pass = tf.config['pass'](data) msk_fail = ~msk_pass # Transfer factor calculator instance calc = tf.calculator(data=data, config=tf.config) # Using default configuration calc.mass = args.mass calc.window = args.window # ... calc.partialbins, calc.emptybins, ... calc.fit() # ...(theta=0.5) w_nom = calc.weights(data[msk_fail]) w_up = calc.weights(data[msk_fail], shift=+1) w_down = calc.weights(data[msk_fail], shift=-1) if args.show or args.save: calc.plot(show=args.show, save=args.save, prefix='plots/new_closure_') # Comparing jet mass distrbutions (closure) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if args.show or args.save: c = ap.canvas(num_pads=2, batch=not args.show) p0, p1 = c.pads() bins = tf.config['massbins'] h_bkg = c.hist(data['m'][msk_fail], bins=bins, weights=data['weight'][msk_fail] * w_nom, display=False) h_up = c.hist(data['m'][msk_fail], bins=bins, weights=data['weight'][msk_fail] * w_up, display=False) h_down = c.hist(data['m'][msk_fail], bins=bins, weights=data['weight'][msk_fail] * w_down, display=False) h_data = c.plot(data['m'][msk_pass], bins=bins, weights=data['weight'][msk_pass], display=False) for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1): width = float(h_bkg.GetBinWidth(bin)) h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width) h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width) h_up.SetBinContent(bin, h_up.GetBinContent(bin) / width) h_up.SetBinError(bin, h_up.GetBinError(bin) / width) h_down.SetBinContent(bin, h_down.GetBinContent(bin) / width) h_down.SetBinError(bin, h_down.GetBinError(bin) / width) h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width) h_data.SetBinError(bin, h_data.GetBinError(bin) / width) pass h_bkg = c.hist(h_bkg, fillcolor=ROOT.kAzure + 7, label='Background est.') h_err = c.hist(h_bkg, fillstyle=3245, fillcolor=ROOT.kGray + 2, linecolor=ROOT.kGray + 3, label='Stat. uncert.', option='E2') h_up = c.hist(h_up, linecolor=ROOT.kGreen + 1, linestyle=2, option='HIST', label='Syst. uncert.') h_down = c.hist(h_down, linecolor=ROOT.kGreen + 1, linestyle=2, option='HIST') h_data = c.plot(h_data, label='Pseudo-data') c.ratio_plot((h_err, h_bkg), option='E2') c.ratio_plot((h_up, h_bkg), option='HIST') c.ratio_plot((h_down, h_bkg), option='HIST') c.ratio_plot((h_data, h_bkg)) c.xlabel('Large-#it{R} jet mass [GeV]') c.ylabel('Events / GeV') p1.ylabel('Data / Est.') c.ylim(1E+00, 1E+06) p1.ylim(0.80, 1.20) p1.yline(1.0) c.region("SR", 0.8 * args.mass, 1.2 * args.mass) #for x in [args.mass * (1 - args.window), args.mass * (1 + args.window)]: # p0.line(x, 1E+01, x, 2E+04) # pass #p1.xlines([args.mass * (1 - args.window), args.mass * (1 + args.window)]) c.text([ "#sqrt{s} = 13 TeV, %s fb^{-1}" % tf.config['lumi'], "Incl. #gamma Monte Carlo", "Photon channel", ], qualifier='Simulation Internal') c.log() c.legend() if args.save: c.save('plots/new_closure_%dGeV_pm%d.pdf' % (args.mass, args.window * 100.)) if args.show: c.show() pass return
def main (): # Parse command-line arguments args = parser.parse_args() # Setup. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Load data files = glob.glob(tf.config['base_path'] + 'objdef_data_*.root') if len(files) == 0: warning("No files found.") return data = loadData(files, tf.config['tree'], prefix=tf.config['prefix']) info = loadData(files, tf.config['outputtree'], stop=1) # Check output. if data.size == 0: warning("No data was loaded. Exiting.") return # Compute new variables data = append_fields(data, 'logpt', np.log(data['pt'])) # Pass/fail masks msk_pass = tf.config['pass'](data) msk_fail = ~msk_pass # Validating transfer factor fit using toys # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #for mass in [85] + list(np.linspace(100, 250, 15 + 1, endpoint=True)): for mass in list(np.linspace(110, 250, 14 + 1, endpoint=True)): print "-------- MASS: %d GeV" % mass # Set up transfer factor calculator instance calc = tf.calculator(data=data, config=tf.config, verbose=False) # Using default configuration calc.mass = mass calc.window = 0.2 if (args.window is None) else args.window # Get nomnial best-fit theta calc.fit() theta = calc.theta() nominal_weights = calc.weights(data[msk_fail], shift=0), \ calc.weights(data[msk_fail], shift=+1), \ calc.weights(data[msk_fail], shift=-1) # "Throw toys" from TF profile, fit N times calc.toysfit(N=args.N, theta=theta) # Get weights for each toys experiment fit toys_weights = calc.toysweights(data[msk_fail]) # Plot variations bins = tf.config['massbins'] c = ap.canvas(num_pads=2, batch=not args.show) # -- Nominal background(s) hist_nom = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[0], fillcolor=ROOT.kAzure + 7, label='Nominal bkg.') h_sum = c.hist(hist_nom, fillstyle=3245, fillcolor=ROOT.kGray + 2, linecolor=ROOT.kGray + 3, option='E2', label='Stat. uncert.') # -- Toys backgrounds toys_hists = list() for idx, weights in enumerate(toys_weights): h = c.hist(data[msk_fail]['m'], bins=bins, weights=weights[0], fillstyle=0, linecolor=ROOT.kRed + idx % 5, linestyle = 1 + idx // 5, label='Toys %d' % (idx + 1) if idx < 5 else None) toys_hists.append(h) pass # -- Nominal variations hist_up = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[1], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2, label='Syst. uncert.') hist_down = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[2], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2) # -- Data hist_data = c.plot(data[msk_pass]['m'], bins=bins, label='Data') # -- Ratio plots c.ratio_plot((h_sum, hist_nom), option='E2') for idx, h in enumerate(toys_hists): c.ratio_plot((h, hist_nom), option='HIST') pass c.ratio_plot((hist_up, hist_nom), option='HIST') c.ratio_plot((hist_down, hist_nom), option='HIST') c.ratio_plot((hist_data, hist_nom), oob=True) # -- Decorations c.xlabel('Large-#it{R} jet mass [GeV]') c.ylabel('Events / 5 GeV') c.pads()[1].ylabel('Ratio wrt. nominal') c.pads()[1].ylim(0.8, 1.2) c.pads()[1].yline(1.) c.text(["#sqrt{s} = 13 TeV, L = 36.1 fb^{-1}", "Photon channel"], qualifier="Internal") c.region("SR", 0.8 * mass, 1.2*mass) c.legend() c.log() if args.show: c.show() if args.save: c.save('plots/validation_%dGeV_N%d.pdf' % (mass, args.N)) pass return