def get_hf_frac(name, cut): logging.info("Getting fractions for %s" % name) basepath = get_paths(dataset="latest") samp = Sample.fromFile("/".join([basepath["mc"]["mu"]["nominal"]["iso"], name])) hi = samp.drawHistogram("wjets_flavour_classification0", str(cut), plot_range=[8, 0, 8], dtype="I") out = dict() for sc, fr in zip(flavour_scenarios[0], list(hi.y())): out[sc] = fr return out
def compare_plot(plot_def): """ Draws a comparison plot based on a plot definition dictionary, which has the following arguments: *mandatory* var: a string with the TTree branch name / TFormula expression to plot plot_range: the plot range definition for Sample.drawHistogram items: a list of tuples, containing the plots to be drawn in the following format [ (name_of_plot, pathname_of_file, filename, cut_object, weight_object) ] *optional* lumi: the luminosity to normalize to in /pb merge_cmds: a merge command for the merge_hists method, which merges the histograms according to the names in the items list hist_callback: a method that is called on each of the histograms after merging, having the form: def f(hist_name, hist): dostuff return hist x_label: a string the the label for the x axis *returns* a tuple (canvas, final_histogram_dict) """ proc_cuts = plot_def['items'] var = plot_def.get('var') plot_range = plot_def.get('range') lumi = plot_def.get('lumi', 20000) merge_cmds = plot_def.get('merge_cmds', None) hist_callback = plot_def.get('hist_callback', None) x_label = plot_def.get('xlab', 'xlab') hists = {} for name, path, fname, cut, weight in proc_cuts: fname = join(path, fname) sample = Sample.fromFile(fname) hists[name] = sample.drawHistogram(var, str(cut), weight=str(weight), plot_range=plot_range) hists[name].SetName(name) hists[name] = copy.deepcopy(hists[name]) hists[name].Scale(sample.lumiScaleFactor(lumi)) if merge_cmds: hists_merged = merge_hists(hists, merge_cmds) hists = hists_merged if hist_callback: for hn, h in hists.items(): hists[hn] = hist_callback(hn, h) canv = plot_hists_dict(hists, x_label=x_label) return canv, hists
parser.add_argument( "--indir", type=str, required=False, default=(os.environ["STPOL_DIR"] + "/step3_latest"), help="the input directory" ) args = parser.parse_args() lumi=19700 for proc in ['ele', 'mu']: physics_processes = PhysicsProcess.get_proc_dict(lepton_channel=proc) merge_cmds = PhysicsProcess.get_merge_dict(physics_processes) flist = get_file_list({'signal': merge_cmds['tchan']}, args.indir + "/%s/mc/iso/nominal/Jul15/" % proc) samples={} for f in flist: samples[f] = Sample.fromFile(f) yld = 0; for k,v in samples.items(): yld+=v.getEntries(cut[proc])*v.lumiScaleFactor(lumi) found = False for b in range(100): bc=b*1./100 mvayld = 0 mcut = Cuts.met if proc == 'mu': mcut = Cuts.mt_mu for k,v in samples.items(): mvayld+=v.getEntries(str(cutlist['2j1t']*cutlist['presel_'+proc]*mcut*Cut('mva_BDT>'+str(bc))))*v.lumiScaleFactor(lumi) if not found and mvayld < yld: found = True break
"wjets": "W", } pretty_names_weights = { "unw": "unweighted", "nom": "nominal", "bc_up": "SF_{bc} up", "bc_down": "SF_{bc} down", "l_up": "SF_{l} up", "l_down": "SF_{l} down", } recreate = True if recreate: for sn in sampnames: sample = Sample.fromFile(path + "/" + sn + ".root") for wn, w in weights: hi = sample.drawHistogram(var, str(cut), weight=w, plot_range=plot_range) hi.Scale(sample.lumiScaleFactor(lumi)) hists[wn][sample.name] = hi merged = dict() for wn, w in weights: merged[wn] = merge_hists(hists[wn], merge_cmds) hc = HistCollection(merged[wn], name="bweight_%s" % wn) hc.save(".") colls = dict() for wn, w in weights:
flist = get_file_list( merge_cmds, args.indir + "/%s/mc/%s/nominal/Jul15/" % (lepton_channel,isoreg) ) for iso in ['iso', 'antiiso']: for ds in ['Jul15', 'Aug1']: flist += get_file_list( merge_cmds, args.indir + "/%s/data/%s/%s/" % (lepton_channel, iso, ds) ) if len(flist)==0: raise Exception("Couldn't open any files. Are you sure that %s exists and contains root files?" % args.indir) samples={} for f in flist: samples[f] = Sample.fromFile(f, tree_name=tree) samples_syst = {} for name, syst in systs.items(): samples_syst[name] = {} for s in list(syst): samples_syst[name][s] = {} for f in change_syst(flist, s): samples_syst[name][s][f] = Sample.fromFile(f, tree_name=tree) for plotname in args.plots: plot_def = plot_defs[plotname] canv, merged_hists, htot_mc, htot_data = data_mc_plot(samples, plot_def, plotname, lepton_channel, lumi, weight, physics_processes, use_antiiso)
canv = plot_hists_dict(hc.hists, do_chi2=False, do_ks=True, x_label=varname, legend_pos="top-left") hc.hists.values()[0].SetTitle("shape variation") canv.SaveAs(out_name + ".png") hc.save(out_name) return hc, canv if __name__=="__main__": ROOT.TH1F.AddDirectory(False) import logging #rootpy.log.basic_config_colorized() logging.basicConfig(level=logging.WARNING) logger = rootpy.log["/systematics"] data_repro = "Jul15" path = get_paths()[data_repro]["mc"]["mu"]["nominal"]["iso"] logger.info("Input path %s" % path) samples = [ Sample.fromFile(f) for f in filter(lambda x: re.match(".*/W[1-4]Jets.*", x), glob.glob(path + "/*.root")) ] logger.info("samples %s" % samples) cut = Cuts.final(2,1) recreate = True r1 = shape_variation("cos_theta", [10, -1, 1], "cos #theta", recreate, "variations_cos_theta") r2 = shape_variation("abs(eta_lj)", [10, 2.5, 4.5], "|#eta|_{j'}", recreate, "variations_abs_eta_lj")
# for i in range(1, data_post.nbins()+1): # data_post.SetBinError(i, pe_post.GetBinError(i)) # Get the true distribution at generator level, # requiring the presence of a lepton with the # correct gen flavour htrue = None datadir = "data/37acf5_343e0a9_Aug22" def gen_hist(sample): hi = sample.drawHistogram("true_cos_theta", str(Cuts.true_lepton(lep)), binning=list(binning)) hi.Scale(s.lumiScaleFactor(lumi)) return hi for fn in ["T_t_ToLeptons", "Tbar_t_ToLeptons"]: #s = Sample.fromFile("data/Step3_Jul26/%s/mc/iso/nominal/Jul15/%s.root" % (lep, fn)) s = Sample.fromFile("data/37acf5_343e0a9_Aug22/%s.root" % (fn)) hi = gen_hist(s) if htrue: htrue += hi else: htrue = hi htrue.SetTitle("generated (POWHEG)") #Scale to the final fit htrue.Scale(fitpars[lep]) hcomphep = None for fn in ["TToBENu_t-channel", "TToBMuNu_t-channel", "TToBTauNu_t-channel"]: s = Sample.fromFile(datadir + "/{0}/{1}.root".format(lep, fn)) hi = gen_hist(s) if hcomphep: hcomphep += hi
def __init__(self, saver, *args, **kwargs): super(SampleNode, self).__init__(*args, **kwargs) self.sample = Sample.fromFile(self.name) self.saver = saver
proc = args.channel # Which luminosity we use lumi = lumis[args.lumitag]['iso'][proc] flist = [] for i in ['sig','bg']: for j in ['train','eval']: flist += mvaFileList[i][j] logger.debug('Used file list: %s',str(flist)) # Read in the samples samples = {} for f in flist: samples[f] = Sample.fromFile(args.indir+'/%s/%s.root' % (proc,f), tree_name='Events_MVA') # Set the weight expression weightString = str(Weights.total(proc) * Weights.wjets_madgraph_shape_weight() * Weights.wjets_madgraph_flat_weight()) # Which file do we use to write our TMVA trainings ext='' if len(rVar): ext='_sans' for v in rVar: ext+='_'+v if len(cVar): ext='_with'
def get_samples(path, channel, systematic): samples2 = None if systematic in ["EnDown", "EnUp", "ResDown", "ResUp", "UnclusteredEnDown", "UnclusteredEnUp"]: datadir = "/".join((path, channel, "mc", "iso", systematic, "Jul15")) elif systematic != "nominal": #datadir2 = "/".join((path, channel, "mc_syst", "iso", "SYST", "Jul15")) #datadir = "/".join((path, channel, "mc", "iso", "nominal", "Jul15")) datadir = "/".join(("/hdfs/local/stpol/step3/Sep4_syst_a554579", channel, "mc", "iso", "nominal", "Jul15")) datadir2 = "/".join(("/home/andres/single_top/stpol/Sep4_syst_a554579/", channel, "mc_syst", "iso", "SYST", "Jul15")) datadir3 = "/".join(("/home/andres/single_top/stpol/Sep4_syst_a554579/", channel, "mc_syst", "iso", "SYST", "Sep4")) #print "dd2", datadir2 #print "dd3", datadir3 samples2 = Sample.fromDirectory(datadir2, out_type="dict") #print "samp2", samples2 #datadir3 = "/".join(("/hdfs/local/stpol/step3/Sep1_wjets_fsim", channel, "mc_syst", "nominal", "wjets_fsim")) samples2.update(Sample.fromDirectory(datadir3, out_type="dict")) #print "samp3", samples2 else: #datadir = "/".join((path, channel, "mc", "iso", systematic, "Jul15")) datadir = "/".join(("/hdfs/local/stpol/step3/Sep4_syst_a554579", channel, "mc", "iso", systematic, "Jul15")) datadir2 = "/".join(("/home/andres/single_top/stpol/Sep4_syst_a554579/", channel, "mc_syst", "iso", "SYST", "Jul15")) datadir3 = "/".join(("/home/andres/single_top/stpol/Sep4_syst_a554579/", channel, "mc_syst", "iso", "SYST", "Sep4")) #datadir2 = "/".join((path, channel, "mc_syst", "iso", "SYST", "Jul15")) samples2 = Sample.fromDirectory(datadir2, out_type="dict") samples2.update(Sample.fromDirectory(datadir3, out_type="dict")) samples = Sample.fromDirectory(datadir, out_type="dict") datadir_data = "/".join((path, channel, "data", "iso", "Jul15")) datadir_data_Aug1 = "/".join((path, channel, "data", "iso", "Aug1")) samples.update(Sample.fromDirectory(datadir_data, out_type="dict")) samples.update(Sample.fromDirectory(datadir_data_Aug1, out_type="dict")) if samples2 is not None: samples.update(samples2) if channel == "mu": samples["SingleMu1_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleMu1.root"))) samples["SingleMu2_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleMu2.root"))) samples["SingleMu3_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleMu3.root"))) samples["SingleMu_miss_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleMu_miss.root"))) elif channel == "ele": samples["SingleEle1_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleEle1.root"))) samples["SingleEle2_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleEle2.root"))) samples["SingleEle_miss_aiso"] = Sample.fromFile("/".join((path, channel, "data", "antiiso", "Jul15", "SingleEle_miss.root"))) return samples
def load_nominal_mc_samples(path, channel, iso): datadir = "/".join((path, channel, "mc", iso, "nominal", "Jul15")) samples = Sample.fromDirectory(datadir, out_type="dict") return samples
import os import copy sys.path.append(os.environ["STPOL_DIR"]) import plots import plots.common from plots.common.odict import OrderedDict as dict from plots.common.sample import Sample from plots.common.cuts import Cuts, Cut from plots.common.utils import merge_cmds, merge_hists, get_hist_int_err lumi_total = 12210 sample_dir = "data/out_step3_05_30_08_20" samples = [] samples.append(Sample.fromFile(sample_dir + "/TTJets_MassiveBinDECAY.root")) samples.append(Sample.fromFile(sample_dir + "/TTJets_FullLept.root")) samples.append(Sample.fromFile(sample_dir + "/TTJets_SemiLept.root")) samples.append(Sample.fromFile(sample_dir + "/T_t.root")) samples.append(Sample.fromFile(sample_dir + "/Tbar_t.root")) samples.append(Sample.fromFile(sample_dir + "/T_t_ToLeptons.root")) samples.append(Sample.fromFile(sample_dir + "/Tbar_t_ToLeptons.root")) samples.append(Sample.fromFile(sample_dir + "/QCDMu.root")) samples.append(Sample.fromFile(sample_dir + "/WJets_inclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W1Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W2Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W3Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W4Jets_exclusive.root")) def mc_amount(cut, weight, lumi=12210, ref=None):
from plots.common.sample import Sample from plots.common.cuts import Cuts import sys if __name__=="__main__": for fi in sys.argv[1:]: print fi samp = Sample.fromFile(fi) print "Lumi scale factor: ", samp.lumiScaleFactor(20000) for lep in ["mu", "ele"]: if "/%s/"%lep not in fi: continue print lep cut = None for cutname, _cut in [ ("hlt", Cuts.hlt(lep)), ("lep", Cuts.single_lepton(lep)), ("2J", Cuts.n_jets(2)), ("1T", Cuts.n_tags(1)), ("MET/MtW", Cuts.metmt(lep)), ("rms", Cuts.rms_lj), ("Mtop", Cuts.top_mass_sig), ("etalj", Cuts.eta_lj) ]: if not cut: cut = _cut else: cut *= _cut try: hi = samp.drawHistogram("eta_lj", str(cut), binning=[50, -5, 5])
description='Caches entries.' ) parser.add_argument('outfile', action='store', help="The output file name." ) parser.add_argument('cut', action='store', help="The cut string." ) parser.add_argument('infiles', nargs='+', help="The input file names" ) args = parser.parse_args() of = ROOT.TFile(args.outfile, "RECREATE") for inf in args.infiles: samp = Sample.fromFile(inf) samp_path = samp.getPath(escape=True) print samp_path cache = samp.cacheEntries(samp_path, args.cut) print "Cached %d entries for sample %s" % (cache.GetN(), samp.tfile.GetPath()) of.cd() cache = cache.Clone() cache.SetDirectory(of) cache.Write() samp.tfile.Close() #of.Write() of.Close()
from plots.common.sample import Sample from plots.common.cuts import Cuts import sys if __name__=="__main__": if len(sys.argv)!=2: print "Usage: %s /path/to/mu/iso/nominal/T_t_ToLeptons.root" % sys.argv[1] sys.exit(0) s = Sample.fromFile(sys.argv[1]) cut = Cuts.final(2,1) nentries = s.tree.Draw("cos_theta", str(cut)) print "Nentries=",nentries
physics_processes = PhysicsProcess.get_proc_dict(lepton_channel=proc) merge_cmds = PhysicsProcess.get_merge_dict(physics_processes) flist = get_file_list( merge_cmds, step3 + "/%s/mc/iso/nominal/Jul15/" % proc ) flist += get_file_list( {'QCD': merge_cmds['data']}, step3 + "/%s/data/antiiso/Jul15/" % proc ) # Read in the file list from the output directory samples = {} for f in flist: samples[f] = Sample.fromFile(f, tree_name="Events_MVAwQCD") # To compute accurate weight we need to load from the tree also the weights in question weightString = str(Weights.total(proc) * Weights.wjets_madgraph_shape_weight() * Weights.wjets_madgraph_flat_weight()) # Temporary patch until proper step3 is available #weightString = "1.0" t={} f={} w={} for key in flist: w[key]=1. t[key]=samples[key].getTree()
from plots.common.sample import Sample from plots.common.sample_style import ColorStyleGen from plots.common.hist_plots import plot_hists from plots.common.cuts import Cuts, Weights from plots.common.legend import legend from plots.common.tdrstyle import tdrstyle if __name__=="__main__": samp = Sample.fromFile("~/Documents/stpol/data/out_step3_joosep_11_07_19_44/mu/iso/nominal/W4Jets_exclusive.root") samp.tree.AddFriend("trees/WJets_weights", samp.file_name) tdrstyle() cut = str(Cuts.final(2,0)*Cuts.Wflavour("W_heavy")) mean_weight = samp.drawHistogram(str(Weights.wjets_madgraph_weight("nominal")), cut, weight="1.0", plot_range=[200, 0, 2]).hist.GetMean() print "mean weight=%.2f" % mean_weight hi0 = samp.drawHistogram("cos_theta", cut, weight="1.0", plot_range=[20, -1, 1]).hist hi0.Scale(samp.lumiScaleFactor(20000)) hi1 = samp.drawHistogram("cos_theta", cut, weight=str(Weights.wjets_madgraph_weight("nominal")), plot_range=[20, -1, 1]).hist hi1.Scale(samp.lumiScaleFactor(20000)) hi2 = samp.drawHistogram("cos_theta", cut, weight=str(Weights.wjets_madgraph_weight("wjets_up")), plot_range=[20, -1, 1]).hist hi2.Scale(samp.lumiScaleFactor(20000)) hi3 = samp.drawHistogram("cos_theta", cut, weight=str(Weights.wjets_madgraph_weight("wjets_down")), plot_range=[20, -1, 1]).hist hi3.Scale(samp.lumiScaleFactor(20000)) hists = [hi0, hi1, hi2, hi3] #for h in hists: # h.Scale(1.0/h.Integral()) hi0.SetTitle("unweighted") hi1.SetTitle("weighted") hi2.SetTitle("weighted wjets_up")
import os import copy sys.path.append(os.environ["STPOL_DIR"]) import plots import plots.common from plots.common.odict import OrderedDict as dict from plots.common.sample import Sample from plots.common.cuts import Cuts, Cut from plots.common.utils import merge_cmds, merge_hists, get_hist_int_err lumi_total=12210 sample_dir = "data/out_step3_05_30_08_20" samples = [] samples.append(Sample.fromFile(sample_dir + "/TTJets_MassiveBinDECAY.root")) samples.append(Sample.fromFile(sample_dir + "/TTJets_FullLept.root")) samples.append(Sample.fromFile(sample_dir + "/TTJets_SemiLept.root")) samples.append(Sample.fromFile(sample_dir + "/T_t.root")) samples.append(Sample.fromFile(sample_dir + "/Tbar_t.root")) samples.append(Sample.fromFile(sample_dir + "/T_t_ToLeptons.root")) samples.append(Sample.fromFile(sample_dir + "/Tbar_t_ToLeptons.root")) samples.append(Sample.fromFile(sample_dir + "/QCDMu.root")) samples.append(Sample.fromFile(sample_dir + "/WJets_inclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W1Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W2Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W3Jets_exclusive.root")) samples.append(Sample.fromFile(sample_dir + "/W4Jets_exclusive.root")) def mc_amount(cut, weight, lumi=12210, ref=None): histsD = dict()
if __name__=="__main__": #import plots.common.tdrstyle as tdrstyle #tdrstyle.tdrstyle() datadirs = dict() #This symlink is present in the repo for *.hep.kbfi.ee, if running on other machines, you must make it yourself by doing #ln -s /path/to/step3/out $STPOL_DIR/step3_latest #isolated files are by default in $STPOL_DIR/step3_latest/mu/iso/nominal/*.root datadirs["iso"] = "/".join((os.environ["STPOL_DIR"], "step3_latest", "mu" ,"iso", "nominal")) #Use the anti-isolated data for QCD $STPOL_DIR/step3_latest/mu/antiiso/nominal/SingleMu.root datadirs["antiiso"] = "/".join((os.environ["STPOL_DIR"], "step3_latest", "mu" ,"antiiso", "nominal")) #Load all the samples in the isolated directory samples = Sample.fromDirectory(datadirs["iso"], out_type="dict") samples["SingleMu_aiso"] = Sample.fromFile(datadirs["antiiso"] + "/SingleMu.root") hists_mc = dict() hist_data = None #Define the variable, cut, weight and lumi var = "cos_theta" cut_name = "2j1t" cut_str = str(Cuts.n_jets(2)*Cuts.n_tags(1)*Cuts.lepton_veto*Cuts.one_muon*Cuts.mt_mu*Cuts.top_mass_sig*Cuts.eta_lj) weight_str = "1.0" lumi = 20000 #FIXME: take from the step2 output filelists/step2/latest/iso/nominal/luminosity.txt #nbins, min, max plot_range= [20, -1, 1]