def combine_templates(templates, patterns, conf): """ Args: """ hists = {} hsources = [] for k in ["data", "mc_nom", "mc_varsamp", "mc_varproc"]: items = templates[patterns[k]] if len(items)==0: raise MatchingException("Nothing matched to %s:%s" % (k, patterns[k])) hsources += items if len(hsources)==0: raise ValueError("No histograms matched") hqcd = NestedDict() hqcd["nominal"][None] = [] for syst in ["yield", "iso"]: for sdir in ["up", "down"]: hqcd[syst][sdir] = [] templates_qcd = templates[patterns["data_antiiso"]] if len(templates_qcd)==0: raise MatchingException("Nothing matched to %s:%s" % ("data_antiiso", patterns["data_antiiso"])) for keys, hist in templates[patterns["data_antiiso"]]: if keys[1].startswith("antiiso"): #We have isolation variations isodir = keys[1].split("_")[1] if isodir=="nominal": hqcd["nominal"][None].append(hist) hup = hist.Clone() hdown = hist.Clone() hup.Scale(qcd_yield_variations[0]) hdown.Scale(qcd_yield_variations[1]) hqcd["yield"]["up"].append(hup) hqcd["yield"]["down"].append(hdown) elif isodir in ["up", "down"]: hqcd["iso"][isodir].append(hist) else: raise ValueError("Undefined isolation variation direction: %s" % isodir) #We only have the nominal QCD shape elif keys[1]=="weight__unweighted": hqcd["nominal"][None].append(hist) hup = hist.Clone() hdown = hist.Clone() hup.Scale(qcd_yield_variations[0]) hdown.Scale(qcd_yield_variations[1]) hqcd["yield"]["up"].append(hup) hqcd["yield"]["down"].append(hdown) #Placeholders for the isolation variation for isodir in ["up", "down"]: h = hist.Clone() hqcd["iso"][isodir].append(h) else: raise Exception("Couldn't parse the QCD pattern: %s" % str(keys)) def map_leaves(di, f, equate=True): for k, v in di.items(): if isinstance(v, dict): map_leaves(v, f) else: if equate: di[k] = f(v) else: f(v) return di #Sum the anti-iso data subsamples map_leaves(hqcd, lambda li: reduce(lambda x,y: x+y, li)) #Normalize the isolation variations to the nominal map_leaves(hqcd["iso"], lambda hi: hi.Scale(hqcd["nominal"][None].Integral() / hi.Integral()) if hi.Integral()>0 else 0, equate=False ) #Add the variated data-driven QCD templates hsources += [ (("data", "qcd", "weight__unweighted"), hqcd["nominal"][None]), (("data", "qcd", "weight__qcd_yield_up"), hqcd["yield"]["up"]), (("data", "qcd", "weight__qcd_yield_down"), hqcd["yield"]["down"]), (("data", "qcd", "weight__qcd_iso_up"), hqcd["iso"]["up"]), (("data", "qcd", "weight__qcd_iso_down"), hqcd["iso"]["down"]), ] #f = open('temp.pickle','wb') #pickle.dump(hsources, f) #f.close() #load the histos from the temporary pickle #f = open('temp.pickle','rb') #hsources = pickle.load(f) syst_scenarios = NestedDict() for (sample_var, sample, weight_var), hist in hsources: make_hist(hist) # if "__ele" in weight_var: # continue if ".root" in sample: sample = sample[:sample.index(".root")] if "__" in weight_var: spl = weight_var.split("__") wn = spl[1] else: wn = weight_var sample_var = sample_var.lower() wtype = None wdir = None stype = None sdir = None syst = None #Nominal weight, look for variated samples if wn=="nominal": syst = sample_var elif wn=="unweighted": syst="unweighted" else: #Variated weight, use only nominal sample or data in case of data-driven shapes if not (sample_var=="nominal" or sample_var=="data"): continue syst = wn if wn==conf.get_nominal_weight() and sample_var=="nominal": logger.info("Using %s:%s as nominal sample for %s" % (wn, sample_var, sample)) syst_scenarios[sample]["nominal"][None] = hist #A systematic scenario which has a separate systematic sample elif sample_var == "syst": try: r = get_syst_from_sample_name(sample) except Exception as e: logger.warning("Unhandled systematic: %s" % str(e)) r = None if not r: continue sample, systname, d = r #sample = map_syst_sample_to_nominal(sample) syst_scenarios[sample][systname][d] = hist else: logger.debug("Systematically variated weight: %s:%s %s" % (wn, sample_var, sample)) systname, d = get_updown(syst) syst_scenarios[sample][systname][d] = hist logger.info("histogram W3Jets_exclusive nominal: " + "%f %d" % ( syst_scenarios["W3Jets_exclusive"]["nominal"][None].Integral(), syst_scenarios["W3Jets_exclusive"]["nominal"][None].GetEntries()) ) ###################################### ### Save systematics, fill missing ### ###################################### ######### # tchan # ######### #T_t_ToLeptons mass_up is missing, take the mass down and flip the difference with the nominal mnomt = syst_scenarios["T_t_ToLeptons"]["nominal"][None].Clone() mdownt = syst_scenarios["T_t_ToLeptons"]["mass"]["down"].Clone() mupt = (mnomt+mnomt-mdownt) syst_scenarios["T_t_ToLeptons"]["mass"]["up"] = mupt ######### # TTBar # ######### #TTbar variations are provided for the inclusive only, fill them for the exclusive nom_ttbar = syst_scenarios["TTJets_FullLept"]["nominal"][None] + syst_scenarios["TTJets_SemiLept"]["nominal"][None] for syst in ["mass", "ttbar_scale", "ttbar_matching"]: for sample in ["TTJets_FullLept", "TTJets_SemiLept"]: for sd in ["up", "down"]: syst_scenarios[sample][syst][sd] = syst_scenarios[sample]["nominal"][None] * syst_scenarios["TTJets"][syst][sd] / nom_ttbar syst_scenarios.pop("TTJets") syst_scenarios = syst_scenarios.as_dict() #Create the output file p = os.path.dirname(conf.get_outfile_unmerged()) if not os.path.exists(p): os.makedirs(p) of = ROOT.TFile(conf.get_outfile_unmerged() , "RECREATE") of.cd() #Get the list of all possible systematic scenarios that we have available allsysts = get_all_systs(syst_scenarios) for sampn, h1 in syst_scenarios.items(): #Consider all the possible systematic scenarios for systname in allsysts: #If we have it available, fine, use it if systname in h1.keys(): h2 = h1[systname] #If not, in case of MC and a non-trivial variation elif not sampn.startswith("Single") and systname not in ["unweighted", "nominal"]: #Try to get the unvariated template as a placeholder h = h1.get("nominal", None) if not h: h = h1.get("unweighted", None) if not h: raise Exception("Could not get the nominal template for %s:%s" % (sampn, systname)) #Our convention is that even the unvariated template is a dict with a single #key for the direction of variation, which is 'None' h = h[None] #Add placeholder templates for systdir in ["up", "down"]: h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir)) set_missing_hist(h) #Save to file h.SetDirectory(of) h.Write() continue else: continue for systdir, h in h2.items(): if systdir==None and systname=="nominal" or not sample_types.is_mc(sampn): h = h.Clone(hname_encode(conf.varname, sampn)) elif systdir==None and systname=="unweighted": h = h.Clone(hname_encode(conf.varname, sampn, "unweighted")) else: h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir)) h.SetDirectory(of) h.Write() nkeys = len(of.GetListOfKeys()) logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath())) # of.Close() ######################## ### Load systematics ### ######################## # of_unmerged = File(conf.get_outfile_unmerged()) hists = dict() # ROOT.gROOT.cd() for k in of.GetListOfKeys(): hists[k.GetName()] = of.Get(k.GetName()) h = hists[k.GetName()] #hists[k.GetName()].Rebin(2) logger.info("Loaded %d histograms from file %s" % (len(hists), of.GetPath())) #of_unmerged.Close() ######################## ### Merge ### ######################## from plots.common.utils import merge_hists, PhysicsProcess merge_cmds = PhysicsProcess.get_merge_dict( PhysicsProcess.get_proc_dict(conf.channel) ) hsysts = NestedDict() for k, v in hists.items(): spl = split_name(k) hsysts[spl["type"]][spl["dir"]][spl["sample"]] = v hsysts = hsysts.as_dict() p = os.path.dirname(conf.get_outfile_merged()) if not os.path.exists(p): os.makedirs(p) of = ROOT.TFile(conf.get_outfile_merged(), "RECREATE") of.cd() for syst, h1 in hsysts.items(): if syst in skipped_systs: continue for sdir, h2 in h1.items(): hmc = merge_hists(h2, merge_cmds) for hn, h in hmc.items(): if syst=="nominal" or syst=="unweighted": h.SetName("__".join([spl["var"], hn])) else: h.SetName("__".join([spl["var"], hn, syst, sdir])) h.SetDirectory(of) h.Write() nkeys = len(of.GetListOfKeys()) logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath())) of.Close() hists = load_theta_format(conf.get_outfile_merged()) processes = [] systs = [] for (variable, sample, syst, systdir), v in hists.items_flat(): processes.append(sample) systs.append(syst) processes = set(processes) systs = set(systs) logger.info("Processes: %s" % processes) if not processes == set(['diboson', 'schan', 'tWchan', 'TTJets', 'tchan', 'WJets', 'qcd', 'DYJets', 'data']): raise Exception("Combined file did not contain the necessary processes: %s" % str(processes)) logger.info("Systematic scenarios: %s" % systs)
fitpars_process = NestedDict() #Load all the files in two formats for (cut, chan, inf) in fit_files: fitpars[cut][chan] = from_file(inf, match_fmt='sample') fitpars_process[cut][chan] = from_file(inf, match_fmt='process') #Separate SF-s for MET-variated templates, probably no longer needed. names = [ "t-channel", "top+QCD", "W, diboson" ] for met in [30, 50, 70]: met = str(met) fitpars['final_2j1t_mva_met'+met]['mu'] = from_file(os.environ["STPOL_DIR"]+"/final_fit/results/histos/met%s/mu__mva_BDT_with_top_mass_eta_lj_C_mu_pt_mt_mu_met_mass_bj_pt_bj_mass_lj.txt" % met) # FIXME -> Currently no MET cut dependent results for electron channel fitpars['final_2j1t_mva_met'+met]['ele'] = from_file(os.environ["STPOL_DIR"]+"/final_fit/results/ele__mva_BDT_with_top_mass_C_eta_lj_el_pt_mt_el_pt_bj_mass_bj_met_mass_lj.txt") for i, name in zip(range(len(names)), names): fpmu = fitpars['final_2j1t_mva_met'+met]['mu'][i] fpel = fitpars['final_2j1t_mva_met'+met]['ele'][i] #print "%s | %.2f ± %.2f | %.2f ± %.2f |" % (name, fpmu[1], fpmu[2], fpel[1], fpel[2]) #FIXME: PLACEHOLDER fitpars['final_2j1t'] = None from pprint import pprint #Convert to static dict fitpars = fitpars.as_dict() if __name__=="__main__": print "Yield tables: TODO latex format"
base = os.path.join(os.environ['STPOL_DIR'], 'qcd_estimation', 'fitted', channel) fn = base + '/%s_no_MC_subtraction_mt_%s_plus.txt' % (cut, met) fi = open(fn) li = fi.readline().strip().split() sf = float(li[0]) #The yield and its uncertainty y = float(li[1]) uncy = float(li[2]) #Calculate the uncertainty of the scale factor unc_sf = uncy/y if not do_uncertainty: return sf else: return sf, unc_sf * sf qcdScale = dict() qcdScale['mu'] = dict() qcdScale['ele'] = dict() #Determined as the ratio of the integral of anti-iso data after full selection / full selection minus MVA #See qcd_scale_factors.ipynb for determination qcd_cut_SF = NestedDict() qcd_cut_SF['mva']['loose']['mu'] = 0.114754098361 qcd_cut_SF['mva']['loose']['ele'] = 0.0734908136483 qcd_cut_SF['cutbased']['final']['mu'] = 0.0983606557377 qcd_cut_SF['cutbased']['final']['ele'] = 0.0629921259843 qcd_cut_SF = qcd_cut_SF.as_dict()