Python diagonalizer Examples

Programming Language: Python

Namespace/Package Name: ROOT

Method/Function: diagonalizer

Examples at hotexamples.com: 5

Python diagonalizer - 5 examples found. These are the top rated real world Python examples of ROOT.diagonalizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

r.gROOT.SetBatch(1)
r.gROOT.ProcessLine(".L diagonalizer.cc+")
from ROOT import diagonalizer

_fOut = r.TFile(fOutName, "RECREATE")
_f = r.TFile.Open(fName)
out_ws = r.RooWorkspace("combinedws")

#out_ws._import = getattr(out_ws,"import")
out_ws._import = SafeWorkspaceImporter(out_ws)

sampleType = r.RooCategory("bin_number", "Bin Number")
obs = r.RooRealVar("observed", "Observed Events bin", 1)
out_ws._import(sampleType)  # Global variables for dataset
out_ws._import(obs)
diag_combined = diagonalizer(out_ws)
obsargset = r.RooArgSet(out_ws.var("observed"), out_ws.cat("bin_number"))

cmb_categories = []

for crd, crn in enumerate(controlregions_def):
    x = __import__(crn)
    for cid, cn in enumerate(categories):
        _fDir = _fOut.mkdir("%s_category_%s" % (crn, cn))
        cmb_categories.append(
            x.cmodel(cn, crn, _f, _fDir, out_ws, diag_combined))

for cid, cn in enumerate(cmb_categories):
    print "Run Model: cid, cn", cid, cn
    cn.init_channels()
    channels = cn.ret_channels()

Example #2

Show file

File: runModel.py Project: bu-cms/monox_fit

def main():
    # Commandline arguments
    args = cli_args()

    # Automatically determine CR settings from categories
    if any(re.match('mono(jet|v).*', x) for x in args.categories):
        controlregions_def = ["Z_constraints", "W_constraints"]
    elif any(['vbf' in x for x in args.categories]):
        controlregions_def = [
            "Z_constraints_qcd_withphoton", "W_constraints_qcd",
            "Z_constraints_ewk_withphoton", "W_constraints_ewk"
        ]

    # Determine year from name
    bname = os.path.basename(args.file)

    # Create output path
    outdir = os.path.dirname(args.out)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    _fOut = r.TFile(args.out, "RECREATE")
    _f = r.TFile.Open(args.file)
    out_ws = r.RooWorkspace("combinedws")

    #out_ws._import = getattr(out_ws,"import")
    out_ws._import = SafeWorkspaceImporter(out_ws)

    sampleType = r.RooCategory("bin_number", "Bin Number")
    obs = r.RooRealVar("observed", "Observed Events bin", 1)
    out_ws._import(sampleType)  # Global variables for dataset
    out_ws._import(obs)
    diag_combined = diagonalizer(out_ws)
    obsargset = r.RooArgSet(out_ws.var("observed"), out_ws.cat("bin_number"))

    # Loop over control region definitions, and load their model definitions
    cmb_categories = []
    for crd, crn in enumerate(controlregions_def):
        x = __import__(crn)
        for cid, cn in enumerate(args.categories):

            # Derive year name
            m = re.match(".*201(7|8).*", cn)
            if not m or (m and len(m.groups()) > 1):
                raise RuntimeError("Cannot derive year from category name: " +
                                   cn)
            year = int("201" + m.groups()[0])

            _fDir = _fOut.mkdir("%s_category_%s" % (crn, cn))

            if "MTR" in args.rename:
                cmb_categories.append(
                    x.cmodel(cn,
                             crn,
                             _f,
                             _fDir,
                             out_ws,
                             diag_combined,
                             year,
                             convention="IC"))
            else:
                cmb_categories.append(
                    x.cmodel(cn, crn, _f, _fDir, out_ws, diag_combined, year))

    for cid, cn in enumerate(cmb_categories):
        print "Run Model: cid, cn", cid, cn
        cn.init_channels()
        channels = cn.ret_channels()

    # Save a Pre-fit snapshot
    out_ws.saveSnapshot("PRE_EXT_FIT_Clean", out_ws.allVars())
    # Now convert workspace to combine friendly workspace
    convertToCombineWorkspace(out_ws, _f, args.categories, cmb_categories,
                              controlregions_def, args.rename)
    _fOut.WriteTObject(out_ws)

    print "Produced constraints model in --> ", _fOut.GetName()

Example #3

Show file

File: runCombinedModel.py Project: blallen/DmsMonoX

#categories = ["boosted"]
#categories = ["inclusive"]
_f = r.TFile.Open("mono-x-vtagged.root")
out_ws = r.RooWorkspace("combinedws","Combined Workspace")
out_ws._import = getattr(out_ws,"import")

# Need to setup the things here for combined dataset, need to add all possible sample types first because otherwise RooFit throws a fit! 
sampleType  = r.RooCategory("bin_number","Bin Number");
obs         = r.RooRealVar("observed","Observed Events bin",1)

out_ws._import(sampleType)  # Global variables for dataset
out_ws._import(obs)
obsargset   = r.RooArgSet(out_ws.var("observed"),out_ws.cat("bin_number"))

cmb_categories=[]
diag_combined = diagonalizer(out_ws)
for cid,cn in enumerate(categories): 
        _fDir = _fOut.mkdir("category_%s"%cn)
	cmb_categories.append(cmodel(cid,cn,_f,_fDir,out_ws,diag_combined))
        _fDirW = _fOut.mkdir("Wcategory_%s"%cn)
	cmb_categories.append(cmodelW(10+cid,cn,_f,_fDirW,out_ws,diag_combined))
# Had to define the types before adding to the combined dataset
for cid,cn in enumerate(cmb_categories):
	cn.init_channels()
        channels = cn.ret_channels()
        for ch in channels: ch.Print()
out_ws.Print('v')
# Next we want to build a list of all of the nuisance parameters which will be in the fit :), this is performed with add_nuisance
ext_constraints = r.RooArgSet() 
hasSys = False

Example #4

Show file

File: combineControlRegions.py Project: blallen/DmsMonoX

def CombinedControlRegionFit(
  cname # name for the parametric variation templates
  ,_fin #TDirectory   
  ,_fout #and output file 
  ,_wspace # RooWorkspace
  ,_bins  # just get the bins
  ,_varname	    # name of the variale
  ,_pdfname	    # name of a double exp pdf
  ,_pdfname_zvv	    # name of a double exp pdf to use as zvv mc fit
  ,_target_datasetname # only for initial fit values
  ,_control_regions # CRs constructed
   ):

  # Make some output directory
  #_fout = _fOut.mkdir("combined_control_fit") 

  #th_ex = _fin.Get(_examplehistname)
  #th_ex.SetName(th_ex.GetName()+cname)
  r.gROOT.ProcessLine(".L diagonalizer.cc+")
  from ROOT import diagonalizer
  diag = diagonalizer(_wspace)

  _var  = _wspace.var(_varname)

  _pdf      = _wspace.pdf(_pdfname)
  _pdf_orig = _wspace.pdf(_pdfname_zvv)
  _data_mc  = _wspace.data(_target_datasetname)

  diag.freezeParameters(_pdf_orig.getParameters(_data_mc),False)
  _pdf_orig.fitTo(_data_mc)  # Just initialises parameters 
  _pdf.fitTo(_data_mc)       # Just initialises parameters 

  _norm = r.RooRealVar("%s_norm"%_target_datasetname,"Norm",_wspace.data(_target_datasetname).sumEntries())
  _norm.removeRange()
  _norm_orig= r.RooRealVar("%s_norm_orig"%_target_datasetname,"Norm_orig",_wspace.data(_target_datasetname).sumEntries())
  _norm.setConstant(False)
  _norm_orig.setConstant(True)
  _wspace._import(_norm)
  _wspace._import(_norm_orig)
  fr = _var.frame()
  _wspace.data(_target_datasetname).plotOn(fr,r.RooFit.Binning(200))
  diag.freezeParameters(_pdf_orig.getParameters(_data_mc))
  _pdf_orig.plotOn(fr)
  _pdf.getParameters(_data_mc).Print("v")
  _pdf_orig.getParameters(_data_mc).Print("v")
  #sys.exit()

  # Setup stuff for the simultaneous fitting, this isn't particularly good since we loop twice without needing to
  sample = r.RooCategory("bin_number","bin_number")
  for j,cr in enumerate(_control_regions):
   for i,bl in enumerate(_bins):
    if i >= len(_bins)-1 : continue
    sample.defineType("ch_%d_bin_%d"%(j,i),MAXBINS*j+i)

  # Loop again, this time setting up each of the bins and linking the pdf 
  # Construct a "channel" (bin) from each bin of the histogram
  channels = []
  combined_obsdata = 0
  for j,cr in enumerate(_control_regions):
   for i,bl in enumerate(_bins):
    if i >= len(_bins)-1 : continue

    xmin,xmax = bl,_bins[i+1]

    ch = Bin(j,i,_var,cr.ret_dataset(),_pdf,_norm,_wspace,xmin,xmax)
    ch.set_control_region(cr)
    if cr.has_background(): ch.add_background(cr.ret_background())
    ch.set_label(sample) # should import the sample category label
    ch.set_sfactor(cr.ret_sfactor(i))
    # This has to the the last thing
    ch.setup_expect_var()

    obsargset = r.RooArgSet(_wspace.var("observed"),_wspace.cat(sample.GetName()))
    if i==0 and j==0 : combined_obsdata = r.RooDataSet("combinedData","Data in all Bins",obsargset)
    ch.add_to_dataset(combined_obsdata)
    #ch.Print()
    channels.append(ch)

  # Now we make a roosimultaneous pdf from the product of the bin pdfs!
  binset = r.RooArgList("bins_set")

  # now we have to build the combined dataset/pdf -> Observation in each bin (var is just obs) and the pdf (already availale)
  # -> Make a RooSimultaneous across each channel

  combined_pdf = r.RooSimultaneous("combined_pdf","combined_pdf",_wspace.cat(sample.GetName()))
  for ch in channels:
    print _wspace.pdf("pdf_%s"%ch.ret_binid())
    combined_pdf.addPdf(_wspace.pdf("pdf_%s"%ch.ret_binid()),ch.ret_binid())

  # Now check systematics, we wont use this right now
  """
  ext_constraints = r.RooArgSet()
  hasSys = False
  for cr in _control_regions:
    nuisances = cr.ret_nuisances()
    for nuis in nuisances:
      hasSys=True
      ext_constraints.add(_wspace.pdf("const_%s"%nuis))
  """
  cr_histos_exp_prefit=[]
  for j,cr in enumerate(_control_regions):
  #save the prefit histos
    cr_pre_hist = r.TH1F("control_region_%s"%cr.ret_name(),"Expected %s control region"%cr.ret_name(),len(_bins)-1,array.array('d',_bins))
    bc=1
    for i in range(j*(len(_bins)-1),(j+1)*(len(_bins)-1) ):
      ch = channels[i]
      #if i>=len(_bins)-1: break
      cr_pre_hist.SetBinContent(bc,ch.ret_expected())
      bc+=1
    cr_pre_hist.SetLineWidth(2)
    cr_pre_hist.SetLineColor(r.kGreen+1)
    cr_histos_exp_prefit.append(cr_pre_hist.Clone())
  # THE FIIIIIIIIIIIIIT!!!!!!!!!!!!!!!!!!!!!!!!!!!! ################################
  # NEED to add constrain terms on top -> Nah, don't bother!
  combined_fit_result = combined_pdf.fitTo(combined_obsdata,r.RooFit.Save())
  # #################################################################################
  # Make the ratio of new/original fits
  ratioargs = r.RooArgList(_norm,_pdf,_norm_orig,_pdf_orig)
  pdf_ratio = r.RooFormulaVar("ratio_correction_%s"%cname,"Correction for Zvv from dimuon+photon control regions","@0*@1/(@2*@3)",ratioargs)
  _wspace._import(pdf_ratio)
  #

  # plot on NEW fit ? 
  _pdf.plotOn(fr,r.RooFit.LineColor(r.kRed),r.RooFit.Normalization(_norm.getVal(),r.RooAbsReal.NumEvent))
  #_pdf.paramOn(fr)
  c = r.TCanvas("zjets_signalregion_mc_fit_before_after")
  fr.GetXaxis().SetTitle("fake MET (GeV)")
  fr.GetYaxis().SetTitle("Events/GeV")
  fr.SetTitle("")
  fr.Draw()
  _fout.WriteTObject(c)

  crat = r.TCanvas("ratio_correction")
  frrat = _var.frame()
  pdf_ratio.plotOn(frrat)
  frrat.Draw()
  _fout.WriteTObject(crat)

  # Having fit, we can spit out every channel expectation, we can correct the MC using it!
  c2 = r.TCanvas("compare_models")
  model_hist = r.TH1F("%s_combined_model"%cname,"combined_model",len(_bins)-1,array.array('d',_bins))
  #fillModelHist(model_hist,channels)
  diag.generateWeightedTemplate(model_hist,_wspace.function(pdf_ratio.GetName()),_wspace.var(_var.GetName()),_wspace.data(_target_datasetname))
  channels[0].Print()
  model_hist.SetLineWidth(2)
  model_hist.SetLineColor(1)
  #_fout = r.TFile("combined_model.root","RECREATE")
  _fout.WriteTObject(model_hist)

  # Now plot the control Regions too!
  crhists = []
  canvs   = []

  lat = r.TLatex();
  lat.SetNDC();
  lat.SetTextSize(0.04);
  lat.SetTextFont(42);
  
  for j,cr in enumerate(_control_regions):
    c3 = r.TCanvas("c_%s"%cr.ret_name(),"",800,800)
    cr_hist = r.TH1F("control_region_%s"%cr.ret_name(),"Expected %s control region"%cr.ret_name(),len(_bins)-1,array.array('d',_bins))
    da_hist = r.TH1F("data_control_region_%s"%cr.ret_name(),"data %s control region"%cr.ret_name(),len(_bins)-1,array.array('d',_bins))
    mc_hist = r.TH1F("mc_control_region_%s"%cr.ret_name(),"Background %s control region"%cr.ret_name(),len(_bins)-1,array.array('d',_bins))
    da_hist.SetTitle("") 
    bc = 1
    for i in range(j*(len(_bins)-1),(j+1)*(len(_bins)-1) ):
      ch = channels[i]
      #if i>=len(_bins)-1: break
      print "Channel", j, "Bin ",i, channels[i].ret_expected()
      cr_hist.SetBinContent(bc,ch.ret_expected())
      da_hist.SetBinContent(bc,ch.ret_observed())
      mc_hist.SetBinContent(bc,ch.ret_background())
      print ch.ret_background()
      da_hist.SetBinError(bc,(ch.ret_observed())**0.5)
      cr_hist.SetFillColor(r.kBlue-9)
      mc_hist.SetFillColor(r.kRed+3)
      bc+=1

    cr_hist = getNormalizedHist(cr_hist)
    da_hist = getNormalizedHist(da_hist)
    mc_hist = getNormalizedHist(mc_hist)
    pre_hist = getNormalizedHist(cr_histos_exp_prefit[j])
    cr_hist.SetLineColor(1)
    mc_hist.SetLineColor(1)
    da_hist.SetMarkerColor(1)
    da_hist.SetLineColor(1)
    da_hist.SetMarkerStyle(20)
    crhists.append(da_hist)
    crhists.append(cr_hist)
    crhists.append(mc_hist)
    crhists.append(pre_hist)

    pad1 = r.TPad("p1","p1",0,0.28,1,1)
    pad1.SetBottomMargin(0.01)
    pad1.SetCanvas(c3)
    pad1.Draw()
    pad1.cd()
    tlg = r.TLegend(0.6,0.67,0.89,0.89)
    tlg.SetFillColor(0)
    tlg.SetTextFont(42)
    tlg.AddEntry(da_hist,"Data - %s"%cr.ret_title(),"PEL") 
    tlg.AddEntry(cr_hist,"Expected (post-fit)","F") 
    tlg.AddEntry(mc_hist,"Backgrounds Component","F")
    tlg.AddEntry(pre_hist,"Expected (pre-fit)","L")
    da_hist.GetYaxis().SetTitle("Events/GeV");
    da_hist.GetXaxis().SetTitle("fake MET (GeV)");
    da_hist.Draw("Pe")
    cr_hist.Draw("samehist")
    mc_hist.Draw("samehist")
    pre_hist.Draw("samehist")
    da_hist.Draw("Pesame")
    tlg.Draw()
    lat.DrawLatex(0.1,0.92,"#bf{CMS} #it{Preliminary}");
    pad1.SetLogy()

    # Ratio plot
    c3.cd()
    pad2 = r.TPad("p2","p2",0,0.068,1,0.28)
    pad2.SetTopMargin(0.02)
    pad2.SetCanvas(c3)
    pad2.Draw()
    pad2.cd()
    ratio = da_hist.Clone()
    ratio_pre = da_hist.Clone()
    ratio.GetYaxis().SetRangeUser(0.01,1.99)
    ratio.Divide(cr_hist)
    ratio_pre.Divide(pre_hist)
    ratio.GetYaxis().SetTitle("Data/Bkg")
    ratio.GetYaxis().SetNdivisions(5)
    ratio.GetYaxis().SetLabelSize(0.1)
    ratio.GetYaxis().SetTitleSize(0.12)
    ratio.GetXaxis().SetTitleSize(0.085)
    ratio.GetXaxis().SetLabelSize(0.12)
    crhists.append(ratio)
    crhists.append(ratio_pre)
    ratio.GetXaxis().SetTitle("")
    ratio.Draw()
    ratio_pre.SetLineColor(pre_hist.GetLineColor())
    ratio_pre.SetMarkerColor(pre_hist.GetLineColor())
    line = r.TLine(da_hist.GetXaxis().GetXmin(),1,da_hist.GetXaxis().GetXmax(),1)
    line.SetLineColor(2)
    line.SetLineWidth(3)
    line.Draw()
    ratio.Draw("same")
    ratio_pre.Draw("pelsame")
    ratio.Draw("samepel")


    canvs.append(c3)
    _fout.WriteTObject(cr_hist)
    _fout.WriteTObject(da_hist)
    _fout.WriteTObject(mc_hist)
    _fout.WriteTObject(c3)

  for bl in channels : bl.Print()
  print _wspace.data(_target_datasetname).sumEntries(), _wspace.var(_norm.GetName()).getVal();
  # Do we really need to re-get the pdf_ratio?dd
  # Ok now the task will be to calculate the uncertainties!, simply diagonalize again and re-calculate histograms given +/- 1 sigmas
  # The first kind are rather straightforward and due to statistical uncertainties
  npars = diag.generateVariations(combined_fit_result)
  h2covar = diag.retCovariance()
  _fout.WriteTObject(h2covar)
  leg_var = r.TLegend(0.56,0.42,0.89,0.89)
  leg_var.SetFillColor(0)
  leg_var.SetTextFont(42)

  canv = r.TCanvas("canv_variations")
  canvr = r.TCanvas("canv_variations_ratio")
  model_hist_spectrum = getNormalizedHist(model_hist)
  model_hist_spectrum.Draw()
  systs = []
  sys_c=0
  for par in range(npars):
    hist_up = r.TH1F("%s_combined_model_par_%d_Up"%(cname,par),"combined_model par %d Up 1 sigma"%par  ,len(_bins)-1,array.array('d',_bins))
    hist_dn = r.TH1F("%s_combined_model_par_%d_Down"%(cname,par),"combined_model par %d Up 1 sigma"%par,len(_bins)-1,array.array('d',_bins))
 
    diag.setEigenset(par,1)  # up variation
    #fillModelHist(hist_up,channels)
    diag.generateWeightedTemplate(hist_up,_wspace.function(pdf_ratio.GetName()),_wspace.var(_var.GetName()),_wspace.data(_target_datasetname))

    diag.setEigenset(par,-1)  # up variation
    #fillModelHist(hist_dn,channels)
    diag.generateWeightedTemplate(hist_dn,_wspace.function(pdf_ratio.GetName()),_wspace.var(_var.GetName()),_wspace.data(_target_datasetname))

    # Reset parameter values 
    diag.resetPars()
    canv.cd()
    hist_up.SetLineWidth(2)
    hist_dn.SetLineWidth(2)
    if sys_c+2 == 10: sys_c+=1
    hist_up.SetLineColor(sys_c+2)
    hist_dn.SetLineColor(sys_c+2)
    hist_dn.SetLineStyle(2)

    _fout.WriteTObject(hist_up)
    _fout.WriteTObject(hist_dn)

    hist_up = getNormalizedHist(hist_up)
    hist_dn = getNormalizedHist(hist_dn)
  
    systs.append(hist_up)
    systs.append(hist_dn)

    hist_up.Draw("samehist")
    hist_dn.Draw("samehist")

    ct = r.TCanvas("sys_par_%d"%par)
    flat = model_hist.Clone()
    hist_up_cl = hist_up.Clone();hist_up_cl.SetName(hist_up_cl.GetName()+"_ratio")
    hist_dn_cl = hist_dn.Clone();hist_dn_cl.SetName(hist_dn_cl.GetName()+"_ratio")
    hist_up_cl.Divide(model_hist_spectrum)
    hist_dn_cl.Divide(model_hist_spectrum)
    hist_up_cl.Draw('hist')
    hist_dn_cl.Draw('histsame')
    flat.Divide(model_hist)
    flat.Draw("histsame")
    _fout.WriteTObject(ct)
    canvr.cd()
    if par==0: flat.Draw("hist")
    systs.append(flat)
    systs.append(hist_up_cl)
    systs.append(hist_dn_cl)
    hist_up_cl.Draw('histsame')
    hist_dn_cl.Draw('histsame')
    leg_var.AddEntry(hist_up_cl,"Parameter %d"%par,"L")
    sys_c+=1
  
  for ch in channels: ch.Print()
  # Final step is to produce alternate templates due to systematic shifts. Loope through and re-fit for each change.
  all_systs = []
  for cr in _control_regions: 
    for sysk in cr.systematics.keys():
  	all_systs.append(sysk)
  all_systs = set(all_systs)

  for syst in all_systs: 
    #BLEH swap out the scale-factors for new set, simply amounts to resetting the s-factors for each :)
    # need to figure out what cr is and what ch is 
    for i,ch in enumerate(channels):
      cr = _control_regions[ch.chid]
      ch.set_sfactor(cr.ret_sfactor(ch.id,syst,1))

    combined_pdf.fitTo(combined_obsdata)
    model_hist_sys_up = r.TH1F("combined_model_%sUp"%syst,"combined_model %s Up 1 sigma"%syst  ,len(_bins)-1,array.array('d',_bins))#Sys_Up
    #fillModelHist(model_hist_sys_up,channels)
    diag.generateWeightedTemplate(model_hist_sys_up,_wspace.function(pdf_ratio.GetName()),_wspace.var(_var.GetName()),_wspace.data(_target_datasetname))

    # Reset the scale_factors
    for i,ch in enumerate(channels):
      cr = _control_regions[ch.chid]
      ch.set_sfactor(cr.ret_sfactor(ch.id,syst,-1))

    combined_pdf.fitTo(combined_obsdata)
    model_hist_sys_dn = r.TH1F("combined_model_%sDown"%syst,"combined_model %s Sown 1 sigma"%syst  ,len(_bins)-1,array.array('d',_bins))#Sys_Dn
    #fillModelHist(model_hist_sys_dn,channels)
    diag.generateWeightedTemplate(model_hist_sys_dn,_wspace.function(pdf_ratio.GetName()),_wspace.var(_var.GetName()),_wspace.data(_target_datasetname))
    # remake combined fit!
    _fout.WriteTObject(model_hist_sys_up)
    _fout.WriteTObject(model_hist_sys_dn)
    model_hist_sys_up= getNormalizedHist(model_hist_sys_up)
    model_hist_sys_dn= getNormalizedHist(model_hist_sys_dn)
    if sys_c+2 == 10 : sys_c+=1
    model_hist_sys_up.SetLineColor(sys_c+2)
    model_hist_sys_dn.SetLineColor(sys_c+2)
    model_hist_sys_up.SetLineWidth(2)
    model_hist_sys_dn.SetLineWidth(2)
    model_hist_sys_dn.SetLineStyle(2)

    canv.cd()
    model_hist_sys_up.Draw("histsame")
    model_hist_sys_dn.Draw("histsame")
    systs.append(model_hist_sys_up)
    systs.append(model_hist_sys_dn)
    model_hist_sys_up_cl = model_hist_sys_up.Clone(); model_hist_sys_up_cl.SetName(model_hist_sys_up_cl.GetName()+"_ratio")
    model_hist_sys_dn_cl = model_hist_sys_dn.Clone(); model_hist_sys_dn_cl.SetName(model_hist_sys_dn_cl.GetName()+"_ratio")
    model_hist_sys_up_cl.Divide(model_hist_spectrum)
    model_hist_sys_dn_cl.Divide(model_hist_spectrum)
    systs.append(model_hist_sys_up_cl)
    systs.append(model_hist_sys_dn_cl)
    canvr.cd()
    model_hist_sys_up_cl.Draw("histsame")
    model_hist_sys_dn_cl.Draw("histsame")

    leg_var.AddEntry(model_hist_sys_up,"%s"%syst,"L")
    sys_c+=1

  _fout.WriteTObject(c)
  canv.cd(); 
  leg_var.Draw()
  canvr.cd();
  leg_var.Draw()
  _fout.WriteTObject(canv)
  _fout.WriteTObject(canvr)

Example #5

Show file

def CombinedControlRegionFit(
        cname  # name for the parametric variation templates
    ,
        _fin  #TDirectory   
    ,
        _fout  #and output file 
    ,
        _wspace  # RooWorkspace
    ,
        _bins  # just get the bins
    ,
        _varname  # name of the variale
    ,
        _pdfname  # name of a double exp pdf
    ,
        _pdfname_zvv  # name of a double exp pdf to use as zvv mc fit
    ,
        _target_datasetname  # only for initial fit values
    ,
        _control_regions  # CRs constructed
):

    # Make some output directory
    #_fout = _fOut.mkdir("combined_control_fit")

    #th_ex = _fin.Get(_examplehistname)
    #th_ex.SetName(th_ex.GetName()+cname)
    r.gROOT.ProcessLine(".L diagonalizer.cc+")
    from ROOT import diagonalizer
    diag = diagonalizer(_wspace)

    _var = _wspace.var(_varname)

    _pdf = _wspace.pdf(_pdfname)
    _pdf_orig = _wspace.pdf(_pdfname_zvv)
    _data_mc = _wspace.data(_target_datasetname)

    diag.freezeParameters(_pdf_orig.getParameters(_data_mc), False)
    _pdf_orig.fitTo(_data_mc)  # Just initialises parameters
    _pdf.fitTo(_data_mc)  # Just initialises parameters

    _norm = r.RooRealVar("%s_norm" % _target_datasetname, "Norm",
                         _wspace.data(_target_datasetname).sumEntries())
    _norm.removeRange()
    _norm_orig = r.RooRealVar("%s_norm_orig" % _target_datasetname,
                              "Norm_orig",
                              _wspace.data(_target_datasetname).sumEntries())
    _norm.setConstant(False)
    _norm_orig.setConstant(True)
    _wspace._import(_norm)
    _wspace._import(_norm_orig)
    fr = _var.frame()
    _wspace.data(_target_datasetname).plotOn(fr, r.RooFit.Binning(200))
    diag.freezeParameters(_pdf_orig.getParameters(_data_mc))
    _pdf_orig.plotOn(fr)
    _pdf.getParameters(_data_mc).Print("v")
    _pdf_orig.getParameters(_data_mc).Print("v")
    #sys.exit()

    # Setup stuff for the simultaneous fitting, this isn't particularly good since we loop twice without needing to
    sample = r.RooCategory("bin_number", "bin_number")
    for j, cr in enumerate(_control_regions):
        for i, bl in enumerate(_bins):
            if i >= len(_bins) - 1: continue
            sample.defineType("ch_%d_bin_%d" % (j, i), MAXBINS * j + i)

    # Loop again, this time setting up each of the bins and linking the pdf
    # Construct a "channel" (bin) from each bin of the histogram
    channels = []
    combined_obsdata = 0
    for j, cr in enumerate(_control_regions):
        for i, bl in enumerate(_bins):
            if i >= len(_bins) - 1: continue

            xmin, xmax = bl, _bins[i + 1]

            ch = Bin(j, i, _var, cr.ret_dataset(), _pdf, _norm, _wspace, xmin,
                     xmax)
            ch.set_control_region(cr)
            if cr.has_background(): ch.add_background(cr.ret_background())
            ch.set_label(sample)  # should import the sample category label
            ch.set_sfactor(cr.ret_sfactor(i))
            # This has to the the last thing
            ch.setup_expect_var()

            obsargset = r.RooArgSet(_wspace.var("observed"),
                                    _wspace.cat(sample.GetName()))
            if i == 0 and j == 0:
                combined_obsdata = r.RooDataSet("combinedData",
                                                "Data in all Bins", obsargset)
            ch.add_to_dataset(combined_obsdata)
            #ch.Print()
            channels.append(ch)

    # Now we make a roosimultaneous pdf from the product of the bin pdfs!
    binset = r.RooArgList("bins_set")

    # now we have to build the combined dataset/pdf -> Observation in each bin (var is just obs) and the pdf (already availale)
    # -> Make a RooSimultaneous across each channel

    combined_pdf = r.RooSimultaneous("combined_pdf", "combined_pdf",
                                     _wspace.cat(sample.GetName()))
    for ch in channels:
        print _wspace.pdf("pdf_%s" % ch.ret_binid())
        combined_pdf.addPdf(_wspace.pdf("pdf_%s" % ch.ret_binid()),
                            ch.ret_binid())

    # Now check systematics, we wont use this right now
    """
  ext_constraints = r.RooArgSet()
  hasSys = False
  for cr in _control_regions:
    nuisances = cr.ret_nuisances()
    for nuis in nuisances:
      hasSys=True
      ext_constraints.add(_wspace.pdf("const_%s"%nuis))
  """
    cr_histos_exp_prefit = []
    for j, cr in enumerate(_control_regions):
        #save the prefit histos
        cr_pre_hist = r.TH1F("control_region_%s" % cr.ret_name(),
                             "Expected %s control region" % cr.ret_name(),
                             len(_bins) - 1, array.array('d', _bins))
        bc = 1
        for i in range(j * (len(_bins) - 1), (j + 1) * (len(_bins) - 1)):
            ch = channels[i]
            #if i>=len(_bins)-1: break
            cr_pre_hist.SetBinContent(bc, ch.ret_expected())
            bc += 1
        cr_pre_hist.SetLineWidth(2)
        cr_pre_hist.SetLineColor(r.kGreen + 1)
        cr_histos_exp_prefit.append(cr_pre_hist.Clone())
    # THE FIIIIIIIIIIIIIT!!!!!!!!!!!!!!!!!!!!!!!!!!!! ################################
    # NEED to add constrain terms on top -> Nah, don't bother!
    combined_fit_result = combined_pdf.fitTo(combined_obsdata, r.RooFit.Save())
    # #################################################################################
    # Make the ratio of new/original fits
    ratioargs = r.RooArgList(_norm, _pdf, _norm_orig, _pdf_orig)
    pdf_ratio = r.RooFormulaVar(
        "ratio_correction_%s" % cname,
        "Correction for Zvv from dimuon+photon control regions",
        "@0*@1/(@2*@3)", ratioargs)
    _wspace._import(pdf_ratio)
    #

    # plot on NEW fit ?
    _pdf.plotOn(fr, r.RooFit.LineColor(r.kRed),
                r.RooFit.Normalization(_norm.getVal(), r.RooAbsReal.NumEvent))
    #_pdf.paramOn(fr)
    c = r.TCanvas("zjets_signalregion_mc_fit_before_after")
    fr.GetXaxis().SetTitle("fake MET (GeV)")
    fr.GetYaxis().SetTitle("Events/GeV")
    fr.SetTitle("")
    fr.Draw()
    _fout.WriteTObject(c)

    crat = r.TCanvas("ratio_correction")
    frrat = _var.frame()
    pdf_ratio.plotOn(frrat)
    frrat.Draw()
    _fout.WriteTObject(crat)

    # Having fit, we can spit out every channel expectation, we can correct the MC using it!
    c2 = r.TCanvas("compare_models")
    model_hist = r.TH1F("%s_combined_model" % cname, "combined_model",
                        len(_bins) - 1, array.array('d', _bins))
    #fillModelHist(model_hist,channels)
    diag.generateWeightedTemplate(model_hist,
                                  _wspace.function(pdf_ratio.GetName()),
                                  _wspace.var(_var.GetName()),
                                  _wspace.data(_target_datasetname))
    channels[0].Print()
    model_hist.SetLineWidth(2)
    model_hist.SetLineColor(1)
    #_fout = r.TFile("combined_model.root","RECREATE")
    _fout.WriteTObject(model_hist)

    # Now plot the control Regions too!
    crhists = []
    canvs = []

    lat = r.TLatex()
    lat.SetNDC()
    lat.SetTextSize(0.04)
    lat.SetTextFont(42)

    for j, cr in enumerate(_control_regions):
        c3 = r.TCanvas("c_%s" % cr.ret_name(), "", 800, 800)
        cr_hist = r.TH1F("control_region_%s" % cr.ret_name(),
                         "Expected %s control region" % cr.ret_name(),
                         len(_bins) - 1, array.array('d', _bins))
        da_hist = r.TH1F("data_control_region_%s" % cr.ret_name(),
                         "data %s control region" % cr.ret_name(),
                         len(_bins) - 1, array.array('d', _bins))
        mc_hist = r.TH1F("mc_control_region_%s" % cr.ret_name(),
                         "Background %s control region" % cr.ret_name(),
                         len(_bins) - 1, array.array('d', _bins))
        da_hist.SetTitle("")
        bc = 1
        for i in range(j * (len(_bins) - 1), (j + 1) * (len(_bins) - 1)):
            ch = channels[i]
            #if i>=len(_bins)-1: break
            print "Channel", j, "Bin ", i, channels[i].ret_expected()
            cr_hist.SetBinContent(bc, ch.ret_expected())
            da_hist.SetBinContent(bc, ch.ret_observed())
            mc_hist.SetBinContent(bc, ch.ret_background())
            print ch.ret_background()
            da_hist.SetBinError(bc, (ch.ret_observed())**0.5)
            cr_hist.SetFillColor(r.kBlue - 9)
            mc_hist.SetFillColor(r.kRed + 3)
            bc += 1

        cr_hist = getNormalizedHist(cr_hist)
        da_hist = getNormalizedHist(da_hist)
        mc_hist = getNormalizedHist(mc_hist)
        pre_hist = getNormalizedHist(cr_histos_exp_prefit[j])
        cr_hist.SetLineColor(1)
        mc_hist.SetLineColor(1)
        da_hist.SetMarkerColor(1)
        da_hist.SetLineColor(1)
        da_hist.SetMarkerStyle(20)
        crhists.append(da_hist)
        crhists.append(cr_hist)
        crhists.append(mc_hist)
        crhists.append(pre_hist)

        pad1 = r.TPad("p1", "p1", 0, 0.28, 1, 1)
        pad1.SetBottomMargin(0.01)
        pad1.SetCanvas(c3)
        pad1.Draw()
        pad1.cd()
        tlg = r.TLegend(0.6, 0.67, 0.89, 0.89)
        tlg.SetFillColor(0)
        tlg.SetTextFont(42)
        tlg.AddEntry(da_hist, "Data - %s" % cr.ret_title(), "PEL")
        tlg.AddEntry(cr_hist, "Expected (post-fit)", "F")
        tlg.AddEntry(mc_hist, "Backgrounds Component", "F")
        tlg.AddEntry(pre_hist, "Expected (pre-fit)", "L")
        da_hist.GetYaxis().SetTitle("Events/GeV")
        da_hist.GetXaxis().SetTitle("fake MET (GeV)")
        da_hist.Draw("Pe")
        cr_hist.Draw("samehist")
        mc_hist.Draw("samehist")
        pre_hist.Draw("samehist")
        da_hist.Draw("Pesame")
        tlg.Draw()
        lat.DrawLatex(0.1, 0.92, "#bf{CMS} #it{Preliminary}")
        pad1.SetLogy()

        # Ratio plot
        c3.cd()
        pad2 = r.TPad("p2", "p2", 0, 0.068, 1, 0.28)
        pad2.SetTopMargin(0.02)
        pad2.SetCanvas(c3)
        pad2.Draw()
        pad2.cd()
        ratio = da_hist.Clone()
        ratio_pre = da_hist.Clone()
        ratio.GetYaxis().SetRangeUser(0.01, 1.99)
        ratio.Divide(cr_hist)
        ratio_pre.Divide(pre_hist)
        ratio.GetYaxis().SetTitle("Data/Bkg")
        ratio.GetYaxis().SetNdivisions(5)
        ratio.GetYaxis().SetLabelSize(0.1)
        ratio.GetYaxis().SetTitleSize(0.12)
        ratio.GetXaxis().SetTitleSize(0.085)
        ratio.GetXaxis().SetLabelSize(0.12)
        crhists.append(ratio)
        crhists.append(ratio_pre)
        ratio.GetXaxis().SetTitle("")
        ratio.Draw()
        ratio_pre.SetLineColor(pre_hist.GetLineColor())
        ratio_pre.SetMarkerColor(pre_hist.GetLineColor())
        line = r.TLine(da_hist.GetXaxis().GetXmin(), 1,
                       da_hist.GetXaxis().GetXmax(), 1)
        line.SetLineColor(2)
        line.SetLineWidth(3)
        line.Draw()
        ratio.Draw("same")
        ratio_pre.Draw("pelsame")
        ratio.Draw("samepel")

        canvs.append(c3)
        _fout.WriteTObject(cr_hist)
        _fout.WriteTObject(da_hist)
        _fout.WriteTObject(mc_hist)
        _fout.WriteTObject(c3)

    for bl in channels:
        bl.Print()
    print _wspace.data(_target_datasetname).sumEntries(), _wspace.var(
        _norm.GetName()).getVal()
    # Do we really need to re-get the pdf_ratio?dd
    # Ok now the task will be to calculate the uncertainties!, simply diagonalize again and re-calculate histograms given +/- 1 sigmas
    # The first kind are rather straightforward and due to statistical uncertainties
    npars = diag.generateVariations(combined_fit_result)
    h2covar = diag.retCovariance()
    _fout.WriteTObject(h2covar)
    leg_var = r.TLegend(0.56, 0.42, 0.89, 0.89)
    leg_var.SetFillColor(0)
    leg_var.SetTextFont(42)

    canv = r.TCanvas("canv_variations")
    canvr = r.TCanvas("canv_variations_ratio")
    model_hist_spectrum = getNormalizedHist(model_hist)
    model_hist_spectrum.Draw()
    systs = []
    sys_c = 0
    for par in range(npars):
        hist_up = r.TH1F("%s_combined_model_par_%d_Up" % (cname, par),
                         "combined_model par %d Up 1 sigma" % par,
                         len(_bins) - 1, array.array('d', _bins))
        hist_dn = r.TH1F("%s_combined_model_par_%d_Down" % (cname, par),
                         "combined_model par %d Up 1 sigma" % par,
                         len(_bins) - 1, array.array('d', _bins))

        diag.setEigenset(par, 1)  # up variation
        #fillModelHist(hist_up,channels)
        diag.generateWeightedTemplate(hist_up,
                                      _wspace.function(pdf_ratio.GetName()),
                                      _wspace.var(_var.GetName()),
                                      _wspace.data(_target_datasetname))

        diag.setEigenset(par, -1)  # up variation
        #fillModelHist(hist_dn,channels)
        diag.generateWeightedTemplate(hist_dn,
                                      _wspace.function(pdf_ratio.GetName()),
                                      _wspace.var(_var.GetName()),
                                      _wspace.data(_target_datasetname))

        # Reset parameter values
        diag.resetPars()
        canv.cd()
        hist_up.SetLineWidth(2)
        hist_dn.SetLineWidth(2)
        if sys_c + 2 == 10: sys_c += 1
        hist_up.SetLineColor(sys_c + 2)
        hist_dn.SetLineColor(sys_c + 2)
        hist_dn.SetLineStyle(2)

        _fout.WriteTObject(hist_up)
        _fout.WriteTObject(hist_dn)

        hist_up = getNormalizedHist(hist_up)
        hist_dn = getNormalizedHist(hist_dn)

        systs.append(hist_up)
        systs.append(hist_dn)

        hist_up.Draw("samehist")
        hist_dn.Draw("samehist")

        ct = r.TCanvas("sys_par_%d" % par)
        flat = model_hist.Clone()
        hist_up_cl = hist_up.Clone()
        hist_up_cl.SetName(hist_up_cl.GetName() + "_ratio")
        hist_dn_cl = hist_dn.Clone()
        hist_dn_cl.SetName(hist_dn_cl.GetName() + "_ratio")
        hist_up_cl.Divide(model_hist_spectrum)
        hist_dn_cl.Divide(model_hist_spectrum)
        hist_up_cl.Draw('hist')
        hist_dn_cl.Draw('histsame')
        flat.Divide(model_hist)
        flat.Draw("histsame")
        _fout.WriteTObject(ct)
        canvr.cd()
        if par == 0: flat.Draw("hist")
        systs.append(flat)
        systs.append(hist_up_cl)
        systs.append(hist_dn_cl)
        hist_up_cl.Draw('histsame')
        hist_dn_cl.Draw('histsame')
        leg_var.AddEntry(hist_up_cl, "Parameter %d" % par, "L")
        sys_c += 1

    for ch in channels:
        ch.Print()
    # Final step is to produce alternate templates due to systematic shifts. Loope through and re-fit for each change.
    all_systs = []
    for cr in _control_regions:
        for sysk in cr.systematics.keys():
            all_systs.append(sysk)
    all_systs = set(all_systs)

    for syst in all_systs:
        #BLEH swap out the scale-factors for new set, simply amounts to resetting the s-factors for each :)
        # need to figure out what cr is and what ch is
        for i, ch in enumerate(channels):
            cr = _control_regions[ch.chid]
            ch.set_sfactor(cr.ret_sfactor(ch.id, syst, 1))

        combined_pdf.fitTo(combined_obsdata)
        model_hist_sys_up = r.TH1F("combined_model_%sUp" % syst,
                                   "combined_model %s Up 1 sigma" % syst,
                                   len(_bins) - 1, array.array('d',
                                                               _bins))  #Sys_Up
        #fillModelHist(model_hist_sys_up,channels)
        diag.generateWeightedTemplate(model_hist_sys_up,
                                      _wspace.function(pdf_ratio.GetName()),
                                      _wspace.var(_var.GetName()),
                                      _wspace.data(_target_datasetname))

        # Reset the scale_factors
        for i, ch in enumerate(channels):
            cr = _control_regions[ch.chid]
            ch.set_sfactor(cr.ret_sfactor(ch.id, syst, -1))

        combined_pdf.fitTo(combined_obsdata)
        model_hist_sys_dn = r.TH1F("combined_model_%sDown" % syst,
                                   "combined_model %s Sown 1 sigma" % syst,
                                   len(_bins) - 1, array.array('d',
                                                               _bins))  #Sys_Dn
        #fillModelHist(model_hist_sys_dn,channels)
        diag.generateWeightedTemplate(model_hist_sys_dn,
                                      _wspace.function(pdf_ratio.GetName()),
                                      _wspace.var(_var.GetName()),
                                      _wspace.data(_target_datasetname))
        # remake combined fit!
        _fout.WriteTObject(model_hist_sys_up)
        _fout.WriteTObject(model_hist_sys_dn)
        model_hist_sys_up = getNormalizedHist(model_hist_sys_up)
        model_hist_sys_dn = getNormalizedHist(model_hist_sys_dn)
        if sys_c + 2 == 10: sys_c += 1
        model_hist_sys_up.SetLineColor(sys_c + 2)
        model_hist_sys_dn.SetLineColor(sys_c + 2)
        model_hist_sys_up.SetLineWidth(2)
        model_hist_sys_dn.SetLineWidth(2)
        model_hist_sys_dn.SetLineStyle(2)

        canv.cd()
        model_hist_sys_up.Draw("histsame")
        model_hist_sys_dn.Draw("histsame")
        systs.append(model_hist_sys_up)
        systs.append(model_hist_sys_dn)
        model_hist_sys_up_cl = model_hist_sys_up.Clone()
        model_hist_sys_up_cl.SetName(model_hist_sys_up_cl.GetName() + "_ratio")
        model_hist_sys_dn_cl = model_hist_sys_dn.Clone()
        model_hist_sys_dn_cl.SetName(model_hist_sys_dn_cl.GetName() + "_ratio")
        model_hist_sys_up_cl.Divide(model_hist_spectrum)
        model_hist_sys_dn_cl.Divide(model_hist_spectrum)
        systs.append(model_hist_sys_up_cl)
        systs.append(model_hist_sys_dn_cl)
        canvr.cd()
        model_hist_sys_up_cl.Draw("histsame")
        model_hist_sys_dn_cl.Draw("histsame")

        leg_var.AddEntry(model_hist_sys_up, "%s" % syst, "L")
        sys_c += 1

    _fout.WriteTObject(c)
    canv.cd()
    leg_var.Draw()
    canvr.cd()
    leg_var.Draw()
    _fout.WriteTObject(canv)
    _fout.WriteTObject(canvr)