def btagEfficiencyTreeProducer(stageName="Z+jet", muChannel=True, path='../testfiles/'):
  #search for category number
  stage=-1
  for cat in categoryNames :
    stage+=1
    if cat==stageName : break
  # prepare output
  path='/nfs/user/llbb/Pat_8TeV_532p4/DYjets_Summer12_V2/'
  ROOT.gROOT.ProcessLine(
  "struct MyStruct {\
     Float_t     pt;\
     Float_t     eta;\
     Int_t       flavor;\
     Float_t     ssvhe;\
     Float_t     ssvhp;\
     Float_t     csv;\
     Float_t     eventWeight;\
  };" )
  from ROOT import MyStruct
  mystruct = MyStruct()
  f = ROOT.TFile( 'mybtagEfftree.root', 'RECREATE' )
  tree = ROOT.TTree( 'btagEff', 'btag efficiency' )
  tree.Branch( 'data', mystruct, 'pt/F:eta/F:flavor/I:ssvhe/F:ssvhp/F:csv/F:eventWeight/F' )
  # input
  if os.path.isdir(path):
    dirList=os.listdir(path)
    files=[]
    for fname in dirList:
      files.append(path+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)
  # event loop
  eventCnt = 0
  print "starting loop on events"
  for event in events:
    categoryData = event.catMu if muChannel else event.catEle
    goodJets = event.goodJets_mu if muChannel else event.goodJets_ele
    if EventSelection.isInCategory(stage, categoryData):
      eventCnt = eventCnt +1
      if eventCnt%100==0 : print ".",
      if eventCnt%1000==0 : print ""
      # event weight
      mystruct.eventWeight = event.weight(weightList=["PileUp"])
      # that's where we access the jets
      for index,jet in enumerate(event.jets):
        if not goodJets[index]: continue
        mystruct.pt = jet.pt()
        mystruct.eta = jet.eta()
        mystruct.flavor = jet.partonFlavour()
        mystruct.ssvhe = jet.bDiscriminator("simpleSecondaryVertexHighEffBJetTags")
        mystruct.ssvhp = jet.bDiscriminator("simpleSecondaryVertexHighPurBJetTags")
        mystruct.csv = jet.bDiscriminator("combinedSecondaryVertexBJetTags")
        tree.Fill()
  f.Write()
  f.Close()
  print ""
 def process(self, event):
   """EventSelectionControlPlots"""
   result = { }
   ## event category
   categoryData = event.category
   result["category"] = [ ]
   for category in range(self.eventCategories):
     if EventSelection.isInCategory(category, categoryData):
       result["category"].append(category)
   result["event"] = event.event()
   return result
 def process(self, event):
     """EventSelectionControlPlots"""
     result = {}
     ## event category
     categoryData = event.category
     result["category"] = []
     for category in range(self.eventCategories):
         if EventSelection.isInCategory(category, categoryData):
             result["category"].append(category)
     result["event"] = event.event()
     return result
Exemple #4
0
def DumpEventList(category, path="./", output="eventlist.txt"):
    """Dump a list of events in a given category"""
    # input
    if os.path.isdir(path):
        dirList = os.listdir(path)
        files = []
        for fname in dirList:
            files.append(path + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []
    # events
    events = AnalysisEvent(files)
    # output
    event_list = open(output, "w")
    # collections and producers used in the analysis
    EventSelection.prepareAnalysisEvent(events)
    for event in events:
        # check category
        if EventSelection.isInCategory(category, event.category):
            # print
            print >> event_list, "Event", event.event()
def DumpEventList(category, path="./", output="eventlist.txt"):
  """Dump a list of events in a given category"""
  # input
  if os.path.isdir(path):
    dirList=os.listdir(path)
    files=[]
    for fname in dirList:
      files.append(path+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]
  # events
  events = AnalysisEvent(files)
  # output
  event_list = open(output,"w")
  # collections and producers used in the analysis
  EventSelection.prepareAnalysisEvent(events)
  for event in events:
    # check category
    if EventSelection.isInCategory(category, event.category):
      # print
      print >> event_list , "Event", event.event()
Exemple #6
0
def runAnalysis(path,
                levels,
                outputname="controlPlots.root",
                Njobs=1,
                jobNumber=1):
    """produce all the plots in one go"""

    # inputs
    if os.path.isdir(path):
        dirList = list(
            itertools.islice(os.listdir(path), jobNumber, None, Njobs))
        files = []
        for fname in dirList:
            files.append(path + "/" + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []

    # output
    output = ROOT.TFile(outputname, "RECREATE")
    if configuration.runningMode == "dataset":
        ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
        rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname,
                              ROOT.RooArgSet())

    # events iterator, plus configuration of standard collections and producers
    events = AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)

    # prepare the plots
    controlPlots = []
    if configuration.runningMode == "plots":
        leafList = [None] * EventSelection.eventCategories()
        createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
        for levelDir in leafList:
            levelPlots = []
            for cp in configuration.controlPlots:
                levelPlots.append(
                    getattr(import_module(cp.module),
                            cp.classname)(dir=levelDir.mkdir(cp.label),
                                          mode="plots"))
            controlPlots.append(levelPlots)
    else:
        for cp in configuration.controlPlots:
            controlPlots.append(
                getattr(import_module(cp.module), cp.classname)(dir=None,
                                                                mode="dataset",
                                                                dataset=rds))

    # book histograms (separate iteration for clarity)
    if configuration.runningMode == "plots":
        for level in levels:
            for conf, cp in zip(configuration.controlPlots,
                                controlPlots[level]):
                cp.beginJob(**conf.kwargs)
    else:
        for conf, cp in zip(configuration.controlPlots, controlPlots):
            cp.beginJob(**conf.kwargs)
        for cp in controlPlots[:1]:
            cp.defineCategories(EventSelection.categoryNames)

    # process events
    i = 0
    t0 = time.time()
    for event in events:
        # printout
        if i % 100 == 0:
            print "Processing... event %d. Last batch in %f s." % (i, (
                time.time() - t0))
            t0 = time.time()
        if configuration.runningMode == "plots":
            # loop on channels
            plots = filter(
                lambda x: EventSelection.isInCategory(x, event.category),
                levels)
            # process the event once (for the first level)
            selectionPlotsData = []
            for level in plots[:1]:
                for cp in controlPlots[level]:
                    selectionPlotsData.append(cp.process(event))
            # fill the histograms
            for level in plots:
                for cp, data in zip(controlPlots[level], selectionPlotsData):
                    cp.fill(data, event.weight(category=level))
        else:
            for cp in controlPlots[:1]:
                # set categories (first CP only)
                cp.setCategories(
                    map(
                        lambda c: EventSelection.isInCategory(
                            c, event.category),
                        range(EventSelection.eventCategories())))
            for cp in controlPlots:
                # process event (all CP)
                cp.processEvent(event)
            # add to the dataset
            rds.add(getArgSet(controlPlots))
        i += 1

    # save all
    if configuration.runningMode == "plots":
        for level in levels:
            for cp in controlPlots[level]:
                cp.endJob()
    else:
        for cp in controlPlots:
            cp.endJob()

    # for dataset, write the merged RDS to file
    if configuration.runningMode == "dataset":
        output.cd()
        ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
        getattr(ws_ras, 'import')(rds.get())
        output.Add(ws_ras)
        ws_ras.Write()
        rds.tree().Write()

    # close the file
    output.Close()
def runAnalysis(path, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  if os.path.isdir(path):
    dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  t0 = time.time()
  for event in events:
    # printout
    if i%100==0 : 
      print "Processing... event %d. Last batch in %f s." % (i,(time.time()-t0))
      t0 = time.time()
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c:EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
    i += 1

  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob()
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()
Exemple #8
0
def runAnalysis(path, txt, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  dcapDir = False
  if "dcap://" in path:
    split = path.split("/")
    decapDir = os.path.isdir("/"+"/".join(split[3:]))
  if os.path.isdir(path) or decapDir:
    if txt: # IWN: use txt file with all names
      f = open(txt)
      dirList=list(itertools.islice([line[:-1] for line in f], jobNumber, None, Njobs))
      f.close()
    else:
      dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[path]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  DeltaTb = 0
  tb = time.time()
  n = events.GetEntries()
  ETA = " "
  print "\n\n\t__%s_events_to_process.__" %n
  for event in events:
    # printout
    if i%1000==0 :
      if i%10000==0 :
        if DeltaTb>0:
#           ETA = " ETA: %s" % time.strftime("%H h. %M min. %S s.", time.gmtime( DeltaTb/i*(n-i) ))
#           runtime = "\n    Running for %s" % time.strftime("%H h. %M min. %S s.", time.gmtime(time.time()-t0))
#           print runtime.replace(" 00 h.","").replace("for 0","for ") + ETA.replace(" 00 h.","").replace(": 0",": ")
          (m,s) = divmod(time.time()-t0,60)
          (h,m) = divmod(m,60)
          runtime = "\n    Running for %i h. %i min. %i s." % (h,m,s)
          (m,s) = divmod(DeltaTb/i*(n-i),60)
          (h,m) = divmod(m,60)
          ETA = " ETA: %i h. %i min. %i s." % (h,m,s)
          print runtime.replace(" 0 h.","") + ETA.replace(" 0 h.","")
      DeltaTb += time.time()-tb
      print "%d%%: Processing... event %d. Last batch in %f s." % (i*100/n,i,(time.time()-tb))
      tb = time.time()
    i += 1
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c: EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
  
  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob(level)
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()

  print "\nDone. Only took %s!\n" % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0))