def runTest(path='../testfiles/', controlPlots=None):
  # these modules are only needed in that function, used for debugging.
  # so we only import them here.
  import CMSSW
  import os
  import AnalysisEvent
  import EventSelection
  assert isinstance(controlPlots, BaseControlPlots)
  if os.path.isdir(path):
    dirList=os.listdir(path)
    files=[]
    for fname in dirList:
      files.append(path+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]
  events = AnalysisEvent.AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)
  controlPlots.beginJob()
  i = 0
  for event in events:
    if i%100==0 : print "Processing... event ", i
    controlPlots.processEvent(event)
    i += 1
  controlPlots.endJob()
def runTest(path='../testfiles/', controlPlots=None):
    # these modules are only needed in that function, used for debugging.
    # so we only import them here.
    import CMSSW
    import os
    import AnalysisEvent
    import EventSelection
    assert isinstance(controlPlots, BaseControlPlots)
    if os.path.isdir(path):
        dirList = os.listdir(path)
        files = []
        for fname in dirList:
            files.append(path + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []
    events = AnalysisEvent.AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)
    controlPlots.beginJob()
    i = 0
    for event in events:
        if i % 100 == 0: print "Processing... event ", i
        controlPlots.processEvent(event)
        i += 1
    controlPlots.endJob()
def btagEfficiencyTreeProducer(stageName="Z+jet", muChannel=True, path='../testfiles/'):
  #search for category number
  stage=-1
  for cat in categoryNames :
    stage+=1
    if cat==stageName : break
  # prepare output
  path='/nfs/user/llbb/Pat_8TeV_532p4/DYjets_Summer12_V2/'
  ROOT.gROOT.ProcessLine(
  "struct MyStruct {\
     Float_t     pt;\
     Float_t     eta;\
     Int_t       flavor;\
     Float_t     ssvhe;\
     Float_t     ssvhp;\
     Float_t     csv;\
     Float_t     eventWeight;\
  };" )
  from ROOT import MyStruct
  mystruct = MyStruct()
  f = ROOT.TFile( 'mybtagEfftree.root', 'RECREATE' )
  tree = ROOT.TTree( 'btagEff', 'btag efficiency' )
  tree.Branch( 'data', mystruct, 'pt/F:eta/F:flavor/I:ssvhe/F:ssvhp/F:csv/F:eventWeight/F' )
  # input
  if os.path.isdir(path):
    dirList=os.listdir(path)
    files=[]
    for fname in dirList:
      files.append(path+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)
  # event loop
  eventCnt = 0
  print "starting loop on events"
  for event in events:
    categoryData = event.catMu if muChannel else event.catEle
    goodJets = event.goodJets_mu if muChannel else event.goodJets_ele
    if EventSelection.isInCategory(stage, categoryData):
      eventCnt = eventCnt +1
      if eventCnt%100==0 : print ".",
      if eventCnt%1000==0 : print ""
      # event weight
      mystruct.eventWeight = event.weight(weightList=["PileUp"])
      # that's where we access the jets
      for index,jet in enumerate(event.jets):
        if not goodJets[index]: continue
        mystruct.pt = jet.pt()
        mystruct.eta = jet.eta()
        mystruct.flavor = jet.partonFlavour()
        mystruct.ssvhe = jet.bDiscriminator("simpleSecondaryVertexHighEffBJetTags")
        mystruct.ssvhp = jet.bDiscriminator("simpleSecondaryVertexHighPurBJetTags")
        mystruct.csv = jet.bDiscriminator("combinedSecondaryVertexBJetTags")
        tree.Fill()
  f.Write()
  f.Close()
  print ""
def main(options):
  """simplistic program main"""
  # do basic arg checking
  if options.path is None: 
    print "Error: no input path specified."
    parser.print_help()
    return
  levels = []
  if options.all:
    levels = range(EventSelection.eventCategories())
  elif not options.levels is None:
    levels= map(int,options.levels.split(','))
  levels.sort()
  if len(levels)==0:
    print "Error: no level specified for processing."
    parser.print_help()
    return
  if min(levels)<0:
    print "Error: levels must be positive integers."
    parser.print_help()
    return
  if max(levels)>=EventSelection.eventCategories():
    print "Error: last level is",EventSelection.eventCategories()-1
    parser.print_help()
    return
  if options.Njobs<1:
    print "Error: Njobs must be strictly positive."
    parser.print_help()
    return
  if options.jobNumber>=options.Njobs:
    print "Error: jobNumber must be strictly smaller than Njobs."
    parser.print_help()
    return
  # if all ok, run the procedure
  runAnalysis(path=options.path,outputname=options.outputname, levels=levels, Njobs=options.Njobs, jobNumber=options.jobNumber)
Exemple #5
0
def DumpEventInfo(event=None, runNumber=None, eventNumber=None, lsNumber=None, path="./"):
  """Dump informations about a given event"""
  # in case no event is provided, find it using eventNumber
  if event is None:
    if (eventNumber is None) or (runNumber is None):
      print "DumpEventInfo Error: either pass an event or an event number"
      return
    # find event based on run and event
    if os.path.isdir(path):
      dirList=os.listdir(path)
      files=[]
      for fname in dirList:
        files.append(path+fname)
    elif os.path.isfile(path):
      files=[path]
    else:
      files=[]
    events = AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)
    DumpEventInfo(events[(runNumber, eventNumber, lsNumber)])
    return
  # run the producers when we want to print the outcome, and mute unneeded collections
  for product in eventDumpConfig.productsToPrint: getattr(event,product)
  for collection in eventDumpConfig.collectionsToHide: event.removeCollection(collection)
  # Now, we can go on with the printing.
  print event
def DumpEventInfo(event=None, eventNumber=None, path="./"):
  """Dump informations about a given event"""
  # in case no event is provided, find it using eventNumber
  if event is None:
    if eventNumber is None:
      print "DumpEventInfo Error: either pass an event or an event number"
      return
    # find event based on run and event
    if os.path.isdir(path):
      dirList=os.listdir(path)
      files=[]
      for fname in dirList:
        files.append(path+fname)
    elif os.path.isfile(path):
      files=[path]
    else:
      files=[]
    events = AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)
    DumpEventInfo(events[eventNumber])
    return
  # run the producers when we want to print the outcome, and mute unneeded collections
  for product in eventDumpConfig.productsToPrint: getattr(event,product)
  for collection in eventDumpConfig.collectionsToHide: event.removeCollection(collection)
  # Now, we can go on with the printing.
  print event
 def __init__(self, dir=None, dataset=None, mode="plots"):
     # create output file if needed. If no file is given, it means it is delegated
     BaseControlPlots.__init__(self,
                               dir=dir,
                               purpose="eventSelection",
                               dataset=dataset,
                               mode=mode)
     self.eventCategories = EventSelection.eventCategories()
 def process(self, event):
   """EventSelectionControlPlots"""
   result = { }
   ## event category
   categoryData = event.category
   result["category"] = [ ]
   for category in range(self.eventCategories):
     if EventSelection.isInCategory(category, categoryData):
       result["category"].append(category)
   result["event"] = event.event()
   return result
 def process(self, event):
     """EventSelectionControlPlots"""
     result = {}
     ## event category
     categoryData = event.category
     result["category"] = []
     for category in range(self.eventCategories):
         if EventSelection.isInCategory(category, categoryData):
             result["category"].append(category)
     result["event"] = event.event()
     return result
Exemple #10
0
def main(options):
    """simplistic program main"""
    # do basic arg checking
    if options.path is None:
        print "Error: no input path specified."
        parser.print_help()
        return
    levels = []
    if options.all:
        levels = range(EventSelection.eventCategories())
    elif not options.levels is None:
        levels = map(int, options.levels.split(','))
    levels.sort()
    if len(levels) == 0:
        print "Error: no level specified for processing."
        parser.print_help()
        return
    if min(levels) < 0:
        print "Error: levels must be positive integers."
        parser.print_help()
        return
    if max(levels) >= EventSelection.eventCategories():
        print "Error: last level is", EventSelection.eventCategories() - 1
        parser.print_help()
        return
    if options.Njobs < 1:
        print "Error: Njobs must be strictly positive."
        parser.print_help()
        return
    if options.jobNumber >= options.Njobs:
        print "Error: jobNumber must be strictly smaller than Njobs."
        parser.print_help()
        return
    # if all ok, run the procedure
    runAnalysis(path=options.path,
                outputname=options.outputname,
                levels=levels,
                Njobs=options.Njobs,
                jobNumber=options.jobNumber)
Exemple #11
0
def DumpEventList(category, path="./", output="eventlist.txt"):
    """Dump a list of events in a given category"""
    # input
    if os.path.isdir(path):
        dirList = os.listdir(path)
        files = []
        for fname in dirList:
            files.append(path + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []
    # events
    events = AnalysisEvent(files)
    # output
    event_list = open(output, "w")
    # collections and producers used in the analysis
    EventSelection.prepareAnalysisEvent(events)
    for event in events:
        # check category
        if EventSelection.isInCategory(category, event.category):
            # print
            print >> event_list, "Event", event.event()
def DumpEventList(category, path="./", output="eventlist.txt"):
  """Dump a list of events in a given category"""
  # input
  if os.path.isdir(path):
    dirList=os.listdir(path)
    files=[]
    for fname in dirList:
      files.append(path+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]
  # events
  events = AnalysisEvent(files)
  # output
  event_list = open(output,"w")
  # collections and producers used in the analysis
  EventSelection.prepareAnalysisEvent(events)
  for event in events:
    # check category
    if EventSelection.isInCategory(category, event.category):
      # print
      print >> event_list , "Event", event.event()
 def __init__(self, dir=None, dataset=None, mode="plots"):
     # create output file if needed. If no file is given, it means it is delegated
     BaseControlPlots.__init__(self, dir=dir, purpose="eventSelection", dataset=dataset, mode=mode)
     self.eventCategories = EventSelection.eventCategories()
Exemple #14
0
def runAnalysis(path,
                levels,
                outputname="controlPlots.root",
                Njobs=1,
                jobNumber=1):
    """produce all the plots in one go"""

    # inputs
    if os.path.isdir(path):
        dirList = list(
            itertools.islice(os.listdir(path), jobNumber, None, Njobs))
        files = []
        for fname in dirList:
            files.append(path + "/" + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []

    # output
    output = ROOT.TFile(outputname, "RECREATE")
    if configuration.runningMode == "dataset":
        ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
        rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname,
                              ROOT.RooArgSet())

    # events iterator, plus configuration of standard collections and producers
    events = AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)

    # prepare the plots
    controlPlots = []
    if configuration.runningMode == "plots":
        leafList = [None] * EventSelection.eventCategories()
        createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
        for levelDir in leafList:
            levelPlots = []
            for cp in configuration.controlPlots:
                levelPlots.append(
                    getattr(import_module(cp.module),
                            cp.classname)(dir=levelDir.mkdir(cp.label),
                                          mode="plots"))
            controlPlots.append(levelPlots)
    else:
        for cp in configuration.controlPlots:
            controlPlots.append(
                getattr(import_module(cp.module), cp.classname)(dir=None,
                                                                mode="dataset",
                                                                dataset=rds))

    # book histograms (separate iteration for clarity)
    if configuration.runningMode == "plots":
        for level in levels:
            for conf, cp in zip(configuration.controlPlots,
                                controlPlots[level]):
                cp.beginJob(**conf.kwargs)
    else:
        for conf, cp in zip(configuration.controlPlots, controlPlots):
            cp.beginJob(**conf.kwargs)
        for cp in controlPlots[:1]:
            cp.defineCategories(EventSelection.categoryNames)

    # process events
    i = 0
    t0 = time.time()
    for event in events:
        # printout
        if i % 100 == 0:
            print "Processing... event %d. Last batch in %f s." % (i, (
                time.time() - t0))
            t0 = time.time()
        if configuration.runningMode == "plots":
            # loop on channels
            plots = filter(
                lambda x: EventSelection.isInCategory(x, event.category),
                levels)
            # process the event once (for the first level)
            selectionPlotsData = []
            for level in plots[:1]:
                for cp in controlPlots[level]:
                    selectionPlotsData.append(cp.process(event))
            # fill the histograms
            for level in plots:
                for cp, data in zip(controlPlots[level], selectionPlotsData):
                    cp.fill(data, event.weight(category=level))
        else:
            for cp in controlPlots[:1]:
                # set categories (first CP only)
                cp.setCategories(
                    map(
                        lambda c: EventSelection.isInCategory(
                            c, event.category),
                        range(EventSelection.eventCategories())))
            for cp in controlPlots:
                # process event (all CP)
                cp.processEvent(event)
            # add to the dataset
            rds.add(getArgSet(controlPlots))
        i += 1

    # save all
    if configuration.runningMode == "plots":
        for level in levels:
            for cp in controlPlots[level]:
                cp.endJob()
    else:
        for cp in controlPlots:
            cp.endJob()

    # for dataset, write the merged RDS to file
    if configuration.runningMode == "dataset":
        output.cd()
        ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
        getattr(ws_ras, 'import')(rds.get())
        output.Add(ws_ras)
        ws_ras.Write()
        rds.tree().Write()

    # close the file
    output.Close()
def runAnalysis(path, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  if os.path.isdir(path):
    dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  t0 = time.time()
  for event in events:
    # printout
    if i%100==0 : 
      print "Processing... event %d. Last batch in %f s." % (i,(time.time()-t0))
      t0 = time.time()
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c:EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
    i += 1

  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob()
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()
Exemple #16
0
def runAnalysis(path, txt, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  dcapDir = False
  if "dcap://" in path:
    split = path.split("/")
    decapDir = os.path.isdir("/"+"/".join(split[3:]))
  if os.path.isdir(path) or decapDir:
    if txt: # IWN: use txt file with all names
      f = open(txt)
      dirList=list(itertools.islice([line[:-1] for line in f], jobNumber, None, Njobs))
      f.close()
    else:
      dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[path]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  DeltaTb = 0
  tb = time.time()
  n = events.GetEntries()
  ETA = " "
  print "\n\n\t__%s_events_to_process.__" %n
  for event in events:
    # printout
    if i%1000==0 :
      if i%10000==0 :
        if DeltaTb>0:
#           ETA = " ETA: %s" % time.strftime("%H h. %M min. %S s.", time.gmtime( DeltaTb/i*(n-i) ))
#           runtime = "\n    Running for %s" % time.strftime("%H h. %M min. %S s.", time.gmtime(time.time()-t0))
#           print runtime.replace(" 00 h.","").replace("for 0","for ") + ETA.replace(" 00 h.","").replace(": 0",": ")
          (m,s) = divmod(time.time()-t0,60)
          (h,m) = divmod(m,60)
          runtime = "\n    Running for %i h. %i min. %i s." % (h,m,s)
          (m,s) = divmod(DeltaTb/i*(n-i),60)
          (h,m) = divmod(m,60)
          ETA = " ETA: %i h. %i min. %i s." % (h,m,s)
          print runtime.replace(" 0 h.","") + ETA.replace(" 0 h.","")
      DeltaTb += time.time()-tb
      print "%d%%: Processing... event %d. Last batch in %f s." % (i*100/n,i,(time.time()-tb))
      tb = time.time()
    i += 1
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c: EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
  
  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob(level)
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()

  print "\nDone. Only took %s!\n" % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0))