def main(options):
  """simplistic program main"""
  # do basic arg checking
  if options.path is None: 
    print "Error: no input path specified."
    parser.print_help()
    return
  levels = []
  if options.all:
    levels = range(EventSelection.eventCategories())
  elif not options.levels is None:
    levels= map(int,options.levels.split(','))
  levels.sort()
  if len(levels)==0:
    print "Error: no level specified for processing."
    parser.print_help()
    return
  if min(levels)<0:
    print "Error: levels must be positive integers."
    parser.print_help()
    return
  if max(levels)>=EventSelection.eventCategories():
    print "Error: last level is",EventSelection.eventCategories()-1
    parser.print_help()
    return
  if options.Njobs<1:
    print "Error: Njobs must be strictly positive."
    parser.print_help()
    return
  if options.jobNumber>=options.Njobs:
    print "Error: jobNumber must be strictly smaller than Njobs."
    parser.print_help()
    return
  # if all ok, run the procedure
  runAnalysis(path=options.path,outputname=options.outputname, levels=levels, Njobs=options.Njobs, jobNumber=options.jobNumber)
 def __init__(self, dir=None, dataset=None, mode="plots"):
     # create output file if needed. If no file is given, it means it is delegated
     BaseControlPlots.__init__(self,
                               dir=dir,
                               purpose="eventSelection",
                               dataset=dataset,
                               mode=mode)
     self.eventCategories = EventSelection.eventCategories()
Exemple #3
0
def main(options):
    """simplistic program main"""
    # do basic arg checking
    if options.path is None:
        print "Error: no input path specified."
        parser.print_help()
        return
    levels = []
    if options.all:
        levels = range(EventSelection.eventCategories())
    elif not options.levels is None:
        levels = map(int, options.levels.split(','))
    levels.sort()
    if len(levels) == 0:
        print "Error: no level specified for processing."
        parser.print_help()
        return
    if min(levels) < 0:
        print "Error: levels must be positive integers."
        parser.print_help()
        return
    if max(levels) >= EventSelection.eventCategories():
        print "Error: last level is", EventSelection.eventCategories() - 1
        parser.print_help()
        return
    if options.Njobs < 1:
        print "Error: Njobs must be strictly positive."
        parser.print_help()
        return
    if options.jobNumber >= options.Njobs:
        print "Error: jobNumber must be strictly smaller than Njobs."
        parser.print_help()
        return
    # if all ok, run the procedure
    runAnalysis(path=options.path,
                outputname=options.outputname,
                levels=levels,
                Njobs=options.Njobs,
                jobNumber=options.jobNumber)
 def __init__(self, dir=None, dataset=None, mode="plots"):
     # create output file if needed. If no file is given, it means it is delegated
     BaseControlPlots.__init__(self, dir=dir, purpose="eventSelection", dataset=dataset, mode=mode)
     self.eventCategories = EventSelection.eventCategories()
Exemple #5
0
def runAnalysis(path,
                levels,
                outputname="controlPlots.root",
                Njobs=1,
                jobNumber=1):
    """produce all the plots in one go"""

    # inputs
    if os.path.isdir(path):
        dirList = list(
            itertools.islice(os.listdir(path), jobNumber, None, Njobs))
        files = []
        for fname in dirList:
            files.append(path + "/" + fname)
    elif os.path.isfile(path):
        files = [path]
    else:
        files = []

    # output
    output = ROOT.TFile(outputname, "RECREATE")
    if configuration.runningMode == "dataset":
        ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
        rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname,
                              ROOT.RooArgSet())

    # events iterator, plus configuration of standard collections and producers
    events = AnalysisEvent(files)
    EventSelection.prepareAnalysisEvent(events)

    # prepare the plots
    controlPlots = []
    if configuration.runningMode == "plots":
        leafList = [None] * EventSelection.eventCategories()
        createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
        for levelDir in leafList:
            levelPlots = []
            for cp in configuration.controlPlots:
                levelPlots.append(
                    getattr(import_module(cp.module),
                            cp.classname)(dir=levelDir.mkdir(cp.label),
                                          mode="plots"))
            controlPlots.append(levelPlots)
    else:
        for cp in configuration.controlPlots:
            controlPlots.append(
                getattr(import_module(cp.module), cp.classname)(dir=None,
                                                                mode="dataset",
                                                                dataset=rds))

    # book histograms (separate iteration for clarity)
    if configuration.runningMode == "plots":
        for level in levels:
            for conf, cp in zip(configuration.controlPlots,
                                controlPlots[level]):
                cp.beginJob(**conf.kwargs)
    else:
        for conf, cp in zip(configuration.controlPlots, controlPlots):
            cp.beginJob(**conf.kwargs)
        for cp in controlPlots[:1]:
            cp.defineCategories(EventSelection.categoryNames)

    # process events
    i = 0
    t0 = time.time()
    for event in events:
        # printout
        if i % 100 == 0:
            print "Processing... event %d. Last batch in %f s." % (i, (
                time.time() - t0))
            t0 = time.time()
        if configuration.runningMode == "plots":
            # loop on channels
            plots = filter(
                lambda x: EventSelection.isInCategory(x, event.category),
                levels)
            # process the event once (for the first level)
            selectionPlotsData = []
            for level in plots[:1]:
                for cp in controlPlots[level]:
                    selectionPlotsData.append(cp.process(event))
            # fill the histograms
            for level in plots:
                for cp, data in zip(controlPlots[level], selectionPlotsData):
                    cp.fill(data, event.weight(category=level))
        else:
            for cp in controlPlots[:1]:
                # set categories (first CP only)
                cp.setCategories(
                    map(
                        lambda c: EventSelection.isInCategory(
                            c, event.category),
                        range(EventSelection.eventCategories())))
            for cp in controlPlots:
                # process event (all CP)
                cp.processEvent(event)
            # add to the dataset
            rds.add(getArgSet(controlPlots))
        i += 1

    # save all
    if configuration.runningMode == "plots":
        for level in levels:
            for cp in controlPlots[level]:
                cp.endJob()
    else:
        for cp in controlPlots:
            cp.endJob()

    # for dataset, write the merged RDS to file
    if configuration.runningMode == "dataset":
        output.cd()
        ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
        getattr(ws_ras, 'import')(rds.get())
        output.Add(ws_ras)
        ws_ras.Write()
        rds.tree().Write()

    # close the file
    output.Close()
def runAnalysis(path, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  if os.path.isdir(path):
    dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  t0 = time.time()
  for event in events:
    # printout
    if i%100==0 : 
      print "Processing... event %d. Last batch in %f s." % (i,(time.time()-t0))
      t0 = time.time()
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c:EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
    i += 1

  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob()
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()
Exemple #7
0
def runAnalysis(path, txt, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1):
  """produce all the plots in one go"""

  # inputs
  dcapDir = False
  if "dcap://" in path:
    split = path.split("/")
    decapDir = os.path.isdir("/"+"/".join(split[3:]))
  if os.path.isdir(path) or decapDir:
    if txt: # IWN: use txt file with all names
      f = open(txt)
      dirList=list(itertools.islice([line[:-1] for line in f], jobNumber, None, Njobs))
      f.close()
    else:
      dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs))
    files=[]
    for fname in dirList:
      files.append(path+"/"+fname)
  elif os.path.isfile(path):
    files=[path]
  else:
    files=[path]

  # output
  output = ROOT.TFile(outputname, "RECREATE")
  if configuration.runningMode=="dataset":
    ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree)
    rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet())

  # events iterator, plus configuration of standard collections and producers
  events = AnalysisEvent(files)
  EventSelection.prepareAnalysisEvent(events)

  # prepare the plots
  controlPlots=[]
  if configuration.runningMode=="plots":
    leafList = [None]*EventSelection.eventCategories()
    createDirectory(EventSelection.categoriesHierarchy(), output, leafList)
    for levelDir in leafList:
      levelPlots=[]
      for cp in configuration.controlPlots:
        levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots"))
      controlPlots.append(levelPlots)
  else:
    for cp in configuration.controlPlots:
      controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds))

  # book histograms (separate iteration for clarity)
  if configuration.runningMode=="plots":
    for level in levels:
     for conf,cp in zip(configuration.controlPlots,controlPlots[level]):
       cp.beginJob(**conf.kwargs)
  else:
    for conf,cp in zip(configuration.controlPlots,controlPlots):
      cp.beginJob(**conf.kwargs)
    for cp in controlPlots[:1]:
      cp.defineCategories(EventSelection.categoryNames)

  # process events
  i = 0
  DeltaTb = 0
  tb = time.time()
  n = events.GetEntries()
  ETA = " "
  print "\n\n\t__%s_events_to_process.__" %n
  for event in events:
    # printout
    if i%1000==0 :
      if i%10000==0 :
        if DeltaTb>0:
#           ETA = " ETA: %s" % time.strftime("%H h. %M min. %S s.", time.gmtime( DeltaTb/i*(n-i) ))
#           runtime = "\n    Running for %s" % time.strftime("%H h. %M min. %S s.", time.gmtime(time.time()-t0))
#           print runtime.replace(" 00 h.","").replace("for 0","for ") + ETA.replace(" 00 h.","").replace(": 0",": ")
          (m,s) = divmod(time.time()-t0,60)
          (h,m) = divmod(m,60)
          runtime = "\n    Running for %i h. %i min. %i s." % (h,m,s)
          (m,s) = divmod(DeltaTb/i*(n-i),60)
          (h,m) = divmod(m,60)
          ETA = " ETA: %i h. %i min. %i s." % (h,m,s)
          print runtime.replace(" 0 h.","") + ETA.replace(" 0 h.","")
      DeltaTb += time.time()-tb
      print "%d%%: Processing... event %d. Last batch in %f s." % (i*100/n,i,(time.time()-tb))
      tb = time.time()
    i += 1
    if configuration.runningMode=="plots":
      # loop on channels
      plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels)
      # process the event once (for the first level)
      selectionPlotsData=[]
      for level in plots[:1]:
        for cp in controlPlots[level]:
          selectionPlotsData.append(cp.process(event))
      # fill the histograms
      for level in plots:
        for cp, data in zip(controlPlots[level],selectionPlotsData):
          cp.fill(data, event.weight(category=level))
    else:
      for cp in controlPlots[:1]:
        # set categories (first CP only)
        cp.setCategories(map(lambda c: EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories())))
      for cp in controlPlots:
        # process event (all CP)
        cp.processEvent(event)
      # add to the dataset
      rds.add(getArgSet(controlPlots))
  
  # save all
  if configuration.runningMode=="plots":
    for level in levels:
     for cp in controlPlots[level]: 
       cp.endJob(level)
  else:
   for cp in controlPlots: 
     cp.endJob()

  # for dataset, write the merged RDS to file
  if configuration.runningMode=="dataset":
    output.cd()
    ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname)
    getattr(ws_ras,'import')(rds.get())
    output.Add(ws_ras)
    ws_ras.Write()
    rds.tree().Write()
  
  # close the file
  output.Close()

  print "\nDone. Only took %s!\n" % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0))