def runTest(path='../testfiles/', controlPlots=None): # these modules are only needed in that function, used for debugging. # so we only import them here. import CMSSW import os import AnalysisEvent import EventSelection assert isinstance(controlPlots, BaseControlPlots) if os.path.isdir(path): dirList=os.listdir(path) files=[] for fname in dirList: files.append(path+fname) elif os.path.isfile(path): files=[path] else: files=[] events = AnalysisEvent.AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) controlPlots.beginJob() i = 0 for event in events: if i%100==0 : print "Processing... event ", i controlPlots.processEvent(event) i += 1 controlPlots.endJob()
def runTest(path='../testfiles/', controlPlots=None): # these modules are only needed in that function, used for debugging. # so we only import them here. import CMSSW import os import AnalysisEvent import EventSelection assert isinstance(controlPlots, BaseControlPlots) if os.path.isdir(path): dirList = os.listdir(path) files = [] for fname in dirList: files.append(path + fname) elif os.path.isfile(path): files = [path] else: files = [] events = AnalysisEvent.AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) controlPlots.beginJob() i = 0 for event in events: if i % 100 == 0: print "Processing... event ", i controlPlots.processEvent(event) i += 1 controlPlots.endJob()
def btagEfficiencyTreeProducer(stageName="Z+jet", muChannel=True, path='../testfiles/'): #search for category number stage=-1 for cat in categoryNames : stage+=1 if cat==stageName : break # prepare output path='/nfs/user/llbb/Pat_8TeV_532p4/DYjets_Summer12_V2/' ROOT.gROOT.ProcessLine( "struct MyStruct {\ Float_t pt;\ Float_t eta;\ Int_t flavor;\ Float_t ssvhe;\ Float_t ssvhp;\ Float_t csv;\ Float_t eventWeight;\ };" ) from ROOT import MyStruct mystruct = MyStruct() f = ROOT.TFile( 'mybtagEfftree.root', 'RECREATE' ) tree = ROOT.TTree( 'btagEff', 'btag efficiency' ) tree.Branch( 'data', mystruct, 'pt/F:eta/F:flavor/I:ssvhe/F:ssvhp/F:csv/F:eventWeight/F' ) # input if os.path.isdir(path): dirList=os.listdir(path) files=[] for fname in dirList: files.append(path+fname) elif os.path.isfile(path): files=[path] else: files=[] events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) # event loop eventCnt = 0 print "starting loop on events" for event in events: categoryData = event.catMu if muChannel else event.catEle goodJets = event.goodJets_mu if muChannel else event.goodJets_ele if EventSelection.isInCategory(stage, categoryData): eventCnt = eventCnt +1 if eventCnt%100==0 : print ".", if eventCnt%1000==0 : print "" # event weight mystruct.eventWeight = event.weight(weightList=["PileUp"]) # that's where we access the jets for index,jet in enumerate(event.jets): if not goodJets[index]: continue mystruct.pt = jet.pt() mystruct.eta = jet.eta() mystruct.flavor = jet.partonFlavour() mystruct.ssvhe = jet.bDiscriminator("simpleSecondaryVertexHighEffBJetTags") mystruct.ssvhp = jet.bDiscriminator("simpleSecondaryVertexHighPurBJetTags") mystruct.csv = jet.bDiscriminator("combinedSecondaryVertexBJetTags") tree.Fill() f.Write() f.Close() print ""
def main(options): """simplistic program main""" # do basic arg checking if options.path is None: print "Error: no input path specified." parser.print_help() return levels = [] if options.all: levels = range(EventSelection.eventCategories()) elif not options.levels is None: levels= map(int,options.levels.split(',')) levels.sort() if len(levels)==0: print "Error: no level specified for processing." parser.print_help() return if min(levels)<0: print "Error: levels must be positive integers." parser.print_help() return if max(levels)>=EventSelection.eventCategories(): print "Error: last level is",EventSelection.eventCategories()-1 parser.print_help() return if options.Njobs<1: print "Error: Njobs must be strictly positive." parser.print_help() return if options.jobNumber>=options.Njobs: print "Error: jobNumber must be strictly smaller than Njobs." parser.print_help() return # if all ok, run the procedure runAnalysis(path=options.path,outputname=options.outputname, levels=levels, Njobs=options.Njobs, jobNumber=options.jobNumber)
def DumpEventInfo(event=None, runNumber=None, eventNumber=None, lsNumber=None, path="./"): """Dump informations about a given event""" # in case no event is provided, find it using eventNumber if event is None: if (eventNumber is None) or (runNumber is None): print "DumpEventInfo Error: either pass an event or an event number" return # find event based on run and event if os.path.isdir(path): dirList=os.listdir(path) files=[] for fname in dirList: files.append(path+fname) elif os.path.isfile(path): files=[path] else: files=[] events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) DumpEventInfo(events[(runNumber, eventNumber, lsNumber)]) return # run the producers when we want to print the outcome, and mute unneeded collections for product in eventDumpConfig.productsToPrint: getattr(event,product) for collection in eventDumpConfig.collectionsToHide: event.removeCollection(collection) # Now, we can go on with the printing. print event
def DumpEventInfo(event=None, eventNumber=None, path="./"): """Dump informations about a given event""" # in case no event is provided, find it using eventNumber if event is None: if eventNumber is None: print "DumpEventInfo Error: either pass an event or an event number" return # find event based on run and event if os.path.isdir(path): dirList=os.listdir(path) files=[] for fname in dirList: files.append(path+fname) elif os.path.isfile(path): files=[path] else: files=[] events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) DumpEventInfo(events[eventNumber]) return # run the producers when we want to print the outcome, and mute unneeded collections for product in eventDumpConfig.productsToPrint: getattr(event,product) for collection in eventDumpConfig.collectionsToHide: event.removeCollection(collection) # Now, we can go on with the printing. print event
def __init__(self, dir=None, dataset=None, mode="plots"): # create output file if needed. If no file is given, it means it is delegated BaseControlPlots.__init__(self, dir=dir, purpose="eventSelection", dataset=dataset, mode=mode) self.eventCategories = EventSelection.eventCategories()
def process(self, event): """EventSelectionControlPlots""" result = { } ## event category categoryData = event.category result["category"] = [ ] for category in range(self.eventCategories): if EventSelection.isInCategory(category, categoryData): result["category"].append(category) result["event"] = event.event() return result
def process(self, event): """EventSelectionControlPlots""" result = {} ## event category categoryData = event.category result["category"] = [] for category in range(self.eventCategories): if EventSelection.isInCategory(category, categoryData): result["category"].append(category) result["event"] = event.event() return result
def main(options): """simplistic program main""" # do basic arg checking if options.path is None: print "Error: no input path specified." parser.print_help() return levels = [] if options.all: levels = range(EventSelection.eventCategories()) elif not options.levels is None: levels = map(int, options.levels.split(',')) levels.sort() if len(levels) == 0: print "Error: no level specified for processing." parser.print_help() return if min(levels) < 0: print "Error: levels must be positive integers." parser.print_help() return if max(levels) >= EventSelection.eventCategories(): print "Error: last level is", EventSelection.eventCategories() - 1 parser.print_help() return if options.Njobs < 1: print "Error: Njobs must be strictly positive." parser.print_help() return if options.jobNumber >= options.Njobs: print "Error: jobNumber must be strictly smaller than Njobs." parser.print_help() return # if all ok, run the procedure runAnalysis(path=options.path, outputname=options.outputname, levels=levels, Njobs=options.Njobs, jobNumber=options.jobNumber)
def DumpEventList(category, path="./", output="eventlist.txt"): """Dump a list of events in a given category""" # input if os.path.isdir(path): dirList = os.listdir(path) files = [] for fname in dirList: files.append(path + fname) elif os.path.isfile(path): files = [path] else: files = [] # events events = AnalysisEvent(files) # output event_list = open(output, "w") # collections and producers used in the analysis EventSelection.prepareAnalysisEvent(events) for event in events: # check category if EventSelection.isInCategory(category, event.category): # print print >> event_list, "Event", event.event()
def DumpEventList(category, path="./", output="eventlist.txt"): """Dump a list of events in a given category""" # input if os.path.isdir(path): dirList=os.listdir(path) files=[] for fname in dirList: files.append(path+fname) elif os.path.isfile(path): files=[path] else: files=[] # events events = AnalysisEvent(files) # output event_list = open(output,"w") # collections and producers used in the analysis EventSelection.prepareAnalysisEvent(events) for event in events: # check category if EventSelection.isInCategory(category, event.category): # print print >> event_list , "Event", event.event()
def runAnalysis(path, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1): """produce all the plots in one go""" # inputs if os.path.isdir(path): dirList = list( itertools.islice(os.listdir(path), jobNumber, None, Njobs)) files = [] for fname in dirList: files.append(path + "/" + fname) elif os.path.isfile(path): files = [path] else: files = [] # output output = ROOT.TFile(outputname, "RECREATE") if configuration.runningMode == "dataset": ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree) rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet()) # events iterator, plus configuration of standard collections and producers events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) # prepare the plots controlPlots = [] if configuration.runningMode == "plots": leafList = [None] * EventSelection.eventCategories() createDirectory(EventSelection.categoriesHierarchy(), output, leafList) for levelDir in leafList: levelPlots = [] for cp in configuration.controlPlots: levelPlots.append( getattr(import_module(cp.module), cp.classname)(dir=levelDir.mkdir(cp.label), mode="plots")) controlPlots.append(levelPlots) else: for cp in configuration.controlPlots: controlPlots.append( getattr(import_module(cp.module), cp.classname)(dir=None, mode="dataset", dataset=rds)) # book histograms (separate iteration for clarity) if configuration.runningMode == "plots": for level in levels: for conf, cp in zip(configuration.controlPlots, controlPlots[level]): cp.beginJob(**conf.kwargs) else: for conf, cp in zip(configuration.controlPlots, controlPlots): cp.beginJob(**conf.kwargs) for cp in controlPlots[:1]: cp.defineCategories(EventSelection.categoryNames) # process events i = 0 t0 = time.time() for event in events: # printout if i % 100 == 0: print "Processing... event %d. Last batch in %f s." % (i, ( time.time() - t0)) t0 = time.time() if configuration.runningMode == "plots": # loop on channels plots = filter( lambda x: EventSelection.isInCategory(x, event.category), levels) # process the event once (for the first level) selectionPlotsData = [] for level in plots[:1]: for cp in controlPlots[level]: selectionPlotsData.append(cp.process(event)) # fill the histograms for level in plots: for cp, data in zip(controlPlots[level], selectionPlotsData): cp.fill(data, event.weight(category=level)) else: for cp in controlPlots[:1]: # set categories (first CP only) cp.setCategories( map( lambda c: EventSelection.isInCategory( c, event.category), range(EventSelection.eventCategories()))) for cp in controlPlots: # process event (all CP) cp.processEvent(event) # add to the dataset rds.add(getArgSet(controlPlots)) i += 1 # save all if configuration.runningMode == "plots": for level in levels: for cp in controlPlots[level]: cp.endJob() else: for cp in controlPlots: cp.endJob() # for dataset, write the merged RDS to file if configuration.runningMode == "dataset": output.cd() ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname) getattr(ws_ras, 'import')(rds.get()) output.Add(ws_ras) ws_ras.Write() rds.tree().Write() # close the file output.Close()
def runAnalysis(path, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1): """produce all the plots in one go""" # inputs if os.path.isdir(path): dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs)) files=[] for fname in dirList: files.append(path+"/"+fname) elif os.path.isfile(path): files=[path] else: files=[] # output output = ROOT.TFile(outputname, "RECREATE") if configuration.runningMode=="dataset": ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree) rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet()) # events iterator, plus configuration of standard collections and producers events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) # prepare the plots controlPlots=[] if configuration.runningMode=="plots": leafList = [None]*EventSelection.eventCategories() createDirectory(EventSelection.categoriesHierarchy(), output, leafList) for levelDir in leafList: levelPlots=[] for cp in configuration.controlPlots: levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots")) controlPlots.append(levelPlots) else: for cp in configuration.controlPlots: controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds)) # book histograms (separate iteration for clarity) if configuration.runningMode=="plots": for level in levels: for conf,cp in zip(configuration.controlPlots,controlPlots[level]): cp.beginJob(**conf.kwargs) else: for conf,cp in zip(configuration.controlPlots,controlPlots): cp.beginJob(**conf.kwargs) for cp in controlPlots[:1]: cp.defineCategories(EventSelection.categoryNames) # process events i = 0 t0 = time.time() for event in events: # printout if i%100==0 : print "Processing... event %d. Last batch in %f s." % (i,(time.time()-t0)) t0 = time.time() if configuration.runningMode=="plots": # loop on channels plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels) # process the event once (for the first level) selectionPlotsData=[] for level in plots[:1]: for cp in controlPlots[level]: selectionPlotsData.append(cp.process(event)) # fill the histograms for level in plots: for cp, data in zip(controlPlots[level],selectionPlotsData): cp.fill(data, event.weight(category=level)) else: for cp in controlPlots[:1]: # set categories (first CP only) cp.setCategories(map(lambda c:EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories()))) for cp in controlPlots: # process event (all CP) cp.processEvent(event) # add to the dataset rds.add(getArgSet(controlPlots)) i += 1 # save all if configuration.runningMode=="plots": for level in levels: for cp in controlPlots[level]: cp.endJob() else: for cp in controlPlots: cp.endJob() # for dataset, write the merged RDS to file if configuration.runningMode=="dataset": output.cd() ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname) getattr(ws_ras,'import')(rds.get()) output.Add(ws_ras) ws_ras.Write() rds.tree().Write() # close the file output.Close()
def runAnalysis(path, txt, levels, outputname="controlPlots.root", Njobs=1, jobNumber=1): """produce all the plots in one go""" # inputs dcapDir = False if "dcap://" in path: split = path.split("/") decapDir = os.path.isdir("/"+"/".join(split[3:])) if os.path.isdir(path) or decapDir: if txt: # IWN: use txt file with all names f = open(txt) dirList=list(itertools.islice([line[:-1] for line in f], jobNumber, None, Njobs)) f.close() else: dirList=list(itertools.islice(os.listdir(path), jobNumber, None, Njobs)) files=[] for fname in dirList: files.append(path+"/"+fname) elif os.path.isfile(path): files=[path] else: files=[path] # output output = ROOT.TFile(outputname, "RECREATE") if configuration.runningMode=="dataset": ROOT.RooAbsData.setDefaultStorageType(ROOT.RooAbsData.Tree) rds = ROOT.RooDataSet(configuration.RDSname, configuration.RDSname, ROOT.RooArgSet()) # events iterator, plus configuration of standard collections and producers events = AnalysisEvent(files) EventSelection.prepareAnalysisEvent(events) # prepare the plots controlPlots=[] if configuration.runningMode=="plots": leafList = [None]*EventSelection.eventCategories() createDirectory(EventSelection.categoriesHierarchy(), output, leafList) for levelDir in leafList: levelPlots=[] for cp in configuration.controlPlots: levelPlots.append(getattr(import_module(cp.module),cp.classname)(dir=levelDir.mkdir(cp.label),mode="plots")) controlPlots.append(levelPlots) else: for cp in configuration.controlPlots: controlPlots.append(getattr(import_module(cp.module),cp.classname)(dir=None, mode="dataset", dataset=rds)) # book histograms (separate iteration for clarity) if configuration.runningMode=="plots": for level in levels: for conf,cp in zip(configuration.controlPlots,controlPlots[level]): cp.beginJob(**conf.kwargs) else: for conf,cp in zip(configuration.controlPlots,controlPlots): cp.beginJob(**conf.kwargs) for cp in controlPlots[:1]: cp.defineCategories(EventSelection.categoryNames) # process events i = 0 DeltaTb = 0 tb = time.time() n = events.GetEntries() ETA = " " print "\n\n\t__%s_events_to_process.__" %n for event in events: # printout if i%1000==0 : if i%10000==0 : if DeltaTb>0: # ETA = " ETA: %s" % time.strftime("%H h. %M min. %S s.", time.gmtime( DeltaTb/i*(n-i) )) # runtime = "\n Running for %s" % time.strftime("%H h. %M min. %S s.", time.gmtime(time.time()-t0)) # print runtime.replace(" 00 h.","").replace("for 0","for ") + ETA.replace(" 00 h.","").replace(": 0",": ") (m,s) = divmod(time.time()-t0,60) (h,m) = divmod(m,60) runtime = "\n Running for %i h. %i min. %i s." % (h,m,s) (m,s) = divmod(DeltaTb/i*(n-i),60) (h,m) = divmod(m,60) ETA = " ETA: %i h. %i min. %i s." % (h,m,s) print runtime.replace(" 0 h.","") + ETA.replace(" 0 h.","") DeltaTb += time.time()-tb print "%d%%: Processing... event %d. Last batch in %f s." % (i*100/n,i,(time.time()-tb)) tb = time.time() i += 1 if configuration.runningMode=="plots": # loop on channels plots = filter(lambda x: EventSelection.isInCategory(x,event.category) ,levels) # process the event once (for the first level) selectionPlotsData=[] for level in plots[:1]: for cp in controlPlots[level]: selectionPlotsData.append(cp.process(event)) # fill the histograms for level in plots: for cp, data in zip(controlPlots[level],selectionPlotsData): cp.fill(data, event.weight(category=level)) else: for cp in controlPlots[:1]: # set categories (first CP only) cp.setCategories(map(lambda c: EventSelection.isInCategory(c, event.category),range(EventSelection.eventCategories()))) for cp in controlPlots: # process event (all CP) cp.processEvent(event) # add to the dataset rds.add(getArgSet(controlPlots)) # save all if configuration.runningMode=="plots": for level in levels: for cp in controlPlots[level]: cp.endJob(level) else: for cp in controlPlots: cp.endJob() # for dataset, write the merged RDS to file if configuration.runningMode=="dataset": output.cd() ws_ras = ROOT.RooWorkspace(configuration.WSname, configuration.WSname) getattr(ws_ras,'import')(rds.get()) output.Add(ws_ras) ws_ras.Write() rds.tree().Write() # close the file output.Close() print "\nDone. Only took %s!\n" % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0))