def fit(self, input_file, cut, num_events, output, epochs, batch): """ Do the fit """ in_file = TFile(input_file) tree = in_file.events val_forms = [TTreeFormula(v, v, tree) for v in self.vars] target_forms = [TTreeFormula(t, t, tree) for t in self.targets] cut_form = TTreeFormula(cut, cut, tree) if cut else None reserve = num_events if num_events > 0 else tree.GetEntries() inputs = numpy.zeros((reserve, len(self.vars))) targets = [numpy.zeros((reserve, 1)) for _ in self.targets] # Set up the inputs numcut = 0 for index, _ in enumerate(tree): event = index - numcut if event == num_events: break if cut_form and not cut_form.EvalInstance(): numcut += 1 continue if event % 10000 == 0: print 'Filling', str(float(event * 100) / num_events) + '%' for jndex, val in enumerate(val_forms): inputs[event][jndex] = val.EvalInstance() for jndex, target in enumerate(target_forms): targets[jndex][event][0] = target.EvalInstance() self.model.fit(inputs, targets, validation_split=0.5, epochs=epochs, batch_size=batch, callbacks=[ keras.callbacks.TensorBoard( log_dir='weights/logdir', histogram_freq=1, write_graph=True, write_images=True) ]) sess = keras.backend.get_session() output_node = [n.op.name for n in self.model.outputs] print 'Output node', output_node graph = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), output_node) graph_io.write_graph(graph, 'weights', output, as_text=False)
def getTTreeFormula(self, tree=0): ## Get a corresponding TTreeFormula object from ROOT import TTreeFormula import warnings warnings.filterwarnings(action='ignore', category=RuntimeWarning, message='creating converter.*') if self.cut: return TTreeFormula(self.name, self.cut, tree) else: return TTreeFormula(self.name, '1', tree)
def _genPlotCuts(self, plots): cuts = [] for plot in plots: cutstr = '1.0' if plot.cuts != "": cutstr = plot.cuts cuts.append([TTreeFormula(id_gen(10), cutstr, self._tree)]) return cuts
def _genPlotFormulae(self, plots): formulae = [] for plot in plots: formulae.append([]) for var in plot.treeVariables: if plot.externalFunction is None: formulae[-1].append( TTreeFormula(id_gen(10), var, self._tree)) return formulae
def add_var_set_br_addr(varlist, reader_method, intree, allvars): """Add variables to TMVA::Reader, and associate to tree branch""" for var in varlist: expr = var.split(':=', 1) # split var:=var1+var2 -> (var, var1+var2) simple = (len(expr) == 1) # if simple, (var,) if simple: expr = expr * 2 # expr same as key: (var,) -> (var, var) allvars[expr[0]] = [ array('f', [0.]), # array for TMVA::Reader TTreeFormula(expr[0], expr[1], intree) ] reader_method(var, allvars[expr[0]][0])
def fill_dataset(varargset, ftree, wt, wtvar, cut=''): """Return a dataset (slow, get_dataset is more efficient). Return a dataset from the ntuple `ftree', also apply `cut'. Use `wt' as the weight expression in the tree. `wtvar' is the corresponding RooRealVar weight. Note, varargset should contain wtvar. The dataset is filled by iterating over the tree. This is needed when you want to ensure different datasets have the same weight variable names, so that they can be combined later on. This is needed even if they are combined as different categories. """ from rplot.fixes import ROOT from ROOT import RooDataSet, RooFit, TTreeFormula from helpers import suppress_warnings suppress_warnings() from rplot.tselect import Tsplice splice = Tsplice(ftree) splice.make_splice('sel', cut) formulae = {} wtname = wtvar.GetName() for var in varargset: name = var.GetName() expr = wt if name == wtname else name formulae[name] = TTreeFormula(name, expr, ftree) dataset = RooDataSet('dataset', 'Dataset', varargset, RooFit.WeightVar(wtvar)) for i in xrange(ftree.GetEntries()): ftree.GetEntry(i) for var, expr in formulae.iteritems(): realvar = varargset.find(var) realvar.setVal(expr.EvalInstance()) dataset.add(varargset, varargset[wtname].getVal()) return dataset
def run(self, selections, dv, dv2d, ch='', name='', nevents=-1): # initialize dictionary selection: list of histograms if name == '': name = self.name nsel = 0 for s in selections: self.sv[s] = collections.OrderedDict() self.sv2d[s] = collections.OrderedDict() selstr = 'sel{}'.format(int(nsel)) nsel += 1 for v in dv.keys(): hname = '{}_{}_{}'.format(name, selstr, v) self.sv[s][v] = TH1D(hname, hname + ";" + dv[v]["title"] + ";", dv[v]["bin"], dv[v]["xmin"], dv[v]["xmax"]) self.sv[s][v].Sumw2() for v in dv2d.keys(): hname = '{}_{}_{}'.format(name, selstr, v) self.sv2d[s][v] = TH2D( hname, hname + ";" + dv2d[v]["titlex"] + ";" + dv2d[v]["titley"] + ";", dv2d[v]["binx"], dv2d[v]["xmin"], dv2d[v]["xmax"], dv2d[v]["biny"], dv2d[v]["ymin"], dv2d[v]["ymax"], ) self.sv2d[s][v].Sumw2() rf = TFile(self.rt) t = rf.Get("events") if nevents == -1: numberOfEntries = t.GetEntries() print 'running over the full entries %i' % numberOfEntries else: numberOfEntries = nevents print 'running over a subset of entries %i' % numberOfEntries for s in selections: formula = TTreeFormula("", s, t) # loop over events print 'number of events:', numberOfEntries for entry in xrange(numberOfEntries): if (entry + 1) % 500 == 0: sys.stdout.write('... %i events processed ...\r' % (entry + 1)) sys.stdout.flush() t.GetEntry(entry) weight = self.w * getattr(t, "weight") # apply selection result = formula.EvalInstance() # fill histos on selected events if result > 0.: for v in dv.keys(): divide = 1 try: divide = dv[v]["divide"] except KeyError, e: divide = 1 self.sv[s][v].Fill( getattr(t, dv[v]["name"]) / divide, weight) for v in dv2d.keys(): self.sv2d[s][v].Fill(getattr(t, dv2d[v]["namex"]), getattr(t, dv2d[v]["namey"]), weight)
def load(inputfile, target, inputs, adversary, weight, reweight): """ Parameters: inputfile: Name of the ROOT file that contains all our data for training target: Expression that yields class number inputs: List of expressions to input into the classifier adversary: Expressions that the adversary should not be able to guess from the classifier output weight: Expressions to get the sample weights reweight: Bool to decide if should reweight Returns: Numpy Arrays that can be used in fitting with the following info - Labels that are used to classify - Raw data to do the classification with - Data that should not be predictable based on the predicted label """ in_file = TFile(inputfile) tree = in_file.events target_form = TTreeFormula(target, target, tree) val_forms = [TTreeFormula(v, v, tree) for v in inputs] adversary_forms = [TTreeFormula(a, a, tree) for a in adversary] weight_form = TTreeFormula(weight, weight, tree) if weight else None reserve = tree.GetEntries() data = numpy.zeros((reserve, len(inputs))) smooths = numpy.zeros((reserve, len(adversary))) labels = numpy.zeros((reserve, 1)) weights = numpy.zeros(reserve) if weight else None logging.info('Reading %i events', reserve) # Set up the inputs for event, _ in enumerate(tree): if event == reserve: break if event % 10000 == 0: logging.info('Filling %s', str(float(event * 100) / reserve) + '%') labels[event][0] = target_form.EvalInstance() if weight: weights[event] = weight_form.EvalInstance() for jndex, val in enumerate(val_forms): data[event][jndex] = val.EvalInstance() for jndex, adv in enumerate(adversary_forms): smooths[event][jndex] = adv.EvalInstance() if reweight: # Want to reweight each class separately smooth_dict = collections.defaultdict(list) for label, point in zip(labels, smooths): smooth_dict[label[0]].append(point) reweighters = { key: Reweighter(row) for key, row in smooth_dict.iteritems() } for index, point in enumerate(data): weights[index] *= reweighters[labels[index][0]].get_weight(point) # reweighter = Reweighter(smooths) # for index, point in enumerate(data): # weights[index] *= reweighter.get_weight(point) return keras.utils.to_categorical(labels), data, smooths, weights
mva2_val = array('f', [0.]) tree_out = TTree('tree', 'tree') mva0_name = 'mva0' tree_out.Branch(mva0_name, mva0_val, mva0_name+'/F') mva1_name = 'mva1' tree_out.Branch(mva1_name, mva1_val, mva1_name+'/F') mva2_name = 'mva2' tree_out.Branch(mva2_name, mva2_val, mva2_name+'/F') ave_mva = 0. for var in train_vars: if var.drawname != var.name: var.formula = TTreeFormula('formula'+var.name, var.drawname, tree_in) var.formula.GetNdata() for i_ev, event in enumerate(tree_in): if i_ev % 10000 == 0: print 'Event', i_ev split_var_val = split_var.formula.EvalInstance() if hasattr(split_var, 'formula') else getattr(event, split_var.name) if int(split_var_val * 1000) % 2 == 0: clf = clf0_0jet if event.n_jets > 0.5: if event.vbf_mjj > 500. and event.vbf_deta > 3.5: clf = clf0_vbf else:
def fillIntoTree(out_tree, branches, cfg, hist_cfg, vcfgs, total_scale, plot, verbose, friend_func): if isinstance(cfg, HistogramCfg): # Loop over sub-cfgs and fill them total_scale *= cfg.total_scale if cfg.total_scale else 1. for sub_cfg in cfg.cfgs: fillIntoTree(out_tree, branches, sub_cfg, cfg, vcfgs, total_scale, plot, verbose, friend_func) return file_name = '/'.join([cfg.ana_dir, cfg.dir_name, cfg.tree_prod_name, 'tree.root']) # Attaches tree to plot ttree = plot.readTree(file_name, cfg.tree_name, verbose=verbose, friend_func=friend_func) norm_cut = hist_cfg.cut shape_cut = hist_cfg.cut if cfg.norm_cut: norm_cut = cfg.norm_cut if cfg.shape_cut: shape_cut = cfg.shape_cut full_weight = branches[-1] weight = hist_cfg.weight if cfg.weight_expr: weight = '*'.join([weight, cfg.weight_expr]) if hist_cfg.weight: norm_cut = '({c}) * {we}'.format(c=norm_cut, we=weight) shape_cut = '({c}) * {we}'.format(c=shape_cut, we=weight) # and this one too sample_weight = cfg.scale * total_scale if not cfg.is_data: sample_weight *= hist_cfg.lumi*cfg.xsec/cfg.sumweights formula = TTreeFormula('weight_formula', norm_cut, ttree) formula.GetNdata() # Add weight as tree variable # Then loop over ttree # And save this to the other tree # # Create TTreeFormulas for all vars for var in vcfgs: if var.drawname != var.name: var.formula = TTreeFormula('formula'+var.name, var.drawname, ttree) var.formula.GetNdata() for i in xrange(ttree.GetEntries()): ttree.GetEntry(i) w = formula.EvalInstance() if w == 0.: continue full_weight[0] = w * sample_weight if abs(full_weight[0]) > 1000.: print "WARNING, unusually large weight", w, sample_weight import pdb; pdb.set_trace() print '\nWeight:', full_weight[0] print cfg.name print norm_cut for branch, var in zip(branches, vcfgs): branch[0] = var.formula.EvalInstance() if hasattr(var, 'formula') else getattr(ttree, var.name) out_tree.Fill() if shape_cut != norm_cut: print 'WARNING: different norm and shape cuts currently not supported in HistCreator.createTrees'
def Count(chan, trigs): #deal with weights first sumWeights = TChain("sumWeights") sumWeights.Add("%stth*.root" % prepath) weights = [] fCurrent_wt = 0 sampleNEvt = 0 nWeightEntries = sumWeights.GetEntries() for a in range(nWeightEntries): sumWeights.GetEntry(a) totalEventsWeighted = getattr(sumWeights, 'totalEventsWeighted') if sumWeights.GetTreeNumber() != fCurrent_wt: fCurrent_wt = sumWeights.GetTreeNumber() weights.append(sampleNEvt) sampleNEvt = 0 sampleNEvt = sampleNEvt + totalEventsWeighted if a == nWeightEntries - 1: weights.append(sampleNEvt) #last file chain = TChain("nominal") chain.Add("%stth*.root" % prepath) nentries = chain.GetEntries() chain.SetBranchStatus("*", 0) chain.SetBranchStatus("Mll01", 1) chain.SetBranchStatus("total_charge", 1) chain.SetBranchStatus("lep_Pt_0", 1) chain.SetBranchStatus("lep_Pt_1", 1) chain.SetBranchStatus("lep_Eta_0", 1) chain.SetBranchStatus("lep_Eta_1", 1) chain.SetBranchStatus("lep_ID_0", 1) chain.SetBranchStatus("lep_ID_1", 1) chain.SetBranchStatus("lep_truthPdgId_0", 1) chain.SetBranchStatus("lep_truthPdgId_1", 1) chain.SetBranchStatus("lep_truthOrigin_0", 1) chain.SetBranchStatus("lep_truthOrigin_1", 1) chain.SetBranchStatus("lep_truthType_0", 1) chain.SetBranchStatus("lep_truthType_1", 1) chain.SetBranchStatus("lep_isQMisID_0", 1) chain.SetBranchStatus("lep_isQMisID_1", 1) chain.SetBranchStatus("nJets_OR_T_MV2c10_70", 1) chain.SetBranchStatus("nJets_OR_T", 1) chain.SetBranchStatus("lep_isTightLH_0", 1) chain.SetBranchStatus("lep_isTightLH_1", 1) chain.SetBranchStatus("lep_isLooseLH_0", 1) chain.SetBranchStatus("lep_isLooseLH_1", 1) chain.SetBranchStatus("lep_isolationFixedCutTight_0", 1) chain.SetBranchStatus("lep_isolationFixedCutLoose_0", 1) chain.SetBranchStatus("lep_isolationFixedCutTight_1", 1) chain.SetBranchStatus("lep_isolationFixedCutLoose_1", 1) chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_0", 1) chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_1", 1) chain.SetBranchStatus("HLT*", 1) chain.SetBranchStatus("*type", 1) chain.SetBranchStatus("RunYear", 1) chain.SetBranchStatus("passEventCleaning", 1) chain.SetBranchStatus("lep_isTrigMatch_0", 1) chain.SetBranchStatus("lep_isTrigMatch_1", 1) chain.SetBranchStatus("lep_isTrigMatchDLT_0", 1) chain.SetBranchStatus("lep_isTrigMatchDLT_1", 1) chain.SetBranchStatus("mcWeightOrg", 1) chain.SetBranchStatus("pileupEventWeight_090", 1) chain.SetBranchStatus("lepSFObjTight", 1) chain.SetBranchStatus("lepSFTrigTight", 1) chain.SetBranchStatus("JVT_EventWeight", 1) chain.SetBranchStatus("SherpaNJetWeight", 1) chain.SetBranchStatus("MV2c10_70_EventWeight", 1) chain.SetBranchStatus("lep_chargeIDBDT*", 1) chain.SetBranchStatus("nTaus_OR_Pt25", 1) chain.SetBranchStatus("tau_JetBDTSigTight_0", 1) chain.SetBranchStatus("tau_JetBDTSigTight_1", 1) chain.SetBranchStatus("tau_tagWeightBin_0", 1) chain.SetBranchStatus("tau_tagWeightBin_1", 1) chain.SetBranchStatus("tau_passMuonOLR_0", 1) chain.SetBranchStatus("tau_passMuonOLR_1", 1) chain.SetBranchStatus("tau_passEleBDT_0", 1) chain.SetBranchStatus("tau_passEleBDT_1", 1) chain.SetBranchStatus("tau_charge_0", 1) chain.SetBranchStatus("tau_charge_1", 1) chain.SetBranchStatus("lep_ID_2", 1) chain.SetBranchStatus("Mll02", 1) chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_0", 1) chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_1", 1) chain.SetBranchStatus("lep_ambiguityType_0", 1) chain.SetBranchStatus("lep_ambiguityType_1", 1) #cuts fCurrent = -1 chain.LoadTree(0) cuts_sr = TTreeFormula("cuts_sr", chan, chain) cuts_trig = TTreeFormula("cuts_trig", trigs, chain) raw_evts, numevts = 0, 0 for evt in range(nentries): #for event in chain: #if evt%10000==0 : print evt chain.GetEntry(evt) #get current file currentFileName = chain.GetCurrentFile().GetName() RunYear = getattr(chain, "RunYear") mcWeightOrg = getattr(chain, "mcWeightOrg") pileupEventWeight_090 = getattr(chain, "pileupEventWeight_090") lepSFObjTight = getattr(chain, "lepSFObjTight") lepSFTrigTight = getattr(chain, "lepSFTrigTight") JVT_EventWeight = getattr(chain, "JVT_EventWeight") SherpaNJetWeight = getattr(chain, "SherpaNJetWeight") MV2c10_70_EventWeight = getattr(chain, "MV2c10_70_EventWeight") lumi = 1.0 if RunYear < 2016.5: lumi = 36074.6 if RunYear > 2016.5: lumi = 43813.7 if chain.GetTreeNumber() != fCurrent: fCurrent = chain.GetTreeNumber() cuts_sr.Notify() cuts_trig.Notify() if cuts_sr.EvalInstance() and cuts_trig.EvalInstance(): #if cuts_trig.EvalInstance(): if "341177" in currentFileName: weight = 0.05343 if "341270" in currentFileName: weight = 0.22276 if "341271" in currentFileName: weight = 0.23082 kfac, filEff = 1, 1 weight = weight * kfac * filEff * mcWeightOrg * pileupEventWeight_090 * lepSFObjTight * lepSFTrigTight * JVT_EventWeight * SherpaNJetWeight * MV2c10_70_EventWeight * lumi / weights[ fCurrent] #print xsec, kfac, filEff, event.mcWeightOrg,event.pileupEventWeight_090,event.lepSFObjTight,event.lepSFTrigTight,event.JVT_EventWeight,event.SherpaNJetWeight,event.MV2c10_70_EventWeight, lumi raw_evts = raw_evts + 1 numevts = numevts + weight print "%s(%.2f)" % (raw_evts, numevts)
def MultiDraw(self, Formulae, CommonWeight="1"): """Draws many histograms in one loop over a tree. Instead of: MyTree.Draw( "nlcts >> a(100, -1, 1)", "weightA" ) MyTree.Draw( "nlcts >> b(100, -1, 1)", "weightB" ) Do: MyTree.MultiDraw( ( "nlcts >> a(100, -1, 1)", "weightA" ), ( "nlcts >> b(100, -1, 1)", "weightB" ) ) This is significantly faster when there are many histograms to be drawn. The first parameter, CommonWeight, decides a weight given to all histograms. An arbitrary number of additional histograms may be specified. They can either be specified with just a string containing the formula to be drawn, the histogram name and bin configuration. Alternatively it can be a tuple, with said string, and an additional string specifying the weight to be applied to that histogram only. """ if type(CommonWeight) == tuple: Formulae = (CommonWeight, ) + Formulae CommonWeight = "1" results, formulae, weights = [], [], [] lastFormula, lastWeight = None, None # A weight common to everything being drawn CommonWeightFormula = TTreeFormula("CommonWeight", CommonWeight, self) CommonWeightFormula.SetQuickLoad(True) if not CommonWeightFormula.GetTree(): raise RuntimeError("TTreeFormula didn't compile: " + CommonWeight) hists = {} for i, origFormula in enumerate(Formulae): print "Have an origFormula", origFormula # Expand out origFormula and weight, otherwise just use weight of 1. if type(origFormula) == tuple: origFormula, weight = origFormula else: origFormula, weight = origFormula, "1" # print origFormula, weight # Pluck out histogram name and arguments match = re.match(r"^(.*?)\s*>>\s*(.*?)\s*\(\s*(.*?)\s*\)$", origFormula) if match: formula, name, arguments = match.groups() arguments = re.split(",\s*", arguments) bins, minX, maxX = arguments bins, minX, maxX = int(bins), float(minX), float(maxX) # Create histogram with name and arguments hist = TH1D(name, name, bins, minX, maxX) hist.Sumw2() else: # without arguments match = re.match(r"^(.*?)\s*>>\s*(.*?)\s*$", origFormula) if not match: raise RuntimeError("MultiDraw: Couldn't parse formula: '%s'" % origFormula) formula, name = match.groups() # print formula, name if name.startswith("+") and name[1:] in hists: # Drawing additionally into a histogram hist = hists[name[1:]] else: # name = name[1:] # JAN: ??? hist = gDirectory.Get(name) if not hist: raise RuntimeError("MultiDraw: Couldn't find histogram to fill '%s' in current directory." % name) if name not in hists: hists[name] = hist results.append(hist) # The following two 'if' clauses check that the next formula is different # to the previous one. If it is not, we add an ordinary TObject. # Then, the dynamic cast in MultiDraw.cxx fails, giving 'NULL', and # The previous value is used. This saves the recomputing of identical values if formula != lastFormula: f = TTreeFormula("formula%i" % i, formula, self) if not f.GetTree(): raise RuntimeError("TTreeFormula didn't compile: " + formula) f.SetQuickLoad(True) formulae.append(f) else: formulae.append(TObject()) if weight != lastWeight: f = TTreeFormula("weight%i" % i, weight, self) if not f.GetTree(): raise RuntimeError("TTreeFormula didn't compile: " + formula) f.SetQuickLoad(True) weights.append(f) else: weights.append(TObject()) lastFormula, lastWeight = formula, weight # Only compile MultiDraw once try: from ROOT import MultiDraw as _MultiDraw except ImportError: # gROOT.ProcessLine(".L %sMultiDraw.cxx+O" % "./") if "/sMultiDraw_cc.so" not in gSystem.GetLibraries(): gROOT.ProcessLine(".L %s/../SFrameAnalysis_emu/datacard/MultiDraw.cc+" % os.environ['CMSSW_BASE']); from ROOT import MultiDraw as _MultiDraw from time import time start = time() # Ensure that formulae are told when tree changes fManager = TTreeFormulaManager() for formula in formulae + weights + [CommonWeightFormula, ]: if type(formula) == TTreeFormula: fManager.Add(formula) fManager.Sync() self.SetNotify(fManager) # Draw everything! _MultiDraw(self, CommonWeightFormula, MakeTObjArray(formulae), MakeTObjArray(weights), MakeTObjArray(results), len(Formulae)) print "Took %.2fs" % (time() - start), " "*20 return results
def run(self, selections, dv, dv2d, ch='', name='', nevents=-1): # initialize dictionary selection: list of histograms if name=='': name = self.name nsel = 0 for s in selections: self.sv[s] = collections.OrderedDict() self.sv2d[s] = collections.OrderedDict() selstr = 'sel{}'.format(int(nsel)) nsel += 1 for v in dv.keys() : hname = '{}_{}_{}'.format(name, selstr, v) self.sv[s][v] = TH1D(hname,hname+";"+dv[v]["title"]+";",dv[v]["bin"],dv[v]["xmin"],dv[v]["xmax"]) self.sv[s][v].Sumw2() for v in dv2d.keys() : hname = '{}_{}_{}'.format(name, selstr, v) self.sv2d[s][v] = TH2D(hname,hname+";"+dv2d[v]["titlex"]+";"+dv2d[v]["titley"]+";", dv2d[v]["binx"],dv2d[v]["xmin"],dv2d[v]["xmax"], dv2d[v]["biny"],dv2d[v]["ymin"],dv2d[v]["ymax"], ) self.sv2d[s][v].Sumw2() rf = TFile(self.rt) t = rf.Get("events") if nevents == -1: numberOfEntries = t.GetEntries() print 'running over the full entries %i'%numberOfEntries else: numberOfEntries = nevents if t.GetEntries()<nevents: numberOfEntries = t.GetEntries() print 'running over the full entries %i'%numberOfEntries else: print 'running over a subset of entries %i'%numberOfEntries for s in selections: weighttrf_name='' weighttrfin_name=[] weighttrfless_name=[] sformula=s if '**' in s: s_split=s.split('**') sformula=s_split[1] weighttrf_name=s_split[0] weighttrf_name=weighttrf_name.strip() if 'tagin' in weighttrf_name: nbtagex = int(filter(str.isdigit, weighttrf_name)) for i in range(nbtagex) : weighttrfin_name.append('weight_%itagex'%(i)) if 'tagless' in weighttrf_name: nbtagex = int(filter(str.isdigit, weighttrf_name)) for i in range(nbtagex) : weighttrfless_name.append('weight_%itagex'%(i)) formula = TTreeFormula("",sformula,t) # loop over events print 'number of events:', numberOfEntries for entry in xrange(numberOfEntries) : if (entry+1)%500 == 0: sys.stdout.write( '... %i events processed ...\r'%(entry+1)) sys.stdout.flush() t.GetEntry(entry) weight = self.w * getattr(t,"weight") weighttrf=1. if weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)==0 : weighttrf = getattr(t,weighttrf_name) elif weighttrf_name!='' and len(weighttrfin_name)!=0 and len(weighttrfless_name)==0 : weighttrf = 1. for i in weighttrfin_name : weighttrf -= getattr(t,i) elif weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)!=0 : weighttrf = 0. for i in weighttrfless_name : weighttrf += getattr(t,i) weight=weight*weighttrf # apply selection result = formula.EvalInstance() # fill histos on selected events if result > 0.: for v in dv.keys(): divide=1 try: divide=dv[v]["divide"] except KeyError, e: divide=1 self.sv[s][v].Fill(getattr(t,dv[v]["name"])/divide, weight) for v in dv2d.keys(): self.sv2d[s][v].Fill(getattr(t,dv2d[v]["namex"]), getattr(t,dv2d[v]["namey"]), weight)
def TreeLoopFromFile(self, fname, noCuts=False, cutOverride=None, CPweight=False, interference=0): # open file and get tree treeFile = TFile.Open(fname) theTree = treeFile.Get(self.pars.treeName) if not theTree: print 'failed to find tree %s in file %s' % (self.pars.treeName, fname) return # get the right cuts if cutOverride: theCuts = self.fullCuts(cutOverride) print 'override cuts:', theCuts elif noCuts: theCuts = '' else: theCuts = self.fullCuts() if gDirectory.Get('cuts_evtList'): gDirectory.Delete('cuts_evtList') theList = None # create fomulae for the variables of interest rowVs = [] for (i, v) in enumerate(self.pars.var): rowVs.append(TTreeFormula('v%i' % i, v, theTree)) extraDraw = '' varsRemaining = 4 - len(self.pars.var) ExtraDrawCP = False ExtraDrawInterf = False if CPweight: if hasattr(theTree, 'complexpolewtggH%i' % self.pars.mHiggs): extraDraw += ':(complexpolewtggH%i/avecomplexpolewtggH%i)' % \ (self.pars.mHiggs, self.pars.mHiggs) varsRemaining -= 1 ExtraDrawCP = True if interference == 1: extraDraw += ':interferencewtggH%i' % self.pars.mHiggs varsRemaining -= 1 ExtraDrawInterf = True elif interference == 2: extraDraw += ':interferencewt_upggH%i' % self.pars.mHiggs varsRemaining -= 1 ExtraDrawInterf = True elif interference == 3: extraDraw += ':interferencewt_downggH%i' % self.pars.mHiggs varsRemaining -= 1 ExtraDrawInterf = True if varsRemaining >= 0: if len(theCuts) > 0: theCuts = 'puwt*effwt*' + theCuts # print ':'.join(self.pars.var) + extraDraw # print 'weighted cuts:',theCuts Nsel = theTree.Draw(':'.join(self.pars.var) + extraDraw, theCuts, 'goff') else: # create an entry list which apply the cuts to the tree Nsel = theTree.Draw('>>cuts_evtList', theCuts, 'entrylist') theList = gDirectory.Get('cuts_evtList') # loop over the selected events calculate their weight and yield # the two variable values and the weight for each selected event. print "selected events:", Nsel if theList: while theTree.GetEntry(theList.Next()): # if self.pars.isElectron: # lep_pt = theTree.W_electron_pt # lep_eta = theTree.W_electron_eta # else: # lep_pt = theTree.W_muon_pt # lep_eta = theTree.W_muon_eta # jet_pt = [] # jet_eta = [] # for (idx, pt) in enumerate(theTree.JetPFCor_Pt): # if pt > 0: # jet_pt.append(pt) # jet_eta.append(theTree.JetPFCor_Eta[idx]) # effWgt = self.effWeight(lepton_pt = lep_pt, lepton_eta = lep_eta, # #jet_pt = jet_pt, jet_eta, # mt_pt = theTree.W_mt, mt_eta = lep_eta, # met_pt = theTree.event_met_pfmet, # met_eta = 0.) # if (hasattr(self.pars, 'btagVeto')) and (self.pars.btagVeto) and \ # self.btagVeto(theTree): # continue effWgt = theTree.puwt * theTree.effwt if CPweight: if hasattr(theTree, 'complexpolewtggH%i' % self.pars.mHiggs): cpw = getattr(theTree, 'complexpolewtggH%i' % self.pars.mHiggs) cpw /= getattr( theTree, 'avecomplexpolewtggH%i' % self.pars.mHiggs) else: cpw = HiggsCPWeight(self.pars.mHiggs, theTree.W_H_mass_gen) else: cpw = 1. if interference == 1: iwt = getattr(theTree, 'interferencewtggH%i' % self.pars.mHiggs) elif interference == 2: iwt = getattr(theTree, 'interferencewt_upggH%i' % self.pars.mHiggs) elif interference == 3: iwt = getattr( theTree, 'interferencewt_downggH%i' % self.pars.mHiggs) else: iwt = 1. row = [v.EvalInstance() for v in rowVs] yield (row, effWgt, cpw, iwt) else: for rowi in range(0, theTree.GetSelectedRows()): effWgt = theTree.GetW()[rowi] row = [] for vi in range(0, len(self.pars.var)): row.append(getattr(theTree, 'GetV%i' % (vi + 1))()[rowi]) cpw = 1. vi = len(self.pars.var) if ExtraDrawCP: cpw = getattr(theTree, 'GetV%i' % (vi + 1))()[rowi] vi += 1 iwt = 1. if ExtraDrawInterf: iwt = getattr(theTree, 'GetV%i' % (vi + 1))()[rowi] vi += 1 yield (row, effWgt, cpw, iwt) treeFile.Close() return
tree_out = tree_in.CopyTree(cut_str) tree_out.Write() # file_out.Close() new_file_out = TFile(file_out_name.replace('.root', '_weight.root'), 'RECREATE') weight_tree = tree_out.CloneTree(0) scale = int_lumi * sample.xsec * sample.scale / sample.sumweights full_weight = array('f', [0.]) new_b = weight_tree.Branch('full_weight', full_weight, 'full_weight/F') formula = TTreeFormula('weight_formula', weight, tree_out) formula.GetNdata() # ATTENTION THIS MAY NOT WORK! for i in xrange(tree_out.GetEntries()): tree_out.GetEntry(i) full_weight[0] = formula.EvalInstance() * scale # print full_weight[0] # new_b.Fill() weight_tree.Fill() # tree_out.Fill() new_file_out.Write() new_file_out.Close() file_out.Close()
def MultiDraw(self, varexps, selection='1', drawoption="", **kwargs): """Draws multiple histograms in one loop over a tree (self). Instead of: tree.Draw( "pt_1 >> a(100, 0, 100)", "weightA" ) tree.Draw( "pt_2 >> b(100, 0, 100)", "weightB" ) Do: tree.MultiDraw( ( "pt_1 >> a(100, 0, 100)", "weightA" ), ( "pt_2 >> b(100, 0, 100)", "weightB" ) ) This is significantly faster when there are many histograms to be drawn. The first parameter, commonWeight, decides a weight given to all histograms. An arbitrary number of additional histograms may be specified. They can either be specified with just a string containing the formula to be drawn, the histogram name and bin configuration. Alternatively it can be a tuple, with said string, and an additional string specifying the weight to be applied to that histogram only.""" selection = kwargs.get('cut', selection) # selections cuts verbosity = kwargs.get('verbosity', 0) # verbosity poisson = kwargs.get('poisson', False) # kPoisson errors for data sumw2 = kwargs.get('sumw2', False) # sumw2 for MC histlist = kwargs.get('hists', []) # to not rely on gDirectory.Get(histname) hists = {} results, xformulae, yformulae, weights = [], [], [], [] lastXVar, lastYVar, lastWeight = None, None, None # A weight common to everything being drawn commonFormula = TTreeFormula("commonFormula", selection, self) commonFormula.SetQuickLoad(True) if not commonFormula.GetTree(): raise error( "MultiDraw: TTreeFormula 'selection' did not compile:\n selection: %r\n varexps: %s" % (selection, varexps)) for i, varexp in enumerate(varexps): #print ' Variable expression: %s'%(varexp,) yvar = None # EXPAND varexp weight = None if isinstance(varexp, (tuple, list)) and len(varexp) == 2: varexp, weight = varexp elif not isinstance(varexp, str): raise IOError( "MultiDraw: given varexp is not a string or tuple of length 2! Got varexp=%s (%s)" % (varexp, type(varexp))) if not varexp: varexp = '1' if not weight: weight = '1' # PREPARE histogram match = varregex.match(varexp) if match: # create new histogram: varexp = "x >> h(100,0,100)" or "y:x >> h(100,0,100,100,0,100)" xvar, name, binning = match.group(1), match.group(2), match.group( 3) # CREATE HISTOGRAM vmatch = varregex2D.match(xvar) if not vmatch or xvar.replace('::', '').count(':') == xvar.count( '?'): # 1D, allow "(x>100 ? 1 : 0) >> h(2,0,2)" bmatch = binregex.match(binning) if not bmatch: raise error( "MultiDraw: Could not parse formula for %r: %r" % (name, varexp)) nxbins, xmin, xmax = int(bmatch.group(1)), float( bmatch.group(2)), float(bmatch.group(3)) hist = TH1D(name, name, nxbins, xmin, xmax) elif vmatch: # 2D histogram yvar, xvar = vmatch.group(1), vmatch.group(2) bmatch = binregex2D.match(binning) if not bmatch: raise error( 'MultiDraw: Could not parse formula for %r to pattern %r: "%s"' % (name, binregex2D.pattern, varexp)) nxbins, xmin, xmax = int(bmatch.group(1)), float( bmatch.group(2)), float(bmatch.group(3)) nybins, ymin, ymax = int(bmatch.group(4)), float( bmatch.group(5)), float(bmatch.group(6)) hist = TH2D(name, name, nxbins, xmin, xmax, nybins, ymin, ymax) else: # impossible raise error( 'MultiDraw: Could not parse variable %r for %r to pattern %r: %r' % (xvar, name, varregex2D.pattern, varexp)) else: # get existing histogram: varexp = "x >> h" or "y:x >> h" match = varregex2.match(varexp) if not match: raise error( 'MultiDraw: Could not parse formula to pattern %r: %r' % (varregex2.pattern, varexp)) xvar, name = match.groups() if name.startswith("+") and name[1:] in hists: hist = hists[name[1:]] # add content to existing histogram else: if i < len(histlist): hist = histlist[i] if hist.GetName() != histlist[i].GetName(): raise error( "MultiDraw: Hisogram mismatch: looking for %r, but found %r." % (hist.GetName(), histlist[i].GetName())) else: hist = gDirectory.Get(name) if not hist: raise error( "MultiDraw: Could not find histogram to fill %r in current directory (varexp %r)." % (name, varexp)) # SANITY CHECKS vmatch = varregex2D.match(xvar) if not vmatch or xvar.replace('::', '').count(':') == xvar.count( '?'): # 1D, allow "(x>100 ? 1 : 0) >> h(2,0,2)" pass elif vmatch: # 2D histogram yvar, xvar = vmatch.group(1), vmatch.group(2) if not isinstance(hist, TH2): raise error( "MultiDraw: Existing histogram with name %r is not 2D! Found xvar=%r, yvar=%r..." % (name, xvar, yvar)) else: # impossible raise error( 'MultiDraw: Could not parse variable %r for %r to pattern %r: "%s"' % (xvar, name, varregex2D.pattern, varexp)) if sumw2: hist.Sumw2() elif poisson: hist.SetBinErrorOption(TH1D.kPoisson) if drawoption: hist.SetDrawOption(drawoption) if name not in hists: hists[name] = hist results.append(hist) # CHECK that the next formula is different to the previous one. # If it is not, we add an ordinary TObject. In this way, the # dynamic cast in MultiDraw.cxx fails, giving 'NULL', and the previous # value is used. This saves the recomputing of identical values if xvar != lastXVar: formula = TTreeFormula("formula%i" % i, xvar, self) if not formula.GetTree(): raise error( "MultiDraw: TTreeFormula 'xvar' did not compile for %r:\n xvar: %r\n varexp: %r" % (name, xvar, varexp)) formula.SetQuickLoad(True) xformulae.append(formula) else: xformulae.append(TObject()) if yvar != None: if yvar != lastYVar: formula = TTreeFormula("formula%i" % i, yvar, self) if not formula.GetTree(): raise error( "MultiDraw: TTreeFormula 'yvar' did not compile for %r:\n yvar: %r\n varexp: %r" % (name, yvar, varexp)) formula.SetQuickLoad(True) yformulae.append(formula) else: yformulae.append(TObject()) if weight != lastWeight: formula = TTreeFormula("weight%i" % i, weight, self) if not formula.GetTree(): raise error( "MultiDraw: TTreeFormula 'weight' did not compile for %r:\n weight: %r\n varexp: %r" % (name, weight, varexp)) formula.SetQuickLoad(True) weights.append(formula) else: weights.append(TObject()) lastXVar, lastYVar, lastWeight = xvar, yvar, weight # CHECK that formulae are told when tree changes manager = TTreeFormulaManager() for formula in xformulae + yformulae + weights + [commonFormula]: if isinstance(formula, TTreeFormula): manager.Add(formula) manager.Sync() self.SetNotify(manager) # DRAW if verbosity >= 2: print ">>> MultiDraw: xformulae=%s, yformulae=%s" % ( [x.GetTitle() for x in xformulae], [y.GetTitle() for y in yformulae]) print ">>> MultiDraw: weights=%s, results=%s" % ( [w.GetTitle() for w in weights], results) if len(yformulae) == 0: _MultiDraw(self, commonFormula, makeTObjArray(xformulae), makeTObjArray(weights), makeTObjArray(results), len(xformulae)) elif len(xformulae) == len(yformulae): _MultiDraw2D(self, commonFormula, makeTObjArray(xformulae), makeTObjArray(yformulae), makeTObjArray(weights), makeTObjArray(results), len(xformulae)) else: raise error( "MultiDraw: Given a mix of arguments for 1D (%d) and 2D (%d) histograms!" % (len(xformulae), len(yformulae))) return results
while filenum1<len(InfileArg): ifilename=InfileArg[filenum1] MeritIn=TChain(options.ttree) MeritIn.Add(ifilename) nEnt=MeritIn.GetEntries() nEnt = 100000 # debug logging.info('found %i events',int(nEnt)) bar_suffix = "%(percent)d%%- %(elapsed)ds" #'%(percent)d%%' bbar = Bar("%s: Progress..."%os.path.basename(ifilename),max=int(nEnt), suffix=bar_suffix) if(nEnt==0): del MeritIn logging.warning("WARNING! EMPTY FILE %s",ifilename) filenum1+=1 continue if not options.cuts is None: CutEval=TTreeFormula("CutEval",options.cuts,MeritIn) #nEntTot+=nEnt tmp_frame = pd.DataFrame(index=np.arange(nEnt),columns=allColumns) tmp_frame = tmp_frame.fillna(0) for i in range(nEnt): #if i<100: MeritIn.GetEntry(i) if not options.cuts is None: if(CutEval.EvalInstance(i)==0): continue # specifically clone branches for j in range(len(IntBrName)): IntBrVal[j][0]=getattr(MeritIn,IntBrName[j]) for j in range(len(DblBrName)): DblBrVal[j][0]=getattr(MeritIn,DblBrName[j]) # this fills the row with all values and appends to existing dataframe if len(IntBrVal) and len(DblBrVal):