def fit(self, input_file, cut, num_events, output, epochs, batch): """ Do the fit """ in_file = TFile(input_file) tree = in_file.events val_forms = [TTreeFormula(v, v, tree) for v in self.vars] target_forms = [TTreeFormula(t, t, tree) for t in self.targets] cut_form = TTreeFormula(cut, cut, tree) if cut else None reserve = num_events if num_events > 0 else tree.GetEntries() inputs = numpy.zeros((reserve, len(self.vars))) targets = [numpy.zeros((reserve, 1)) for _ in self.targets] # Set up the inputs numcut = 0 for index, _ in enumerate(tree): event = index - numcut if event == num_events: break if cut_form and not cut_form.EvalInstance(): numcut += 1 continue if event % 10000 == 0: print 'Filling', str(float(event * 100) / num_events) + '%' for jndex, val in enumerate(val_forms): inputs[event][jndex] = val.EvalInstance() for jndex, target in enumerate(target_forms): targets[jndex][event][0] = target.EvalInstance() self.model.fit(inputs, targets, validation_split=0.5, epochs=epochs, batch_size=batch, callbacks=[ keras.callbacks.TensorBoard( log_dir='weights/logdir', histogram_freq=1, write_graph=True, write_images=True) ]) sess = keras.backend.get_session() output_node = [n.op.name for n in self.model.outputs] print 'Output node', output_node graph = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), output_node) graph_io.write_graph(graph, 'weights', output, as_text=False)
def fillIntoTree(out_tree, branches, cfg, hist_cfg, vcfgs, total_scale, plot, verbose, friend_func): if isinstance(cfg, HistogramCfg): # Loop over sub-cfgs and fill them total_scale *= cfg.total_scale if cfg.total_scale else 1. for sub_cfg in cfg.cfgs: fillIntoTree(out_tree, branches, sub_cfg, cfg, vcfgs, total_scale, plot, verbose, friend_func) return file_name = '/'.join([cfg.ana_dir, cfg.dir_name, cfg.tree_prod_name, 'tree.root']) # Attaches tree to plot ttree = plot.readTree(file_name, cfg.tree_name, verbose=verbose, friend_func=friend_func) norm_cut = hist_cfg.cut shape_cut = hist_cfg.cut if cfg.norm_cut: norm_cut = cfg.norm_cut if cfg.shape_cut: shape_cut = cfg.shape_cut full_weight = branches[-1] weight = hist_cfg.weight if cfg.weight_expr: weight = '*'.join([weight, cfg.weight_expr]) if hist_cfg.weight: norm_cut = '({c}) * {we}'.format(c=norm_cut, we=weight) shape_cut = '({c}) * {we}'.format(c=shape_cut, we=weight) # and this one too sample_weight = cfg.scale * total_scale if not cfg.is_data: sample_weight *= hist_cfg.lumi*cfg.xsec/cfg.sumweights formula = TTreeFormula('weight_formula', norm_cut, ttree) formula.GetNdata() # Add weight as tree variable # Then loop over ttree # And save this to the other tree # # Create TTreeFormulas for all vars for var in vcfgs: if var.drawname != var.name: var.formula = TTreeFormula('formula'+var.name, var.drawname, ttree) var.formula.GetNdata() for i in xrange(ttree.GetEntries()): ttree.GetEntry(i) w = formula.EvalInstance() if w == 0.: continue full_weight[0] = w * sample_weight if abs(full_weight[0]) > 1000.: print "WARNING, unusually large weight", w, sample_weight import pdb; pdb.set_trace() print '\nWeight:', full_weight[0] print cfg.name print norm_cut for branch, var in zip(branches, vcfgs): branch[0] = var.formula.EvalInstance() if hasattr(var, 'formula') else getattr(ttree, var.name) out_tree.Fill() if shape_cut != norm_cut: print 'WARNING: different norm and shape cuts currently not supported in HistCreator.createTrees'
def Count(chan, trigs): #deal with weights first sumWeights = TChain("sumWeights") sumWeights.Add("%stth*.root" % prepath) weights = [] fCurrent_wt = 0 sampleNEvt = 0 nWeightEntries = sumWeights.GetEntries() for a in range(nWeightEntries): sumWeights.GetEntry(a) totalEventsWeighted = getattr(sumWeights, 'totalEventsWeighted') if sumWeights.GetTreeNumber() != fCurrent_wt: fCurrent_wt = sumWeights.GetTreeNumber() weights.append(sampleNEvt) sampleNEvt = 0 sampleNEvt = sampleNEvt + totalEventsWeighted if a == nWeightEntries - 1: weights.append(sampleNEvt) #last file chain = TChain("nominal") chain.Add("%stth*.root" % prepath) nentries = chain.GetEntries() chain.SetBranchStatus("*", 0) chain.SetBranchStatus("Mll01", 1) chain.SetBranchStatus("total_charge", 1) chain.SetBranchStatus("lep_Pt_0", 1) chain.SetBranchStatus("lep_Pt_1", 1) chain.SetBranchStatus("lep_Eta_0", 1) chain.SetBranchStatus("lep_Eta_1", 1) chain.SetBranchStatus("lep_ID_0", 1) chain.SetBranchStatus("lep_ID_1", 1) chain.SetBranchStatus("lep_truthPdgId_0", 1) chain.SetBranchStatus("lep_truthPdgId_1", 1) chain.SetBranchStatus("lep_truthOrigin_0", 1) chain.SetBranchStatus("lep_truthOrigin_1", 1) chain.SetBranchStatus("lep_truthType_0", 1) chain.SetBranchStatus("lep_truthType_1", 1) chain.SetBranchStatus("lep_isQMisID_0", 1) chain.SetBranchStatus("lep_isQMisID_1", 1) chain.SetBranchStatus("nJets_OR_T_MV2c10_70", 1) chain.SetBranchStatus("nJets_OR_T", 1) chain.SetBranchStatus("lep_isTightLH_0", 1) chain.SetBranchStatus("lep_isTightLH_1", 1) chain.SetBranchStatus("lep_isLooseLH_0", 1) chain.SetBranchStatus("lep_isLooseLH_1", 1) chain.SetBranchStatus("lep_isolationFixedCutTight_0", 1) chain.SetBranchStatus("lep_isolationFixedCutLoose_0", 1) chain.SetBranchStatus("lep_isolationFixedCutTight_1", 1) chain.SetBranchStatus("lep_isolationFixedCutLoose_1", 1) chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_0", 1) chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_1", 1) chain.SetBranchStatus("HLT*", 1) chain.SetBranchStatus("*type", 1) chain.SetBranchStatus("RunYear", 1) chain.SetBranchStatus("passEventCleaning", 1) chain.SetBranchStatus("lep_isTrigMatch_0", 1) chain.SetBranchStatus("lep_isTrigMatch_1", 1) chain.SetBranchStatus("lep_isTrigMatchDLT_0", 1) chain.SetBranchStatus("lep_isTrigMatchDLT_1", 1) chain.SetBranchStatus("mcWeightOrg", 1) chain.SetBranchStatus("pileupEventWeight_090", 1) chain.SetBranchStatus("lepSFObjTight", 1) chain.SetBranchStatus("lepSFTrigTight", 1) chain.SetBranchStatus("JVT_EventWeight", 1) chain.SetBranchStatus("SherpaNJetWeight", 1) chain.SetBranchStatus("MV2c10_70_EventWeight", 1) chain.SetBranchStatus("lep_chargeIDBDT*", 1) chain.SetBranchStatus("nTaus_OR_Pt25", 1) chain.SetBranchStatus("tau_JetBDTSigTight_0", 1) chain.SetBranchStatus("tau_JetBDTSigTight_1", 1) chain.SetBranchStatus("tau_tagWeightBin_0", 1) chain.SetBranchStatus("tau_tagWeightBin_1", 1) chain.SetBranchStatus("tau_passMuonOLR_0", 1) chain.SetBranchStatus("tau_passMuonOLR_1", 1) chain.SetBranchStatus("tau_passEleBDT_0", 1) chain.SetBranchStatus("tau_passEleBDT_1", 1) chain.SetBranchStatus("tau_charge_0", 1) chain.SetBranchStatus("tau_charge_1", 1) chain.SetBranchStatus("lep_ID_2", 1) chain.SetBranchStatus("Mll02", 1) chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_0", 1) chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_1", 1) chain.SetBranchStatus("lep_ambiguityType_0", 1) chain.SetBranchStatus("lep_ambiguityType_1", 1) #cuts fCurrent = -1 chain.LoadTree(0) cuts_sr = TTreeFormula("cuts_sr", chan, chain) cuts_trig = TTreeFormula("cuts_trig", trigs, chain) raw_evts, numevts = 0, 0 for evt in range(nentries): #for event in chain: #if evt%10000==0 : print evt chain.GetEntry(evt) #get current file currentFileName = chain.GetCurrentFile().GetName() RunYear = getattr(chain, "RunYear") mcWeightOrg = getattr(chain, "mcWeightOrg") pileupEventWeight_090 = getattr(chain, "pileupEventWeight_090") lepSFObjTight = getattr(chain, "lepSFObjTight") lepSFTrigTight = getattr(chain, "lepSFTrigTight") JVT_EventWeight = getattr(chain, "JVT_EventWeight") SherpaNJetWeight = getattr(chain, "SherpaNJetWeight") MV2c10_70_EventWeight = getattr(chain, "MV2c10_70_EventWeight") lumi = 1.0 if RunYear < 2016.5: lumi = 36074.6 if RunYear > 2016.5: lumi = 43813.7 if chain.GetTreeNumber() != fCurrent: fCurrent = chain.GetTreeNumber() cuts_sr.Notify() cuts_trig.Notify() if cuts_sr.EvalInstance() and cuts_trig.EvalInstance(): #if cuts_trig.EvalInstance(): if "341177" in currentFileName: weight = 0.05343 if "341270" in currentFileName: weight = 0.22276 if "341271" in currentFileName: weight = 0.23082 kfac, filEff = 1, 1 weight = weight * kfac * filEff * mcWeightOrg * pileupEventWeight_090 * lepSFObjTight * lepSFTrigTight * JVT_EventWeight * SherpaNJetWeight * MV2c10_70_EventWeight * lumi / weights[ fCurrent] #print xsec, kfac, filEff, event.mcWeightOrg,event.pileupEventWeight_090,event.lepSFObjTight,event.lepSFTrigTight,event.JVT_EventWeight,event.SherpaNJetWeight,event.MV2c10_70_EventWeight, lumi raw_evts = raw_evts + 1 numevts = numevts + weight print "%s(%.2f)" % (raw_evts, numevts)
def run(self, selections, dv, dv2d, ch='', name='', nevents=-1): # initialize dictionary selection: list of histograms if name=='': name = self.name nsel = 0 for s in selections: self.sv[s] = collections.OrderedDict() self.sv2d[s] = collections.OrderedDict() selstr = 'sel{}'.format(int(nsel)) nsel += 1 for v in dv.keys() : hname = '{}_{}_{}'.format(name, selstr, v) self.sv[s][v] = TH1D(hname,hname+";"+dv[v]["title"]+";",dv[v]["bin"],dv[v]["xmin"],dv[v]["xmax"]) self.sv[s][v].Sumw2() for v in dv2d.keys() : hname = '{}_{}_{}'.format(name, selstr, v) self.sv2d[s][v] = TH2D(hname,hname+";"+dv2d[v]["titlex"]+";"+dv2d[v]["titley"]+";", dv2d[v]["binx"],dv2d[v]["xmin"],dv2d[v]["xmax"], dv2d[v]["biny"],dv2d[v]["ymin"],dv2d[v]["ymax"], ) self.sv2d[s][v].Sumw2() rf = TFile(self.rt) t = rf.Get("events") if nevents == -1: numberOfEntries = t.GetEntries() print 'running over the full entries %i'%numberOfEntries else: numberOfEntries = nevents if t.GetEntries()<nevents: numberOfEntries = t.GetEntries() print 'running over the full entries %i'%numberOfEntries else: print 'running over a subset of entries %i'%numberOfEntries for s in selections: weighttrf_name='' weighttrfin_name=[] weighttrfless_name=[] sformula=s if '**' in s: s_split=s.split('**') sformula=s_split[1] weighttrf_name=s_split[0] weighttrf_name=weighttrf_name.strip() if 'tagin' in weighttrf_name: nbtagex = int(filter(str.isdigit, weighttrf_name)) for i in range(nbtagex) : weighttrfin_name.append('weight_%itagex'%(i)) if 'tagless' in weighttrf_name: nbtagex = int(filter(str.isdigit, weighttrf_name)) for i in range(nbtagex) : weighttrfless_name.append('weight_%itagex'%(i)) formula = TTreeFormula("",sformula,t) # loop over events print 'number of events:', numberOfEntries for entry in xrange(numberOfEntries) : if (entry+1)%500 == 0: sys.stdout.write( '... %i events processed ...\r'%(entry+1)) sys.stdout.flush() t.GetEntry(entry) weight = self.w * getattr(t,"weight") weighttrf=1. if weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)==0 : weighttrf = getattr(t,weighttrf_name) elif weighttrf_name!='' and len(weighttrfin_name)!=0 and len(weighttrfless_name)==0 : weighttrf = 1. for i in weighttrfin_name : weighttrf -= getattr(t,i) elif weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)!=0 : weighttrf = 0. for i in weighttrfless_name : weighttrf += getattr(t,i) weight=weight*weighttrf # apply selection result = formula.EvalInstance() # fill histos on selected events if result > 0.: for v in dv.keys(): divide=1 try: divide=dv[v]["divide"] except KeyError, e: divide=1 self.sv[s][v].Fill(getattr(t,dv[v]["name"])/divide, weight) for v in dv2d.keys(): self.sv2d[s][v].Fill(getattr(t,dv2d[v]["namex"]), getattr(t,dv2d[v]["namey"]), weight)
def run(self, selections, dv, dv2d, ch='', name='', nevents=-1): # initialize dictionary selection: list of histograms if name == '': name = self.name nsel = 0 for s in selections: self.sv[s] = collections.OrderedDict() self.sv2d[s] = collections.OrderedDict() selstr = 'sel{}'.format(int(nsel)) nsel += 1 for v in dv.keys(): hname = '{}_{}_{}'.format(name, selstr, v) self.sv[s][v] = TH1D(hname, hname + ";" + dv[v]["title"] + ";", dv[v]["bin"], dv[v]["xmin"], dv[v]["xmax"]) self.sv[s][v].Sumw2() for v in dv2d.keys(): hname = '{}_{}_{}'.format(name, selstr, v) self.sv2d[s][v] = TH2D( hname, hname + ";" + dv2d[v]["titlex"] + ";" + dv2d[v]["titley"] + ";", dv2d[v]["binx"], dv2d[v]["xmin"], dv2d[v]["xmax"], dv2d[v]["biny"], dv2d[v]["ymin"], dv2d[v]["ymax"], ) self.sv2d[s][v].Sumw2() rf = TFile(self.rt) t = rf.Get("events") if nevents == -1: numberOfEntries = t.GetEntries() print 'running over the full entries %i' % numberOfEntries else: numberOfEntries = nevents print 'running over a subset of entries %i' % numberOfEntries for s in selections: formula = TTreeFormula("", s, t) # loop over events print 'number of events:', numberOfEntries for entry in xrange(numberOfEntries): if (entry + 1) % 500 == 0: sys.stdout.write('... %i events processed ...\r' % (entry + 1)) sys.stdout.flush() t.GetEntry(entry) weight = self.w * getattr(t, "weight") # apply selection result = formula.EvalInstance() # fill histos on selected events if result > 0.: for v in dv.keys(): divide = 1 try: divide = dv[v]["divide"] except KeyError, e: divide = 1 self.sv[s][v].Fill( getattr(t, dv[v]["name"]) / divide, weight) for v in dv2d.keys(): self.sv2d[s][v].Fill(getattr(t, dv2d[v]["namex"]), getattr(t, dv2d[v]["namey"]), weight)
new_file_out = TFile(file_out_name.replace('.root', '_weight.root'), 'RECREATE') weight_tree = tree_out.CloneTree(0) scale = int_lumi * sample.xsec * sample.scale / sample.sumweights full_weight = array('f', [0.]) new_b = weight_tree.Branch('full_weight', full_weight, 'full_weight/F') formula = TTreeFormula('weight_formula', weight, tree_out) formula.GetNdata() # ATTENTION THIS MAY NOT WORK! for i in xrange(tree_out.GetEntries()): tree_out.GetEntry(i) full_weight[0] = formula.EvalInstance() * scale # print full_weight[0] # new_b.Fill() weight_tree.Fill() # tree_out.Fill() new_file_out.Write() new_file_out.Close() file_out.Close() print 'Writing file', file_out_name out_dict[name] = {} out_dict[name]['weight'] = scale out_dict[name][cut.name] = cut_str
bbar = Bar("%s: Progress..."%os.path.basename(ifilename),max=int(nEnt), suffix=bar_suffix) if(nEnt==0): del MeritIn logging.warning("WARNING! EMPTY FILE %s",ifilename) filenum1+=1 continue if not options.cuts is None: CutEval=TTreeFormula("CutEval",options.cuts,MeritIn) #nEntTot+=nEnt tmp_frame = pd.DataFrame(index=np.arange(nEnt),columns=allColumns) tmp_frame = tmp_frame.fillna(0) for i in range(nEnt): #if i<100: MeritIn.GetEntry(i) if not options.cuts is None: if(CutEval.EvalInstance(i)==0): continue # specifically clone branches for j in range(len(IntBrName)): IntBrVal[j][0]=getattr(MeritIn,IntBrName[j]) for j in range(len(DblBrName)): DblBrVal[j][0]=getattr(MeritIn,DblBrName[j]) # this fills the row with all values and appends to existing dataframe if len(IntBrVal) and len(DblBrVal): series=np.hstack([np.column_stack(IntBrVal),np.column_stack(DblBrVal)]) else: series = np.array(IntBrVal if len(IntBrVal) else DblBrVal).T tmp_frame.loc[i] = series[0] del series bbar.next() df_.append(tmp_frame) del tmp_frame
def load(inputfile, target, inputs, adversary, weight, reweight): """ Parameters: inputfile: Name of the ROOT file that contains all our data for training target: Expression that yields class number inputs: List of expressions to input into the classifier adversary: Expressions that the adversary should not be able to guess from the classifier output weight: Expressions to get the sample weights reweight: Bool to decide if should reweight Returns: Numpy Arrays that can be used in fitting with the following info - Labels that are used to classify - Raw data to do the classification with - Data that should not be predictable based on the predicted label """ in_file = TFile(inputfile) tree = in_file.events target_form = TTreeFormula(target, target, tree) val_forms = [TTreeFormula(v, v, tree) for v in inputs] adversary_forms = [TTreeFormula(a, a, tree) for a in adversary] weight_form = TTreeFormula(weight, weight, tree) if weight else None reserve = tree.GetEntries() data = numpy.zeros((reserve, len(inputs))) smooths = numpy.zeros((reserve, len(adversary))) labels = numpy.zeros((reserve, 1)) weights = numpy.zeros(reserve) if weight else None logging.info('Reading %i events', reserve) # Set up the inputs for event, _ in enumerate(tree): if event == reserve: break if event % 10000 == 0: logging.info('Filling %s', str(float(event * 100) / reserve) + '%') labels[event][0] = target_form.EvalInstance() if weight: weights[event] = weight_form.EvalInstance() for jndex, val in enumerate(val_forms): data[event][jndex] = val.EvalInstance() for jndex, adv in enumerate(adversary_forms): smooths[event][jndex] = adv.EvalInstance() if reweight: # Want to reweight each class separately smooth_dict = collections.defaultdict(list) for label, point in zip(labels, smooths): smooth_dict[label[0]].append(point) reweighters = { key: Reweighter(row) for key, row in smooth_dict.iteritems() } for index, point in enumerate(data): weights[index] *= reweighters[labels[index][0]].get_weight(point) # reweighter = Reweighter(smooths) # for index, point in enumerate(data): # weights[index] *= reweighter.get_weight(point) return keras.utils.to_categorical(labels), data, smooths, weights