Esempio n. 1
0
    def fit(self, input_file, cut, num_events, output, epochs, batch):
        """
        Do the fit
        """
        in_file = TFile(input_file)
        tree = in_file.events
        val_forms = [TTreeFormula(v, v, tree) for v in self.vars]
        target_forms = [TTreeFormula(t, t, tree) for t in self.targets]
        cut_form = TTreeFormula(cut, cut, tree) if cut else None

        reserve = num_events if num_events > 0 else tree.GetEntries()

        inputs = numpy.zeros((reserve, len(self.vars)))
        targets = [numpy.zeros((reserve, 1)) for _ in self.targets]

        # Set up the inputs
        numcut = 0
        for index, _ in enumerate(tree):
            event = index - numcut
            if event == num_events:
                break
            if cut_form and not cut_form.EvalInstance():
                numcut += 1
                continue

            if event % 10000 == 0:
                print 'Filling', str(float(event * 100) / num_events) + '%'

            for jndex, val in enumerate(val_forms):
                inputs[event][jndex] = val.EvalInstance()

            for jndex, target in enumerate(target_forms):
                targets[jndex][event][0] = target.EvalInstance()

        self.model.fit(inputs,
                       targets,
                       validation_split=0.5,
                       epochs=epochs,
                       batch_size=batch,
                       callbacks=[
                           keras.callbacks.TensorBoard(
                               log_dir='weights/logdir',
                               histogram_freq=1,
                               write_graph=True,
                               write_images=True)
                       ])

        sess = keras.backend.get_session()
        output_node = [n.op.name for n in self.model.outputs]
        print 'Output node', output_node
        graph = graph_util.convert_variables_to_constants(
            sess, sess.graph.as_graph_def(), output_node)

        graph_io.write_graph(graph, 'weights', output, as_text=False)
Esempio n. 2
0
def fillIntoTree(out_tree, branches, cfg, hist_cfg, vcfgs, total_scale, plot, verbose, friend_func):

    if isinstance(cfg, HistogramCfg):
        # Loop over sub-cfgs and fill them
        total_scale *= cfg.total_scale if cfg.total_scale else 1.
        for sub_cfg in cfg.cfgs:
            fillIntoTree(out_tree, branches, sub_cfg, cfg, vcfgs, total_scale, plot, verbose, friend_func)
        return

    file_name = '/'.join([cfg.ana_dir, cfg.dir_name, cfg.tree_prod_name, 'tree.root'])

    # Attaches tree to plot
    ttree = plot.readTree(file_name, cfg.tree_name, verbose=verbose, friend_func=friend_func)

    norm_cut = hist_cfg.cut
    shape_cut = hist_cfg.cut

    if cfg.norm_cut:
        norm_cut = cfg.norm_cut

    if cfg.shape_cut:
        shape_cut = cfg.shape_cut

    full_weight = branches[-1]

    weight = hist_cfg.weight
    if cfg.weight_expr:
        weight = '*'.join([weight, cfg.weight_expr])

    if hist_cfg.weight:
        norm_cut = '({c}) * {we}'.format(c=norm_cut, we=weight)
        shape_cut = '({c}) * {we}'.format(c=shape_cut, we=weight)

    # and this one too
    sample_weight = cfg.scale * total_scale
    if not cfg.is_data:
        sample_weight *= hist_cfg.lumi*cfg.xsec/cfg.sumweights

    formula = TTreeFormula('weight_formula', norm_cut, ttree)
    formula.GetNdata()

    # Add weight as tree variable
    # Then loop over ttree
    # And save this to the other tree
    # 

    # Create TTreeFormulas for all vars
    for var in vcfgs:
        if var.drawname != var.name:
            var.formula = TTreeFormula('formula'+var.name, var.drawname, ttree)
            var.formula.GetNdata()

    for i in xrange(ttree.GetEntries()):
        ttree.GetEntry(i)
        w = formula.EvalInstance()
        if w == 0.:
            continue
        full_weight[0] = w * sample_weight
        if abs(full_weight[0]) > 1000.:
            print "WARNING, unusually large weight", w, sample_weight
            import pdb; pdb.set_trace()
            print '\nWeight:', full_weight[0]
            print cfg.name
            print norm_cut
        for branch, var in zip(branches, vcfgs):
            branch[0] = var.formula.EvalInstance() if hasattr(var, 'formula') else getattr(ttree, var.name)
        out_tree.Fill()


    if shape_cut != norm_cut:
        print 'WARNING: different norm and shape cuts currently not supported in HistCreator.createTrees'
Esempio n. 3
0
def Count(chan, trigs):
    #deal with weights first
    sumWeights = TChain("sumWeights")
    sumWeights.Add("%stth*.root" % prepath)
    weights = []
    fCurrent_wt = 0
    sampleNEvt = 0
    nWeightEntries = sumWeights.GetEntries()
    for a in range(nWeightEntries):
        sumWeights.GetEntry(a)
        totalEventsWeighted = getattr(sumWeights, 'totalEventsWeighted')
        if sumWeights.GetTreeNumber() != fCurrent_wt:
            fCurrent_wt = sumWeights.GetTreeNumber()
            weights.append(sampleNEvt)
            sampleNEvt = 0
        sampleNEvt = sampleNEvt + totalEventsWeighted
        if a == nWeightEntries - 1: weights.append(sampleNEvt)  #last file
    chain = TChain("nominal")
    chain.Add("%stth*.root" % prepath)
    nentries = chain.GetEntries()

    chain.SetBranchStatus("*", 0)
    chain.SetBranchStatus("Mll01", 1)
    chain.SetBranchStatus("total_charge", 1)
    chain.SetBranchStatus("lep_Pt_0", 1)
    chain.SetBranchStatus("lep_Pt_1", 1)
    chain.SetBranchStatus("lep_Eta_0", 1)
    chain.SetBranchStatus("lep_Eta_1", 1)
    chain.SetBranchStatus("lep_ID_0", 1)
    chain.SetBranchStatus("lep_ID_1", 1)
    chain.SetBranchStatus("lep_truthPdgId_0", 1)
    chain.SetBranchStatus("lep_truthPdgId_1", 1)
    chain.SetBranchStatus("lep_truthOrigin_0", 1)
    chain.SetBranchStatus("lep_truthOrigin_1", 1)
    chain.SetBranchStatus("lep_truthType_0", 1)
    chain.SetBranchStatus("lep_truthType_1", 1)
    chain.SetBranchStatus("lep_isQMisID_0", 1)
    chain.SetBranchStatus("lep_isQMisID_1", 1)
    chain.SetBranchStatus("nJets_OR_T_MV2c10_70", 1)
    chain.SetBranchStatus("nJets_OR_T", 1)
    chain.SetBranchStatus("lep_isTightLH_0", 1)
    chain.SetBranchStatus("lep_isTightLH_1", 1)
    chain.SetBranchStatus("lep_isLooseLH_0", 1)
    chain.SetBranchStatus("lep_isLooseLH_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTight_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutLoose_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTight_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutLoose_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_1", 1)
    chain.SetBranchStatus("HLT*", 1)
    chain.SetBranchStatus("*type", 1)
    chain.SetBranchStatus("RunYear", 1)
    chain.SetBranchStatus("passEventCleaning", 1)
    chain.SetBranchStatus("lep_isTrigMatch_0", 1)
    chain.SetBranchStatus("lep_isTrigMatch_1", 1)
    chain.SetBranchStatus("lep_isTrigMatchDLT_0", 1)
    chain.SetBranchStatus("lep_isTrigMatchDLT_1", 1)
    chain.SetBranchStatus("mcWeightOrg", 1)
    chain.SetBranchStatus("pileupEventWeight_090", 1)
    chain.SetBranchStatus("lepSFObjTight", 1)
    chain.SetBranchStatus("lepSFTrigTight", 1)
    chain.SetBranchStatus("JVT_EventWeight", 1)
    chain.SetBranchStatus("SherpaNJetWeight", 1)
    chain.SetBranchStatus("MV2c10_70_EventWeight", 1)
    chain.SetBranchStatus("lep_chargeIDBDT*", 1)
    chain.SetBranchStatus("nTaus_OR_Pt25", 1)
    chain.SetBranchStatus("tau_JetBDTSigTight_0", 1)
    chain.SetBranchStatus("tau_JetBDTSigTight_1", 1)
    chain.SetBranchStatus("tau_tagWeightBin_0", 1)
    chain.SetBranchStatus("tau_tagWeightBin_1", 1)
    chain.SetBranchStatus("tau_passMuonOLR_0", 1)
    chain.SetBranchStatus("tau_passMuonOLR_1", 1)
    chain.SetBranchStatus("tau_passEleBDT_0", 1)
    chain.SetBranchStatus("tau_passEleBDT_1", 1)
    chain.SetBranchStatus("tau_charge_0", 1)
    chain.SetBranchStatus("tau_charge_1", 1)
    chain.SetBranchStatus("lep_ID_2", 1)
    chain.SetBranchStatus("Mll02", 1)
    chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_0", 1)
    chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_1", 1)
    chain.SetBranchStatus("lep_ambiguityType_0", 1)
    chain.SetBranchStatus("lep_ambiguityType_1", 1)
    #cuts
    fCurrent = -1
    chain.LoadTree(0)
    cuts_sr = TTreeFormula("cuts_sr", chan, chain)
    cuts_trig = TTreeFormula("cuts_trig", trigs, chain)
    raw_evts, numevts = 0, 0
    for evt in range(nentries):
        #for event in chain:
        #if evt%10000==0 : print evt
        chain.GetEntry(evt)
        #get current file
        currentFileName = chain.GetCurrentFile().GetName()
        RunYear = getattr(chain, "RunYear")
        mcWeightOrg = getattr(chain, "mcWeightOrg")
        pileupEventWeight_090 = getattr(chain, "pileupEventWeight_090")
        lepSFObjTight = getattr(chain, "lepSFObjTight")
        lepSFTrigTight = getattr(chain, "lepSFTrigTight")
        JVT_EventWeight = getattr(chain, "JVT_EventWeight")
        SherpaNJetWeight = getattr(chain, "SherpaNJetWeight")
        MV2c10_70_EventWeight = getattr(chain, "MV2c10_70_EventWeight")
        lumi = 1.0
        if RunYear < 2016.5: lumi = 36074.6
        if RunYear > 2016.5: lumi = 43813.7
        if chain.GetTreeNumber() != fCurrent:
            fCurrent = chain.GetTreeNumber()
            cuts_sr.Notify()
            cuts_trig.Notify()
        if cuts_sr.EvalInstance() and cuts_trig.EvalInstance():
            #if cuts_trig.EvalInstance():
            if "341177" in currentFileName: weight = 0.05343
            if "341270" in currentFileName: weight = 0.22276
            if "341271" in currentFileName: weight = 0.23082
            kfac, filEff = 1, 1
            weight = weight * kfac * filEff * mcWeightOrg * pileupEventWeight_090 * lepSFObjTight * lepSFTrigTight * JVT_EventWeight * SherpaNJetWeight * MV2c10_70_EventWeight * lumi / weights[
                fCurrent]
            #print xsec, kfac, filEff, event.mcWeightOrg,event.pileupEventWeight_090,event.lepSFObjTight,event.lepSFTrigTight,event.JVT_EventWeight,event.SherpaNJetWeight,event.MV2c10_70_EventWeight, lumi
            raw_evts = raw_evts + 1
            numevts = numevts + weight
    print "%s(%.2f)" % (raw_evts, numevts)
Esempio n. 4
0
    def run(self, selections, dv, dv2d, ch='', name='', nevents=-1):
        # initialize dictionary selection: list of histograms
        if name=='':
            name = self.name
            nsel = 0
            for s in selections:
                self.sv[s] = collections.OrderedDict()
                self.sv2d[s] = collections.OrderedDict()
                selstr = 'sel{}'.format(int(nsel))
                nsel += 1

                for v in dv.keys() :
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv[s][v] = TH1D(hname,hname+";"+dv[v]["title"]+";",dv[v]["bin"],dv[v]["xmin"],dv[v]["xmax"])
                    self.sv[s][v].Sumw2()

                for v in dv2d.keys() :
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv2d[s][v] = TH2D(hname,hname+";"+dv2d[v]["titlex"]+";"+dv2d[v]["titley"]+";",
                                     dv2d[v]["binx"],dv2d[v]["xmin"],dv2d[v]["xmax"], 
                                     dv2d[v]["biny"],dv2d[v]["ymin"],dv2d[v]["ymax"], 
                                     ) 
                    self.sv2d[s][v].Sumw2()

        rf = TFile(self.rt)
        t = rf.Get("events")
        if nevents == -1:
            numberOfEntries = t.GetEntries()
            print 'running over the full entries  %i'%numberOfEntries
        else:
            numberOfEntries = nevents
            if t.GetEntries()<nevents:
                numberOfEntries = t.GetEntries()
                print 'running over the full entries  %i'%numberOfEntries
            else:
                print 'running over a subset of entries  %i'%numberOfEntries

        for s in selections:
            weighttrf_name=''
            weighttrfin_name=[]
            weighttrfless_name=[]

            sformula=s
            if '**' in s:
                s_split=s.split('**')
                sformula=s_split[1]
                weighttrf_name=s_split[0]
                weighttrf_name=weighttrf_name.strip()
                if 'tagin' in weighttrf_name:
                    nbtagex = int(filter(str.isdigit, weighttrf_name))
                    for i in range(nbtagex) :
                      weighttrfin_name.append('weight_%itagex'%(i))
                if 'tagless' in weighttrf_name:
                    nbtagex = int(filter(str.isdigit, weighttrf_name))
                    for i in range(nbtagex) :
                      weighttrfless_name.append('weight_%itagex'%(i))

            formula = TTreeFormula("",sformula,t)

            # loop over events
            print 'number of events:', numberOfEntries
            for entry in xrange(numberOfEntries) :
                if (entry+1)%500 == 0: 
                    sys.stdout.write( '... %i events processed ...\r'%(entry+1))
                    sys.stdout.flush()

                t.GetEntry(entry)
                weight = self.w * getattr(t,"weight")
                weighttrf=1.
                if weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)==0 :
                    weighttrf = getattr(t,weighttrf_name)
                elif weighttrf_name!='' and len(weighttrfin_name)!=0 and len(weighttrfless_name)==0 :
                    weighttrf = 1.
                    for i in weighttrfin_name :
                      weighttrf -= getattr(t,i)
                elif weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)!=0 :
                    weighttrf = 0.
                    for i in weighttrfless_name :
                      weighttrf += getattr(t,i)

                weight=weight*weighttrf
                # apply selection
                result  = formula.EvalInstance() 
                
                # fill histos on selected events
                if result > 0.:
                    for v in dv.keys():
                        divide=1
                        try:
                            divide=dv[v]["divide"]
                        except KeyError, e:
                            divide=1
                        self.sv[s][v].Fill(getattr(t,dv[v]["name"])/divide, weight)
                    for v in dv2d.keys():
                        self.sv2d[s][v].Fill(getattr(t,dv2d[v]["namex"]), getattr(t,dv2d[v]["namey"]), weight)
Esempio n. 5
0
    def run(self, selections, dv, dv2d, ch='', name='', nevents=-1):
        # initialize dictionary selection: list of histograms
        if name == '':
            name = self.name
            nsel = 0
            for s in selections:
                self.sv[s] = collections.OrderedDict()
                self.sv2d[s] = collections.OrderedDict()
                selstr = 'sel{}'.format(int(nsel))
                nsel += 1

                for v in dv.keys():
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv[s][v] = TH1D(hname,
                                         hname + ";" + dv[v]["title"] + ";",
                                         dv[v]["bin"], dv[v]["xmin"],
                                         dv[v]["xmax"])
                    self.sv[s][v].Sumw2()

                for v in dv2d.keys():
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv2d[s][v] = TH2D(
                        hname,
                        hname + ";" + dv2d[v]["titlex"] + ";" +
                        dv2d[v]["titley"] + ";",
                        dv2d[v]["binx"],
                        dv2d[v]["xmin"],
                        dv2d[v]["xmax"],
                        dv2d[v]["biny"],
                        dv2d[v]["ymin"],
                        dv2d[v]["ymax"],
                    )
                    self.sv2d[s][v].Sumw2()

        rf = TFile(self.rt)
        t = rf.Get("events")
        if nevents == -1:
            numberOfEntries = t.GetEntries()
            print 'running over the full entries  %i' % numberOfEntries
        else:
            numberOfEntries = nevents
            print 'running over a subset of entries  %i' % numberOfEntries

        for s in selections:

            formula = TTreeFormula("", s, t)

            # loop over events
            print 'number of events:', numberOfEntries
            for entry in xrange(numberOfEntries):
                if (entry + 1) % 500 == 0:
                    sys.stdout.write('... %i events processed ...\r' %
                                     (entry + 1))
                    sys.stdout.flush()

                t.GetEntry(entry)
                weight = self.w * getattr(t, "weight")

                # apply selection
                result = formula.EvalInstance()

                # fill histos on selected events
                if result > 0.:
                    for v in dv.keys():
                        divide = 1
                        try:
                            divide = dv[v]["divide"]
                        except KeyError, e:
                            divide = 1
                        self.sv[s][v].Fill(
                            getattr(t, dv[v]["name"]) / divide, weight)
                    for v in dv2d.keys():
                        self.sv2d[s][v].Fill(getattr(t, dv2d[v]["namex"]),
                                             getattr(t, dv2d[v]["namey"]),
                                             weight)
Esempio n. 6
0
        new_file_out = TFile(file_out_name.replace('.root', '_weight.root'),
                             'RECREATE')

        weight_tree = tree_out.CloneTree(0)

        scale = int_lumi * sample.xsec * sample.scale / sample.sumweights

        full_weight = array('f', [0.])
        new_b = weight_tree.Branch('full_weight', full_weight, 'full_weight/F')
        formula = TTreeFormula('weight_formula', weight, tree_out)
        formula.GetNdata()

        # ATTENTION THIS MAY NOT WORK!
        for i in xrange(tree_out.GetEntries()):
            tree_out.GetEntry(i)
            full_weight[0] = formula.EvalInstance() * scale
            # print full_weight[0]
            # new_b.Fill()
            weight_tree.Fill()
            # tree_out.Fill()

        new_file_out.Write()
        new_file_out.Close()
        file_out.Close()

        print 'Writing file', file_out_name

        out_dict[name] = {}
        out_dict[name]['weight'] = scale
        out_dict[name][cut.name] = cut_str
Esempio n. 7
0
 bbar = Bar("%s: Progress..."%os.path.basename(ifilename),max=int(nEnt), suffix=bar_suffix)
 if(nEnt==0):
     del MeritIn
     logging.warning("WARNING! EMPTY FILE %s",ifilename)
     filenum1+=1
     continue
 if not options.cuts is None:
     CutEval=TTreeFormula("CutEval",options.cuts,MeritIn)
 #nEntTot+=nEnt
 tmp_frame = pd.DataFrame(index=np.arange(nEnt),columns=allColumns)
 tmp_frame = tmp_frame.fillna(0)
 for i in range(nEnt):
     #if i<100:
     MeritIn.GetEntry(i)
     if not options.cuts is None:
         if(CutEval.EvalInstance(i)==0): continue
 # specifically clone branches
     for j in range(len(IntBrName)):
         IntBrVal[j][0]=getattr(MeritIn,IntBrName[j])
     for j in range(len(DblBrName)):
         DblBrVal[j][0]=getattr(MeritIn,DblBrName[j])
     # this fills the row with all values and appends to existing dataframe
     if len(IntBrVal) and len(DblBrVal):
         series=np.hstack([np.column_stack(IntBrVal),np.column_stack(DblBrVal)])
     else:
         series = np.array(IntBrVal if len(IntBrVal) else DblBrVal).T            
     tmp_frame.loc[i] = series[0]
     del series
     bbar.next()
 df_.append(tmp_frame)
 del tmp_frame
Esempio n. 8
0
def load(inputfile, target, inputs, adversary, weight, reweight):
    """
    Parameters:
      inputfile: Name of the ROOT file that contains all our data for training
      target: Expression that yields class number
      inputs: List of expressions to input into the classifier
      adversary: Expressions that the adversary should not be able to guess
        from the classifier output
      weight: Expressions to get the sample weights
      reweight: Bool to decide if should reweight

    Returns:
      Numpy Arrays that can be used in fitting with the following info
      - Labels that are used to classify
      - Raw data to do the classification with
      - Data that should not be predictable based on the predicted label
    """

    in_file = TFile(inputfile)
    tree = in_file.events

    target_form = TTreeFormula(target, target, tree)
    val_forms = [TTreeFormula(v, v, tree) for v in inputs]
    adversary_forms = [TTreeFormula(a, a, tree) for a in adversary]
    weight_form = TTreeFormula(weight, weight, tree) if weight else None

    reserve = tree.GetEntries()

    data = numpy.zeros((reserve, len(inputs)))
    smooths = numpy.zeros((reserve, len(adversary)))
    labels = numpy.zeros((reserve, 1))
    weights = numpy.zeros(reserve) if weight else None

    logging.info('Reading %i events', reserve)

    # Set up the inputs
    for event, _ in enumerate(tree):
        if event == reserve:
            break

        if event % 10000 == 0:
            logging.info('Filling %s', str(float(event * 100) / reserve) + '%')

        labels[event][0] = target_form.EvalInstance()
        if weight:
            weights[event] = weight_form.EvalInstance()

        for jndex, val in enumerate(val_forms):
            data[event][jndex] = val.EvalInstance()

        for jndex, adv in enumerate(adversary_forms):
            smooths[event][jndex] = adv.EvalInstance()

    if reweight:
        # Want to reweight each class separately
        smooth_dict = collections.defaultdict(list)

        for label, point in zip(labels, smooths):
            smooth_dict[label[0]].append(point)

        reweighters = {
            key: Reweighter(row)
            for key, row in smooth_dict.iteritems()
        }

        for index, point in enumerate(data):
            weights[index] *= reweighters[labels[index][0]].get_weight(point)


#        reweighter = Reweighter(smooths)

#        for index, point in enumerate(data):
#            weights[index] *= reweighter.get_weight(point)

    return keras.utils.to_categorical(labels), data, smooths, weights