Esempio n. 1
0
    def fit(self, input_file, cut, num_events, output, epochs, batch):
        """
        Do the fit
        """
        in_file = TFile(input_file)
        tree = in_file.events
        val_forms = [TTreeFormula(v, v, tree) for v in self.vars]
        target_forms = [TTreeFormula(t, t, tree) for t in self.targets]
        cut_form = TTreeFormula(cut, cut, tree) if cut else None

        reserve = num_events if num_events > 0 else tree.GetEntries()

        inputs = numpy.zeros((reserve, len(self.vars)))
        targets = [numpy.zeros((reserve, 1)) for _ in self.targets]

        # Set up the inputs
        numcut = 0
        for index, _ in enumerate(tree):
            event = index - numcut
            if event == num_events:
                break
            if cut_form and not cut_form.EvalInstance():
                numcut += 1
                continue

            if event % 10000 == 0:
                print 'Filling', str(float(event * 100) / num_events) + '%'

            for jndex, val in enumerate(val_forms):
                inputs[event][jndex] = val.EvalInstance()

            for jndex, target in enumerate(target_forms):
                targets[jndex][event][0] = target.EvalInstance()

        self.model.fit(inputs,
                       targets,
                       validation_split=0.5,
                       epochs=epochs,
                       batch_size=batch,
                       callbacks=[
                           keras.callbacks.TensorBoard(
                               log_dir='weights/logdir',
                               histogram_freq=1,
                               write_graph=True,
                               write_images=True)
                       ])

        sess = keras.backend.get_session()
        output_node = [n.op.name for n in self.model.outputs]
        print 'Output node', output_node
        graph = graph_util.convert_variables_to_constants(
            sess, sess.graph.as_graph_def(), output_node)

        graph_io.write_graph(graph, 'weights', output, as_text=False)
Esempio n. 2
0
 def getTTreeFormula(self, tree=0):
     ## Get a corresponding TTreeFormula object
     from ROOT import TTreeFormula
     import warnings
     warnings.filterwarnings(action='ignore',
                             category=RuntimeWarning,
                             message='creating converter.*')
     if self.cut:
         return TTreeFormula(self.name, self.cut, tree)
     else:
         return TTreeFormula(self.name, '1', tree)
Esempio n. 3
0
 def _genPlotCuts(self, plots):
     cuts = []
     for plot in plots:
         cutstr = '1.0'
         if plot.cuts != "":
             cutstr = plot.cuts
         cuts.append([TTreeFormula(id_gen(10), cutstr, self._tree)])
     return cuts
Esempio n. 4
0
 def _genPlotFormulae(self, plots):
     formulae = []
     for plot in plots:
         formulae.append([])
         for var in plot.treeVariables:
             if plot.externalFunction is None:
                 formulae[-1].append(
                     TTreeFormula(id_gen(10), var, self._tree))
     return formulae
Esempio n. 5
0
def add_var_set_br_addr(varlist, reader_method, intree, allvars):
    """Add variables to TMVA::Reader, and associate to tree branch"""
    for var in varlist:
        expr = var.split(':=', 1)  # split var:=var1+var2 -> (var, var1+var2)
        simple = (len(expr) == 1)  # if simple, (var,)
        if simple:
            expr = expr * 2  # expr same as key: (var,) -> (var, var)
        allvars[expr[0]] = [
            array('f', [0.]),  # array for TMVA::Reader
            TTreeFormula(expr[0], expr[1], intree)
        ]
        reader_method(var, allvars[expr[0]][0])
Esempio n. 6
0
def fill_dataset(varargset, ftree, wt, wtvar, cut=''):
    """Return a dataset (slow, get_dataset is more efficient).

    Return a dataset from the ntuple `ftree', also apply `cut'.  Use
    `wt' as the weight expression in the tree.  `wtvar' is the
    corresponding RooRealVar weight.  Note, varargset should contain
    wtvar.

    The dataset is filled by iterating over the tree.  This is needed
    when you want to ensure different datasets have the same weight
    variable names, so that they can be combined later on.  This is
    needed even if they are combined as different categories.

    """

    from rplot.fixes import ROOT
    from ROOT import RooDataSet, RooFit, TTreeFormula
    from helpers import suppress_warnings
    suppress_warnings()
    from rplot.tselect import Tsplice
    splice = Tsplice(ftree)
    splice.make_splice('sel', cut)

    formulae = {}
    wtname = wtvar.GetName()
    for var in varargset:
        name = var.GetName()
        expr = wt if name == wtname else name
        formulae[name] = TTreeFormula(name, expr, ftree)

    dataset = RooDataSet('dataset', 'Dataset', varargset,
                         RooFit.WeightVar(wtvar))
    for i in xrange(ftree.GetEntries()):
        ftree.GetEntry(i)
        for var, expr in formulae.iteritems():
            realvar = varargset.find(var)
            realvar.setVal(expr.EvalInstance())
        dataset.add(varargset, varargset[wtname].getVal())
    return dataset
Esempio n. 7
0
    def run(self, selections, dv, dv2d, ch='', name='', nevents=-1):
        # initialize dictionary selection: list of histograms
        if name == '':
            name = self.name
            nsel = 0
            for s in selections:
                self.sv[s] = collections.OrderedDict()
                self.sv2d[s] = collections.OrderedDict()
                selstr = 'sel{}'.format(int(nsel))
                nsel += 1

                for v in dv.keys():
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv[s][v] = TH1D(hname,
                                         hname + ";" + dv[v]["title"] + ";",
                                         dv[v]["bin"], dv[v]["xmin"],
                                         dv[v]["xmax"])
                    self.sv[s][v].Sumw2()

                for v in dv2d.keys():
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv2d[s][v] = TH2D(
                        hname,
                        hname + ";" + dv2d[v]["titlex"] + ";" +
                        dv2d[v]["titley"] + ";",
                        dv2d[v]["binx"],
                        dv2d[v]["xmin"],
                        dv2d[v]["xmax"],
                        dv2d[v]["biny"],
                        dv2d[v]["ymin"],
                        dv2d[v]["ymax"],
                    )
                    self.sv2d[s][v].Sumw2()

        rf = TFile(self.rt)
        t = rf.Get("events")
        if nevents == -1:
            numberOfEntries = t.GetEntries()
            print 'running over the full entries  %i' % numberOfEntries
        else:
            numberOfEntries = nevents
            print 'running over a subset of entries  %i' % numberOfEntries

        for s in selections:

            formula = TTreeFormula("", s, t)

            # loop over events
            print 'number of events:', numberOfEntries
            for entry in xrange(numberOfEntries):
                if (entry + 1) % 500 == 0:
                    sys.stdout.write('... %i events processed ...\r' %
                                     (entry + 1))
                    sys.stdout.flush()

                t.GetEntry(entry)
                weight = self.w * getattr(t, "weight")

                # apply selection
                result = formula.EvalInstance()

                # fill histos on selected events
                if result > 0.:
                    for v in dv.keys():
                        divide = 1
                        try:
                            divide = dv[v]["divide"]
                        except KeyError, e:
                            divide = 1
                        self.sv[s][v].Fill(
                            getattr(t, dv[v]["name"]) / divide, weight)
                    for v in dv2d.keys():
                        self.sv2d[s][v].Fill(getattr(t, dv2d[v]["namex"]),
                                             getattr(t, dv2d[v]["namey"]),
                                             weight)
Esempio n. 8
0
def load(inputfile, target, inputs, adversary, weight, reweight):
    """
    Parameters:
      inputfile: Name of the ROOT file that contains all our data for training
      target: Expression that yields class number
      inputs: List of expressions to input into the classifier
      adversary: Expressions that the adversary should not be able to guess
        from the classifier output
      weight: Expressions to get the sample weights
      reweight: Bool to decide if should reweight

    Returns:
      Numpy Arrays that can be used in fitting with the following info
      - Labels that are used to classify
      - Raw data to do the classification with
      - Data that should not be predictable based on the predicted label
    """

    in_file = TFile(inputfile)
    tree = in_file.events

    target_form = TTreeFormula(target, target, tree)
    val_forms = [TTreeFormula(v, v, tree) for v in inputs]
    adversary_forms = [TTreeFormula(a, a, tree) for a in adversary]
    weight_form = TTreeFormula(weight, weight, tree) if weight else None

    reserve = tree.GetEntries()

    data = numpy.zeros((reserve, len(inputs)))
    smooths = numpy.zeros((reserve, len(adversary)))
    labels = numpy.zeros((reserve, 1))
    weights = numpy.zeros(reserve) if weight else None

    logging.info('Reading %i events', reserve)

    # Set up the inputs
    for event, _ in enumerate(tree):
        if event == reserve:
            break

        if event % 10000 == 0:
            logging.info('Filling %s', str(float(event * 100) / reserve) + '%')

        labels[event][0] = target_form.EvalInstance()
        if weight:
            weights[event] = weight_form.EvalInstance()

        for jndex, val in enumerate(val_forms):
            data[event][jndex] = val.EvalInstance()

        for jndex, adv in enumerate(adversary_forms):
            smooths[event][jndex] = adv.EvalInstance()

    if reweight:
        # Want to reweight each class separately
        smooth_dict = collections.defaultdict(list)

        for label, point in zip(labels, smooths):
            smooth_dict[label[0]].append(point)

        reweighters = {
            key: Reweighter(row)
            for key, row in smooth_dict.iteritems()
        }

        for index, point in enumerate(data):
            weights[index] *= reweighters[labels[index][0]].get_weight(point)


#        reweighter = Reweighter(smooths)

#        for index, point in enumerate(data):
#            weights[index] *= reweighter.get_weight(point)

    return keras.utils.to_categorical(labels), data, smooths, weights
    mva2_val = array('f', [0.])

    tree_out = TTree('tree', 'tree')

    mva0_name = 'mva0'
    tree_out.Branch(mva0_name, mva0_val, mva0_name+'/F')
    mva1_name = 'mva1'
    tree_out.Branch(mva1_name, mva1_val, mva1_name+'/F')
    mva2_name = 'mva2'
    tree_out.Branch(mva2_name, mva2_val, mva2_name+'/F')

    ave_mva = 0.

    for var in train_vars:
        if var.drawname != var.name:
            var.formula = TTreeFormula('formula'+var.name, var.drawname, tree_in)
            var.formula.GetNdata()

    for i_ev, event in enumerate(tree_in):

        if i_ev % 10000 == 0:
            print 'Event', i_ev

        split_var_val = split_var.formula.EvalInstance() if hasattr(split_var, 'formula') else getattr(event, split_var.name)

        if int(split_var_val * 1000) % 2 == 0:
            clf = clf0_0jet
            if event.n_jets > 0.5:
                if event.vbf_mjj > 500. and event.vbf_deta > 3.5:
                    clf = clf0_vbf
                else:
Esempio n. 10
0
def fillIntoTree(out_tree, branches, cfg, hist_cfg, vcfgs, total_scale, plot, verbose, friend_func):

    if isinstance(cfg, HistogramCfg):
        # Loop over sub-cfgs and fill them
        total_scale *= cfg.total_scale if cfg.total_scale else 1.
        for sub_cfg in cfg.cfgs:
            fillIntoTree(out_tree, branches, sub_cfg, cfg, vcfgs, total_scale, plot, verbose, friend_func)
        return

    file_name = '/'.join([cfg.ana_dir, cfg.dir_name, cfg.tree_prod_name, 'tree.root'])

    # Attaches tree to plot
    ttree = plot.readTree(file_name, cfg.tree_name, verbose=verbose, friend_func=friend_func)

    norm_cut = hist_cfg.cut
    shape_cut = hist_cfg.cut

    if cfg.norm_cut:
        norm_cut = cfg.norm_cut

    if cfg.shape_cut:
        shape_cut = cfg.shape_cut

    full_weight = branches[-1]

    weight = hist_cfg.weight
    if cfg.weight_expr:
        weight = '*'.join([weight, cfg.weight_expr])

    if hist_cfg.weight:
        norm_cut = '({c}) * {we}'.format(c=norm_cut, we=weight)
        shape_cut = '({c}) * {we}'.format(c=shape_cut, we=weight)

    # and this one too
    sample_weight = cfg.scale * total_scale
    if not cfg.is_data:
        sample_weight *= hist_cfg.lumi*cfg.xsec/cfg.sumweights

    formula = TTreeFormula('weight_formula', norm_cut, ttree)
    formula.GetNdata()

    # Add weight as tree variable
    # Then loop over ttree
    # And save this to the other tree
    # 

    # Create TTreeFormulas for all vars
    for var in vcfgs:
        if var.drawname != var.name:
            var.formula = TTreeFormula('formula'+var.name, var.drawname, ttree)
            var.formula.GetNdata()

    for i in xrange(ttree.GetEntries()):
        ttree.GetEntry(i)
        w = formula.EvalInstance()
        if w == 0.:
            continue
        full_weight[0] = w * sample_weight
        if abs(full_weight[0]) > 1000.:
            print "WARNING, unusually large weight", w, sample_weight
            import pdb; pdb.set_trace()
            print '\nWeight:', full_weight[0]
            print cfg.name
            print norm_cut
        for branch, var in zip(branches, vcfgs):
            branch[0] = var.formula.EvalInstance() if hasattr(var, 'formula') else getattr(ttree, var.name)
        out_tree.Fill()


    if shape_cut != norm_cut:
        print 'WARNING: different norm and shape cuts currently not supported in HistCreator.createTrees'
Esempio n. 11
0
def Count(chan, trigs):
    #deal with weights first
    sumWeights = TChain("sumWeights")
    sumWeights.Add("%stth*.root" % prepath)
    weights = []
    fCurrent_wt = 0
    sampleNEvt = 0
    nWeightEntries = sumWeights.GetEntries()
    for a in range(nWeightEntries):
        sumWeights.GetEntry(a)
        totalEventsWeighted = getattr(sumWeights, 'totalEventsWeighted')
        if sumWeights.GetTreeNumber() != fCurrent_wt:
            fCurrent_wt = sumWeights.GetTreeNumber()
            weights.append(sampleNEvt)
            sampleNEvt = 0
        sampleNEvt = sampleNEvt + totalEventsWeighted
        if a == nWeightEntries - 1: weights.append(sampleNEvt)  #last file
    chain = TChain("nominal")
    chain.Add("%stth*.root" % prepath)
    nentries = chain.GetEntries()

    chain.SetBranchStatus("*", 0)
    chain.SetBranchStatus("Mll01", 1)
    chain.SetBranchStatus("total_charge", 1)
    chain.SetBranchStatus("lep_Pt_0", 1)
    chain.SetBranchStatus("lep_Pt_1", 1)
    chain.SetBranchStatus("lep_Eta_0", 1)
    chain.SetBranchStatus("lep_Eta_1", 1)
    chain.SetBranchStatus("lep_ID_0", 1)
    chain.SetBranchStatus("lep_ID_1", 1)
    chain.SetBranchStatus("lep_truthPdgId_0", 1)
    chain.SetBranchStatus("lep_truthPdgId_1", 1)
    chain.SetBranchStatus("lep_truthOrigin_0", 1)
    chain.SetBranchStatus("lep_truthOrigin_1", 1)
    chain.SetBranchStatus("lep_truthType_0", 1)
    chain.SetBranchStatus("lep_truthType_1", 1)
    chain.SetBranchStatus("lep_isQMisID_0", 1)
    chain.SetBranchStatus("lep_isQMisID_1", 1)
    chain.SetBranchStatus("nJets_OR_T_MV2c10_70", 1)
    chain.SetBranchStatus("nJets_OR_T", 1)
    chain.SetBranchStatus("lep_isTightLH_0", 1)
    chain.SetBranchStatus("lep_isTightLH_1", 1)
    chain.SetBranchStatus("lep_isLooseLH_0", 1)
    chain.SetBranchStatus("lep_isLooseLH_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTight_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutLoose_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTight_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutLoose_1", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_0", 1)
    chain.SetBranchStatus("lep_isolationFixedCutTightTrackOnly_1", 1)
    chain.SetBranchStatus("HLT*", 1)
    chain.SetBranchStatus("*type", 1)
    chain.SetBranchStatus("RunYear", 1)
    chain.SetBranchStatus("passEventCleaning", 1)
    chain.SetBranchStatus("lep_isTrigMatch_0", 1)
    chain.SetBranchStatus("lep_isTrigMatch_1", 1)
    chain.SetBranchStatus("lep_isTrigMatchDLT_0", 1)
    chain.SetBranchStatus("lep_isTrigMatchDLT_1", 1)
    chain.SetBranchStatus("mcWeightOrg", 1)
    chain.SetBranchStatus("pileupEventWeight_090", 1)
    chain.SetBranchStatus("lepSFObjTight", 1)
    chain.SetBranchStatus("lepSFTrigTight", 1)
    chain.SetBranchStatus("JVT_EventWeight", 1)
    chain.SetBranchStatus("SherpaNJetWeight", 1)
    chain.SetBranchStatus("MV2c10_70_EventWeight", 1)
    chain.SetBranchStatus("lep_chargeIDBDT*", 1)
    chain.SetBranchStatus("nTaus_OR_Pt25", 1)
    chain.SetBranchStatus("tau_JetBDTSigTight_0", 1)
    chain.SetBranchStatus("tau_JetBDTSigTight_1", 1)
    chain.SetBranchStatus("tau_tagWeightBin_0", 1)
    chain.SetBranchStatus("tau_tagWeightBin_1", 1)
    chain.SetBranchStatus("tau_passMuonOLR_0", 1)
    chain.SetBranchStatus("tau_passMuonOLR_1", 1)
    chain.SetBranchStatus("tau_passEleBDT_0", 1)
    chain.SetBranchStatus("tau_passEleBDT_1", 1)
    chain.SetBranchStatus("tau_charge_0", 1)
    chain.SetBranchStatus("tau_charge_1", 1)
    chain.SetBranchStatus("lep_ID_2", 1)
    chain.SetBranchStatus("Mll02", 1)
    chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_0", 1)
    chain.SetBranchStatus("lep_promptLeptonVeto_TagWeight_1", 1)
    chain.SetBranchStatus("lep_ambiguityType_0", 1)
    chain.SetBranchStatus("lep_ambiguityType_1", 1)
    #cuts
    fCurrent = -1
    chain.LoadTree(0)
    cuts_sr = TTreeFormula("cuts_sr", chan, chain)
    cuts_trig = TTreeFormula("cuts_trig", trigs, chain)
    raw_evts, numevts = 0, 0
    for evt in range(nentries):
        #for event in chain:
        #if evt%10000==0 : print evt
        chain.GetEntry(evt)
        #get current file
        currentFileName = chain.GetCurrentFile().GetName()
        RunYear = getattr(chain, "RunYear")
        mcWeightOrg = getattr(chain, "mcWeightOrg")
        pileupEventWeight_090 = getattr(chain, "pileupEventWeight_090")
        lepSFObjTight = getattr(chain, "lepSFObjTight")
        lepSFTrigTight = getattr(chain, "lepSFTrigTight")
        JVT_EventWeight = getattr(chain, "JVT_EventWeight")
        SherpaNJetWeight = getattr(chain, "SherpaNJetWeight")
        MV2c10_70_EventWeight = getattr(chain, "MV2c10_70_EventWeight")
        lumi = 1.0
        if RunYear < 2016.5: lumi = 36074.6
        if RunYear > 2016.5: lumi = 43813.7
        if chain.GetTreeNumber() != fCurrent:
            fCurrent = chain.GetTreeNumber()
            cuts_sr.Notify()
            cuts_trig.Notify()
        if cuts_sr.EvalInstance() and cuts_trig.EvalInstance():
            #if cuts_trig.EvalInstance():
            if "341177" in currentFileName: weight = 0.05343
            if "341270" in currentFileName: weight = 0.22276
            if "341271" in currentFileName: weight = 0.23082
            kfac, filEff = 1, 1
            weight = weight * kfac * filEff * mcWeightOrg * pileupEventWeight_090 * lepSFObjTight * lepSFTrigTight * JVT_EventWeight * SherpaNJetWeight * MV2c10_70_EventWeight * lumi / weights[
                fCurrent]
            #print xsec, kfac, filEff, event.mcWeightOrg,event.pileupEventWeight_090,event.lepSFObjTight,event.lepSFTrigTight,event.JVT_EventWeight,event.SherpaNJetWeight,event.MV2c10_70_EventWeight, lumi
            raw_evts = raw_evts + 1
            numevts = numevts + weight
    print "%s(%.2f)" % (raw_evts, numevts)
Esempio n. 12
0
def MultiDraw(self, Formulae, CommonWeight="1"):
    """Draws many histograms in one loop over a tree.

        Instead of:
        MyTree.Draw( "nlcts >> a(100, -1, 1)", "weightA" )
        MyTree.Draw( "nlcts >> b(100, -1, 1)", "weightB" )

        Do:    
        MyTree.MultiDraw( ( "nlcts >> a(100, -1, 1)", "weightA" ),
                          ( "nlcts >> b(100, -1, 1)", "weightB" ) )

        This is significantly faster when there are many histograms to be drawn.
        The first parameter, CommonWeight, decides a weight given to all
        histograms.

        An arbitrary number of additional histograms may be specified. They can 
        either be specified with just a string containing the formula to be 
        drawn, the histogram name and bin configuration. 

        Alternatively it can be a tuple, with  said string, and an additional
        string specifying the weight to be applied to that histogram only.
    """

    if type(CommonWeight) == tuple:
        Formulae = (CommonWeight, ) + Formulae
        CommonWeight = "1"

    results, formulae, weights = [], [], []

    lastFormula, lastWeight = None, None

    # A weight common to everything being drawn
    CommonWeightFormula = TTreeFormula("CommonWeight", CommonWeight, self)
    CommonWeightFormula.SetQuickLoad(True)
    if not CommonWeightFormula.GetTree():
        raise RuntimeError("TTreeFormula didn't compile: " + CommonWeight)

    hists = {}

    for i, origFormula in enumerate(Formulae):
        print "Have an origFormula", origFormula

        # Expand out origFormula and weight, otherwise just use weight of 1.
        if type(origFormula) == tuple:
            origFormula, weight = origFormula
        else:
            origFormula, weight = origFormula, "1"

        # print origFormula, weight

        # Pluck out histogram name and arguments
        match = re.match(r"^(.*?)\s*>>\s*(.*?)\s*\(\s*(.*?)\s*\)$", origFormula)
        if match:

            formula, name, arguments = match.groups()
            arguments = re.split(",\s*", arguments)

            bins, minX, maxX = arguments
            bins, minX, maxX = int(bins), float(minX), float(maxX)

            # Create histogram with name and arguments
            hist = TH1D(name, name, bins, minX, maxX)
            hist.Sumw2()
        else:
            # without arguments
            match = re.match(r"^(.*?)\s*>>\s*(.*?)\s*$", origFormula)
            if not match:
                raise RuntimeError("MultiDraw: Couldn't parse formula: '%s'" % origFormula)

            formula, name = match.groups()
            # print formula, name

            if name.startswith("+") and name[1:] in hists:
                # Drawing additionally into a histogram
                hist = hists[name[1:]]
            else:
                # name = name[1:] # JAN: ???
                hist = gDirectory.Get(name)
                if not hist:
                    raise RuntimeError("MultiDraw: Couldn't find histogram to fill '%s' in current directory." % name)

        if name not in hists:
            hists[name] = hist

        results.append(hist)

        # The following two 'if' clauses check that the next formula is different
        # to the previous one. If it is not, we add an ordinary TObject.
        # Then, the dynamic cast in MultiDraw.cxx fails, giving 'NULL', and
        # The previous value is used. This saves the recomputing of identical values

        if formula != lastFormula:
            f = TTreeFormula("formula%i" % i, formula, self)
            if not f.GetTree():
                raise RuntimeError("TTreeFormula didn't compile: " + formula)
            f.SetQuickLoad(True)
            formulae.append(f)
        else:
            formulae.append(TObject())

        if weight != lastWeight:
            f = TTreeFormula("weight%i" % i, weight, self)
            if not f.GetTree():
                raise RuntimeError("TTreeFormula didn't compile: " + formula)
            f.SetQuickLoad(True)
            weights.append(f)
        else:
            weights.append(TObject())

        lastFormula, lastWeight = formula, weight

    # Only compile MultiDraw once
    try:
        from ROOT import MultiDraw as _MultiDraw
    except ImportError:
        # gROOT.ProcessLine(".L %sMultiDraw.cxx+O" % "./")
        if "/sMultiDraw_cc.so" not in gSystem.GetLibraries(): 
            gROOT.ProcessLine(".L %s/../SFrameAnalysis_emu/datacard/MultiDraw.cc+" % os.environ['CMSSW_BASE']);
        from ROOT import MultiDraw as _MultiDraw

    from time import time
    start = time()

    # Ensure that formulae are told when tree changes
    fManager = TTreeFormulaManager()
    for formula in formulae + weights + [CommonWeightFormula, ]:
        if type(formula) == TTreeFormula:
            fManager.Add(formula)

    fManager.Sync()
    self.SetNotify(fManager)

    # Draw everything!
    _MultiDraw(self, CommonWeightFormula,
               MakeTObjArray(formulae),
               MakeTObjArray(weights),
               MakeTObjArray(results),
               len(Formulae))

    print "Took %.2fs" % (time() - start), " "*20

    return results
Esempio n. 13
0
    def run(self, selections, dv, dv2d, ch='', name='', nevents=-1):
        # initialize dictionary selection: list of histograms
        if name=='':
            name = self.name
            nsel = 0
            for s in selections:
                self.sv[s] = collections.OrderedDict()
                self.sv2d[s] = collections.OrderedDict()
                selstr = 'sel{}'.format(int(nsel))
                nsel += 1

                for v in dv.keys() :
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv[s][v] = TH1D(hname,hname+";"+dv[v]["title"]+";",dv[v]["bin"],dv[v]["xmin"],dv[v]["xmax"])
                    self.sv[s][v].Sumw2()

                for v in dv2d.keys() :
                    hname = '{}_{}_{}'.format(name, selstr, v)
                    self.sv2d[s][v] = TH2D(hname,hname+";"+dv2d[v]["titlex"]+";"+dv2d[v]["titley"]+";",
                                     dv2d[v]["binx"],dv2d[v]["xmin"],dv2d[v]["xmax"], 
                                     dv2d[v]["biny"],dv2d[v]["ymin"],dv2d[v]["ymax"], 
                                     ) 
                    self.sv2d[s][v].Sumw2()

        rf = TFile(self.rt)
        t = rf.Get("events")
        if nevents == -1:
            numberOfEntries = t.GetEntries()
            print 'running over the full entries  %i'%numberOfEntries
        else:
            numberOfEntries = nevents
            if t.GetEntries()<nevents:
                numberOfEntries = t.GetEntries()
                print 'running over the full entries  %i'%numberOfEntries
            else:
                print 'running over a subset of entries  %i'%numberOfEntries

        for s in selections:
            weighttrf_name=''
            weighttrfin_name=[]
            weighttrfless_name=[]

            sformula=s
            if '**' in s:
                s_split=s.split('**')
                sformula=s_split[1]
                weighttrf_name=s_split[0]
                weighttrf_name=weighttrf_name.strip()
                if 'tagin' in weighttrf_name:
                    nbtagex = int(filter(str.isdigit, weighttrf_name))
                    for i in range(nbtagex) :
                      weighttrfin_name.append('weight_%itagex'%(i))
                if 'tagless' in weighttrf_name:
                    nbtagex = int(filter(str.isdigit, weighttrf_name))
                    for i in range(nbtagex) :
                      weighttrfless_name.append('weight_%itagex'%(i))

            formula = TTreeFormula("",sformula,t)

            # loop over events
            print 'number of events:', numberOfEntries
            for entry in xrange(numberOfEntries) :
                if (entry+1)%500 == 0: 
                    sys.stdout.write( '... %i events processed ...\r'%(entry+1))
                    sys.stdout.flush()

                t.GetEntry(entry)
                weight = self.w * getattr(t,"weight")
                weighttrf=1.
                if weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)==0 :
                    weighttrf = getattr(t,weighttrf_name)
                elif weighttrf_name!='' and len(weighttrfin_name)!=0 and len(weighttrfless_name)==0 :
                    weighttrf = 1.
                    for i in weighttrfin_name :
                      weighttrf -= getattr(t,i)
                elif weighttrf_name!='' and len(weighttrfin_name)==0 and len(weighttrfless_name)!=0 :
                    weighttrf = 0.
                    for i in weighttrfless_name :
                      weighttrf += getattr(t,i)

                weight=weight*weighttrf
                # apply selection
                result  = formula.EvalInstance() 
                
                # fill histos on selected events
                if result > 0.:
                    for v in dv.keys():
                        divide=1
                        try:
                            divide=dv[v]["divide"]
                        except KeyError, e:
                            divide=1
                        self.sv[s][v].Fill(getattr(t,dv[v]["name"])/divide, weight)
                    for v in dv2d.keys():
                        self.sv2d[s][v].Fill(getattr(t,dv2d[v]["namex"]), getattr(t,dv2d[v]["namey"]), weight)
    def TreeLoopFromFile(self,
                         fname,
                         noCuts=False,
                         cutOverride=None,
                         CPweight=False,
                         interference=0):

        # open file and get tree
        treeFile = TFile.Open(fname)
        theTree = treeFile.Get(self.pars.treeName)

        if not theTree:
            print 'failed to find tree %s in file %s' % (self.pars.treeName,
                                                         fname)
            return

        # get the right cuts
        if cutOverride:
            theCuts = self.fullCuts(cutOverride)
            print 'override cuts:', theCuts
        elif noCuts:
            theCuts = ''
        else:
            theCuts = self.fullCuts()

        if gDirectory.Get('cuts_evtList'):
            gDirectory.Delete('cuts_evtList')
        theList = None

        # create fomulae for the variables of interest
        rowVs = []
        for (i, v) in enumerate(self.pars.var):
            rowVs.append(TTreeFormula('v%i' % i, v, theTree))

        extraDraw = ''
        varsRemaining = 4 - len(self.pars.var)
        ExtraDrawCP = False
        ExtraDrawInterf = False
        if CPweight:
            if hasattr(theTree, 'complexpolewtggH%i' % self.pars.mHiggs):
                extraDraw += ':(complexpolewtggH%i/avecomplexpolewtggH%i)' % \
                             (self.pars.mHiggs, self.pars.mHiggs)
                varsRemaining -= 1
                ExtraDrawCP = True
        if interference == 1:
            extraDraw += ':interferencewtggH%i' % self.pars.mHiggs
            varsRemaining -= 1
            ExtraDrawInterf = True
        elif interference == 2:
            extraDraw += ':interferencewt_upggH%i' % self.pars.mHiggs
            varsRemaining -= 1
            ExtraDrawInterf = True
        elif interference == 3:
            extraDraw += ':interferencewt_downggH%i' % self.pars.mHiggs
            varsRemaining -= 1
            ExtraDrawInterf = True

        if varsRemaining >= 0:
            if len(theCuts) > 0:
                theCuts = 'puwt*effwt*' + theCuts
            # print ':'.join(self.pars.var) + extraDraw
            # print 'weighted cuts:',theCuts
            Nsel = theTree.Draw(':'.join(self.pars.var) + extraDraw, theCuts,
                                'goff')
        else:
            # create an entry list which apply the cuts to the tree
            Nsel = theTree.Draw('>>cuts_evtList', theCuts, 'entrylist')
            theList = gDirectory.Get('cuts_evtList')

        # loop over the selected events calculate their weight and yield
        # the two variable values and the weight for each selected event.
        print "selected events:", Nsel

        if theList:
            while theTree.GetEntry(theList.Next()):
                # if self.pars.isElectron:
                #     lep_pt = theTree.W_electron_pt
                #     lep_eta = theTree.W_electron_eta
                # else:
                #     lep_pt = theTree.W_muon_pt
                #     lep_eta = theTree.W_muon_eta
                # jet_pt = []
                # jet_eta = []
                # for (idx, pt) in enumerate(theTree.JetPFCor_Pt):
                #     if pt > 0:
                #         jet_pt.append(pt)
                #         jet_eta.append(theTree.JetPFCor_Eta[idx])

                # effWgt = self.effWeight(lepton_pt = lep_pt, lepton_eta = lep_eta,
                #                         #jet_pt = jet_pt, jet_eta,
                #                         mt_pt = theTree.W_mt, mt_eta = lep_eta,
                #                         met_pt = theTree.event_met_pfmet,
                #                         met_eta = 0.)
                # if (hasattr(self.pars, 'btagVeto')) and (self.pars.btagVeto) and \
                #         self.btagVeto(theTree):
                #     continue

                effWgt = theTree.puwt * theTree.effwt
                if CPweight:
                    if hasattr(theTree,
                               'complexpolewtggH%i' % self.pars.mHiggs):
                        cpw = getattr(theTree,
                                      'complexpolewtggH%i' % self.pars.mHiggs)
                        cpw /= getattr(
                            theTree,
                            'avecomplexpolewtggH%i' % self.pars.mHiggs)
                    else:
                        cpw = HiggsCPWeight(self.pars.mHiggs,
                                            theTree.W_H_mass_gen)
                else:
                    cpw = 1.
                if interference == 1:
                    iwt = getattr(theTree,
                                  'interferencewtggH%i' % self.pars.mHiggs)
                elif interference == 2:
                    iwt = getattr(theTree,
                                  'interferencewt_upggH%i' % self.pars.mHiggs)
                elif interference == 3:
                    iwt = getattr(
                        theTree, 'interferencewt_downggH%i' % self.pars.mHiggs)
                else:
                    iwt = 1.
                row = [v.EvalInstance() for v in rowVs]
                yield (row, effWgt, cpw, iwt)
        else:
            for rowi in range(0, theTree.GetSelectedRows()):
                effWgt = theTree.GetW()[rowi]
                row = []
                for vi in range(0, len(self.pars.var)):
                    row.append(getattr(theTree, 'GetV%i' % (vi + 1))()[rowi])
                cpw = 1.
                vi = len(self.pars.var)
                if ExtraDrawCP:
                    cpw = getattr(theTree, 'GetV%i' % (vi + 1))()[rowi]
                    vi += 1
                iwt = 1.
                if ExtraDrawInterf:
                    iwt = getattr(theTree, 'GetV%i' % (vi + 1))()[rowi]
                    vi += 1
                yield (row, effWgt, cpw, iwt)

        treeFile.Close()
        return
Esempio n. 15
0
        tree_out = tree_in.CopyTree(cut_str)

        tree_out.Write()
        # file_out.Close()

        new_file_out = TFile(file_out_name.replace('.root', '_weight.root'),
                             'RECREATE')

        weight_tree = tree_out.CloneTree(0)

        scale = int_lumi * sample.xsec * sample.scale / sample.sumweights

        full_weight = array('f', [0.])
        new_b = weight_tree.Branch('full_weight', full_weight, 'full_weight/F')
        formula = TTreeFormula('weight_formula', weight, tree_out)
        formula.GetNdata()

        # ATTENTION THIS MAY NOT WORK!
        for i in xrange(tree_out.GetEntries()):
            tree_out.GetEntry(i)
            full_weight[0] = formula.EvalInstance() * scale
            # print full_weight[0]
            # new_b.Fill()
            weight_tree.Fill()
            # tree_out.Fill()

        new_file_out.Write()
        new_file_out.Close()
        file_out.Close()
Esempio n. 16
0
def MultiDraw(self, varexps, selection='1', drawoption="", **kwargs):
    """Draws multiple histograms in one loop over a tree (self).
    Instead of:
      tree.Draw( "pt_1 >> a(100, 0, 100)", "weightA" )
      tree.Draw( "pt_2 >> b(100, 0, 100)", "weightB" )
    Do:
      tree.MultiDraw( ( "pt_1 >> a(100, 0, 100)", "weightA" ),
                      ( "pt_2 >> b(100, 0, 100)", "weightB" ) )
    This is significantly faster when there are many histograms to be drawn.
    The first parameter, commonWeight, decides a weight given to all histograms.
    An arbitrary number of additional histograms may be specified. They can 
    either be specified with just a string containing the formula to be 
    drawn, the histogram name and bin configuration. 
    Alternatively it can be a tuple, with  said string, and an additional
    string specifying the weight to be applied to that histogram only."""

    selection = kwargs.get('cut', selection)  # selections cuts
    verbosity = kwargs.get('verbosity', 0)  # verbosity
    poisson = kwargs.get('poisson', False)  # kPoisson errors for data
    sumw2 = kwargs.get('sumw2', False)  # sumw2 for MC
    histlist = kwargs.get('hists',
                          [])  # to not rely on gDirectory.Get(histname)

    hists = {}
    results, xformulae, yformulae, weights = [], [], [], []
    lastXVar, lastYVar, lastWeight = None, None, None

    # A weight common to everything being drawn
    commonFormula = TTreeFormula("commonFormula", selection, self)
    commonFormula.SetQuickLoad(True)

    if not commonFormula.GetTree():
        raise error(
            "MultiDraw: TTreeFormula 'selection' did not compile:\n  selection:  %r\n  varexps:    %s"
            % (selection, varexps))

    for i, varexp in enumerate(varexps):
        #print '  Variable expression: %s'%(varexp,)
        yvar = None

        # EXPAND varexp
        weight = None
        if isinstance(varexp, (tuple, list)) and len(varexp) == 2:
            varexp, weight = varexp
        elif not isinstance(varexp, str):
            raise IOError(
                "MultiDraw: given varexp is not a string or tuple of length 2! Got varexp=%s (%s)"
                % (varexp, type(varexp)))
        if not varexp: varexp = '1'
        if not weight: weight = '1'

        # PREPARE histogram
        match = varregex.match(varexp)
        if match:  # create new histogram: varexp = "x >> h(100,0,100)" or "y:x >> h(100,0,100,100,0,100)"
            xvar, name, binning = match.group(1), match.group(2), match.group(
                3)

            # CREATE HISTOGRAM
            vmatch = varregex2D.match(xvar)
            if not vmatch or xvar.replace('::', '').count(':') == xvar.count(
                    '?'):  # 1D, allow "(x>100 ? 1 : 0) >> h(2,0,2)"
                bmatch = binregex.match(binning)
                if not bmatch:
                    raise error(
                        "MultiDraw: Could not parse formula for %r: %r" %
                        (name, varexp))
                nxbins, xmin, xmax = int(bmatch.group(1)), float(
                    bmatch.group(2)), float(bmatch.group(3))
                hist = TH1D(name, name, nxbins, xmin, xmax)
            elif vmatch:  # 2D histogram
                yvar, xvar = vmatch.group(1), vmatch.group(2)
                bmatch = binregex2D.match(binning)
                if not bmatch:
                    raise error(
                        'MultiDraw: Could not parse formula for %r to pattern %r: "%s"'
                        % (name, binregex2D.pattern, varexp))
                nxbins, xmin, xmax = int(bmatch.group(1)), float(
                    bmatch.group(2)), float(bmatch.group(3))
                nybins, ymin, ymax = int(bmatch.group(4)), float(
                    bmatch.group(5)), float(bmatch.group(6))
                hist = TH2D(name, name, nxbins, xmin, xmax, nybins, ymin, ymax)
            else:  # impossible
                raise error(
                    'MultiDraw: Could not parse variable %r for %r to pattern %r: %r'
                    % (xvar, name, varregex2D.pattern, varexp))

        else:  # get existing histogram: varexp = "x >> h" or "y:x >> h"
            match = varregex2.match(varexp)
            if not match:
                raise error(
                    'MultiDraw: Could not parse formula to pattern %r: %r' %
                    (varregex2.pattern, varexp))
            xvar, name = match.groups()
            if name.startswith("+") and name[1:] in hists:
                hist = hists[name[1:]]  # add content to existing histogram
            else:
                if i < len(histlist):
                    hist = histlist[i]
                    if hist.GetName() != histlist[i].GetName():
                        raise error(
                            "MultiDraw: Hisogram mismatch: looking for %r, but found %r."
                            % (hist.GetName(), histlist[i].GetName()))
                else:
                    hist = gDirectory.Get(name)
                    if not hist:
                        raise error(
                            "MultiDraw: Could not find histogram to fill %r in current directory (varexp %r)."
                            % (name, varexp))

            # SANITY CHECKS
            vmatch = varregex2D.match(xvar)
            if not vmatch or xvar.replace('::', '').count(':') == xvar.count(
                    '?'):  # 1D, allow "(x>100 ? 1 : 0) >> h(2,0,2)"
                pass
            elif vmatch:  # 2D histogram
                yvar, xvar = vmatch.group(1), vmatch.group(2)
                if not isinstance(hist, TH2):
                    raise error(
                        "MultiDraw: Existing histogram with name %r is not 2D! Found xvar=%r, yvar=%r..."
                        % (name, xvar, yvar))
            else:  # impossible
                raise error(
                    'MultiDraw: Could not parse variable %r for %r to pattern %r: "%s"'
                    % (xvar, name, varregex2D.pattern, varexp))

        if sumw2:
            hist.Sumw2()
        elif poisson:
            hist.SetBinErrorOption(TH1D.kPoisson)
        if drawoption:
            hist.SetDrawOption(drawoption)
        if name not in hists:
            hists[name] = hist
        results.append(hist)

        # CHECK that the next formula is different to the previous one.
        # If it is not, we add an ordinary TObject. In this way, the
        # dynamic cast in MultiDraw.cxx fails, giving 'NULL', and the previous
        # value is used. This saves the recomputing of identical values
        if xvar != lastXVar:
            formula = TTreeFormula("formula%i" % i, xvar, self)
            if not formula.GetTree():
                raise error(
                    "MultiDraw: TTreeFormula 'xvar' did not compile for %r:\n  xvar:    %r\n  varexp:  %r"
                    % (name, xvar, varexp))
            formula.SetQuickLoad(True)
            xformulae.append(formula)
        else:
            xformulae.append(TObject())

        if yvar != None:
            if yvar != lastYVar:
                formula = TTreeFormula("formula%i" % i, yvar, self)
                if not formula.GetTree():
                    raise error(
                        "MultiDraw: TTreeFormula 'yvar' did not compile for %r:\n  yvar:    %r\n  varexp:  %r"
                        % (name, yvar, varexp))
                formula.SetQuickLoad(True)
                yformulae.append(formula)
            else:
                yformulae.append(TObject())

        if weight != lastWeight:
            formula = TTreeFormula("weight%i" % i, weight, self)
            if not formula.GetTree():
                raise error(
                    "MultiDraw: TTreeFormula 'weight' did not compile for %r:\n  weight:  %r\n  varexp:  %r"
                    % (name, weight, varexp))
            formula.SetQuickLoad(True)
            weights.append(formula)
        else:
            weights.append(TObject())

        lastXVar, lastYVar, lastWeight = xvar, yvar, weight

    # CHECK that formulae are told when tree changes
    manager = TTreeFormulaManager()
    for formula in xformulae + yformulae + weights + [commonFormula]:
        if isinstance(formula, TTreeFormula):
            manager.Add(formula)

    manager.Sync()
    self.SetNotify(manager)

    # DRAW
    if verbosity >= 2:
        print ">>> MultiDraw: xformulae=%s, yformulae=%s" % (
            [x.GetTitle()
             for x in xformulae], [y.GetTitle() for y in yformulae])
        print ">>> MultiDraw: weights=%s, results=%s" % (
            [w.GetTitle() for w in weights], results)
    if len(yformulae) == 0:
        _MultiDraw(self, commonFormula, makeTObjArray(xformulae),
                   makeTObjArray(weights), makeTObjArray(results),
                   len(xformulae))
    elif len(xformulae) == len(yformulae):
        _MultiDraw2D(self, commonFormula, makeTObjArray(xformulae),
                     makeTObjArray(yformulae), makeTObjArray(weights),
                     makeTObjArray(results), len(xformulae))
    else:
        raise error(
            "MultiDraw: Given a mix of arguments for 1D (%d) and 2D (%d) histograms!"
            % (len(xformulae), len(yformulae)))

    return results
Esempio n. 17
0
 while filenum1<len(InfileArg):
     ifilename=InfileArg[filenum1]
     MeritIn=TChain(options.ttree)
     MeritIn.Add(ifilename)
     nEnt=MeritIn.GetEntries()
     nEnt = 100000 # debug
     logging.info('found %i events',int(nEnt))
     bar_suffix = "%(percent)d%%- %(elapsed)ds" #'%(percent)d%%'
     bbar = Bar("%s: Progress..."%os.path.basename(ifilename),max=int(nEnt), suffix=bar_suffix)
     if(nEnt==0):
         del MeritIn
         logging.warning("WARNING! EMPTY FILE %s",ifilename)
         filenum1+=1
         continue
     if not options.cuts is None:
         CutEval=TTreeFormula("CutEval",options.cuts,MeritIn)
     #nEntTot+=nEnt
     tmp_frame = pd.DataFrame(index=np.arange(nEnt),columns=allColumns)
     tmp_frame = tmp_frame.fillna(0)
     for i in range(nEnt):
         #if i<100:
         MeritIn.GetEntry(i)
         if not options.cuts is None:
             if(CutEval.EvalInstance(i)==0): continue
     # specifically clone branches
         for j in range(len(IntBrName)):
             IntBrVal[j][0]=getattr(MeritIn,IntBrName[j])
         for j in range(len(DblBrName)):
             DblBrVal[j][0]=getattr(MeritIn,DblBrName[j])
         # this fills the row with all values and appends to existing dataframe
         if len(IntBrVal) and len(DblBrVal):