Ejemplo n.º 1
0
 def merge(inputs, output, metadata):
     # merge output trees
     root_output = output + '.root'
     log.info("merging output trees")
     subprocess.call(['hadd', root_output] + inputs)
     if metadata.datatype == datasets.DATA:
         # merge GRLs
         log.info("merging GRL fragments")
         grl = goodruns.GRL()
         for input in inputs:
             grl |= goodruns.GRL('%s:/lumi' % input)
         grl.save('%s:/lumi' % root_output)
Ejemplo n.º 2
0
 def merge(inputs, output, metadata):
     # merge output trees
     root_output = output + '.root'
     log.info("merging output trees")
     subprocess.call(['hadd', root_output] + inputs)
     if metadata.datatype == datasets.DATA:
         # merge GRLs
         log.info("merging GRL fragments")
         grl = goodruns.GRL()
         for input in inputs:
             grl |= goodruns.GRL('%s:/lumi' % input)
         grl.save('%s:/lumi' % root_output)
Ejemplo n.º 3
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname
            or '_WH' in dsname or '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:

                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:

                def mc_weight_count(event):
                    return event.mc_event_weight

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        # three instances of the pileup reweighting tool are created to write
        # out the nominal, high and low pileup weights
        pileup_tool = None
        pileup_tool_high = None
        pileup_tool_low = None

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(
                        file.Get('Lumi/%s' %
                                 student.metadata.treename).GetString())

                onfilechange.append((update_grl, (
                    self,
                    merged_grl,
                )))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (
                self,
                merged_cutflow,
            )))

        else:
            # get pileup reweighting tool
            pileup_tool = get_pileup_reweighting_tool(year=year,
                                                      use_defaults=True)
            pileup_tool_high = get_pileup_reweighting_tool(year=year,
                                                           use_defaults=True,
                                                           systematic='high')
            pileup_tool_low = get_pileup_reweighting_tool(year=year,
                                                          use_defaults=True,
                                                          systematic='low')

            if datatype not in (datasets.EMBED, datasets.MCEMBED):
                # merge TrigConfTrees
                metadirname = '%sMeta' % self.metadata.treename
                trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
                map(trigconfchain.Add, self.files)
                metadir = self.output.mkdir(metadirname)
                metadir.cd()
                trigconfchain.Merge(self.output, -1, 'fast keep')
                self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
                for fname in self.files:
                    with root_open(fname) as f:
                        for key in f.Lumi.keys():
                            merged_grl |= goodruns.GRL(str(
                                key.ReadObj().GetString()),
                                                       from_string=True)
                lumi_dir = self.output.mkdir('Lumi')
                lumi_dir.cd()
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write(self.metadata.treename)
                self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype,
                          dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename, model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        trigger_emulation = TauTriggerEmulation(year=year,
                                                passthrough=local
                                                or datatype != datasets.MC
                                                or year > 2011,
                                                count_funcs=count_funcs)

        if not trigger_emulation.passthrough:
            onfilechange.append((update_trigger_trees, (
                self,
                trigger_emulation,
            )))

        trigger_config = None

        if datatype not in (datasets.EMBED, datasets.MCEMBED):
            # trigger config tool to read trigger info in the ntuples
            trigger_config = get_trigger_config()
            # update the trigger config maps on every file change
            onfilechange.append((update_trigger_config, (trigger_config, )))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = ('tau_out_track_n_extended'
                                                     in test_tree)

            event_filters = EventFilterList([
                averageIntPerXingPatch(
                    passthrough=(local or year < 2012
                                 or datatype != datasets.MC),
                    count_funcs=count_funcs),
                PileupTemplates(
                    year=year,
                    passthrough=(local or is_bch_sample or datatype
                                 not in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                RandomSeed(datatype=datatype, count_funcs=count_funcs),
                RandomRunNumber(tree=tree,
                                datatype=datatype,
                                pileup_tool=pileup_tool,
                                passthrough=local,
                                count_funcs=count_funcs),
                PileupReweight(
                    year=year,
                    tool=pileup_tool,
                    tool_high=pileup_tool_high,
                    tool_low=pileup_tool_low,
                    tree=tree,
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                TruthMatching(passthrough=datatype == datasets.DATA,
                              count_funcs=count_funcs),
                JetIsPileup(
                    passthrough=(local or year < 2012 or datatype
                                 not in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                HiggsPT(year=year,
                        tree=tree,
                        passthrough=not is_signal or local,
                        count_funcs=count_funcs),
                MCWeight(datatype=datatype,
                         tree=tree,
                         passthrough=local or datatype == datasets.DATA,
                         count_funcs=count_funcs),
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        # peek at first tree to determine which branches to exclude
        with root_open(self.files[0]) as test_file:
            test_tree = test_file.Get(self.metadata.treename)
            ignore_branches = test_tree.glob(hhbranches.REMOVE,
                                             exclude=hhbranches.KEEP)
            ignore_branches_output = test_tree.glob(
                hhbranches.REMOVE_OUTPUT, exclude=hhbranches.KEEP_OUTPUT)

        # initialize the TreeChain of all input files
        chain = TreeChain(self.metadata.treename,
                          files=self.files,
                          ignore_branches=ignore_branches,
                          events=self.events,
                          onfilechange=onfilechange,
                          filters=event_filters,
                          cache=True,
                          cache_size=50000000,
                          learn_entries=100)

        if local:
            copied = [
                'EventNumber',
            ]

            hh_buffer = TreeBuffer()
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(hh_buffer, create_branches=False, visible=True)
            outtree.set_buffer(buffer, create_branches=True, visible=False)

        else:
            # additional decorations on existing objects
            if year > 2011 and datatype in (datasets.MC, datasets.MCEMBED):

                class Decorations(TreeModel):
                    jet_ispileup = stl.vector('bool')

                chain.set_buffer(Decorations(), create_branches=True)

            # include the branches in the input chain in the output tree
            # set branches to be removed in ignore_branches
            outtree.set_buffer(chain._buffer,
                               ignore_branches=ignore_branches +
                               ignore_branches_output,
                               create_branches=True,
                               ignore_duplicates=True,
                               transfer_objects=True,
                               visible=False)

        # define tree objects
        define_objects(chain, year)

        # create the MMC
        mmc = mass.MMC(year=year)

        # report which packages have been loaded
        externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:

            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt, reverse=True)
            event.jets.sort(key=lambda jet: jet.pt, reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            taus = list(event.taus)
            if len(taus) >= 2:
                tau1, tau2 = taus[0], taus[1]
                jets = list(event.jets)
                jet1, jet2, jet3 = None, None, None
                beta = None

                if len(jets) >= 2:
                    jet1, jet2 = jets[:2]

                    # determine boost of system
                    # determine jet CoM frame
                    beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                    tree.jet_beta.copy_from(beta)

                    jet1.fourvect_boosted.copy_from(jet1.fourvect)
                    jet2.fourvect_boosted.copy_from(jet2.fourvect)
                    jet1.fourvect_boosted.Boost(beta * -1)
                    jet2.fourvect_boosted.Boost(beta * -1)

                    tau1.fourvect_boosted.copy_from(tau1.fourvect)
                    tau2.fourvect_boosted.copy_from(tau2.fourvect)
                    tau1.fourvect_boosted.Boost(beta * -1)
                    tau2.fourvect_boosted.Boost(beta * -1)

                    tau1.min_dr_jet = min(tau1.fourvect.DeltaR(jet1.fourvect),
                                          tau1.fourvect.DeltaR(jet2.fourvect))
                    tau2.min_dr_jet = min(tau2.fourvect.DeltaR(jet1.fourvect),
                                          tau2.fourvect.DeltaR(jet2.fourvect))

                    # sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect,
                    #     tau2.fourvect,
                    #     jet1.fourvect,
                    #     jet2.fourvect])

                    # sphericity
                    # tree.sphericity = sphericity
                    # aplanarity
                    # tree.aplanarity = aplanarity

                    # sphericity_boosted, aplanarity_boosted = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect_boosted,
                    #     tau2.fourvect_boosted,
                    #     jet1.fourvect_boosted,
                    #     jet2.fourvect_boosted])

                    # sphericity
                    # tree.sphericity_boosted = sphericity_boosted
                    # aplanarity
                    # tree.aplanarity_boosted = aplanarity_boosted

                    # tau centrality (degree to which they are between the two jets)
                    tau1.centrality = eventshapes.eta_centrality(
                        tau1.fourvect.Eta(), jet1.fourvect.Eta(),
                        jet2.fourvect.Eta())

                    tau2.centrality = eventshapes.eta_centrality(
                        tau2.fourvect.Eta(), jet1.fourvect.Eta(),
                        jet2.fourvect.Eta())

                    # boosted tau centrality
                    tau1.centrality_boosted = eventshapes.eta_centrality(
                        tau1.fourvect_boosted.Eta(),
                        jet1.fourvect_boosted.Eta(),
                        jet2.fourvect_boosted.Eta())

                    tau2.centrality_boosted = eventshapes.eta_centrality(
                        tau2.fourvect_boosted.Eta(),
                        jet1.fourvect_boosted.Eta(),
                        jet2.fourvect_boosted.Eta())

                    # 3rd leading jet
                    if len(jets) >= 3:
                        jet3 = jets[2]
                        jet3.fourvect_boosted.copy_from(jet3.fourvect)
                        jet3.fourvect_boosted.Boost(beta * -1)

                elif len(jets) == 1:
                    jet1 = jets[0]

                    tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                    tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

                    # sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect,
                    #     tau2.fourvect,
                    #     jet1.fourvect])

                    # sphericity
                    # tree.sphericity = sphericity
                    # aplanarity
                    #tree.aplanarity = aplanarity

                    RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

                # mass of ditau + leading jet system
                if jet1 is not None:
                    tree.mass_tau1_tau2_jet1 = (tau1.fourvect + tau2.fourvect +
                                                jet1.fourvect).M()

                # full sphericity and aplanarity
                # sphericity_full, aplanarity_full = eventshapes.sphericity_aplanarity(
                #    [tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets])

                # tree.sphericity_full = sphericity_full
                # tree.aplanarity_full = aplanarity_full

                # ####################################
                # number of tracks from PV minus taus
                # ####################################
                ntrack_pv = 0
                ntrack_nontau_pv = 0
                for vxp in event.vertices:
                    # primary vertex
                    if vxp.type == 1:
                        ntrack_pv = vxp.nTracks
                        ntrack_nontau_pv = ntrack_pv - tau1.numTrack - tau2.numTrack
                        break
                tree.ntrack_pv = ntrack_pv
                tree.ntrack_nontau_pv = ntrack_nontau_pv

                # ########################
                # MET variables
                # ########################
                METx = event.MET.etx
                METy = event.MET.ety
                MET = event.MET.et
                MET_vect = Vector2(METx, METy)
                MET_4vect = LorentzVector()
                MET_4vect.SetPxPyPzE(METx, METy, 0., MET)
                MET_4vect_boosted = LorentzVector()
                MET_4vect_boosted.copy_from(MET_4vect)
                if beta is not None:
                    MET_4vect_boosted.Boost(beta * -1)

                tree.MET_et = MET
                tree.MET_etx = METx
                tree.MET_ety = METy
                tree.MET_phi = event.MET.phi
                dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
                dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
                dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
                tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
                tree.dPhi_tau1_MET = dPhi_tau1_MET
                tree.dPhi_tau2_MET = dPhi_tau2_MET
                tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
                tree.MET_bisecting = is_MET_bisecting(dPhi_tau1_tau2,
                                                      dPhi_tau1_MET,
                                                      dPhi_tau2_MET)

                sumET = event.MET.sumet
                tree.MET_sumet = sumET
                if sumET != 0:
                    tree.MET_sig = (
                        (2. * MET / GeV) /
                        (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
                else:
                    tree.MET_sig = -1.

                tree.MET_centrality = eventshapes.phi_centrality(
                    tau1.fourvect, tau2.fourvect, MET_vect)
                tree.MET_centrality_boosted = eventshapes.phi_centrality(
                    tau1.fourvect_boosted, tau2.fourvect_boosted,
                    MET_4vect_boosted)

                tree.number_of_good_vertices = len(event.vertices)

                # #########################
                # Jet and sum pt variables
                # #########################
                tree.numJets = len(event.jets)

                # sum pT with only the two leading jets
                tree.sum_pt = sum([tau1.pt, tau2.pt] +
                                  [jet.pt for jet in jets[:2]])

                # sum pT with all selected jets
                tree.sum_pt_full = sum([tau1.pt, tau2.pt] +
                                       [jet.pt for jet in jets])

                # vector sum pT with two leading jets and MET
                tree.vector_sum_pt = sum([tau1.fourvect, tau2.fourvect] +
                                         [jet.fourvect for jet in jets[:2]] +
                                         [MET_4vect]).Pt()

                # vector sum pT with all selected jets and MET
                tree.vector_sum_pt_full = sum([tau1.fourvect, tau2.fourvect] +
                                              [jet.fourvect for jet in jets] +
                                              [MET_4vect]).Pt()

                # resonance pT
                tree.resonance_pt = sum(
                    [tau1.fourvect, tau2.fourvect, MET_4vect]).Pt()

                # ############################
                # tau <-> vertex association
                # ############################
                tree.tau_same_vertex = (tau1.privtx_x == tau2.privtx_x
                                        and tau1.privtx_y == tau2.privtx_y
                                        and tau1.privtx_z == tau2.privtx_z)

                tau1.vertex_prob = ROOT.TMath.Prob(tau1.privtx_chiSquared,
                                                   int(tau1.privtx_numberDoF))

                tau2.vertex_prob = ROOT.TMath.Prob(tau2.privtx_chiSquared,
                                                   int(tau2.privtx_numberDoF))

                # #########################
                # MMC Mass
                # #########################
                mmc_result = mmc.mass(tau1,
                                      tau2,
                                      METx,
                                      METy,
                                      sumET,
                                      njets=len(event.jets))

                for mmc_method, mmc_object in enumerate(mmc_objects):
                    mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
                    if verbose:
                        log.info("MMC (method %d): %f" %
                                 (mmc_method, mmc_mass))

                    mmc_object.mass = mmc_mass
                    mmc_object.MET_et = mmc_met.Mod()
                    mmc_object.MET_etx = mmc_met.X()
                    mmc_object.MET_ety = mmc_met.Y()
                    mmc_object.MET_phi = math.pi - mmc_met.Phi()
                    if mmc_mass > 0:
                        FourMomentum.set(mmc_object.resonance, mmc_resonance)

                # ###########################
                # collinear and visible mass
                # ###########################
                vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
                    tau1, tau2, METx, METy)

                tree.mass_vis_tau1_tau2 = vis_mass
                tree.mass_collinear_tau1_tau2 = collin_mass
                tau1.collinear_momentum_fraction = tau1_x
                tau2.collinear_momentum_fraction = tau2_x

                ###########################
                # Match jets to VBF partons
                ###########################
                #if datatype == datasets.MC and 'VBF' in dsname and year == 2011:
                #    # get partons (already sorted by eta in hepmc) FIXME!!!
                #    parton1, parton2 = hepmc.get_VBF_partons(event)
                #    tree.mass_true_quark1_quark2 = (parton1.fourvect + parton2.fourvect).M()
                #    # order here needs to be revised since jets are no longer
                #    # sorted by eta but instead by pT
                #    PartonBlock.set(tree, parton1, parton2)
                #    if len(jets) >= 2:
                #        jet1, jet2 = jets[:2]
                #        for i, jet in zip((1, 2), (jet1, jet2)):
                #            for parton in (parton1, parton2):
                #                if utils.dR(jet.eta, jet.phi, parton.eta, parton.phi) < .8:
                #                    setattr(tree, 'jet%i_matched' % i, True)

                # Fill the tau block
                # This must come after the RecoJetBlock is filled since
                # that sets the jet_beta for boosting the taus
                RecoTauBlock.set(event,
                                 tree,
                                 datatype,
                                 tau1,
                                 tau2,
                                 local=local)
                if datatype != datasets.DATA:
                    TrueTauBlock.set(tree, tau1, tau2)

            # fill the output tree
            outtree.Fill(reset=True)

        externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()
Ejemplo n.º 4
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or
            '_ggH' in dsname or
            '_ZH' in dsname or
            '_WH' in dsname or
            '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:
                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:
                def mc_weight_count(event):
                    return event.TruthEvent[0].weights()[0]

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(file.Get('Lumi/%s' % student.metadata.treename).GetString())

                onfilechange.append((update_grl, (self, merged_grl,)))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (self, merged_cutflow,)))

        else:

            # NEED TO BE CONVERTED TO XAOD
            # if datatype not in (datasets.EMBED, datasets.MCEMBED):
            #     # merge TrigConfTrees
            #     metadirname = '%sMeta' % self.metadata.treename
            #     trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
            #     map(trigconfchain.Add, self.files)
            #     metadir = self.output.mkdir(metadirname)
            #     metadir.cd()
            #     trigconfchain.Merge(self.output, -1, 'fast keep')
            #     self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
            #     for fname in self.files:
            #         with root_open(fname) as f:
            #             for key in f.Lumi.keys():
            #                 merged_grl |= goodruns.GRL(
            #                     str(key.ReadObj().GetString()),
            #                     from_string=True)
            #     lumi_dir = self.output.mkdir('Lumi')
            #     lumi_dir.cd()
            #     xml_string= ROOT.TObjString(merged_grl.str())
            #     xml_string.Write(self.metadata.treename)
            #     self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype, dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename,
                       model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        #tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        # NEED TO BE CONVERTED TO XAOD
        # trigger_emulation = TauTriggerEmulation(
        #     year=year,
        #     passthrough=local or datatype != datasets.MC or year > 2011,
        #     count_funcs=count_funcs)

        # if not trigger_emulation.passthrough:
        #     onfilechange.append(
        #         (update_trigger_trees, (self, trigger_emulation,)))

        # trigger_config = None

        # if datatype not in (datasets.EMBED, datasets.MCEMBED):
        #     # trigger config tool to read trigger info in the ntuples
        #     trigger_config = get_trigger_config()
        #     # update the trigger config maps on every file change
        #     onfilechange.append((update_trigger_config, (trigger_config,)))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = (
                        'tau_out_track_n_extended' in test_tree)

            log.info(self.grl)
            event_filters = EventFilterList([
                GRLFilter(
                    self.grl,
                    passthrough=(
                        local or (
                            datatype not in (datasets.DATA, datasets.EMBED))),
                    count_funcs=count_funcs),
                CoreFlags(
                    passthrough=local,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingPileupPatch(
                #     passthrough=(
                #         local or year > 2011 or datatype != datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD (not a priority)
                # PileupTemplates(
                #     year=year,
                #     passthrough=(
                #         local or is_bch_sample or datatype not in (
                #             datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomSeed(
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHSampleRunNumber(
                #     passthrough=not is_bch_sample,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomRunNumber(
                #     tree=tree,
                #     datatype=datatype,
                #     pileup_tool=pileup_tool,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # trigger_emulation,
                # NEED TO BE CONVERTED TO XAOD
                # Triggers(
                #     year=year,
                #     tree=tree,
                #     datatype=datatype,
                #     passthrough=datatype in (datasets.EMBED, datasets.MCEMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupReweight_xAOD(
                        tree=tree,
                        passthrough=(local or (
                            datatype not in (datasets.MC, datasets.MCEMBED))),
                        count_funcs=count_funcs),
                PriVertex(
                    passthrough=local,
                    count_funcs=count_funcs),
                LArError(
                    passthrough=local,
                    count_funcs=count_funcs),
                TileError(
                    passthrough=local,
                    count_funcs=count_funcs),
                TileTrips(
                    passthrough=(
                        local or datatype in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                JetCalibration(
                        datatype=datatype,
                        passthrough=local,
                        count_funcs=count_funcs),
                JetResolution(
                        passthrough=(local or (
                                datatype not in (datasets.MC, datasets.MCEMBED))),
                        count_funcs=count_funcs),
                TauCalibration(
                        datatype,
                        passthrough=local,
                        count_funcs=count_funcs),
                # # truth matching must come before systematics due to
                # # TES_TRUE/FAKE
                # NEED TO BE CONVERTED TO XAOD
                TrueTauSelection(
                        passthrough=datatype == datasets.DATA,
                        count_funcs=count_funcs),
                TruthMatching(
                    passthrough=datatype == datasets.DATA,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                NvtxJets(
                    tree=tree,
                    count_funcs=count_funcs),
                # # PUT THE SYSTEMATICS "FILTER" BEFORE
                # # ANY FILTERS THAT REFER TO OBJECTS
                # # BUT AFTER CALIBRATIONS
                # # Systematics must also come before anything that refers to
                # # thing.fourvect since fourvect is cached!
                # NEED TO BE CONVERTED TO XAOD
                # Systematics(
                #     terms=syst_terms,
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     verbose=verbose,
                #     passthrough=not syst_terms,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # JetIsPileup(
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                JetCleaning(
                    datatype=datatype,
                    year=year,
                    count_funcs=count_funcs),
                ElectronVeto(
                        el_sel='Medium',
                        count_funcs=count_funcs),
                MuonVeto(
                    count_funcs=count_funcs),
                TauPT(2,
                    thresh=20 * GeV,
                    count_funcs=count_funcs),
                TauHasTrack(2,
                    count_funcs=count_funcs),
                TauEta(2,
                    count_funcs=count_funcs),
                TauElectronVeto(2,
                    count_funcs=count_funcs),
                TauMuonVeto(2,
                    count_funcs=count_funcs),
                TauCrack(2,
                    count_funcs=count_funcs),
                # # before selecting the leading and subleading taus
                # # be sure to only consider good candidates
                TauIDMedium(2,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchIndex(
                # #    config=trigger_config,
                # #    year=year,
                # #    datatype=datatype,
                # #    passthrough=datatype == datasets.EMBED,
                # #    count_funcs=count_funcs),
                # Select two leading taus at this point
                # 25 and 35 for data
                # 20 and 30 for MC to leave room for TES uncertainty
                TauLeadSublead(
                    lead=(
                        35 * GeV if datatype == datasets.DATA or local
                        else 30 * GeV),
                    sublead=(
                        25 * GeV if datatype == datasets.DATA or local
                        else 20 * GeV),
                    count_funcs=count_funcs),
                # taus are sorted (in decreasing order) by pT from here on
                TauIDSelection(
                    count_funcs=count_funcs),
                TaudR(3.2,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchThreshold(
                # #    datatype=datatype,
                # #    tree=tree,
                # #    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTriggerEfficiency(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_systematic=self.args.syst_terms and (
                #         Systematics.TES_TERMS & self.args.syst_terms),
                #     passthrough=datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupScale(
                    tree=tree,
                    year=year,
                    datatype=datatype,
                    passthrough=local,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                TauIDScaleFactors(
                    year=year,
                    passthrough=datatype == datasets.DATA,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauFakeRateScaleFactors(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_up=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)),
                #     tes_down=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)),
                #     passthrough=datatype in (datasets.DATA, datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # HiggsPT(
                #     year=year,
                #     tree=tree,
                #     passthrough=not is_signal or local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTrackRecounting(
                #     year=year,
                #     use_ntup_value=tau_ntrack_recounted_use_ntup,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # MCWeight(
                #     datatype=datatype,
                #     tree=tree,
                #     passthrough=local or datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingIsolation(
                #     tree=tree,
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingCorrections(
                #     tree=tree,
                #     year=year,
                #     passthrough=(
                #         local or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingTauSpinner(
                #     year=year,
                #     tree=tree,
                #     passthrough=(
                #         local or datatype not in (
                #             datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # # put MET recalculation after tau selection but before tau-jet
                # # overlap removal and jet selection because of the RefAntiTau
                # # MET correction
                # NEED TO BE CONVERTED TO XAOD
                # METRecalculation(
                #     terms=syst_terms,
                #     year=year,
                #     tree=tree,
                #     refantitau=not nominal_values,
                #     verbose=verbose,
                #     very_verbose=very_verbose,
                #     count_funcs=count_funcs),
                TauJetOverlapRemoval(
                    count_funcs=count_funcs),
                JetPreselection(
                    count_funcs=count_funcs),
                NonIsolatedJet(
                    tree=tree,
                    count_funcs=count_funcs),
                JetSelection(
                    year=year,
                    count_funcs=count_funcs),
                RecoJetTrueTauMatching(
                    passthrough=datatype == datasets.DATA or local,
                    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHCleaning(
                #     tree=tree,
                #     passthrough=year == 2011 or local,
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        hh_buffer = TreeBuffer()
        if local:
            chain = TreeChain(
                self.metadata.treename,
                files=self.files,
                # ignore_branches=ignore_branches,
                events=self.events,
                onfilechange=onfilechange,
                filters=event_filters,
                cache=True,
                cache_size=50000000,
                learn_entries=100)
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(
                hh_buffer,
                create_branches=False,
                visible=True)
            outtree.set_buffer(
                buffer,
                create_branches=True,
                visible=False)


        else:

            root_chain = ROOT.TChain(self.metadata.treename)
            for f in self.files:
                log.info(f)
                root_chain.Add(f)
            
            # if len(self.files) != 1:
            #     raise RuntimeError('lenght of files has to be 1 for now (no xAOD chaining available)')
            # self.files = self.files[0]
            # root_chain = ROOT.TFile(self.files)

            chain = xAODTree(root_chain, filters=event_filters, events=self.events)
            define_objects(chain, datatype=datatype)
            outtree.set_buffer(
                hh_buffer,
                create_branches=True,
                visible=False)

            # # create the MMC
            # mmc = mass.MMC(year=year)
            from ROOT import MissingMassTool
            mass_tool = MissingMassTool('mass_tool')
            mass_tool.initialize()


        # report which packages have been loaded
        # externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:
            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue
            
            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt(), reverse=True)
            event.jets.sort(key=lambda jet: jet.pt(), reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            tau1, tau2 = event.taus
            tau1.fourvect = asrootpy(tau1.p4())
            tau2.fourvect = asrootpy(tau2.p4())

            beta_taus = (tau1.fourvect + tau2.fourvect).BoostVector()
            tau1.fourvect_boosted = LorentzVector()
            tau1.fourvect_boosted.copy_from(tau1.fourvect)
            tau1.fourvect_boosted.Boost(beta_taus * -1)
            
            tau2.fourvect_boosted = LorentzVector()
            tau2.fourvect_boosted.copy_from(tau2.fourvect)
            tau2.fourvect_boosted.Boost(beta_taus * -1)

            jets = list(event.jets)
            for jet in jets:
                jet.fourvect = asrootpy(jet.p4())

            jet1, jet2, jet3 = None, None, None
            beta = None
            if len(jets) >= 2:
                jet1, jet2 = jets[:2]

                # determine boost of system
                # determine jet CoM frame
                beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                tree.jet_beta.copy_from(beta)

                jet1.fourvect_boosted = LorentzVector()
                jet1.fourvect_boosted.copy_from(jet1.fourvect)
                jet1.fourvect_boosted.Boost(beta * -1)

                jet2.fourvect_boosted = LorentzVector()
                jet2.fourvect_boosted.copy_from(jet2.fourvect)
                jet2.fourvect_boosted.Boost(beta * -1)

                tau1.min_dr_jet = min(
                    tau1.fourvect.DeltaR(jet1.fourvect),
                    tau1.fourvect.DeltaR(jet2.fourvect))
                tau2.min_dr_jet = min(
                    tau2.fourvect.DeltaR(jet1.fourvect),
                    tau2.fourvect.DeltaR(jet2.fourvect))

                # tau centrality (degree to which they are between the two jets)
                tau1.centrality = eventshapes.eta_centrality(
                    tau1.fourvect.Eta(),
                    jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                tau2.centrality = eventshapes.eta_centrality(
                    tau2.fourvect.Eta(),
                    jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                # boosted tau centrality
                tau1.centrality_boosted = eventshapes.eta_centrality(
                    tau1.fourvect_boosted.Eta(),
                    jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                tau2.centrality_boosted = eventshapes.eta_centrality(
                    tau2.fourvect_boosted.Eta(),
                    jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                # 3rd leading jet
                if len(jets) >= 3:
                    jet3 = jets[2]
                    jet3.fourvect_boosted = LorentzVector()
                    jet3.fourvect_boosted.copy_from(jet3.fourvect)
                    jet3.fourvect_boosted.Boost(beta * -1)

            elif len(jets) == 1:
                jet1 = jets[0]

                tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

            RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

            # mass of ditau + leading jet system
            if jet1 is not None:
                tree.mass_tau1_tau2_jet1 = (
                    tau1.fourvect + tau2.fourvect + jet1.fourvect).M()

            #####################################
            # number of tracks from PV minus taus
            #####################################
            ntrack_pv = 0
            ntrack_nontau_pv = 0
            for vxp in event.vertices:
                # primary vertex
                if vxp.vertexType() == 1:
                    ntrack_pv = vxp.nTrackParticles()
                    ntrack_nontau_pv = ntrack_pv - tau1.nTracks() - tau2.nTracks()
                    break
            tree.ntrack_pv = ntrack_pv
            tree.ntrack_nontau_pv = ntrack_nontau_pv

            #########################
            # MET variables
            #########################
            MET = event.MET.collection['Final']
            METx = MET.mpx()
            METy = MET.mpy()
            METet = MET.met()
            MET_vect = Vector2(METx, METy)
            MET_4vect = LorentzVector()
            MET_4vect.SetPxPyPzE(METx, METy, 0., METet)
            MET_4vect_boosted = LorentzVector()
            MET_4vect_boosted.copy_from(MET_4vect)
            if beta is not None:
                MET_4vect_boosted.Boost(beta * -1)

            tree.MET_et = METet
            tree.MET_etx = METx
            tree.MET_ety = METy
            tree.MET_phi = MET.phi()
            dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
            dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
            dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
            tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
            tree.dPhi_tau1_MET = dPhi_tau1_MET
            tree.dPhi_tau2_MET = dPhi_tau2_MET
            tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
            tree.MET_bisecting = is_MET_bisecting(
                dPhi_tau1_tau2,
                dPhi_tau1_MET,
                dPhi_tau2_MET)

            sumET = MET.sumet()
            tree.MET_sumet = sumET
            if sumET != 0:
                tree.MET_sig = ((2. * METet / GeV) /
                    (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
            else:
                tree.MET_sig = -1.

            tree.MET_centrality = eventshapes.phi_centrality(
                tau1.fourvect,
                tau2.fourvect,
                MET_vect)
            tree.MET_centrality_boosted = eventshapes.phi_centrality(
                tau1.fourvect_boosted,
                tau2.fourvect_boosted,
                MET_4vect_boosted)

            tree.number_of_good_vertices = len(event.vertices)

            ##########################
            # Jet and sum pt variables
            ##########################
            tree.numJets = len(event.jets)

            # sum pT with only the two leading jets
            tree.sum_pt = sum(
                [tau1.pt(), tau2.pt()] +
                [jet.pt() for jet in jets[:2]])

            # sum pT with all selected jets
            tree.sum_pt_full = sum(
                [tau1.pt(), tau2.pt()] +
                [jet.pt() for jet in jets])

            # vector sum pT with two leading jets and MET
            tree.vector_sum_pt = sum(
                [tau1.fourvect, tau2.fourvect] +
                [jet.fourvect for jet in jets[:2]] +
                [MET_4vect]).Pt()

            # vector sum pT with all selected jets and MET
            tree.vector_sum_pt_full = sum(
                [tau1.fourvect, tau2.fourvect] +
                [jet.fourvect for jet in jets] +
                [MET_4vect]).Pt()

            # resonance pT
            tree.resonance_pt = sum(
                [tau1.fourvect, tau2.fourvect, MET_4vect]).Pt()

            # #############################
            # # tau <-> vertex association
            # #############################
            tree.tau_same_vertex = (
                tau1.vertex() == tau2.vertex())

            tau1.vertex_prob = ROOT.TMath.Prob(
                tau1.vertex().chiSquared(),
                int(tau1.vertex().numberDoF()))

            tau2.vertex_prob = ROOT.TMath.Prob(
                tau2.vertex().chiSquared(),
                int(tau2.vertex().numberDoF()))

            # ##########################
            # # MMC Mass
            # ##########################
            # OLD USAGE
            # mmc_result = mmc.mass(
            #     tau1, tau2,
            #     METx, METy, sumET,
            #     njets=len(event.jets))
            # for mmc_method, mmc_object in enumerate(mmc_objects):
            #     mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
            #     if verbose:
            #         log.info("MMC (method %d): %f" % (mmc_method, mmc_mass))

            #     mmc_object.mass = mmc_mass
            #     mmc_object.MET_et = mmc_met.Mod()
            #     mmc_object.MET_etx = mmc_met.X()
            #     mmc_object.MET_ety = mmc_met.Y()
            #     mmc_object.MET_phi = math.pi - mmc_met.Phi()
            #     if mmc_mass > 0:
            #         FourMomentum.set(mmc_object.resonance, mmc_resonance)

            mass_tool.apply(event.EventInfo, tau1, tau2, MET, len(event.jets))
            for i, mmc_object in enumerate(mmc_objects):
                mmc_object.mass = event.EventInfo.auxdataConst('double')('mmc%s_mass' % i)
                mmc_object.MET_et = mass_tool.GetFittedMetVec(i).Mod()
                mmc_object.MET_etx = mass_tool.GetFittedMetVec(i).X()
                mmc_object.MET_ety = mass_tool.GetFittedMetVec(i).Y()
                mmc_object.MET_phi = math.pi - mass_tool.GetFittedMetVec(i).Phi()
                if mmc_object.mass > 0:
                    FourMomentum.set(mmc_object.resonance, mass_tool.GetResonanceVec(i))


            # ############################
            # # collinear and visible mass
            # ############################
            # vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
            #     tau1, tau2, METx, METy)

            # tree.mass_vis_tau1_tau2 = vis_mass
            # tree.mass_collinear_tau1_tau2 = collin_mass
            # tau1.collinear_momentum_fraction = tau1_x
            # tau2.collinear_momentum_fraction = tau2_x

            # # Fill the tau block
            # # This must come after the RecoJetBlock is filled since
            # # that sets the jet_beta for boosting the taus
            RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local)

            # if datatype != datasets.DATA:
            #     TrueTauBlock.set(tree, tau1, tau2)

            # fill the output tree
            outtree.Fill(reset=True)

        # externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()
Ejemplo n.º 5
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or
            '_ggH' in dsname or
            '_ZH' in dsname or
            '_WH' in dsname or
            '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:
                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:
                def mc_weight_count(event):
                    return event.mc_event_weight

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        # three instances of the pileup reweighting tool are created to write
        # out the nominal, high and low pileup weights
        pileup_tool = None
        pileup_tool_high = None
        pileup_tool_low = None

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(file.Get('Lumi/%s' % student.metadata.treename).GetString())

                onfilechange.append((update_grl, (self, merged_grl,)))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (self, merged_cutflow,)))

        else:
            # get pileup reweighting tool
            pileup_tool = get_pileup_reweighting_tool(
                year=year,
                use_defaults=True)
            pileup_tool_high = get_pileup_reweighting_tool(
                year=year,
                use_defaults=True,
                systematic='high')
            pileup_tool_low = get_pileup_reweighting_tool(
                year=year,
                use_defaults=True,
                systematic='low')

            if datatype not in (datasets.EMBED, datasets.MCEMBED):
                # merge TrigConfTrees
                metadirname = '%sMeta' % self.metadata.treename
                trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
                map(trigconfchain.Add, self.files)
                metadir = self.output.mkdir(metadirname)
                metadir.cd()
                trigconfchain.Merge(self.output, -1, 'fast keep')
                self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
                for fname in self.files:
                    with root_open(fname) as f:
                        for key in f.Lumi.keys():
                            merged_grl |= goodruns.GRL(
                                str(key.ReadObj().GetString()),
                                from_string=True)
                lumi_dir = self.output.mkdir('Lumi')
                lumi_dir.cd()
                xml_string= ROOT.TObjString(merged_grl.str())
                xml_string.Write(self.metadata.treename)
                self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype, dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename,
                       model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        trigger_emulation = TauTriggerEmulation(
            year=year,
            passthrough=local or datatype != datasets.MC or year > 2011,
            count_funcs=count_funcs)

        if not trigger_emulation.passthrough:
            onfilechange.append(
                (update_trigger_trees, (self, trigger_emulation,)))

        trigger_config = None

        if datatype not in (datasets.EMBED, datasets.MCEMBED):
            # trigger config tool to read trigger info in the ntuples
            trigger_config = get_trigger_config()
            # update the trigger config maps on every file change
            onfilechange.append((update_trigger_config, (trigger_config,)))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = (
                        'tau_out_track_n_extended' in test_tree)

            event_filters = EventFilterList([
                GRLFilter(
                    self.grl,
                    passthrough=(
                        local or (
                            datatype not in (datasets.DATA, datasets.EMBED))),
                    count_funcs=count_funcs),
                CoreFlags(
                    passthrough=local,
                    count_funcs=count_funcs),
                EmbeddingPileupPatch(
                    passthrough=(
                        local or year > 2011 or datatype != datasets.EMBED),
                    count_funcs=count_funcs),
                averageIntPerXingPatch(
                    passthrough=(
                        local or year < 2012 or datatype != datasets.MC),
                    count_funcs=count_funcs),
                PileupTemplates(
                    year=year,
                    passthrough=(
                        local or is_bch_sample or datatype not in (
                            datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                RandomSeed(
                    datatype=datatype,
                    count_funcs=count_funcs),
                BCHSampleRunNumber(
                    passthrough=not is_bch_sample,
                    count_funcs=count_funcs),
                RandomRunNumber(
                    tree=tree,
                    datatype=datatype,
                    pileup_tool=pileup_tool,
                    passthrough=local,
                    count_funcs=count_funcs),
                trigger_emulation,
                Triggers(
                    year=year,
                    tree=tree,
                    datatype=datatype,
                    passthrough=datatype in (datasets.EMBED, datasets.MCEMBED),
                    count_funcs=count_funcs),
                PileupReweight(
                    year=year,
                    tool=pileup_tool,
                    tool_high=pileup_tool_high,
                    tool_low=pileup_tool_low,
                    tree=tree,
                    passthrough=(
                        local or (
                            datatype not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                PriVertex(
                    passthrough=local,
                    count_funcs=count_funcs),
                LArError(
                    passthrough=local,
                    count_funcs=count_funcs),
                TileError(
                    passthrough=local,
                    count_funcs=count_funcs),
                TileTrips(
                    passthrough=(
                        local or datatype in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                JetCopy(
                    tree=tree,
                    passthrough=local,
                    count_funcs=count_funcs),
                # IMPORTANT!
                # JetCalibration MUST COME BEFORE ANYTHING THAT REFERS TO
                # jet.fourvect since jet.fourvect IS CACHED!
                JetCalibration(
                    datatype=datatype,
                    year=year,
                    verbose=very_verbose,
                    passthrough=local or nominal_values,
                    count_funcs=count_funcs),
                # in situ TES shift for 2012 data
                TauEnergyShift(
                    passthrough=(
                        local or datatype != datasets.DATA
                        or year < 2012 or nominal_values),
                    count_funcs=count_funcs),
                # truth matching must come before systematics due to
                # TES_TRUE/FAKE
                TruthMatching(
                    passthrough=datatype == datasets.DATA,
                    count_funcs=count_funcs),
                NvtxJets(
                    tree=tree,
                    count_funcs=count_funcs),
                # PUT THE SYSTEMATICS "FILTER" BEFORE
                # ANY FILTERS THAT REFER TO OBJECTS
                # BUT AFTER CALIBRATIONS
                # Systematics must also come before anything that refers to
                # thing.fourvect since fourvect is cached!
                Systematics(
                    terms=syst_terms,
                    year=year,
                    datatype=datatype,
                    tree=tree,
                    verbose=verbose,
                    passthrough=not syst_terms,
                    count_funcs=count_funcs),
                JetIsPileup(
                    passthrough=(
                        local or year < 2012 or
                        datatype not in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                LArHole(
                    tree=tree,
                    passthrough=year > 2011,
                    count_funcs=count_funcs),
                JetCleaning(
                    datatype=datatype,
                    year=year,
                    count_funcs=count_funcs),
                ElectronVeto(
                    count_funcs=count_funcs),
                MuonVeto(
                    year=year,
                    count_funcs=count_funcs),
                TauPT(2,
                    thresh=20 * GeV,
                    count_funcs=count_funcs),
                TauHasTrack(2,
                    count_funcs=count_funcs),
                TauEta(2,
                    count_funcs=count_funcs),
                TauElectronVeto(2,
                    count_funcs=count_funcs),
                TauMuonVeto(2,
                    count_funcs=count_funcs),
                TauAuthor(2,
                    count_funcs=count_funcs),
                TauCrack(2,
                    count_funcs=count_funcs),
                TauLArHole(2,
                    tree=tree,
                    passthrough=year > 2011,
                    count_funcs=count_funcs),
                # before selecting the leading and subleading taus
                # be sure to only consider good candidates
                TauIDMedium(2,
                    count_funcs=count_funcs),
                #TauTriggerMatchIndex(
                #    config=trigger_config,
                #    year=year,
                #    datatype=datatype,
                #    passthrough=datatype == datasets.EMBED,
                #    count_funcs=count_funcs),
                # Select two leading taus at this point
                # 25 and 35 for data
                # 20 and 30 for MC to leave room for TES uncertainty
                TauLeadSublead(
                    lead=(
                        35 * GeV if datatype == datasets.DATA or local
                        else 30 * GeV),
                    sublead=(
                        25 * GeV if datatype == datasets.DATA or local
                        else 20 * GeV),
                    count_funcs=count_funcs),
                # taus are sorted (in decreasing order) by pT from here on
                TauIDSelection(
                    tree=tree,
                    count_funcs=count_funcs),
                TaudR(3.2,
                    count_funcs=count_funcs),
                #TauTriggerMatchThreshold(
                #    datatype=datatype,
                #    tree=tree,
                #    count_funcs=count_funcs),
                TauTriggerEfficiency(
                    year=year,
                    datatype=datatype,
                    tree=tree,
                    tes_systematic=self.args.syst_terms and (
                        Systematics.TES_TERMS & self.args.syst_terms),
                    passthrough=datatype == datasets.DATA,
                    count_funcs=count_funcs),
                PileupScale(
                    tree=tree,
                    year=year,
                    datatype=datatype,
                    passthrough=local,
                    count_funcs=count_funcs),
                TauIDScaleFactors(
                    year=year,
                    passthrough=datatype == datasets.DATA,
                    count_funcs=count_funcs),
                TauFakeRateScaleFactors(
                    year=year,
                    datatype=datatype,
                    tree=tree,
                    tes_up=(self.args.syst_terms is not None and
                        (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or
                         Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)),
                    tes_down=(self.args.syst_terms is not None and
                        (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or
                         Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)),
                    passthrough=datatype in (datasets.DATA, datasets.EMBED),
                    count_funcs=count_funcs),
                HiggsPT(
                    year=year,
                    tree=tree,
                    passthrough=not is_signal or local,
                    count_funcs=count_funcs),
                TauTrackRecounting(
                    year=year,
                    use_ntup_value=tau_ntrack_recounted_use_ntup,
                    passthrough=local,
                    count_funcs=count_funcs),
                MCWeight(
                    datatype=datatype,
                    tree=tree,
                    passthrough=local or datatype == datasets.DATA,
                    count_funcs=count_funcs),
                EmbeddingIsolation(
                    tree=tree,
                    passthrough=(
                        local or year < 2012 or
                        datatype not in (datasets.EMBED, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                EmbeddingCorrections(
                    tree=tree,
                    year=year,
                    passthrough=(
                        local or
                        datatype not in (datasets.EMBED, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                EmbeddingTauSpinner(
                    year=year,
                    tree=tree,
                    passthrough=(
                        local or datatype not in (
                            datasets.EMBED, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                # put MET recalculation after tau selection but before tau-jet
                # overlap removal and jet selection because of the RefAntiTau
                # MET correction
                METRecalculation(
                    terms=syst_terms,
                    year=year,
                    tree=tree,
                    refantitau=not nominal_values,
                    verbose=verbose,
                    very_verbose=very_verbose,
                    count_funcs=count_funcs),
                TauJetOverlapRemoval(
                    count_funcs=count_funcs),
                JetPreselection(
                    count_funcs=count_funcs),
                NonIsolatedJet(
                    tree=tree,
                    count_funcs=count_funcs),
                JetSelection(
                    year=year,
                    count_funcs=count_funcs),
                RecoJetTrueTauMatching(
                    passthrough=datatype == datasets.DATA or local,
                    count_funcs=count_funcs),
                BCHCleaning(
                    tree=tree,
                    passthrough=year == 2011 or local,
                    datatype=datatype,
                    count_funcs=count_funcs),
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        # peek at first tree to determine which branches to exclude
        with root_open(self.files[0]) as test_file:
            test_tree = test_file.Get(self.metadata.treename)
            ignore_branches = test_tree.glob(
                hhbranches.REMOVE,
                exclude=hhbranches.KEEP)
            ignore_branches_output = test_tree.glob(
                hhbranches.REMOVE_OUTPUT,
                exclude=hhbranches.KEEP_OUTPUT)

        # initialize the TreeChain of all input files
        chain = TreeChain(
            self.metadata.treename,
            files=self.files,
            ignore_branches=ignore_branches,
            events=self.events,
            onfilechange=onfilechange,
            filters=event_filters,
            cache=True,
            cache_size=50000000,
            learn_entries=100)

        if local:
            copied = [
                'EventNumber',
            ]

            hh_buffer = TreeBuffer()
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(
                hh_buffer,
                create_branches=False,
                visible=True)
            outtree.set_buffer(
                buffer,
                create_branches=True,
                visible=False)

        else:
            # additional decorations on existing objects
            if year > 2011 and datatype in (datasets.MC, datasets.MCEMBED):
                class Decorations(TreeModel):
                    jet_ispileup = stl.vector('bool')

                chain.set_buffer(Decorations(), create_branches=True)

            # include the branches in the input chain in the output tree
            # set branches to be removed in ignore_branches
            outtree.set_buffer(
                chain._buffer,
                ignore_branches=ignore_branches + ignore_branches_output,
                create_branches=True,
                ignore_duplicates=True,
                transfer_objects=True,
                visible=False)

        # define tree objects
        define_objects(chain, year)

        # create the MMC
        mmc = mass.MMC(year=year)

        # report which packages have been loaded
        externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:

            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt, reverse=True)
            event.jets.sort(key=lambda jet: jet.pt, reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            tau1, tau2 = event.taus
            jets = list(event.jets)
            jet1, jet2, jet3 = None, None, None
            beta = None

            if len(jets) >= 2:
                jet1, jet2 = jets[:2]

                # determine boost of system
                # determine jet CoM frame
                beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                tree.jet_beta.copy_from(beta)

                jet1.fourvect_boosted.copy_from(jet1.fourvect)
                jet2.fourvect_boosted.copy_from(jet2.fourvect)
                jet1.fourvect_boosted.Boost(beta * -1)
                jet2.fourvect_boosted.Boost(beta * -1)

                tau1.fourvect_boosted.copy_from(tau1.fourvect)
                tau2.fourvect_boosted.copy_from(tau2.fourvect)
                tau1.fourvect_boosted.Boost(beta * -1)
                tau2.fourvect_boosted.Boost(beta * -1)

                tau1.min_dr_jet = min(
                    tau1.fourvect.DeltaR(jet1.fourvect),
                    tau1.fourvect.DeltaR(jet2.fourvect))
                tau2.min_dr_jet = min(
                    tau2.fourvect.DeltaR(jet1.fourvect),
                    tau2.fourvect.DeltaR(jet2.fourvect))

                #sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                #    [tau1.fourvect,
                #     tau2.fourvect,
                #     jet1.fourvect,
                #     jet2.fourvect])

                # sphericity
                #tree.sphericity = sphericity
                # aplanarity
                #tree.aplanarity = aplanarity

                #sphericity_boosted, aplanarity_boosted = eventshapes.sphericity_aplanarity(
                #    [tau1.fourvect_boosted,
                #     tau2.fourvect_boosted,
                #     jet1.fourvect_boosted,
                #     jet2.fourvect_boosted])

                # sphericity
                #tree.sphericity_boosted = sphericity_boosted
                # aplanarity
                #tree.aplanarity_boosted = aplanarity_boosted

                # tau centrality (degree to which they are between the two jets)
                tau1.centrality = eventshapes.eta_centrality(
                    tau1.fourvect.Eta(),
                    jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                tau2.centrality = eventshapes.eta_centrality(
                    tau2.fourvect.Eta(),
                    jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                # boosted tau centrality
                tau1.centrality_boosted = eventshapes.eta_centrality(
                    tau1.fourvect_boosted.Eta(),
                    jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                tau2.centrality_boosted = eventshapes.eta_centrality(
                    tau2.fourvect_boosted.Eta(),
                    jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                # 3rd leading jet
                if len(jets) >= 3:
                    jet3 = jets[2]
                    jet3.fourvect_boosted.copy_from(jet3.fourvect)
                    jet3.fourvect_boosted.Boost(beta * -1)

            elif len(jets) == 1:
                jet1 = jets[0]

                tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

                #sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                #    [tau1.fourvect,
                #     tau2.fourvect,
                #     jet1.fourvect])

                # sphericity
                #tree.sphericity = sphericity
                # aplanarity
                #tree.aplanarity = aplanarity

            RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

            # mass of ditau + leading jet system
            if jet1 is not None:
                tree.mass_tau1_tau2_jet1 = (
                    tau1.fourvect + tau2.fourvect + jet1.fourvect).M()

            # full sphericity and aplanarity
            #sphericity_full, aplanarity_full = eventshapes.sphericity_aplanarity(
            #    [tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets])

            #tree.sphericity_full = sphericity_full
            #tree.aplanarity_full = aplanarity_full

            #####################################
            # number of tracks from PV minus taus
            #####################################
            ntrack_pv = 0
            ntrack_nontau_pv = 0
            for vxp in event.vertices:
                # primary vertex
                if vxp.type == 1:
                    ntrack_pv = vxp.nTracks
                    ntrack_nontau_pv = ntrack_pv - tau1.numTrack - tau2.numTrack
                    break
            tree.ntrack_pv = ntrack_pv
            tree.ntrack_nontau_pv = ntrack_nontau_pv

            #########################
            # MET variables
            #########################
            METx = event.MET.etx
            METy = event.MET.ety
            MET = event.MET.et
            MET_vect = Vector2(METx, METy)
            MET_4vect = LorentzVector()
            MET_4vect.SetPxPyPzE(METx, METy, 0., MET)
            MET_4vect_boosted = LorentzVector()
            MET_4vect_boosted.copy_from(MET_4vect)
            if beta is not None:
                MET_4vect_boosted.Boost(beta * -1)

            tree.MET_et = MET
            tree.MET_etx = METx
            tree.MET_ety = METy
            tree.MET_phi = event.MET.phi
            dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
            dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
            dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
            tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
            tree.dPhi_tau1_MET = dPhi_tau1_MET
            tree.dPhi_tau2_MET = dPhi_tau2_MET
            tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
            tree.MET_bisecting = is_MET_bisecting(
                dPhi_tau1_tau2,
                dPhi_tau1_MET,
                dPhi_tau2_MET)

            sumET = event.MET.sumet
            tree.MET_sumet = sumET
            if sumET != 0:
                tree.MET_sig = ((2. * MET / GeV) /
                    (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
            else:
                tree.MET_sig = -1.

            tree.MET_centrality = eventshapes.phi_centrality(
                tau1.fourvect,
                tau2.fourvect,
                MET_vect)
            tree.MET_centrality_boosted = eventshapes.phi_centrality(
                tau1.fourvect_boosted,
                tau2.fourvect_boosted,
                MET_4vect_boosted)

            tree.number_of_good_vertices = len(event.vertices)

            ##########################
            # Jet and sum pt variables
            ##########################
            tree.numJets = len(event.jets)

            # sum pT with only the two leading jets
            tree.sum_pt = sum(
                [tau1.pt, tau2.pt] +
                [jet.pt for jet in jets[:2]])

            # sum pT with all selected jets
            tree.sum_pt_full = sum(
                [tau1.pt, tau2.pt] +
                [jet.pt for jet in jets])

            # vector sum pT with two leading jets and MET
            tree.vector_sum_pt = sum(
                [tau1.fourvect, tau2.fourvect] +
                [jet.fourvect for jet in jets[:2]] +
                [MET_4vect]).Pt()

            # vector sum pT with all selected jets and MET
            tree.vector_sum_pt_full = sum(
                [tau1.fourvect, tau2.fourvect] +
                [jet.fourvect for jet in jets] +
                [MET_4vect]).Pt()

            # resonance pT
            tree.resonance_pt = sum(
                [tau1.fourvect, tau2.fourvect, MET_4vect]).Pt()

            #############################
            # tau <-> vertex association
            #############################
            tree.tau_same_vertex = (
                tau1.privtx_x == tau2.privtx_x and
                tau1.privtx_y == tau2.privtx_y and
                tau1.privtx_z == tau2.privtx_z)

            tau1.vertex_prob = ROOT.TMath.Prob(
                tau1.privtx_chiSquared,
                int(tau1.privtx_numberDoF))

            tau2.vertex_prob = ROOT.TMath.Prob(
                tau2.privtx_chiSquared,
                int(tau2.privtx_numberDoF))

            ##########################
            # MMC Mass
            ##########################
            mmc_result = mmc.mass(
                tau1, tau2,
                METx, METy, sumET,
                njets=len(event.jets))

            for mmc_method, mmc_object in enumerate(mmc_objects):
                mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
                if verbose:
                    log.info("MMC (method %d): %f" % (mmc_method, mmc_mass))

                mmc_object.mass = mmc_mass
                mmc_object.MET_et = mmc_met.Mod()
                mmc_object.MET_etx = mmc_met.X()
                mmc_object.MET_ety = mmc_met.Y()
                mmc_object.MET_phi = math.pi - mmc_met.Phi()
                if mmc_mass > 0:
                    FourMomentum.set(mmc_object.resonance, mmc_resonance)

            ############################
            # collinear and visible mass
            ############################
            vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
                tau1, tau2, METx, METy)

            tree.mass_vis_tau1_tau2 = vis_mass
            tree.mass_collinear_tau1_tau2 = collin_mass
            tau1.collinear_momentum_fraction = tau1_x
            tau2.collinear_momentum_fraction = tau2_x

            ###########################
            # Match jets to VBF partons
            ###########################
            #if datatype == datasets.MC and 'VBF' in dsname and year == 2011:
            #    # get partons (already sorted by eta in hepmc) FIXME!!!
            #    parton1, parton2 = hepmc.get_VBF_partons(event)
            #    tree.mass_true_quark1_quark2 = (parton1.fourvect + parton2.fourvect).M()
            #    # order here needs to be revised since jets are no longer
            #    # sorted by eta but instead by pT
            #    PartonBlock.set(tree, parton1, parton2)
            #    if len(jets) >= 2:
            #        jet1, jet2 = jets[:2]
            #        for i, jet in zip((1, 2), (jet1, jet2)):
            #            for parton in (parton1, parton2):
            #                if utils.dR(jet.eta, jet.phi, parton.eta, parton.phi) < .8:
            #                    setattr(tree, 'jet%i_matched' % i, True)

            # Fill the tau block
            # This must come after the RecoJetBlock is filled since
            # that sets the jet_beta for boosting the taus
            RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local)
            if datatype != datasets.DATA:
                TrueTauBlock.set(tree, tau1, tau2)

            # fill the output tree
            outtree.Fill(reset=True)

        externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()
Ejemplo n.º 6
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname
            or '_WH' in dsname or '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:

                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:

                def mc_weight_count(event):
                    return event.TruthEvent[0].weights()[0]

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(
                        file.Get('Lumi/%s' %
                                 student.metadata.treename).GetString())

                onfilechange.append((update_grl, (
                    self,
                    merged_grl,
                )))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (
                self,
                merged_cutflow,
            )))

        else:

            # NEED TO BE CONVERTED TO XAOD
            # if datatype not in (datasets.EMBED, datasets.MCEMBED):
            #     # merge TrigConfTrees
            #     metadirname = '%sMeta' % self.metadata.treename
            #     trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
            #     map(trigconfchain.Add, self.files)
            #     metadir = self.output.mkdir(metadirname)
            #     metadir.cd()
            #     trigconfchain.Merge(self.output, -1, 'fast keep')
            #     self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
            #     for fname in self.files:
            #         with root_open(fname) as f:
            #             for key in f.Lumi.keys():
            #                 merged_grl |= goodruns.GRL(
            #                     str(key.ReadObj().GetString()),
            #                     from_string=True)
            #     lumi_dir = self.output.mkdir('Lumi')
            #     lumi_dir.cd()
            #     xml_string= ROOT.TObjString(merged_grl.str())
            #     xml_string.Write(self.metadata.treename)
            #     self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype,
                          dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename, model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        #tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        # NEED TO BE CONVERTED TO XAOD
        # trigger_emulation = TauTriggerEmulation(
        #     year=year,
        #     passthrough=local or datatype != datasets.MC or year > 2011,
        #     count_funcs=count_funcs)

        # if not trigger_emulation.passthrough:
        #     onfilechange.append(
        #         (update_trigger_trees, (self, trigger_emulation,)))

        # trigger_config = None

        # if datatype not in (datasets.EMBED, datasets.MCEMBED):
        #     # trigger config tool to read trigger info in the ntuples
        #     trigger_config = get_trigger_config()
        #     # update the trigger config maps on every file change
        #     onfilechange.append((update_trigger_config, (trigger_config,)))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = ('tau_out_track_n_extended'
                                                     in test_tree)

            log.info(self.grl)
            event_filters = EventFilterList([
                GRLFilter(self.grl,
                          passthrough=(local
                                       or (datatype not in (datasets.DATA,
                                                            datasets.EMBED))),
                          count_funcs=count_funcs),
                CoreFlags(passthrough=local, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingPileupPatch(
                #     passthrough=(
                #         local or year > 2011 or datatype != datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD (not a priority)
                # PileupTemplates(
                #     year=year,
                #     passthrough=(
                #         local or is_bch_sample or datatype not in (
                #             datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomSeed(
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHSampleRunNumber(
                #     passthrough=not is_bch_sample,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomRunNumber(
                #     tree=tree,
                #     datatype=datatype,
                #     pileup_tool=pileup_tool,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # trigger_emulation,
                # NEED TO BE CONVERTED TO XAOD
                # Triggers(
                #     year=year,
                #     tree=tree,
                #     datatype=datatype,
                #     passthrough=datatype in (datasets.EMBED, datasets.MCEMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupReweight_xAOD(
                    tree=tree,
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                PriVertex(passthrough=local, count_funcs=count_funcs),
                LArError(passthrough=local, count_funcs=count_funcs),
                TileError(passthrough=local, count_funcs=count_funcs),
                TileTrips(passthrough=(local or datatype
                                       in (datasets.MC, datasets.MCEMBED)),
                          count_funcs=count_funcs),
                JetCalibration(datatype=datatype,
                               passthrough=local,
                               count_funcs=count_funcs),
                JetResolution(
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                TauCalibration(datatype,
                               passthrough=local,
                               count_funcs=count_funcs),
                # # truth matching must come before systematics due to
                # # TES_TRUE/FAKE
                # NEED TO BE CONVERTED TO XAOD
                TrueTauSelection(passthrough=datatype == datasets.DATA,
                                 count_funcs=count_funcs),
                TruthMatching(passthrough=datatype == datasets.DATA,
                              count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                NvtxJets(tree=tree, count_funcs=count_funcs),
                # # PUT THE SYSTEMATICS "FILTER" BEFORE
                # # ANY FILTERS THAT REFER TO OBJECTS
                # # BUT AFTER CALIBRATIONS
                # # Systematics must also come before anything that refers to
                # # thing.fourvect since fourvect is cached!
                # NEED TO BE CONVERTED TO XAOD
                # Systematics(
                #     terms=syst_terms,
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     verbose=verbose,
                #     passthrough=not syst_terms,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # JetIsPileup(
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                JetCleaning(datatype=datatype,
                            year=year,
                            count_funcs=count_funcs),
                ElectronVeto(el_sel='Medium', count_funcs=count_funcs),
                MuonVeto(count_funcs=count_funcs),
                TauPT(2, thresh=20 * GeV, count_funcs=count_funcs),
                TauHasTrack(2, count_funcs=count_funcs),
                TauEta(2, count_funcs=count_funcs),
                TauElectronVeto(2, count_funcs=count_funcs),
                TauMuonVeto(2, count_funcs=count_funcs),
                TauCrack(2, count_funcs=count_funcs),
                # # before selecting the leading and subleading taus
                # # be sure to only consider good candidates
                TauIDMedium(2, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchIndex(
                # #    config=trigger_config,
                # #    year=year,
                # #    datatype=datatype,
                # #    passthrough=datatype == datasets.EMBED,
                # #    count_funcs=count_funcs),
                # Select two leading taus at this point
                # 25 and 35 for data
                # 20 and 30 for MC to leave room for TES uncertainty
                TauLeadSublead(lead=(35 * GeV if datatype == datasets.DATA
                                     or local else 30 * GeV),
                               sublead=(25 * GeV if datatype == datasets.DATA
                                        or local else 20 * GeV),
                               count_funcs=count_funcs),
                # taus are sorted (in decreasing order) by pT from here on
                TauIDSelection(count_funcs=count_funcs),
                TaudR(3.2, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchThreshold(
                # #    datatype=datatype,
                # #    tree=tree,
                # #    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTriggerEfficiency(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_systematic=self.args.syst_terms and (
                #         Systematics.TES_TERMS & self.args.syst_terms),
                #     passthrough=datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupScale(tree=tree,
                            year=year,
                            datatype=datatype,
                            passthrough=local,
                            count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                TauIDScaleFactors(year=year,
                                  passthrough=datatype == datasets.DATA,
                                  count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauFakeRateScaleFactors(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_up=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)),
                #     tes_down=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)),
                #     passthrough=datatype in (datasets.DATA, datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                HiggsPT(year=year,
                        tree=tree,
                        passthrough=not is_signal or local,
                        count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTrackRecounting(
                #     year=year,
                #     use_ntup_value=tau_ntrack_recounted_use_ntup,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # MCWeight(
                #     datatype=datatype,
                #     tree=tree,
                #     passthrough=local or datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingIsolation(
                #     tree=tree,
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingCorrections(
                #     tree=tree,
                #     year=year,
                #     passthrough=(
                #         local or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingTauSpinner(
                #     year=year,
                #     tree=tree,
                #     passthrough=(
                #         local or datatype not in (
                #             datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # # put MET recalculation after tau selection but before tau-jet
                # # overlap removal and jet selection because of the RefAntiTau
                # # MET correction
                # NEED TO BE CONVERTED TO XAOD
                # METRecalculation(
                #     terms=syst_terms,
                #     year=year,
                #     tree=tree,
                #     refantitau=not nominal_values,
                #     verbose=verbose,
                #     very_verbose=very_verbose,
                #     count_funcs=count_funcs),
                TauJetOverlapRemoval(count_funcs=count_funcs),
                JetPreselection(count_funcs=count_funcs),
                NonIsolatedJet(tree=tree, count_funcs=count_funcs),
                JetSelection(year=year, count_funcs=count_funcs),
                RecoJetTrueTauMatching(passthrough=datatype == datasets.DATA
                                       or local,
                                       count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHCleaning(
                #     tree=tree,
                #     passthrough=year == 2011 or local,
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        hh_buffer = TreeBuffer()
        if local:
            chain = TreeChain(
                self.metadata.treename,
                files=self.files,
                # ignore_branches=ignore_branches,
                events=self.events,
                onfilechange=onfilechange,
                filters=event_filters,
                cache=True,
                cache_size=50000000,
                learn_entries=100)
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(hh_buffer, create_branches=False, visible=True)
            outtree.set_buffer(buffer, create_branches=True, visible=False)

        else:

            root_chain = ROOT.TChain(self.metadata.treename)
            for f in self.files:
                log.info(f)
                root_chain.Add(f)

            # if len(self.files) != 1:
            #     raise RuntimeError('lenght of files has to be 1 for now (no xAOD chaining available)')
            # self.files = self.files[0]
            # root_chain = ROOT.TFile(self.files)

            chain = xAODTree(root_chain,
                             filters=event_filters,
                             events=self.events)
            define_objects(chain, datatype=datatype)
            outtree.set_buffer(hh_buffer, create_branches=True, visible=False)

            # create the MMC
            mmc = mass.MMC(year=year)

        # report which packages have been loaded
        # externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:

            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt(), reverse=True)
            event.jets.sort(key=lambda jet: jet.pt(), reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            tau1, tau2 = event.taus
            tau1.fourvect = asrootpy(tau1.p4())
            tau2.fourvect = asrootpy(tau2.p4())

            beta_taus = (tau1.fourvect + tau2.fourvect).BoostVector()
            tau1.fourvect_boosted = LorentzVector()
            tau1.fourvect_boosted.copy_from(tau1.fourvect)
            tau1.fourvect_boosted.Boost(beta_taus * -1)

            tau2.fourvect_boosted = LorentzVector()
            tau2.fourvect_boosted.copy_from(tau2.fourvect)
            tau2.fourvect_boosted.Boost(beta_taus * -1)

            jets = list(event.jets)
            for jet in jets:
                jet.fourvect = asrootpy(jet.p4())

            jet1, jet2, jet3 = None, None, None
            beta = None
            if len(jets) >= 2:
                jet1, jet2 = jets[:2]

                # determine boost of system
                # determine jet CoM frame
                beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                tree.jet_beta.copy_from(beta)

                jet1.fourvect_boosted = LorentzVector()
                jet1.fourvect_boosted.copy_from(jet1.fourvect)
                jet1.fourvect_boosted.Boost(beta * -1)

                jet2.fourvect_boosted = LorentzVector()
                jet2.fourvect_boosted.copy_from(jet2.fourvect)
                jet2.fourvect_boosted.Boost(beta * -1)

                tau1.min_dr_jet = min(tau1.fourvect.DeltaR(jet1.fourvect),
                                      tau1.fourvect.DeltaR(jet2.fourvect))
                tau2.min_dr_jet = min(tau2.fourvect.DeltaR(jet1.fourvect),
                                      tau2.fourvect.DeltaR(jet2.fourvect))

                # tau centrality (degree to which they are between the two jets)
                tau1.centrality = eventshapes.eta_centrality(
                    tau1.fourvect.Eta(), jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                tau2.centrality = eventshapes.eta_centrality(
                    tau2.fourvect.Eta(), jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                # boosted tau centrality
                tau1.centrality_boosted = eventshapes.eta_centrality(
                    tau1.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                tau2.centrality_boosted = eventshapes.eta_centrality(
                    tau2.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                # 3rd leading jet
                if len(jets) >= 3:
                    jet3 = jets[2]
                    jet3.fourvect_boosted = LorentzVector()
                    jet3.fourvect_boosted.copy_from(jet3.fourvect)
                    jet3.fourvect_boosted.Boost(beta * -1)

            elif len(jets) == 1:
                jet1 = jets[0]

                tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

            RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

            # mass of ditau + leading jet system
            if jet1 is not None:
                tree.mass_tau1_tau2_jet1 = (tau1.fourvect + tau2.fourvect +
                                            jet1.fourvect).M()

            #####################################
            # number of tracks from PV minus taus
            #####################################
            ntrack_pv = 0
            ntrack_nontau_pv = 0
            for vxp in event.vertices:
                # primary vertex
                if vxp.vertexType() == 1:
                    ntrack_pv = vxp.nTrackParticles()
                    ntrack_nontau_pv = ntrack_pv - tau1.nTracks(
                    ) - tau2.nTracks()
                    break
            tree.ntrack_pv = ntrack_pv
            tree.ntrack_nontau_pv = ntrack_nontau_pv

            #########################
            # MET variables
            #########################
            MET = event.MET[0]
            METx = MET.mpx()
            METy = MET.mpy()
            METet = MET.met()
            MET_vect = Vector2(METx, METy)
            MET_4vect = LorentzVector()
            MET_4vect.SetPxPyPzE(METx, METy, 0., METet)
            MET_4vect_boosted = LorentzVector()
            MET_4vect_boosted.copy_from(MET_4vect)
            if beta is not None:
                MET_4vect_boosted.Boost(beta * -1)

            tree.MET_et = METet
            tree.MET_etx = METx
            tree.MET_ety = METy
            tree.MET_phi = MET.phi()
            dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
            dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
            dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
            tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
            tree.dPhi_tau1_MET = dPhi_tau1_MET
            tree.dPhi_tau2_MET = dPhi_tau2_MET
            tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
            tree.MET_bisecting = is_MET_bisecting(dPhi_tau1_tau2,
                                                  dPhi_tau1_MET, dPhi_tau2_MET)

            sumET = MET.sumet()
            tree.MET_sumet = sumET
            if sumET != 0:
                tree.MET_sig = ((2. * METet / GeV) /
                                (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
            else:
                tree.MET_sig = -1.

            tree.MET_centrality = eventshapes.phi_centrality(
                tau1.fourvect, tau2.fourvect, MET_vect)
            tree.MET_centrality_boosted = eventshapes.phi_centrality(
                tau1.fourvect_boosted, tau2.fourvect_boosted,
                MET_4vect_boosted)

            tree.number_of_good_vertices = len(event.vertices)

            ##########################
            # Jet and sum pt variables
            ##########################
            tree.numJets = len(event.jets)

            # sum pT with only the two leading jets
            tree.sum_pt = sum([tau1.pt(), tau2.pt()] +
                              [jet.pt() for jet in jets[:2]])

            # sum pT with all selected jets
            tree.sum_pt_full = sum([tau1.pt(), tau2.pt()] +
                                   [jet.pt() for jet in jets])

            # vector sum pT with two leading jets and MET
            tree.vector_sum_pt = sum([tau1.fourvect, tau2.fourvect] +
                                     [jet.fourvect
                                      for jet in jets[:2]] + [MET_4vect]).Pt()

            # vector sum pT with all selected jets and MET
            tree.vector_sum_pt_full = sum([tau1.fourvect, tau2.fourvect] +
                                          [jet.fourvect for jet in jets] +
                                          [MET_4vect]).Pt()

            # resonance pT
            tree.resonance_pt = sum([tau1.fourvect, tau2.fourvect,
                                     MET_4vect]).Pt()

            # #############################
            # # tau <-> vertex association
            # #############################
            tree.tau_same_vertex = (tau1.vertex() == tau2.vertex())

            tau1.vertex_prob = ROOT.TMath.Prob(tau1.vertex().chiSquared(),
                                               int(tau1.vertex().numberDoF()))

            tau2.vertex_prob = ROOT.TMath.Prob(tau2.vertex().chiSquared(),
                                               int(tau2.vertex().numberDoF()))

            # ##########################
            # # MMC Mass
            # ##########################
            mmc_result = mmc.mass(tau1,
                                  tau2,
                                  METx,
                                  METy,
                                  sumET,
                                  njets=len(event.jets))

            for mmc_method, mmc_object in enumerate(mmc_objects):
                mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
                if verbose:
                    log.info("MMC (method %d): %f" % (mmc_method, mmc_mass))

                mmc_object.mass = mmc_mass
                mmc_object.MET_et = mmc_met.Mod()
                mmc_object.MET_etx = mmc_met.X()
                mmc_object.MET_ety = mmc_met.Y()
                mmc_object.MET_phi = math.pi - mmc_met.Phi()
                if mmc_mass > 0:
                    FourMomentum.set(mmc_object.resonance, mmc_resonance)

            # ############################
            # # collinear and visible mass
            # ############################
            vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
                tau1, tau2, METx, METy)

            tree.mass_vis_tau1_tau2 = vis_mass
            tree.mass_collinear_tau1_tau2 = collin_mass
            tau1.collinear_momentum_fraction = tau1_x
            tau2.collinear_momentum_fraction = tau2_x

            # # Fill the tau block
            # # This must come after the RecoJetBlock is filled since
            # # that sets the jet_beta for boosting the taus
            RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local)

            # NEED TO BE CONVERTED TO XAOD
            if datatype != datasets.DATA:
                TrueTauBlock.set(tree, tau1, tau2)
            # fill the output tree
            outtree.Fill(reset=True)

        # externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()