コード例 #1
0
def get_phasespace_info():
    return [
        ("measure",
         join_root_selection(
             ["n_jets{jec_identifier} == 2", "mll > 12", "dr_ll > 0.2"])),
        ("closure",
         join_root_selection(
             ["n_jets{jec_identifier} >= 2", "mll > 12", "dr_ll > 0.2"])),
    ]
コード例 #2
0
def get_region_info(cfg,
                    idx,
                    channel,
                    et_miss=30.,
                    z_window=10.,
                    add_btag_cut=True,
                    b_tagger="deepcsv"):
    hf_cuts, lf_cuts = [], []
    if add_btag_cut:
        hf_cuts.append(get_btag_info(cfg, idx, "medium", b_tagger, ">"))
        lf_cuts.append(get_btag_info(cfg, idx, "loose", b_tagger, "<"))

    if channel != "emu":
        lf_cuts.extend(
            get_z_window_info(cfg, "lf", et_miss=et_miss, z_window=z_window))
        hf_cuts.extend(
            get_z_window_info(cfg, "hf", et_miss=et_miss, z_window=z_window))

    return [
        ("hf", join_root_selection(hf_cuts)),
        ("lf", join_root_selection(lf_cuts)),
    ]
コード例 #3
0
def get_contamination_region_info(cfg,
                                  channel,
                                  et_miss=30.0,
                                  z_window=10.0,
                                  b_tagger="deepcsv"):
    cuts = []
    cuts.append(get_btag_info(cfg, 1, "tight", b_tagger, ">"))
    cuts.append(get_btag_info(cfg, 2, "tight", b_tagger, ">"))

    if channel != "emu":
        cuts.extend(
            get_z_window_info(cfg, "lf", et_miss=et_miss, z_window=z_window))

    return [
        ("cont", join_root_selection(cuts)),
    ]
コード例 #4
0
def binning_to_selection(binning, variable):
    def to_string(value):
        if np.isinf(value):
            return "Inf"
        elif value == 0:
            return "0"
        else:
            return str(value).replace(".", "p")

    selections = []
    for left_edge, right_edge in zip(binning[:-1], binning[1:]):
        name = "{}To{}".format(to_string(left_edge), to_string(right_edge))
        cuts = []
        if left_edge != 0:
            cuts.append("{} > {}".format(variable, left_edge))
        if not np.isinf(right_edge):
            cuts.append("{} <= {}".format(variable, right_edge))
        edges = [left_edge, right_edge]
        selections.append((name, join_root_selection(cuts), edges))
    return selections
コード例 #5
0
def add_categories(cfg, b_tagger):
    # categories
    for ch in [ch_ee, ch_emu, ch_mumu]:
        # phase space region loop (measurement, closure, ...)
        for ps_name, ps_sel in get_phasespace_info():
            # inclusive region categories to measure rates
            for rg_name, rg_sel in get_region_info(cfg,
                                                   1,
                                                   ch,
                                                   add_btag_cut=False,
                                                   b_tagger=b_tagger):
                # we skip the emu channel in the lf region because the DY (the main contribution)
                # should have same-flavored leptons
                if rg_name == "lf" and ch == ch_emu:
                    continue

                # categories to perform overall normalization of each channel
                rg_cat_combined = ch.add_category(
                    name="{}__{}__{}__{}__{}".format(ch.name, ps_name, rg_name,
                                                     b_tagger, cfg.name),
                    label="{}, {}, {}".format(ch.name, ps_name, rg_name),
                    selection=join_root_selection(
                        "channel == {}".format(ch.id), ps_sel, rg_sel),
                    tags={"scales", b_tagger},
                    aux={
                        "channel": ch,
                        "phase_space": ps_name,
                        "region": rg_name,
                        "config": cfg.name,
                    },
                )
                # combine region categories to create inclusive control regions for plotting
                rg_merged_name = "{}__{}__{}".format(ps_name, rg_name,
                                                     b_tagger)
                if not cfg.has_category(rg_merged_name):
                    rg_merged_cat = cfg.add_category(
                        name=rg_merged_name,
                        label="{}, {}".format(ps_name, rg_name),
                        tags={"inclusive", b_tagger},
                        aux={
                            "phase_space": ps_name,
                            "region": rg_name,
                        },
                        context=cfg.name,
                    )
                else:
                    rg_merged_cat = cfg.get_category(rg_merged_name)
                rg_merged_cat.add_category(rg_cat_combined)

            # loop over both jet1 jet2 permutations
            for i_tag_jet, i_probe_jet in [(1, 2), (2, 1)]:
                # region loop (hf, lf, ...)
                for rg_name, rg_sel in get_region_info(cfg,
                                                       i_tag_jet,
                                                       ch,
                                                       b_tagger=b_tagger):
                    if rg_name == "lf" and ch == ch_emu:
                        continue

                    rg_cat = ch.add_category(
                        name="{}__{}__{}__j{}__{}__{}".format(
                            ch.name, ps_name, rg_name, i_tag_jet, b_tagger,
                            cfg.name),
                        label="{}, {}, {} region (j{} tagged)".format(
                            ch.name, ps_name, rg_name, i_tag_jet),
                        selection=join_root_selection(
                            "channel == {}".format(ch.id), ps_sel, rg_sel),
                        tags={b_tagger},
                    )

                    # combined region categories, with tag jet cut applied
                    # used to determine e.g. sample composition in measurement regions
                    rg_btag_merged_name = "{}__{}__{}__{}__btag".format(
                        ps_name, rg_name, b_tagger, cfg.name)
                    if not cfg.has_category(rg_btag_merged_name):
                        rg_btag_merged_cat = cfg.add_category(
                            name=rg_btag_merged_name,
                            label="{}, {}".format(ps_name, rg_name, b_tagger),
                            tags={"combined", b_tagger},
                            aux={
                                "phase_space": ps_name,
                                "region": rg_name,
                            },
                            context=cfg.name,
                        )
                    else:
                        rg_btag_merged_cat = cfg.get_category(
                            rg_btag_merged_name)

                    # flavor loop (b, c, udsg, ...)
                    for fl_name, fl_sel in get_flavor_info(i_probe_jet):
                        fl_cat = rg_cat.add_category(
                            name="{}__f{}".format(rg_cat.name, fl_name),
                            label="{}, {} flavor".format(
                                rg_cat.label, fl_name),
                            selection=join_root_selection(
                                rg_cat.selection, fl_sel),
                            tags={b_tagger},
                        )

                        # pt loop
                        for pt_idx, (pt_name, pt_sel, pt_range) in enumerate(
                                get_axis_info(
                                    cfg, i_probe_jet, "pt",
                                    "jet{}_pt{{jec_identifier}}")[rg_name]):

                            pt_cat = fl_cat.add_category(
                                name="{}__pt{}".format(fl_cat.name, pt_name),
                                label="{}, pt {}".format(
                                    fl_cat.label, pt_name),
                                selection=join_root_selection(
                                    fl_cat.selection, pt_sel),
                                tags={b_tagger},
                            )

                            # eta loop
                            for eta_idx, (
                                    eta_name, eta_sel, eta_range
                            ) in enumerate(
                                    get_axis_info(
                                        cfg,
                                        i_probe_jet,
                                        "abs(eta)",
                                        fmt="abs(jet{}_eta{{jec_identifier}})")
                                [rg_name]):

                                eta_cat = pt_cat.add_category(
                                    name="{}__eta{}".format(
                                        pt_cat.name, eta_name),
                                    label="{}, eta {}".format(
                                        pt_cat.label, eta_name),
                                    selection=join_root_selection(
                                        pt_cat.selection, eta_sel),
                                    aux={
                                        "channel": ch,
                                        "i_probe_jet": i_probe_jet,
                                        "i_tag_jet": i_tag_jet,
                                        "phase_space": ps_name,
                                        "region": rg_name,
                                        "flavor": fl_name,
                                        "config": cfg.name,
                                    },
                                    tags={b_tagger},
                                )

                                # merged category for both jets and all flavors
                                merged_vars = (ps_name, rg_name, pt_name,
                                               eta_name, b_tagger)
                                merged_name = "{}__{}__pt{}__eta{}__{}".format(
                                    *merged_vars)

                                # define categories for testing
                                merged_tags = {"merged", b_tagger}
                                if rg_name == "hf" and (pt_idx == 1
                                                        and eta_idx == 0):
                                    merged_tags = merged_tags | {"test"}
                                if rg_name == "lf" and (pt_idx == 2
                                                        and eta_idx == 0):
                                    merged_tags = merged_tags | {"test"}

                                if not cfg.has_category(merged_name):
                                    label = "{}, {} region, pt {}, eta {}".format(
                                        *merged_vars)
                                    merged_cat = cfg.add_category(
                                        name=merged_name,
                                        label=label,
                                        tags=merged_tags,
                                        aux={
                                            "phase_space": ps_name,
                                            "region": rg_name,
                                            "eta": eta_range,
                                            "pt": pt_range,
                                        },
                                        context=cfg.name,
                                    )
                                    if rg_name == "hf":
                                        # add c categories (not written to histograms)
                                        c_vars = (ps_name, "c", pt_name,
                                                  eta_name, b_tagger)
                                        c_name = "{}__{}__pt{}__eta{}__{}".format(
                                            *c_vars)
                                        label = "{}, {} region, pt {}, eta {}".format(
                                            *c_vars)
                                        c_cat = cfg.add_category(
                                            name=c_name,
                                            label=label,
                                            tags={"c", b_tagger},
                                            aux={
                                                "phase_space": ps_name,
                                                "region": "c",
                                                "eta": eta_range,
                                                "pt": pt_range,
                                            },
                                            context=cfg.name,
                                        )
                                        c_cat.set_aux("binning_category",
                                                      merged_cat)

                                else:
                                    merged_cat = cfg.get_category(merged_name)
                                merged_cat.add_category(eta_cat)
                                rg_btag_merged_cat.add_category(eta_cat)

                                # Specialized b-tag discriminant binnings are defined on
                                # the merged categories, but needed when writing leaf categories
                                eta_cat.set_aux("binning_category", merged_cat)

            # add categories to measure light flavour contamination uncertainty
            for rg_name, rg_sel in get_contamination_region_info(
                    cfg, ch, b_tagger=b_tagger):
                if ch == ch_emu:
                    continue

                contamination_cat = ch.add_category(
                    name="{}__{}__{}__{}__{}".format(ch.name, ps_name, rg_name,
                                                     b_tagger, cfg.name),
                    label="{}, {}, {}".format(ch.name, ps_name, rg_name),
                    selection=join_root_selection(
                        "channel == {}".format(ch.id), ps_sel, rg_sel),
                    tags={b_tagger},
                    aux={
                        "channel": ch,
                        "phase_space": ps_name,
                        "region": rg_name,
                        "config": cfg.name,
                    },
                )
                # combine contamination regions over all channels
                cont_merged_name = "{}__{}__{}".format(ps_name, rg_name,
                                                       b_tagger)
                if not cfg.has_category(cont_merged_name):
                    cont_merged_cat = cfg.add_category(
                        name=cont_merged_name,
                        label="{}, {}".format(ps_name, rg_name),
                        tags={"contamination", b_tagger},
                        aux={
                            "phase_space": ps_name,
                            "region": rg_name,
                        },
                        context=cfg.name,
                    )
                else:
                    cont_merged_cat = cfg.get_category(cont_merged_name)
                cont_merged_cat.add_category(contamination_cat)

    # sl categories
    sl_phasespaces = [
        ("closure",
         join_root_selection([
             "n_jets{jec_identifier} == 4",
             "n_tags_{}{{jec_identifier}} == 2".format(b_tagger)
         ])),
        ("high_multiplicity",
         join_root_selection([
             "n_jets{jec_identifier} >= 6",
             "n_tags_{}{{jec_identifier}} >= 4".format(b_tagger)
         ])),
        ("high_pt",
         join_root_selection([
             "n_jets{jec_identifier} == 4",
             "n_tags_{}{{jec_identifier}} == 2".format(b_tagger),
             "jet1_pt{jec_identifier} > 100."
         ])),
    ]
    for ch in [ch_e, ch_mu]:
        # phase space region loop (measurement, closure, ...)
        for ps_name, ps_sel in sl_phasespaces:
            for jet_idx in range(1, 5):
                for fl_name, fl_sel in get_flavor_info(jet_idx):
                    # categories per channel
                    rg_cat = ch.add_category(
                        name="{}__{}__j{}__{}__{}__{}".format(
                            ch.name, ps_name, str(jet_idx), fl_name, b_tagger,
                            cfg.name),
                        label="{}, {}, jet{}, {}".format(
                            ch.name, ps_name, str(jet_idx), fl_name),
                        selection=join_root_selection(
                            "channel == {}".format(ch.id), ps_sel, fl_sel),
                        tags={b_tagger},
                        aux={
                            "channel": ch,
                            "phase_space": ps_name,
                            "config": cfg.name,
                            "flavor": fl_name,
                            "i_flavor_jet": jet_idx,
                        },
                    )
                    # combine region categories to create inclusive control regions for plotting
                    rg_merged_name = "sl__{}__{}".format(ps_name, b_tagger)
                    if not cfg.has_category(rg_merged_name):
                        rg_merged_cat = cfg.add_category(
                            name=rg_merged_name,
                            label="sl, {}".format(ps_name),
                            tags={"sl", b_tagger},
                            aux={
                                "phase_space": ps_name,
                            },
                            context=cfg.name,
                        )
                    else:
                        rg_merged_cat = cfg.get_category(rg_merged_name)
                    rg_merged_cat.add_category(rg_cat)
コード例 #6
0
ファイル: hists.py プロジェクト: cms-btv-pog/jet-tagging-sf
    def run(self):
        import ROOT

        inp = self.input()
        outp = self.output()
        outp.parent.touch(0o0770)

        self.category_getter = CategoryGetter(self.config_inst, self.b_tagger)

        # get child categories
        categories = []

        for category in self.config_inst.categories:
            # only consider top-level categories with at least one given tag if specified
            if len(self.category_tags) > 0 and not category.has_tag(
                    self.category_tags, mode=any):
                continue
            # for intermediate iterations, skip merged categories not used for measurement
            # (to improve performance)
            if not self.final_it:
                if category.has_tag("merged") and not category.get_aux(
                        "phase_space") == "measure":
                    continue
            # recurse through all children of category, add leaf categories
            for cat, children in walk_categories(category):
                if not children:
                    # only use categories matching the task config
                    if cat.get_aux("config", None) != self.config_inst.name:
                        continue
                    # only use categories for the chosen b-tag algorithm
                    if cat.has_tag(self.b_tagger):
                        channel = cat.get_aux("channel")
                        categories.append((channel, cat))

        categories = list(set(categories))

        # get processes
        if len(self.dataset_inst.processes) != 1:
            raise NotImplementedError(
                "only datasets with exactly one linked process can be"
                " handled, got {}".format(len(self.dataset_inst.processes)))
        processes = list(self.dataset_inst.processes.values())

        # build a progress callback
        progress = self.create_progress_callback(len(categories))

        # open the output file
        with outp.localize("w") as tmp:
            with tmp.dump("RECREATE") as output_file:
                with self.publish_step(
                        "creating root output file directories ..."):
                    process_dirs = {}
                    for _, category in categories:
                        output_file.cd()
                        category_dir = output_file.mkdir(category.name)
                        for process in processes:
                            category_dir.cd()
                            process_dir = category_dir.mkdir(process.name)
                            process_dir.Write()
                            process_dirs[(category.name,
                                          process.name)] = process_dir

                # open the input file and get the tree
                # as we need to extend the tree with custom weights, we do not cache the file
                with inp["tree"].load("UPDATE", cache=False) as input_file:
                    tree = input_file.Get("tree")
                    self.publish_message("{} events in tree".format(
                        tree.GetEntries()))

                    # identifier for jec shifted variables
                    for shift in self.shifts:
                        jec_identifier = self.get_jec_identifier(shift)

                        # pt aliases for jets
                        for obj in ["jet1", "jet2", "jet3", "jet4"]:
                            tree.SetAlias(
                                "{0}_pt{1}".format(obj, jec_identifier),
                                "({0}_px{1}**2 + {0}_py{1}**2)**0.5".format(
                                    obj, jec_identifier))
                        # b-tagging alias
                        btag_var = self.config_inst.get_aux("btaggers")[
                            self.b_tagger]["variable"]
                        for obj in ["jet1", "jet2", "jet3", "jet4"]:
                            variable = self.config_inst.get_variable(
                                "{0}_{1}".format(obj, btag_var))
                            tree.SetAlias(
                                variable.name + jec_identifier,
                                variable.expression.format(
                                    **{"jec_identifier": jec_identifier}))
                    # pt aliases for leptons
                    for obj in ["lep1", "lep2"]:
                        tree.SetAlias(
                            "{0}_pt".format(obj),
                            "({0}_px**2 + {0}_py**2)**0.5".format(obj))

                    # extend the tree
                    if self.dataset_inst.is_mc:
                        with self.publish_step(
                                "extending the input tree with weights ..."):
                            weighters = []

                            # pileup weight
                            weighters.append(
                                self.get_pileup_weighter(inp["pu"]))

                            # weights from previous iterations
                            if self.iteration > 0:
                                # b-tagging scale factors
                                for shift in self.shifts:
                                    nominal_sfs = inp["sf"]["nominal"]["sf"] if shift.startswith("c_stat") \
                                        else None
                                    weighters.append(
                                        self.get_scale_factor_weighter(
                                            inp["sf"],
                                            shift,
                                            nominal_sfs=nominal_sfs))

                            input_file.cd()
                            with TreeExtender(tree) as te:
                                for add_branch, _ in weighters:
                                    add_branch(te)
                                for i, entry in enumerate(te):
                                    if (i % 1000) == 0:
                                        print "event {}".format(i)
                                    for _, add_value in weighters:
                                        add_value(entry)

                        # read in total number of events
                        sum_weights = inp["meta"].load(
                        )["event_weights"]["sum"]

                    # get category-dependent binning if optimized binning is used
                    # only for b-taaging discriminants
                    if self.optimize_binning:
                        category_binnings = inp["binning"].load()

                    for i, (channel, category) in enumerate(categories):
                        self.publish_message(
                            "writing histograms in category {} ({}/{})".format(
                                category.name, i + 1, len(categories)))

                        # get the region (HF / LF)
                        # not all child categories have regions associated, e.g. the phase space
                        # inclusive regions ("measure", "closure")
                        region = category.get_aux("region", None)

                        # set weights that are common for all shifts
                        base_weights = []
                        if self.dataset_inst.is_mc:
                            base_weights.append("gen_weight")
                            # lumi weight
                            lumi = self.config_inst.get_aux("lumi")[channel]
                            x_sec = process.get_xsec(
                                self.config_inst.campaign.ecm).nominal
                            lumi_weight = lumi * x_sec / sum_weights
                            base_weights.append(str(lumi_weight))

                            # pu weight
                            base_weights.append("pu_weight")

                        for process in processes:
                            # change into the correct directory
                            process_dirs[(category.name, process.name)].cd()
                            for shift in self.shifts:
                                jec_identifier = self.get_jec_identifier(shift)

                                # weights
                                weights = base_weights[:]
                                if self.dataset_inst.is_mc:
                                    # channel scale weight
                                    if self.iteration > 0:
                                        # b-tag scale factor weights
                                        phase_space = category.get_aux(
                                            "phase_space", None)
                                        # In measurement categories,
                                        # apply scale factors only for contamination
                                        if phase_space == "measure" and not self.final_it:
                                            weights.append(
                                                "scale_factor_c_{}".format(
                                                    shift))
                                            if region == "hf":
                                                weights.append(
                                                    "scale_factor_lf_{}".
                                                    format(shift))
                                            elif region == "lf":
                                                weights.append(
                                                    "scale_factor_hf_{}".
                                                    format(shift))
                                            elif region == "cont":
                                                weights.append(
                                                    "scale_factor_lf_{}".
                                                    format(shift))
                                                weights.append(
                                                    "scale_factor_hf_{}".
                                                    format(shift))
                                            else:
                                                raise ValueError(
                                                    "Unexpected region {}".
                                                    format(region))
                                        else:
                                            weights.append(
                                                "scale_factor_lf_{}".format(
                                                    shift))
                                            weights.append(
                                                "scale_factor_c_{}".format(
                                                    shift))
                                            weights.append(
                                                "scale_factor_hf_{}".format(
                                                    shift))

                                # totalWeight alias
                                while len(weights) < 2:
                                    weights.insert(0, "1")
                                tree.SetAlias(
                                    "totalWeight",
                                    join_root_selection(weights, op="*"))

                                # actual projecting
                                for variable in self.config_inst.variables:
                                    # save variable binning to reset at end of loop
                                    base_variable_binning = variable.binning

                                    if variable.has_tag("skip_all"):
                                        continue
                                    if region and variable.has_tag(
                                            "skip_{}".format(region)):
                                        continue
                                    # if variable tags is given, require at least one
                                    if len(self.variable_tags
                                           ) > 0 and not variable.has_tag(
                                               self.variable_tags, mode=any):
                                        continue
                                    # do not write one b-tag discriminant in the category of another
                                    if variable.get_aux(
                                            "b_tagger",
                                            self.b_tagger) != self.b_tagger:
                                        continue

                                    # if number of bins is specified, overwrite variable binning
                                    if self.binning:
                                        self.binning = list(self.binning)
                                        # if a tuple of (n_bins, x_min, x_max) is given, ensure that n_bins is an integer
                                        if len(self.binning) == 3:
                                            self.binning[0] = int(
                                                self.binning[0])
                                            self.binning = tuple(self.binning)

                                        variable.binning = self.binning

                                    # use optimized binning for b-tag discriminants if provided
                                    if self.optimize_binning and variable.get_aux(
                                            "can_optimize_bins", False):
                                        binning_category = category.get_aux(
                                            "binning_category", category)
                                        # overwrite binning if specialized binning is defined for this category
                                        variable.binning = category_binnings.get(
                                            binning_category.name,
                                            variable.binning)

                                    hist = ROOT.TH1F(
                                        "{}_{}".format(variable.name, shift),
                                        variable.full_title(root=True),
                                        variable.n_bins,
                                        array.array("d", variable.bin_edges))
                                    hist.Sumw2()

                                    # build the full selection string, including the total event weight
                                    selection = [
                                        category.selection,
                                        "jetmet_pass{jec_identifier} == 1",
                                        "{} != -10000".format(
                                            variable.expression),
                                    ]
                                    if variable.selection:
                                        selection.append(variable.selection)
                                    selection = join_root_selection(
                                        selection).format(
                                            **
                                            {"jec_identifier": jec_identifier})
                                    selection = join_root_selection(
                                        selection, "totalWeight", op="*")

                                    # project and write the histogram
                                    tree.Project(
                                        "{}_{}".format(variable.name, shift),
                                        variable.expression.format(
                                            **
                                            {"jec_identifier": jec_identifier
                                             }), selection)
                                    hist.Write()
                                    variable.binning = base_variable_binning

                        progress(i)