def get_phasespace_info(): return [ ("measure", join_root_selection( ["n_jets{jec_identifier} == 2", "mll > 12", "dr_ll > 0.2"])), ("closure", join_root_selection( ["n_jets{jec_identifier} >= 2", "mll > 12", "dr_ll > 0.2"])), ]
def get_region_info(cfg, idx, channel, et_miss=30., z_window=10., add_btag_cut=True, b_tagger="deepcsv"): hf_cuts, lf_cuts = [], [] if add_btag_cut: hf_cuts.append(get_btag_info(cfg, idx, "medium", b_tagger, ">")) lf_cuts.append(get_btag_info(cfg, idx, "loose", b_tagger, "<")) if channel != "emu": lf_cuts.extend( get_z_window_info(cfg, "lf", et_miss=et_miss, z_window=z_window)) hf_cuts.extend( get_z_window_info(cfg, "hf", et_miss=et_miss, z_window=z_window)) return [ ("hf", join_root_selection(hf_cuts)), ("lf", join_root_selection(lf_cuts)), ]
def get_contamination_region_info(cfg, channel, et_miss=30.0, z_window=10.0, b_tagger="deepcsv"): cuts = [] cuts.append(get_btag_info(cfg, 1, "tight", b_tagger, ">")) cuts.append(get_btag_info(cfg, 2, "tight", b_tagger, ">")) if channel != "emu": cuts.extend( get_z_window_info(cfg, "lf", et_miss=et_miss, z_window=z_window)) return [ ("cont", join_root_selection(cuts)), ]
def binning_to_selection(binning, variable): def to_string(value): if np.isinf(value): return "Inf" elif value == 0: return "0" else: return str(value).replace(".", "p") selections = [] for left_edge, right_edge in zip(binning[:-1], binning[1:]): name = "{}To{}".format(to_string(left_edge), to_string(right_edge)) cuts = [] if left_edge != 0: cuts.append("{} > {}".format(variable, left_edge)) if not np.isinf(right_edge): cuts.append("{} <= {}".format(variable, right_edge)) edges = [left_edge, right_edge] selections.append((name, join_root_selection(cuts), edges)) return selections
def add_categories(cfg, b_tagger): # categories for ch in [ch_ee, ch_emu, ch_mumu]: # phase space region loop (measurement, closure, ...) for ps_name, ps_sel in get_phasespace_info(): # inclusive region categories to measure rates for rg_name, rg_sel in get_region_info(cfg, 1, ch, add_btag_cut=False, b_tagger=b_tagger): # we skip the emu channel in the lf region because the DY (the main contribution) # should have same-flavored leptons if rg_name == "lf" and ch == ch_emu: continue # categories to perform overall normalization of each channel rg_cat_combined = ch.add_category( name="{}__{}__{}__{}__{}".format(ch.name, ps_name, rg_name, b_tagger, cfg.name), label="{}, {}, {}".format(ch.name, ps_name, rg_name), selection=join_root_selection( "channel == {}".format(ch.id), ps_sel, rg_sel), tags={"scales", b_tagger}, aux={ "channel": ch, "phase_space": ps_name, "region": rg_name, "config": cfg.name, }, ) # combine region categories to create inclusive control regions for plotting rg_merged_name = "{}__{}__{}".format(ps_name, rg_name, b_tagger) if not cfg.has_category(rg_merged_name): rg_merged_cat = cfg.add_category( name=rg_merged_name, label="{}, {}".format(ps_name, rg_name), tags={"inclusive", b_tagger}, aux={ "phase_space": ps_name, "region": rg_name, }, context=cfg.name, ) else: rg_merged_cat = cfg.get_category(rg_merged_name) rg_merged_cat.add_category(rg_cat_combined) # loop over both jet1 jet2 permutations for i_tag_jet, i_probe_jet in [(1, 2), (2, 1)]: # region loop (hf, lf, ...) for rg_name, rg_sel in get_region_info(cfg, i_tag_jet, ch, b_tagger=b_tagger): if rg_name == "lf" and ch == ch_emu: continue rg_cat = ch.add_category( name="{}__{}__{}__j{}__{}__{}".format( ch.name, ps_name, rg_name, i_tag_jet, b_tagger, cfg.name), label="{}, {}, {} region (j{} tagged)".format( ch.name, ps_name, rg_name, i_tag_jet), selection=join_root_selection( "channel == {}".format(ch.id), ps_sel, rg_sel), tags={b_tagger}, ) # combined region categories, with tag jet cut applied # used to determine e.g. sample composition in measurement regions rg_btag_merged_name = "{}__{}__{}__{}__btag".format( ps_name, rg_name, b_tagger, cfg.name) if not cfg.has_category(rg_btag_merged_name): rg_btag_merged_cat = cfg.add_category( name=rg_btag_merged_name, label="{}, {}".format(ps_name, rg_name, b_tagger), tags={"combined", b_tagger}, aux={ "phase_space": ps_name, "region": rg_name, }, context=cfg.name, ) else: rg_btag_merged_cat = cfg.get_category( rg_btag_merged_name) # flavor loop (b, c, udsg, ...) for fl_name, fl_sel in get_flavor_info(i_probe_jet): fl_cat = rg_cat.add_category( name="{}__f{}".format(rg_cat.name, fl_name), label="{}, {} flavor".format( rg_cat.label, fl_name), selection=join_root_selection( rg_cat.selection, fl_sel), tags={b_tagger}, ) # pt loop for pt_idx, (pt_name, pt_sel, pt_range) in enumerate( get_axis_info( cfg, i_probe_jet, "pt", "jet{}_pt{{jec_identifier}}")[rg_name]): pt_cat = fl_cat.add_category( name="{}__pt{}".format(fl_cat.name, pt_name), label="{}, pt {}".format( fl_cat.label, pt_name), selection=join_root_selection( fl_cat.selection, pt_sel), tags={b_tagger}, ) # eta loop for eta_idx, ( eta_name, eta_sel, eta_range ) in enumerate( get_axis_info( cfg, i_probe_jet, "abs(eta)", fmt="abs(jet{}_eta{{jec_identifier}})") [rg_name]): eta_cat = pt_cat.add_category( name="{}__eta{}".format( pt_cat.name, eta_name), label="{}, eta {}".format( pt_cat.label, eta_name), selection=join_root_selection( pt_cat.selection, eta_sel), aux={ "channel": ch, "i_probe_jet": i_probe_jet, "i_tag_jet": i_tag_jet, "phase_space": ps_name, "region": rg_name, "flavor": fl_name, "config": cfg.name, }, tags={b_tagger}, ) # merged category for both jets and all flavors merged_vars = (ps_name, rg_name, pt_name, eta_name, b_tagger) merged_name = "{}__{}__pt{}__eta{}__{}".format( *merged_vars) # define categories for testing merged_tags = {"merged", b_tagger} if rg_name == "hf" and (pt_idx == 1 and eta_idx == 0): merged_tags = merged_tags | {"test"} if rg_name == "lf" and (pt_idx == 2 and eta_idx == 0): merged_tags = merged_tags | {"test"} if not cfg.has_category(merged_name): label = "{}, {} region, pt {}, eta {}".format( *merged_vars) merged_cat = cfg.add_category( name=merged_name, label=label, tags=merged_tags, aux={ "phase_space": ps_name, "region": rg_name, "eta": eta_range, "pt": pt_range, }, context=cfg.name, ) if rg_name == "hf": # add c categories (not written to histograms) c_vars = (ps_name, "c", pt_name, eta_name, b_tagger) c_name = "{}__{}__pt{}__eta{}__{}".format( *c_vars) label = "{}, {} region, pt {}, eta {}".format( *c_vars) c_cat = cfg.add_category( name=c_name, label=label, tags={"c", b_tagger}, aux={ "phase_space": ps_name, "region": "c", "eta": eta_range, "pt": pt_range, }, context=cfg.name, ) c_cat.set_aux("binning_category", merged_cat) else: merged_cat = cfg.get_category(merged_name) merged_cat.add_category(eta_cat) rg_btag_merged_cat.add_category(eta_cat) # Specialized b-tag discriminant binnings are defined on # the merged categories, but needed when writing leaf categories eta_cat.set_aux("binning_category", merged_cat) # add categories to measure light flavour contamination uncertainty for rg_name, rg_sel in get_contamination_region_info( cfg, ch, b_tagger=b_tagger): if ch == ch_emu: continue contamination_cat = ch.add_category( name="{}__{}__{}__{}__{}".format(ch.name, ps_name, rg_name, b_tagger, cfg.name), label="{}, {}, {}".format(ch.name, ps_name, rg_name), selection=join_root_selection( "channel == {}".format(ch.id), ps_sel, rg_sel), tags={b_tagger}, aux={ "channel": ch, "phase_space": ps_name, "region": rg_name, "config": cfg.name, }, ) # combine contamination regions over all channels cont_merged_name = "{}__{}__{}".format(ps_name, rg_name, b_tagger) if not cfg.has_category(cont_merged_name): cont_merged_cat = cfg.add_category( name=cont_merged_name, label="{}, {}".format(ps_name, rg_name), tags={"contamination", b_tagger}, aux={ "phase_space": ps_name, "region": rg_name, }, context=cfg.name, ) else: cont_merged_cat = cfg.get_category(cont_merged_name) cont_merged_cat.add_category(contamination_cat) # sl categories sl_phasespaces = [ ("closure", join_root_selection([ "n_jets{jec_identifier} == 4", "n_tags_{}{{jec_identifier}} == 2".format(b_tagger) ])), ("high_multiplicity", join_root_selection([ "n_jets{jec_identifier} >= 6", "n_tags_{}{{jec_identifier}} >= 4".format(b_tagger) ])), ("high_pt", join_root_selection([ "n_jets{jec_identifier} == 4", "n_tags_{}{{jec_identifier}} == 2".format(b_tagger), "jet1_pt{jec_identifier} > 100." ])), ] for ch in [ch_e, ch_mu]: # phase space region loop (measurement, closure, ...) for ps_name, ps_sel in sl_phasespaces: for jet_idx in range(1, 5): for fl_name, fl_sel in get_flavor_info(jet_idx): # categories per channel rg_cat = ch.add_category( name="{}__{}__j{}__{}__{}__{}".format( ch.name, ps_name, str(jet_idx), fl_name, b_tagger, cfg.name), label="{}, {}, jet{}, {}".format( ch.name, ps_name, str(jet_idx), fl_name), selection=join_root_selection( "channel == {}".format(ch.id), ps_sel, fl_sel), tags={b_tagger}, aux={ "channel": ch, "phase_space": ps_name, "config": cfg.name, "flavor": fl_name, "i_flavor_jet": jet_idx, }, ) # combine region categories to create inclusive control regions for plotting rg_merged_name = "sl__{}__{}".format(ps_name, b_tagger) if not cfg.has_category(rg_merged_name): rg_merged_cat = cfg.add_category( name=rg_merged_name, label="sl, {}".format(ps_name), tags={"sl", b_tagger}, aux={ "phase_space": ps_name, }, context=cfg.name, ) else: rg_merged_cat = cfg.get_category(rg_merged_name) rg_merged_cat.add_category(rg_cat)
def run(self): import ROOT inp = self.input() outp = self.output() outp.parent.touch(0o0770) self.category_getter = CategoryGetter(self.config_inst, self.b_tagger) # get child categories categories = [] for category in self.config_inst.categories: # only consider top-level categories with at least one given tag if specified if len(self.category_tags) > 0 and not category.has_tag( self.category_tags, mode=any): continue # for intermediate iterations, skip merged categories not used for measurement # (to improve performance) if not self.final_it: if category.has_tag("merged") and not category.get_aux( "phase_space") == "measure": continue # recurse through all children of category, add leaf categories for cat, children in walk_categories(category): if not children: # only use categories matching the task config if cat.get_aux("config", None) != self.config_inst.name: continue # only use categories for the chosen b-tag algorithm if cat.has_tag(self.b_tagger): channel = cat.get_aux("channel") categories.append((channel, cat)) categories = list(set(categories)) # get processes if len(self.dataset_inst.processes) != 1: raise NotImplementedError( "only datasets with exactly one linked process can be" " handled, got {}".format(len(self.dataset_inst.processes))) processes = list(self.dataset_inst.processes.values()) # build a progress callback progress = self.create_progress_callback(len(categories)) # open the output file with outp.localize("w") as tmp: with tmp.dump("RECREATE") as output_file: with self.publish_step( "creating root output file directories ..."): process_dirs = {} for _, category in categories: output_file.cd() category_dir = output_file.mkdir(category.name) for process in processes: category_dir.cd() process_dir = category_dir.mkdir(process.name) process_dir.Write() process_dirs[(category.name, process.name)] = process_dir # open the input file and get the tree # as we need to extend the tree with custom weights, we do not cache the file with inp["tree"].load("UPDATE", cache=False) as input_file: tree = input_file.Get("tree") self.publish_message("{} events in tree".format( tree.GetEntries())) # identifier for jec shifted variables for shift in self.shifts: jec_identifier = self.get_jec_identifier(shift) # pt aliases for jets for obj in ["jet1", "jet2", "jet3", "jet4"]: tree.SetAlias( "{0}_pt{1}".format(obj, jec_identifier), "({0}_px{1}**2 + {0}_py{1}**2)**0.5".format( obj, jec_identifier)) # b-tagging alias btag_var = self.config_inst.get_aux("btaggers")[ self.b_tagger]["variable"] for obj in ["jet1", "jet2", "jet3", "jet4"]: variable = self.config_inst.get_variable( "{0}_{1}".format(obj, btag_var)) tree.SetAlias( variable.name + jec_identifier, variable.expression.format( **{"jec_identifier": jec_identifier})) # pt aliases for leptons for obj in ["lep1", "lep2"]: tree.SetAlias( "{0}_pt".format(obj), "({0}_px**2 + {0}_py**2)**0.5".format(obj)) # extend the tree if self.dataset_inst.is_mc: with self.publish_step( "extending the input tree with weights ..."): weighters = [] # pileup weight weighters.append( self.get_pileup_weighter(inp["pu"])) # weights from previous iterations if self.iteration > 0: # b-tagging scale factors for shift in self.shifts: nominal_sfs = inp["sf"]["nominal"]["sf"] if shift.startswith("c_stat") \ else None weighters.append( self.get_scale_factor_weighter( inp["sf"], shift, nominal_sfs=nominal_sfs)) input_file.cd() with TreeExtender(tree) as te: for add_branch, _ in weighters: add_branch(te) for i, entry in enumerate(te): if (i % 1000) == 0: print "event {}".format(i) for _, add_value in weighters: add_value(entry) # read in total number of events sum_weights = inp["meta"].load( )["event_weights"]["sum"] # get category-dependent binning if optimized binning is used # only for b-taaging discriminants if self.optimize_binning: category_binnings = inp["binning"].load() for i, (channel, category) in enumerate(categories): self.publish_message( "writing histograms in category {} ({}/{})".format( category.name, i + 1, len(categories))) # get the region (HF / LF) # not all child categories have regions associated, e.g. the phase space # inclusive regions ("measure", "closure") region = category.get_aux("region", None) # set weights that are common for all shifts base_weights = [] if self.dataset_inst.is_mc: base_weights.append("gen_weight") # lumi weight lumi = self.config_inst.get_aux("lumi")[channel] x_sec = process.get_xsec( self.config_inst.campaign.ecm).nominal lumi_weight = lumi * x_sec / sum_weights base_weights.append(str(lumi_weight)) # pu weight base_weights.append("pu_weight") for process in processes: # change into the correct directory process_dirs[(category.name, process.name)].cd() for shift in self.shifts: jec_identifier = self.get_jec_identifier(shift) # weights weights = base_weights[:] if self.dataset_inst.is_mc: # channel scale weight if self.iteration > 0: # b-tag scale factor weights phase_space = category.get_aux( "phase_space", None) # In measurement categories, # apply scale factors only for contamination if phase_space == "measure" and not self.final_it: weights.append( "scale_factor_c_{}".format( shift)) if region == "hf": weights.append( "scale_factor_lf_{}". format(shift)) elif region == "lf": weights.append( "scale_factor_hf_{}". format(shift)) elif region == "cont": weights.append( "scale_factor_lf_{}". format(shift)) weights.append( "scale_factor_hf_{}". format(shift)) else: raise ValueError( "Unexpected region {}". format(region)) else: weights.append( "scale_factor_lf_{}".format( shift)) weights.append( "scale_factor_c_{}".format( shift)) weights.append( "scale_factor_hf_{}".format( shift)) # totalWeight alias while len(weights) < 2: weights.insert(0, "1") tree.SetAlias( "totalWeight", join_root_selection(weights, op="*")) # actual projecting for variable in self.config_inst.variables: # save variable binning to reset at end of loop base_variable_binning = variable.binning if variable.has_tag("skip_all"): continue if region and variable.has_tag( "skip_{}".format(region)): continue # if variable tags is given, require at least one if len(self.variable_tags ) > 0 and not variable.has_tag( self.variable_tags, mode=any): continue # do not write one b-tag discriminant in the category of another if variable.get_aux( "b_tagger", self.b_tagger) != self.b_tagger: continue # if number of bins is specified, overwrite variable binning if self.binning: self.binning = list(self.binning) # if a tuple of (n_bins, x_min, x_max) is given, ensure that n_bins is an integer if len(self.binning) == 3: self.binning[0] = int( self.binning[0]) self.binning = tuple(self.binning) variable.binning = self.binning # use optimized binning for b-tag discriminants if provided if self.optimize_binning and variable.get_aux( "can_optimize_bins", False): binning_category = category.get_aux( "binning_category", category) # overwrite binning if specialized binning is defined for this category variable.binning = category_binnings.get( binning_category.name, variable.binning) hist = ROOT.TH1F( "{}_{}".format(variable.name, shift), variable.full_title(root=True), variable.n_bins, array.array("d", variable.bin_edges)) hist.Sumw2() # build the full selection string, including the total event weight selection = [ category.selection, "jetmet_pass{jec_identifier} == 1", "{} != -10000".format( variable.expression), ] if variable.selection: selection.append(variable.selection) selection = join_root_selection( selection).format( ** {"jec_identifier": jec_identifier}) selection = join_root_selection( selection, "totalWeight", op="*") # project and write the histogram tree.Project( "{}_{}".format(variable.name, shift), variable.expression.format( ** {"jec_identifier": jec_identifier }), selection) hist.Write() variable.binning = base_variable_binning progress(i)