def get_properties(dict_, era, channel, directory, additional_cuts): # Get data estimation method if "2016" in era.name: from shape_producer.estimation_methods_2016 import DataEstimation elif "2017" in era.name: from shape_producer.estimation_methods_2017 import DataEstimation else: logger.fatal( "Can not import data estimation because era {} is not implemented." .format(era.name)) raise Exception estimation = DataEstimation(era, directory, channel) # Extract weight string, which should be equal (1.0) weight_string = estimation.get_weights().extract() logger.debug("Data weight string: %s", weight_string) if weight_string != "(1.0)": logger.fatal("Weight string is not equal to (1.0).") raise Exception # Extract cut string cut_string = (estimation.get_cuts() + channel.cuts + additional_cuts).expand() logger.debug("Data cut string: %s", cut_string) dict_["cut_string"] = str(cut_string) # Get files files = [str(f) for f in estimation.get_files()] for i, f in enumerate(files): logger.debug("File %d: %s", i + 1, str(f).replace(directory + "/", "")) dict_["files"] = files dict_["directory"] = directory return dict_
def get_properties(dict_, era, channel, directory, additional_cuts): # Get data estimation method estimation = DataEstimation(era, directory, channel) # Extract weight string, which should be equal (1.0) weight_string = estimation.get_weights().extract() logger.debug("Data weight string: %s", weight_string) if weight_string != "(1.0)": logger.fatal("Weight string is not equal to (1.0).") raise Exception # Extract cut string cut_string = (estimation.get_cuts() + channel.cuts + additional_cuts).expand() logger.debug("Data cut string: %s", cut_string) dict_["cut_string"] = str(cut_string) # Get files files = [str(f) for f in estimation.get_files()] for i, f in enumerate(files): logger.debug("File %d: %s", i + 1, str(f).replace(directory + "/", "")) dict_["files"] = files return dict_
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZTTEstimationTT, ZLEstimationMTSM, ZLEstimationETSM, ZLEstimationTT, ZJEstimationMT, ZJEstimationET, ZJEstimationTT, WEstimationRaw, TTTEstimationMT, TTTEstimationET, TTTEstimationTT, TTJEstimationMT, TTJEstimationET, TTJEstimationTT, VVEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimationMT, TTLEstimationET, TTLEstimationTT, TTTTEstimationMT, TTTTEstimationET, EWKWpEstimation, EWKWmEstimation, EWKZllEstimation, EWKZnnEstimation from shape_producer.era import Run2016 era = Run2016(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception ############################################################################ # Channel: mt if args.channel == "mt": channel = MTSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationMTSM(era, args.base_path, channel), ZJEstimationMT(era, args.base_path, channel), TTTEstimationMT(era, args.base_path, channel), #TTLEstimationMT(era, args.base_path, channel), TTJEstimationMT(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Channel: et if args.channel == "et": channel = ETSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for et: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationETSM(era, args.base_path, channel), ZJEstimationET(era, args.base_path, channel), TTTEstimationET(era, args.base_path, channel), #TTLEstimationET(era, args.base_path, channel), TTJEstimationET(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Channel: tt if args.channel == "tt": channel = TTSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for tt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "misc", "ZJ": "misc", "TTT": "misc", "TTL": "misc", "TTJ": "misc", "W": "misc", "EWKWp": "misc", "EWKWm": "misc", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimationTT(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationTT(era, args.base_path, channel), ZJEstimationTT(era, args.base_path, channel), TTTEstimationTT(era, args.base_path, channel), #TTLEstimationTT(era, args.base_path, channel), TTJEstimationTT(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_iso = copy.deepcopy(channel) channel_iso.cuts.remove("tau_2_iso") channel_iso.cuts.add( Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso")) channel_iso.cuts.add( Cut("byLooseIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_iso.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "noniso" } ############################################################################ # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Use 2016 dataset era = Run2016(args.datasets) # Channel if args.channel == "et": channel = ETSM2016() friend_directory = args.et_friend_directory elif args.channel == "mt": channel = MTSM2016() friend_directory = args.mt_friend_directory elif args.channel == "tt": channel = TTSM2016() friend_directory = args.tt_friend_directory else: raise Exception # Data estimation data = DataEstimation(era, args.directory, channel, friend_directory=friend_directory) files = data.get_files() cuts = (data.get_cuts() + channel.cuts).expand() weights = data.get_weights().extract() # Combine all files tree = ROOT.TChain() for f in files: tree.Add(f + "/{}_nominal/ntuple".format(args.channel)) #print("Add file to tree: {}".format(f)) friend = ROOT.TChain() for f in files: friendname = os.path.basename(f).replace(".root", "") friendpath = os.path.join(friend_directory, friendname, friendname + ".root") friend.Add(friendpath + "/{}_nominal/ntuple".format(args.channel)) #print("Add file to friend: {}".format(friendpath)) tree.AddFriend(friend) # All events after baseline selection tree.Draw("m_sv>>all_events", cuts + "*({})".format(weights), "goff") all_events = ROOT.gDirectory.Get("all_events").Integral(-1000, 1000) # Only 16043 tree.Draw( "m_sv>>only_16043", cuts + "*(({})==0)*(({})==1)*({})".format( args.cut18032, args.cut16043, weights), "goff") only_16043 = ROOT.gDirectory.Get("only_16043").Integral(-1000, 1000) # All 16043 tree.Draw("m_sv>>all_16043", cuts + "*(({})==1)*({})".format(args.cut16043, weights), "goff") all_16043 = ROOT.gDirectory.Get("all_16043").Integral(-1000, 1000) # Only 18032 tree.Draw( "m_sv>>only_18032", cuts + "*(({})==1)*(({})==0)*({})".format( args.cut18032, args.cut16043, weights), "goff") only_18032 = ROOT.gDirectory.Get("only_18032").Integral(-1000, 1000) # All 18032 tree.Draw("m_sv>>all_18032", cuts + "*(({})==1)*({})".format(args.cut18032, weights), "goff") all_18032 = ROOT.gDirectory.Get("all_18032").Integral(-1000, 1000) # Both tree.Draw( "m_sv>>both", cuts + "*(({})==1)*(({})==1)*({})".format( args.cut18032, args.cut16043, weights), "goff") both = ROOT.gDirectory.Get("both").Integral(-1000, 1000) # None tree.Draw( "m_sv>>none", cuts + "*(({})==0)*(({})==0)*({})".format( args.cut18032, args.cut16043, weights), "goff") none = ROOT.gDirectory.Get("none").Integral(-1000, 1000) # Print print("Cross-check: {}, {}".format(both + only_18032 + only_16043 + none, all_events)) print("Cross-check: {}, {}".format(all_18032 + only_16043 + none, all_events)) print("Cross-check: {}, {}".format(only_18032 + all_16043 + none, all_events)) print("Cross-check: {}, {}".format(all_16043, only_16043 + both)) print("Cross-check: {}, {}".format(all_18032, only_18032 + both)) print("Cross-check: {}, {}".format( all_events - both - only_18032 - only_16043, none)) print("All events: {}".format(all_events)) print("In none of both selection: {}".format(none)) print("In both selections together: {}".format(both)) print("In at least one selection: {}".format(both + only_18032 + only_16043)) print("Only 16043: {}".format(only_16043)) print("All 16043: {}".format(all_16043)) print("Only 18032: {}".format(only_18032)) print("All 18032: {}".format(all_18032))
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["friend_paths"] = args.friend_paths output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch logger.debug("Channel" + args.channel + " Era " + args.era) # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, \ ZTTEstimation, ZLEstimation, ZJEstimation, TTTEstimation, TTJEstimation, \ ZTTEmbeddedEstimation, TTLEstimation, \ EWKZEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \ TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \ ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2017 era = Run2017(args.database) elif "2018" in args.era: from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \ TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \ ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2018 era = Run2018(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception def estimationMethodAndClassMapGenerator(): ###### common processes if args.training_stxs1p1: classes_map = { # class1 "ggH_GG2H_PTH_GT200125": "ggh_PTHGT200", # class2 "ggH_GG2H_0J_PTH_0_10125": "ggh_0J", "ggH_GG2H_0J_PTH_GT10125": "ggh_0J", # class3 "ggH_GG2H_1J_PTH_0_60125": "ggh_1J_PTH0to120", "ggH_GG2H_1J_PTH_60_120125": "ggh_1J_PTH0to120", # class4 "ggH_GG2H_1J_PTH_120_200125": "ggh_1J_PTH120to200", # class5 "ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125": "ggh_2J", "ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125": "ggh_2J", "ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125": "ggh_2J", # class6 "ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj", "ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj", "qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj", "qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj", # class7 "ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj", "ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj", "qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj", "qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj", # class8 "qqH_QQ2HQQ_GE2J_MJJ_0_60125": "qqh_2J", "qqH_QQ2HQQ_GE2J_MJJ_60_120125": "qqh_2J", "qqH_QQ2HQQ_GE2J_MJJ_120_350125": "qqh_2J", # class9 "qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125": "qqh_PTHGT200", } estimationMethodList = [ ggHEstimation("ggH_GG2H_PTH_GT200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_0J_PTH_0_10125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_0J_PTH_GT10125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_0_60125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_60_120125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_120_200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_0_60125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_60_120125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_120_350125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125", era, args.base_path, channel), ] elif args.training_inclusive: classes_map = { "ggH125": "xxh", "qqH125": "xxh", } estimationMethodList = [ ggHEstimation("ggH125", era, args.base_path, channel), qqHEstimation("qqH125", era, args.base_path, channel), ] else: classes_map = { "ggH125": "ggh", "qqH125": "qqh", } estimationMethodList = [ ggHEstimation("ggH125", era, args.base_path, channel), qqHEstimation("qqH125", era, args.base_path, channel), ] estimationMethodList.extend([ EWKZEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel) ]) classes_map["EWKZ"]="misc" ##### TT* zl,zj processes estimationMethodList.extend([ TTLEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel) ]) if args.channel == "tt": classes_map.update({ "TTL": "misc", "ZL": "misc", "VVL": "misc" }) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({ "TTL": "tt", "ZL": "misc", "VVL": "db" }) else: classes_map.update({ "TTL": "tt", "ZL": "zll", "VVL": "misc" }) ######## Check for emb vs MC if args.training_z_estimation_method == "emb": classes_map["EMB"] = "emb" estimationMethodList.extend([ ZTTEmbeddedEstimation(era, args.base_path, channel)]) elif args.training_z_estimation_method == "mc": classes_map["ZTT"] = "ztt" estimationMethodList.extend([ ZTTEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel) ]) if args.channel == "tt": classes_map.update({ "TTT": "misc", "VVT": "misc" }) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({ "TTT": "tt", "VVT": "db" }) else: classes_map.update({ "TTT": "tt", "VVT": "misc" }) else: logger.fatal("No valid training-z-estimation-method! Options are emb, mc. Argument was {}".format( args.training_z_estimation_method)) raise Exception if args.training_jetfakes_estimation_method == "ff" and args.channel != "em": classes_map.update({ "ff": "ff" }) elif args.training_jetfakes_estimation_method == "mc" or args.channel == "em": # less data-> less categories for tt if args.channel == "tt": classes_map.update({ "TTJ": "misc", "ZJ": "misc" }) ## not TTJ,ZJ for em elif args.channel != "em": classes_map.update({ "TTJ": "tt", "ZJ": "zll" }) if args.channel != "em": classes_map.update({ "VVJ": "misc" }) estimationMethodList.extend([ VVJEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ###w: estimationMethodList.extend([WEstimation(era, args.base_path, channel)]) if args.channel in ["et", "mt"]: classes_map["W"] = "w" else: classes_map["W"] = "misc" ### QCD class if args.channel == "tt": classes_map["QCD"] = "noniso" else: classes_map["QCD"] = "ss" else: logger.fatal("No valid training-jetfakes-estimation-method! Options are ff, mc. Argument was {}".format( args.training_jetfakes_estimation_method)) raise Exception return ([classes_map, estimationMethodList]) channelDict = {} channelDict["2016"] = {"mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016()} channelDict["2017"] = {"mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017()} channelDict["2018"] = {"mt": MTSM2018(), "et": ETSM2018(), "tt": TTSM2018(), "em": EMSM2018()} channel = channelDict[args.era][args.channel] # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) classes_map, estimationMethodList = estimationMethodAndClassMapGenerator() ### disables all other estimation methods # classes_map={"ff":"ff"} # estimationMethodList=[] for estimation in estimationMethodList: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } if args.training_jetfakes_estimation_method == "mc" or args.channel == "em": if args.training_jetfakes_estimation_method == "ff": logger.warn("ff+em: using mc for em channel") # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_qcd = copy.deepcopy(channel) if args.channel != "tt": ## os= opposite sign channel_qcd.cuts.get("os").invert() # Same sign selection for data-driven QCD else: channel_qcd.cuts.remove("tau_2_iso") channel_qcd.cuts.add( Cut("byTightDeepTau2017v2p1VSjet_2<0.5", "tau_2_iso")) channel_qcd.cuts.add( Cut("byMediumDeepTau2017v2p1VSjet_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } else: ## ff and not em estimation = DataEstimation(era, args.base_path, channel) estimation.name = "ff" aiso = copy.deepcopy(channel) if args.channel in ["et", "mt"]: aisoCut = Cut( "byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5", "tau_aiso") fakeWeightstring = "ff2_nom" aiso.cuts.remove("tau_iso") elif args.channel == "tt": aisoCut = Cut( "(byTightDeepTau2017v2p1VSjet_2>0.5&&byTightDeepTau2017v2p1VSjet_1<0.5&&byVLooseDeepTau2017v2p1VSjet_1>0.5)||(byTightDeepTau2017v2p1VSjet_1>0.5&&byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5)", "tau_aiso") fakeWeightstring = "(0.5*ff1_nom*(byTightDeepTau2017v2p1VSjet_1<0.5)+0.5*ff2_nom*(byTightDeepTau2017v2p1VSjet_2<0.5))" aiso.cuts.remove("tau_1_iso") aiso.cuts.remove("tau_2_iso") # self._nofake_processes = [copy.deepcopy(p) for p in nofake_processes] aiso.cuts.add(aisoCut) additionalWeights = Weights(Weight(fakeWeightstring, "fake_factor")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + aiso.cuts).expand(), "weight_string": (estimation.get_weights() + additionalWeights).extract(), "class": classes_map[estimation.name] } output_config["datasets"] = [args.output_path + "/fold" + fold + "_training_dataset.root" for fold in ["0", "1"]] ##################################### # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["friend_paths"] = args.friend_paths output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, EWKWpEstimation, EWKWmEstimation, , VHEstimation, HTTEstimation, from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2017 era = Run2017(args.database) elif "2018" in args.era: from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2018 era = Run2018(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception def estimationMethodAndClassMapGenerator(): ###### common processes classes_map = {"ggH": "ggh", "qqH": "qqh", "EWKZ": "misc"} estimationMethodList = [ ggHEstimation("ggH", era, args.base_path, channel), qqHEstimation("qqH", era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel) ] ######## Check for emb vs MC if args.training_z_estimation_method == "emb": classes_map["EMB"] = "ztt" estimationMethodList.extend( [ZTTEmbeddedEstimation(era, args.base_path, channel)]) elif args.training_z_estimation_method == "mc": classes_map["ZTT"] = "ztt" estimationMethodList.extend([ ZTTEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel) ]) else: logger.fatal( "No valid training-z-estimation-method! Options are emb, mc. Argument was {}" .format(args.training_z_estimation_method)) raise Exception ##### TT* zl,zj processes estimationMethodList.extend([ TTLEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel) ]) # less data-> less categories for tt if args.channel == "tt": classes_map.update({ "TTT": "misc", "TTL": "misc", "TTJ": "misc", "ZL": "misc", "ZJ": "misc" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({"TTT": "tt", "TTL": "tt", "ZL": "misc"}) else: classes_map.update({ "TTT": "tt", "TTL": "tt", "TTJ": "tt", "ZL": "zll", "ZJ": "zll" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ###w: # estimation metho already included, just different mapping fror et and mt if args.channel in ["et", "mt"]: classes_map["W"] = "w" else: classes_map["W"] = "misc" ##### VV/[VVT,VVL,VVJ] split # VVL in common, VVT in "EMBvsMC" if args.channel == "em": classes_map.update({"VVT": "db", "VVL": "db"}) else: classes_map.update({"VVT": "misc", "VVL": "misc", "VVJ": "misc"}) estimationMethodList.extend([ VVJEstimation(era, args.base_path, channel), ]) ### QCD class if args.channel == "tt": classes_map["QCD"] = "noniso" else: classes_map["QCD"] = "ss" return ([classes_map, estimationMethodList]) channelDict = {} channelDict["2016"] = { "mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016() } channelDict["2017"] = { "mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017() } channelDict["2018"] = { "mt": MTSM2018(), "et": ETSM2018(), "tt": TTSM2018(), "em": EMSM2018() } channel = channelDict[args.era][args.channel] # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) classes_map, estimationMethodList = estimationMethodAndClassMapGenerator() ##MC+/Embedding Processes for estimation in estimationMethodList: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } ### # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_qcd = copy.deepcopy(channel) if args.channel != "tt": ## os= opposite sign channel_qcd.cuts.get("os").invert() # Same sign selection for data-driven QCD else: channel_qcd.cuts.remove("tau_2_iso") channel_qcd.cuts.add( Cut("byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5", "tau_2_iso")) channel_qcd.cuts.add( Cut("byLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } ##################################### # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Define era and channel era = Run2016(args.datasets) if "et" in args.channel: channel = ETSM() elif "mt" in args.channel: channel = MTSM() elif "tt" in args.channel: channel = TTSM() else: logger.fatal("Channel %s not known.", args.channel) raise Exception logger.debug("Use channel %s.", args.channel) # Get cut string estimation = DataEstimation(era, args.directory, channel) cut_string = (estimation.get_cuts() + channel.cuts).expand() logger.debug("Data cut string: %s", cut_string) # Get chain tree_path = "{}_nominal/ntuple".format(args.channel) logger.debug("Use tree path %s to get tree.", tree_path) files = [str(f) for f in estimation.get_files()] chain = ROOT.TChain() for i, f in enumerate(files): base = os.path.basename(f).replace(".root", "") f_friend = os.path.join(args.artus_friends, base, base + ".root") + "/" + tree_path logger.debug("Add file with scores %d: %s", i, f_friend) chain.Add(f_friend) logger.debug("Add friend with ntuple %d: %s", i, f) chain.AddFriend(tree_path, f) chain_numentries = chain.GetEntries() if not chain_numentries > 0: logger.fatal("Chain (before skimming) does not contain any events.") raise Exception logger.debug("Found %s events before skimming with cut string.", chain_numentries) # Skim chain chain_skimmed = chain.CopyTree(cut_string) chain_skimmed_numentries = chain_skimmed.GetEntries() if not chain_skimmed_numentries > 0: logger.fatal("Chain (after skimming) does not contain any events.") raise Exception logger.debug("Found %s events after skimming with cut string.", chain_skimmed_numentries) # Calculate binning logger.debug("Load classes from config %s.", args.training_config) classes = yaml.load(open(args.training_config))["classes"] logger.debug("Use classes %s.", classes) scores = [[] for c in classes] for event in chain_skimmed: max_score = float(getattr(event, args.channel + "_max_score")) max_index = int(getattr(event, args.channel + "_max_index")) scores[max_index].append(max_score) binning = {} percentiles = range(0, 105, 5) logger.debug("Use percentiles %s for binning.", percentiles) for i, name in enumerate(classes): logger.debug("Process class %s.", name) x = scores[i] + [1.0 / float(len(classes)), 1.0] logger.debug("Found %s events in class %s.", len(x), name) binning[name] = [float(x) for x in np.percentile(x, percentiles)] # Write binning to output config = yaml.load(open(args.output)) config["analysis"][args.channel] = binning logger.info("Write binning to %s.", args.output) yaml.dump(config, open(args.output, "w"))