def __init__(self, base_values, sample_values = None): self._config = {} ###Config which is initiliazed with information for each sample from samples_run2_201X.py if wished if sample_values != None: sample_settings = samples.Samples() sample_list, channel, category, estimationMethod, cut_type, nick_suffix, no_plot, weight = sample_values self._config = sample_settings.get_config(samples=[getattr(samples.Samples, sample) for sample in sample_list], channel = channel, category = category, no_plot = no_plot, nick_suffix = nick_suffix, estimationMethod = estimationMethod, cut_type = cut_type, weight = weight) ###Fill config with basic information for (key, value) in self.__base__(*base_values).iteritems(): self._config[key] = value ###Dictionary with all possible plotting/analysis modules self._modules_dict = { Plotmodule.control_plot: self.__controlplot__, Plotmodule.sum_of_hists: self.__sumofhists__, Plotmodule.efficiency_plot: self.__efficiencyplot__, Plotmodule.shape_plot: self.__shapeplot__, Plotmodule.datacard: self.__datacard__, Plotmodule.cutflow_plot: self.__cutflowplot__, Plotmodule.ratio: self.__ratio__, Plotmodule.limit: self.__limitplot__, Plotmodule.blind: self.__blind__, Plotmodule.errorband: self.__errorband__, }
def get_input_files(channel): sample_settings = samples.Samples() mc_list = [] for sample_list in [sig_list, bkg_list]: mc_set = set() config = sample_settings.get_config(samples=[ getattr(samples.Samples, sample) for sample in sample_list ], channel=channel, category=None, cut_type="lfv") mc_set.update([ glob.glob(sample_dir + mc_name)[0] for mc_name in [ file_name for file_list in [ config_string.split(" ") for config_string in config["files"] ] for file_name in file_list ] ]) mc_list.append(list(mc_set)) return mc_list
def create_input_root_files(datacards, args): ''' Configuring Harry plotter according to the samples and creating input root files according to the args.''' plot_configs = [] output_files = [] merged_output_files = [] hadd_commands = [] sample_settings = samples.Samples() expression_settings = expressions.ExpressionsDict() binnings_settings = binnings.BinningsDict() systematics_factory = systematics.SystematicsFactory() datacards.configs._mapping_process2sample = { "data_obs": "data", "EWKZ": "ewkz", "QCD": "qcd", "TT": "ttj", "TTT": "ttt", "TTJ": "ttjj", "VV": "vv", "VVT": "vvt", "VVJ": "vvj", "W": "wj", "ZJ": "zj", "ZL": "zl", "ZLL": "zll", "ZTTPOSPOL": "zttpospol", "ZTTNEGPOL": "zttnegpol", "ZTT_GEN_DM_ZERO": "ztt_gen_dm_zero", "ZTT_GEN_DM_ONE": "ztt_gen_dm_one", "ZTT_GEN_DM_TWO": "ztt_gen_dm_two", "ZTT_GEN_DM_TEN": "ztt_gen_dm_ten", "ZTT_GEN_DM_ELEVEN": "ztt_gen_dm_eleven", } for index, (channel, categories) in enumerate(zip(args.channel, args.categories)): for category in categories: datacards_per_channel_category = datacardsbase.Datacards( cb=datacards.cb.cp().channel([channel]).bin([category])) higgs_masses = [ mass for mass in datacards_per_channel_category.cb.mass_set() if mass != "*" ] output_file = os.path.join( args.output_dir, "input/{ANALYSIS}_{CHANNEL}_{BIN}_{ERA}.root".format( ANALYSIS="ztt", CHANNEL=channel, BIN=category, ERA="13TeV")) output_files.append(output_file) tmp_output_files = [] for shape_systematic, list_of_samples in datacards_per_channel_category.get_samples_per_shape_systematic( ).iteritems(): nominal = (shape_systematic == "nominal") list_of_samples = [ datacards.configs.process2sample(process) for process in list_of_samples ] if ("wj" in list_of_samples) and not ("qcd" in list_of_samples): list_of_samples.append("qcd") elif ("qcd" in list_of_samples) and not ("wj" in list_of_samples): list_of_samples.append("wj") asimov_nicks = [] if args.use_asimov_dataset: asimov_nicks = [ nick.replace("zttpospol", "zttpospol_noplot").replace( "zttnegpol", "zttnegpol_noplot") for nick in list_of_samples ] if "data" in asimov_nicks: asimov_nicks.remove("data") for shift_up in ([True] if nominal else [True, False]): systematic = "nominal" if nominal else ( shape_systematic + ("Up" if shift_up else "Down")) log.debug( "Create inputs for (samples, systematic) = ([\"{samples}\"], {systematic}), (channel, category) = ({channel}, {category})." .format(samples="\", \"".join(list_of_samples), channel=channel, category=category, systematic=systematic)) tmp_quantity = args.quantity tmp_omega_version = args.omega_version if args.fixed_variables == "best_choice": if channel in ["tt"]: if category in [ channel + "_" + cat for cat in [ "combined_rho_oneprong", "combined_oneprong_oneprong" ] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_a1_rho"] ]: tmp_quantity = None tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_a1_a1", "combined_a1_oneprong"] ]: tmp_quantity = None tmp_omega_version = "BarSvfitM91" elif category in [ channel + "_" + cat for cat in ["combined_rho_rho", "rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" elif channel in ["mt", "et"]: if category in [ channel + "_" + cat for cat in ["combined_a1_oneprong"] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_rho_oneprong"] ]: tmp_quantity = None tmp_omega_version = None elif category in [ channel + "_" + cat for cat in [ "combined_oneprong_oneprong", "a1", "oneprong" ] ]: tmp_quantity = None tmp_omega_version = "BarSvfitM91" elif category in [ channel + "_" + cat for cat in ["rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" elif channel in ["em"]: if category in [ channel + "_" + cat for cat in ["combined_oneprong_oneprong"] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif args.fixed_variables == "best_choice_no_svfit": tmp_quantity = "m_vis" tmp_omega_version = None if channel in ["tt", "mt", "et"]: if category in [ channel + "_" + cat for cat in ["combined_rho_rho", "rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" x_expression = None if tmp_quantity: x_expression = tmp_quantity else: x_expression = "testZttPol13TeV_" + category if tmp_omega_version: x_expression = expression_settings.expressions_dict[ x_expression].replace("BarSvfit", tmp_omega_version) x_expression = expression_settings.expressions_dict.get( x_expression, x_expression) # prepare plotting configs for retrieving the input histograms config = sample_settings.get_config( samples=[ getattr(samples.Samples, sample) for sample in list_of_samples ], channel=channel, category="catZttPol13TeV_" + category, weight=args.weight, lumi=args.lumi * 1000, higgs_masses=higgs_masses, estimationMethod="new", polarisation_bias_correction=True, cut_type="low_mvis_smhtt2016", exclude_cuts=(["m_vis"] if x_expression == "m_vis" else []), no_ewk_samples=args.no_ewk_samples, no_ewkz_as_dy=True, asimov_nicks=asimov_nicks) systematics_settings = systematics_factory.get( shape_systematic)(config) # TODO: evaluate shift from datacards_per_channel_category.cb config = systematics_settings.get_config(shift=( 0.0 if nominal else (1.0 if shift_up else -1.0))) #config["qcd_subtract_shape"] =[args.qcd_subtract_shapes] config["x_expressions"] = [ ("0" if (("gen_zttpospol" in nick) or ("gen_zttnegpol" in nick)) else x_expression) for nick in config["nicks"] ] binnings_key = "binningZttPol13TeV_" + category + "_" + x_expression if not (binnings_key in binnings_settings.binnings_dict): binnings_key = "binningZttPol13TeV_" + category + ( ("_" + tmp_quantity) if tmp_quantity else "") if binnings_key in binnings_settings.binnings_dict: config["x_bins"] = [ ("1,-1,1" if (("gen_zttpospol" in nick) or ("gen_zttnegpol" in nick)) else binnings_key) for nick in config["nicks"] ] if args.fixed_binning: if args.fixed_variables: if channel == "tt": config["x_bins"] = [ args.fixed_binning.split(",")[0] + ",-1.0001,1.0001" for nick in config["nicks"] ] else: config["x_bins"] = [ args.fixed_binning for nick in config["nicks"] ] else: config["x_bins"] = [ args.fixed_binning for nick in config["nicks"] ] config["directories"] = [args.input_dir] histogram_name_template = "${BIN}/${PROCESS}" if nominal else "${BIN}/${PROCESS}_${SYSTEMATIC}" config["labels"] = [ histogram_name_template.replace("$", "").format( PROCESS=datacards.configs.sample2process( sample.replace("asimov", "data")), BIN=category, SYSTEMATIC=systematic) for sample in config["labels"] ] tmp_output_file = os.path.join( args.output_dir, "input/{ANALYSIS}_{CHANNEL}_{BIN}_{SYSTEMATIC}_{ERA}.root" .format(ANALYSIS="ztt", CHANNEL=channel, BIN=category, SYSTEMATIC=systematic, ERA="13TeV")) tmp_output_files.append(tmp_output_file) config["output_dir"] = os.path.dirname(tmp_output_file) config["filename"] = os.path.splitext( os.path.basename(tmp_output_file))[0] config["plot_modules"] = ["ExportRoot"] config["file_mode"] = "UPDATE" if "legend_markers" in config: config.pop("legend_markers") plot_configs.append(config) hadd_commands.append("hadd -f {DST} {SRC} && rm {SRC}".format( DST=output_file, SRC=" ".join(tmp_output_files))) tmp_output_files = list( set([ os.path.join(config["output_dir"], config["filename"] + ".root") for config in plot_configs[:args.n_plots[0]] ])) for output_file in tmp_output_files: if os.path.exists(output_file): os.remove(output_file) log.debug("Removed file \"" + output_file + "\" before it is recreated again.") output_files = list(set(output_files)) higgsplot.HiggsPlotter(list_of_config_dicts=plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0], batch=args.batch) if args.n_plots[0] != 0: tools.parallelize(_call_command, hadd_commands, n_processes=args.n_processes) debug_plot_configs = [] for output_file in (output_files): debug_plot_configs.extend(plotconfigs.PlotConfigs().all_histograms( output_file, plot_config_template={ "markers": ["E"], "colors": ["#FF0000"] })) if args.www: for debug_plot_config in debug_plot_configs: debug_plot_config["www"] = debug_plot_config["output_dir"].replace( args.output_dir, args.www) #higgsplot.HiggsPlotter(list_of_config_dicts=debug_plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0]) return None
args.lumi = samples.default_lumi / 1000.0 # Clean the output dir args.output_dir = os.path.abspath(os.path.expandvars(args.output_dir)) if args.clear_output_dir: clear_output_dir = raw_input( "Do you really want to clear the output directory? [yes]").lower( ) == "yes" if not clear_output_dir: log.info( "Terminate. Remove the clear_output_dir option and run the programm again." ) sys.exit(1) logger.subprocessCall("rm -r " + args.output_dir, shell=True) sample_settings = samples.Samples() binnings_settings = binnings.BinningsDict() systematics_factory = systematics.SystematicsFactory() www_output_dirs_postfit = [] www_output_dirs_weightbin = [] www_output_dirs_parabola = [] plot_configs = [] output_files = [] merged_output_files = [] hadd_commands = [] # Initialise directory and naming scheme templates for datacards tmp_input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${BIN}_${SYSTEMATIC}_${ERA}.root" input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${ERA}.root" bkg_histogram_name_template = "${BIN}/${PROCESS}"
def application(channelselection): import HiggsAnalysis.KITHiggsToTauTau.plotting.configs.samples_run2_2016 as samples import glob import ConfigParser import os from random import uniform sample_settings = samples.Samples() mc_list = [] mc_set = set() ##Get file names using samples_run2_2016 for sample_list in [["data"], bkg_list, sig_list]: for channel in ["em", "et", "mt"]: config = sample_settings.get_config(samples=[ getattr(samples.Samples, sample) for sample in sample_list ], channel=channel, category=None, cut_type="lfv") mc_set.update([ glob.glob(sample_dir + mc_name)[0] for mc_name in [ file_name for file_list in [ config_string.split(" ") for config_string in config["files"] ] for file_name in file_list ] ]) for file_name in list(mc_set): mc_list.append(file_name) #print mc_list ##Write bash script for grid control ##Write config for grid control local = False if not local: seed = str(int(uniform(1, 10000))) Config = ConfigParser.ConfigParser() Config.optionxform = str config_name = "config" + seed + ".cfg" config = open(config_name, "w") sections = ["global", "jobs", "UserTask", "parameters"] entries = [[("task", "UserTask"), ("backend", "condor"), ("cmdargs", "-c -G"), ("workdir create", True), ("workdir", "/tmp/Flavio_work" + seed)], [("wall time", "1:00"), ("max retry", 5)], [("executable", "kappa.sh")], [("parameters", "FILE_NAME CHANNEL"), ("FILE_NAME", " ".join(mc_list)), ("CHANNEL", "em")]] for section in sections: Config.add_section(section) for section, entry in zip(sections, entries): for key, value in entry: Config.set(section, key, value) Config.write(config) config.close() #Send jobs to the batch system os.system("go.py {config_name}".format(config_name=config_name)) os.system("rm config*.cfg") else: pool = Pool(cpu_count()) for filename in mc_list: pool.apply_async(attach, args=(filename, )) pool.close() pool.join()
def create_input_trees(channelselection): import HiggsAnalysis.KITHiggsToTauTau.plotting.configs.samples_run2_2016 as samples import glob sample_settings = samples.Samples() mc_list = [] ##Get file names using samples_run2_2016 for sample_list in [sig_list, bkg_list]: mc_set = set() config = sample_settings.get_config(samples=[ getattr(samples.Samples, sample) for sample in sample_list ], channel="em", category=None, cut_type="lfv") mc_set.update([ glob.glob(sample_dir + mc_name)[0] for mc_name in [ file_name for file_list in [ config_string.split(" ") for config_string in config["files"] ] for file_name in file_list ] ]) mc_list.append(list(mc_set)) check_em = False for channel in channelselection: if channel == "em" or channel == "etm" or channel == "mte": if check_em: continue else: check_em = True channel = "em" ##Create signal/background chain and read in all files sig_chain = ROOT.TChain(tree_name.format(channel=channel)) bkg_chain = ROOT.TChain(tree_name.format(channel=channel)) for sig_file in mc_list[0]: sig_chain.Add(sig_file) for bkg_file in mc_list[1]: bkg_chain.Add(bkg_file) ##Deactivate branches, which are not avaiable in all files invalid_branch = [ "genBosonLV", "genBosonLep1LV", "genBosonLep2LV", "genBosonTau1LV", "genBosonTau2LV", "genBosonTau1VisibleLV", "genBosonTau2VisibleLV" ] for branch in invalid_branch: sig_chain.SetBranchStatus(branch, 0) bkg_chain.SetBranchStatus(branch, 0) ##Create input root files in paralell pool = Pool(4) pool.map(write_root_file, [[sig_chain, "event%2==0", sig_name, channel], [sig_chain, "event%2==1", sig_name, channel], [bkg_chain, "event%2==0", bkg_name, channel], [bkg_chain, "event%2==1", bkg_name, channel]])
def main(): ##parser args = parser() ##For zlt-leptonic there is just the em channel available with the addition of which lepton is first. functions = [get_hists] + ([shape_plot] if args.shape else [plot]) for func in functions: pool = Pool(cpu_count()) tasks = [] for channel in args.channel: if not os.path.exists(plot_dir + channel): os.system("mkdir -p " + plot_dir + channel) ##Procceses names processes = ["qcd", "ttj", "vv", "wj", "ztt", "zll"] + ["z{}".format(channel)] + ["data"] for param in args.parameter: ##Check if parameter plotting configuration is saved in parameter.yaml param_config = yaml.load(open(parameter_yaml, "r")) if param in param_config.keys(): parameter, binning, plotname, x_label = param_config[param][:4] if param in args.weight: args.weight = args.weight.replace(param, parameter) else: parameter, binning, plotname, x_label = (param, ["30"], param, param) if type(binning)==dict: binning = binning[channel] ##Get histograms from of Ntuple with harry.py if func == get_hists: sample_settings = samples.Samples() base_config = sample_settings.get_config([getattr(samples.Samples, process) for process in processes], "em" if channel == "etm" or channel == "mte" else channel, category = None, estimationMethod = "new", weight = args.weight) task = pool.apply_async(get_hists, args = (parameter, base_config, binning, channel)) tasks.append(task) ##Plot all things if func == plot: task = pool.apply_async(plot, args = (parameter, channel, processes, plotname, x_label, args.data, args.name_suffix, args.www)) tasks.append(task) ##Plot shape histograms if func == shape_plot: task = pool.apply_async(shape_plot, args = (parameter, channel, processes, plotname, x_label, args.name_suffix, args.www)) tasks.append(task) pool.close() pool.join() [task.get() for task in tasks] ##Clean up for channel in ["em", "et", "mt", "etm", "mte"]: if os.path.exists(plot_dir + channel): os.system("rm --force " + plot_dir + channel + "/*.root") os.system("rm --force " + plot_dir + channel + "/*.json") ##Web plotting if args.www != "": for channel in args.channel: webplot(args.www, channel)
def create_datacards(channel, method): backgrounds = {"ZTT": "ztt", "VV": "vv", "W": "wj", "QCD": "qcd"} backgrounds.update({ "TT": "ttj", "ZLL": "zll" } if channel == "em" else { "TTT": "ttt", "TTJJ": "ttjj", "ZL": "zl", "ZJ": "zj" }) ##Combine harvester instance cb = ch.CombineHarvester() #Instance for extracting histograms sample_settings = samples.Samples() config_list = [] ##weights cut_info = yaml.load( open(os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/cuts.yaml", "r")) parameter_info = yaml.load( open( os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/parameter.yaml", "r")) weights = [] for index, category in enumerate( ["(njetspt30==0)", "(njetspt30==1)", "(njetspt30>1)"]): #, "(nbtag==2)"]): #cut_strings = [parameter_info[param][4] for param in cut_info[index][channel].keys()] #cut_values, cut_side = [[entry[index2] for entry in cut_info[index][channel].values()] for index2 in [0,1]] weights.append( { #"cut_based": "*".join([cut_strings[index2].format(side = side, cut = value) for index2, (side, value) in enumerate(zip(cut_side, cut_values))] + [category]), "cut_BDT": "(BDT_forcut_score>0.7)*" + category, "cut_Ada_BDT": "(BDT_Ada_forcut_score>0.0)*" + category, "BDT": category, "Ada_BDT": category }) ##Fill combine harvester with categories/processes for category in categories + controlregions: ##Add data/signal cb.AddObservations(["*"], ["lfv"], ["13TeV"], [channel], [category]) if not "CR" in category[1]: cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], ["Z" + channel.upper()], [category], True) ##Config for each category config = sample_settings.get_config( [ getattr(samples.Samples, sample) for sample in data.values() + { True: ["z" + channel], False: [] }["CR" not in category[1]] + backgrounds.values() ], channel, None, estimationMethod="new", weight=weights[category[0]][method]) config.pop("legend_markers") config += { "filename": "input_" + method + "_nominal_" + category[1], "plot_modules": ["ExportRoot"], "file_mode": "UPDATE", "directories": os.environ["MCPATH"], "x_expressions": x[method], "x_bins": x_bins[method], "output_dir": output_dir + channel, "no_cache": True } config["labels"] = [ category[1] + "/" + process for process in data.keys() + { True: ["Z" + channel.upper()], False: [] }["CR" not in category[1]] + backgrounds.keys() ] config_list.append(config) for process in backgrounds.keys(): ##Add background cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], [process], [category], False) ##Fill combine with control regions for CR in controlregions: cb.cp().channel([channel]).bin([category[1]]).AddSyst( cb, "scale_" + category[1].remove("_CR"), "rateParam", ch.SystMap()) for category in catogories: cb.cp().bin([category[0] ]).AddSyst(cb, "scale_" + category[1].remove("_CR"), "rateParam", ch.SystMapFunc()) ##Fill combine harvester with systematics systematics_list = SystLib.SystematicLibary() systematics_factory = systematics.SystematicsFactory() for (systematic, process, category) in systematics_list.get_LFV_systs( channel, lnN=True) + systematics_list.get_LFV_systs(channel, shape=True): cb.cp().channel([channel]).process(process).AddSyst(cb, *systematic) if "W" in process and "QCD" not in process: process.append("QCD") if "QCD" in process and "W" not in process: process.append("W") if systematic[1] == "shape": ##Config for each systematic shift: for category in categories + controlregions: if "CR" in category[1] and "Z" + channel.upper() in process: process.remove("Z" + channel.upper()) for shift in ["Down", "Up"]: config = sample_settings.get_config( [ getattr(samples.Samples, dict(signals, **backgrounds)[sample]) for sample in process ], channel, None, estimationMethod="new", weight=weights[category[0]][method]) config.pop("legend_markers") config += { "filename": "input_" + method + "_" + systematic[0].replace( "$ERA", "13TeV").replace("$CHANNEL", channel) + shift + "_" + category[1], "plot_modules": ["ExportRoot"], "file_mode": "UPDATE", "directories": os.environ["MCPATH"], "x_expressions": x[method], "x_bins": x_bins[method], "output_dir": output_dir + channel, "no_cache": True } config["labels"] = [ category[1] + "/" + proc + "_" + systematic[0].replace( "$ERA", "13TeV").replace("$CHANNEL", channel) + shift for proc in process ] if systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel) == "CMS_scale_j_13TeV": systematics_settings = systematics_factory.get( systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel))(config, "Total") else: systematics_settings = systematics_factory.get( systematic[0].replace("$ERA", "13TeV").replace( "$CHANNEL", channel))(config) config = systematics_settings.get_config(1 if shift == "Up" else -1) config_list.append(config) pool = Pool(cpu_count()) for config in config_list: pool.apply_async(harry_do_your_job, args=(config, )) pool.close() pool.join() os.system("hadd {target}.root {root_files}*.root".format( target=output_dir + channel + "/input_" + method, root_files=output_dir + channel + "/input_" + method)) ##Fill combine harvester with the shapes which were extracted before from harry.py cb.cp().backgrounds().ExtractShapes( output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC") cb.cp().signals().ExtractShapes( output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC") #Write datacard and call combine cb.WriteDatacard( output_dir + channel + "/combined_" + method + ".txt", output_dir + channel + "/combined_datacard_" + method + ".root") for category in categories: cb_copy = cb.cp() cb_copy.FilterAll(lambda obj: obj.bin() != category[1]) cb_copy.WriteDatacard( output_dir + channel + "/" + category[1] + "_" + method + ".txt", output_dir + channel + "/" + category[1] + "_datacard_" + method + ".root")