def create_input_root_files(datacards, args): ''' Configuring Harry plotter according to the samples and creating input root files according to the args.''' plot_configs = [] output_files = [] merged_output_files = [] hadd_commands = [] sample_settings = samples.Samples() expression_settings = expressions.ExpressionsDict() binnings_settings = binnings.BinningsDict() systematics_factory = systematics.SystematicsFactory() datacards.configs._mapping_process2sample = { "data_obs": "data", "EWKZ": "ewkz", "QCD": "qcd", "TT": "ttj", "TTT": "ttt", "TTJ": "ttjj", "VV": "vv", "VVT": "vvt", "VVJ": "vvj", "W": "wj", "ZJ": "zj", "ZL": "zl", "ZLL": "zll", "ZTTPOSPOL": "zttpospol", "ZTTNEGPOL": "zttnegpol", "ZTT_GEN_DM_ZERO": "ztt_gen_dm_zero", "ZTT_GEN_DM_ONE": "ztt_gen_dm_one", "ZTT_GEN_DM_TWO": "ztt_gen_dm_two", "ZTT_GEN_DM_TEN": "ztt_gen_dm_ten", "ZTT_GEN_DM_ELEVEN": "ztt_gen_dm_eleven", } for index, (channel, categories) in enumerate(zip(args.channel, args.categories)): for category in categories: datacards_per_channel_category = datacardsbase.Datacards( cb=datacards.cb.cp().channel([channel]).bin([category])) higgs_masses = [ mass for mass in datacards_per_channel_category.cb.mass_set() if mass != "*" ] output_file = os.path.join( args.output_dir, "input/{ANALYSIS}_{CHANNEL}_{BIN}_{ERA}.root".format( ANALYSIS="ztt", CHANNEL=channel, BIN=category, ERA="13TeV")) output_files.append(output_file) tmp_output_files = [] for shape_systematic, list_of_samples in datacards_per_channel_category.get_samples_per_shape_systematic( ).iteritems(): nominal = (shape_systematic == "nominal") list_of_samples = [ datacards.configs.process2sample(process) for process in list_of_samples ] if ("wj" in list_of_samples) and not ("qcd" in list_of_samples): list_of_samples.append("qcd") elif ("qcd" in list_of_samples) and not ("wj" in list_of_samples): list_of_samples.append("wj") asimov_nicks = [] if args.use_asimov_dataset: asimov_nicks = [ nick.replace("zttpospol", "zttpospol_noplot").replace( "zttnegpol", "zttnegpol_noplot") for nick in list_of_samples ] if "data" in asimov_nicks: asimov_nicks.remove("data") for shift_up in ([True] if nominal else [True, False]): systematic = "nominal" if nominal else ( shape_systematic + ("Up" if shift_up else "Down")) log.debug( "Create inputs for (samples, systematic) = ([\"{samples}\"], {systematic}), (channel, category) = ({channel}, {category})." .format(samples="\", \"".join(list_of_samples), channel=channel, category=category, systematic=systematic)) tmp_quantity = args.quantity tmp_omega_version = args.omega_version if args.fixed_variables == "best_choice": if channel in ["tt"]: if category in [ channel + "_" + cat for cat in [ "combined_rho_oneprong", "combined_oneprong_oneprong" ] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_a1_rho"] ]: tmp_quantity = None tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_a1_a1", "combined_a1_oneprong"] ]: tmp_quantity = None tmp_omega_version = "BarSvfitM91" elif category in [ channel + "_" + cat for cat in ["combined_rho_rho", "rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" elif channel in ["mt", "et"]: if category in [ channel + "_" + cat for cat in ["combined_a1_oneprong"] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif category in [ channel + "_" + cat for cat in ["combined_rho_oneprong"] ]: tmp_quantity = None tmp_omega_version = None elif category in [ channel + "_" + cat for cat in [ "combined_oneprong_oneprong", "a1", "oneprong" ] ]: tmp_quantity = None tmp_omega_version = "BarSvfitM91" elif category in [ channel + "_" + cat for cat in ["rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" elif channel in ["em"]: if category in [ channel + "_" + cat for cat in ["combined_oneprong_oneprong"] ]: tmp_quantity = "m_vis" tmp_omega_version = None elif args.fixed_variables == "best_choice_no_svfit": tmp_quantity = "m_vis" tmp_omega_version = None if channel in ["tt", "mt", "et"]: if category in [ channel + "_" + cat for cat in ["combined_rho_rho", "rho"] ]: tmp_quantity = None tmp_omega_version = "VisibleSvfit" x_expression = None if tmp_quantity: x_expression = tmp_quantity else: x_expression = "testZttPol13TeV_" + category if tmp_omega_version: x_expression = expression_settings.expressions_dict[ x_expression].replace("BarSvfit", tmp_omega_version) x_expression = expression_settings.expressions_dict.get( x_expression, x_expression) # prepare plotting configs for retrieving the input histograms config = sample_settings.get_config( samples=[ getattr(samples.Samples, sample) for sample in list_of_samples ], channel=channel, category="catZttPol13TeV_" + category, weight=args.weight, lumi=args.lumi * 1000, higgs_masses=higgs_masses, estimationMethod="new", polarisation_bias_correction=True, cut_type="low_mvis_smhtt2016", exclude_cuts=(["m_vis"] if x_expression == "m_vis" else []), no_ewk_samples=args.no_ewk_samples, no_ewkz_as_dy=True, asimov_nicks=asimov_nicks) systematics_settings = systematics_factory.get( shape_systematic)(config) # TODO: evaluate shift from datacards_per_channel_category.cb config = systematics_settings.get_config(shift=( 0.0 if nominal else (1.0 if shift_up else -1.0))) #config["qcd_subtract_shape"] =[args.qcd_subtract_shapes] config["x_expressions"] = [ ("0" if (("gen_zttpospol" in nick) or ("gen_zttnegpol" in nick)) else x_expression) for nick in config["nicks"] ] binnings_key = "binningZttPol13TeV_" + category + "_" + x_expression if not (binnings_key in binnings_settings.binnings_dict): binnings_key = "binningZttPol13TeV_" + category + ( ("_" + tmp_quantity) if tmp_quantity else "") if binnings_key in binnings_settings.binnings_dict: config["x_bins"] = [ ("1,-1,1" if (("gen_zttpospol" in nick) or ("gen_zttnegpol" in nick)) else binnings_key) for nick in config["nicks"] ] if args.fixed_binning: if args.fixed_variables: if channel == "tt": config["x_bins"] = [ args.fixed_binning.split(",")[0] + ",-1.0001,1.0001" for nick in config["nicks"] ] else: config["x_bins"] = [ args.fixed_binning for nick in config["nicks"] ] else: config["x_bins"] = [ args.fixed_binning for nick in config["nicks"] ] config["directories"] = [args.input_dir] histogram_name_template = "${BIN}/${PROCESS}" if nominal else "${BIN}/${PROCESS}_${SYSTEMATIC}" config["labels"] = [ histogram_name_template.replace("$", "").format( PROCESS=datacards.configs.sample2process( sample.replace("asimov", "data")), BIN=category, SYSTEMATIC=systematic) for sample in config["labels"] ] tmp_output_file = os.path.join( args.output_dir, "input/{ANALYSIS}_{CHANNEL}_{BIN}_{SYSTEMATIC}_{ERA}.root" .format(ANALYSIS="ztt", CHANNEL=channel, BIN=category, SYSTEMATIC=systematic, ERA="13TeV")) tmp_output_files.append(tmp_output_file) config["output_dir"] = os.path.dirname(tmp_output_file) config["filename"] = os.path.splitext( os.path.basename(tmp_output_file))[0] config["plot_modules"] = ["ExportRoot"] config["file_mode"] = "UPDATE" if "legend_markers" in config: config.pop("legend_markers") plot_configs.append(config) hadd_commands.append("hadd -f {DST} {SRC} && rm {SRC}".format( DST=output_file, SRC=" ".join(tmp_output_files))) tmp_output_files = list( set([ os.path.join(config["output_dir"], config["filename"] + ".root") for config in plot_configs[:args.n_plots[0]] ])) for output_file in tmp_output_files: if os.path.exists(output_file): os.remove(output_file) log.debug("Removed file \"" + output_file + "\" before it is recreated again.") output_files = list(set(output_files)) higgsplot.HiggsPlotter(list_of_config_dicts=plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0], batch=args.batch) if args.n_plots[0] != 0: tools.parallelize(_call_command, hadd_commands, n_processes=args.n_processes) debug_plot_configs = [] for output_file in (output_files): debug_plot_configs.extend(plotconfigs.PlotConfigs().all_histograms( output_file, plot_config_template={ "markers": ["E"], "colors": ["#FF0000"] })) if args.www: for debug_plot_config in debug_plot_configs: debug_plot_config["www"] = debug_plot_config["output_dir"].replace( args.output_dir, args.www) #higgsplot.HiggsPlotter(list_of_config_dicts=debug_plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0]) return None
parser.add_argument("-n", "--n-processes", type=int, default=1, help="Number of (parallel) processes. [Default: %(default)s]") parser.add_argument("-p", "--n-plots", type=int, help="Number of plots. [Default: all]") #parser.add_argument("--calculate-separation", action="store_true", default =False, #help="calculate separation using TestSample") #parser.add_argument("--no-plot", action="store_true", default =False, #help="skip plotting") args = parser.parse_args() logger.initLogger(args) nice_channel = { "em": "#scale[1.5]{e#mu ", "mt": "#scale[1.5]{#mu#tau_{h} ", "et": "#scale[1.5]{e#tau_{h} ", "tt": "#scale[1.5]{#tau_{h}#tau_{h} "} binnings_dict = import_binnings.BinningsDict() input_bdts = {} for i,in_dir in enumerate(args.input_dirs): input_files = [(dirpath, f) for dirpath, dirnames, files in os.walk(in_dir) for f in fnmatch.filter(files, args.matching)] for path, file_name in input_files: bdt_name = file_name[:-7] #cut away TX.root if not bdt_name in input_bdts.keys(): input_bdts[bdt_name] = {in_dir: [os.path.join(path, file_name)]} elif in_dir not in input_bdts[bdt_name].keys(): input_bdts[bdt_name][in_dir] = [os.path.join(path, file_name)] else: input_bdts[bdt_name][in_dir].append(os.path.join(path, file_name)) configs_list = [] jsonTools.JsonDict(input_bdts).save("testPlot.json", indent=4) for name, plot in input_bdts.iteritems(): bdt_var = "BDT_"+name
# Clean the output dir args.output_dir = os.path.abspath(os.path.expandvars(args.output_dir)) if args.clear_output_dir: clear_output_dir = raw_input( "Do you really want to clear the output directory? [yes]").lower( ) == "yes" if not clear_output_dir: log.info( "Terminate. Remove the clear_output_dir option and run the programm again." ) sys.exit(1) logger.subprocessCall("rm -r " + args.output_dir, shell=True) sample_settings = samples.Samples() binnings_settings = binnings.BinningsDict() systematics_factory = systematics.SystematicsFactory() www_output_dirs_postfit = [] www_output_dirs_weightbin = [] www_output_dirs_parabola = [] plot_configs = [] output_files = [] merged_output_files = [] hadd_commands = [] # Initialise directory and naming scheme templates for datacards tmp_input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${BIN}_${SYSTEMATIC}_${ERA}.root" input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${ERA}.root" bkg_histogram_name_template = "${BIN}/${PROCESS}" sig_histogram_name_template = "${BIN}/${PROCESS}"
def calculate_partial_correlation(config): channel = config["channel"] category = config["category"] requested_sample = config["request_nick"] #construct list of rootfiles, if prepare samples is enabled, files are produced c_tree = "" c_tree_list = ROOT.TList() root_file_name_list = [] root_storage_file = "%s/%s.root"%(config["storage_name_extension"], "NTuples") config["storage_file"] = root_storage_file config["storage_ntuple"]=config["folders"][0].replace("/ntuple", "") cuts = "" #find all physical files and store them in root_filename_list for i,nick in enumerate(config["nicks"]): if not bool(sum([x in nick for x in ["wmh", "wph", "zh"]])) and "noplot" in nick: continue log.debug("search files:\n" + str(config["files"][i])) #next line splits file_string into filenames, those could contain * -> use glob.glob to map * to real names, add the list to root_file_name_list map(root_file_name_list.__iadd__, map(glob.glob, map(config["root_input_dir"].__add__, config["files"][i].split(" ")))) if (not cuts == "") and (not cuts == config["weights"][i]): log.error("can not decide which weight to use for sample %s nick %s" %(config["request_nick"],nick)) log.error(config) #sys.exit() cuts = config["weights"][i] config["lumi"] = config["scale_factors"][i] if config["prepare_samples"] and not (os.path.exists(root_storage_file) and not config["overwrite_samples"]): log.info("Recreate Sample %s"%root_storage_file) for root_file_name in root_file_name_list: log.debug("Prepare Rootfile %s as Sample %s" %(root_file_name, config["request_nick"])) c_tree_list.append(ROOT.TChain()) root_file_name = root_file_name + '/' + config["folders"][0] c_tree_list[-1].Add(root_file_name) c_tree_list[-1].SetName("list_tree") store_file = ROOT.TFile(root_storage_file, "RECREATE") selection_string = cuts.replace("eventWeight*", "") if config["request_nick"] in ["ztt", "zll"]: selection_string = cuts.replace("eventWeight*", "").replace("*stitchWeight%s"%(config["request_nick"].upper()), "") for index in range(len(c_tree_list)): log.debug("Cut Tree %s for Sample %s "%(root_file_name_list[index], root_storage_file)) c_tree_list[index]=c_tree_list[index].CopyTree(selection_string) log.debug("Merge Trees for Sample %s "%root_storage_file) if len(c_tree_list) > 1: c_tree = ROOT.TTree.MergeTrees(c_tree_list) elif len(c_tree_list) == 1: c_tree = c_tree_list[0] else: c_tree =ROOT.TChain() log.debug("Prepare Sample %s from %i files"%(root_storage_file,len(c_tree_list))) c_tree.SetName(config["folders"][0].replace("/ntuple", "")) c_tree.Write() for i in range(len(c_tree_list)): del c_tree_list[0] del c_tree_list #store_file.Write() store_file.Close() #log.error("Reached Breakpoint, would start calculations next!") #sys.exit() root_histograms = {} corr_vars = {} binnings_dict = import_binnings.BinningsDict() nick_path = os.path.join(config["output_dir_path"], channel, category, requested_sample) log.debug("save output to folder %s"%nick_path) if not os.path.exists(nick_path): os.makedirs(nick_path) root_inf = ROOT.TFile(config["storage_file"], "read") root_inst = root_inf.Get(config["storage_ntuple"]) log.debug("===============================X-Y Correlations==============================") for variables in itertools.combinations(config["parameters_list"], 2): bins_raw = [binnings_dict.get_binning("%s_"%config["channel"]+variables[0]).strip(), binnings_dict.get_binning("%s_"%config["channel"]+variables[1]).strip()] for i, bins in enumerate(bins_raw): if " " in bins: tmp_bins = bins.split(" ") bins_raw[i] = [10,float(tmp_bins[0]),float(tmp_bins[-1])] elif "," in bins: tmp_bins = bins.split(",") bins_raw[i] = [10,float(tmp_bins[1]),float(tmp_bins[-1])] else: bins_raw[i] = [10,0.,250] [xbins, ybins] = bins_raw root_histograms["+-+".join(variables)] = ROOT.TProfile("+-+".join(variables), "correlation: %s vs %s"%variables, *(xbins+ybins[1:]+[ 's'])) log.debug("Generate RootHistogram TH2F: %s"%("+-+".join(variables))) log.debug("options: " + str(xbins+ybins[1:]+[ 's'])) root_histograms["+-+".join(variables[-1::-1])] = ROOT.TProfile("+-+".join(variables[-1::-1]), "correlation: %s vs %s"%(variables[-1::-1]), *(ybins+xbins[1:]+[ 's'])) log.debug("Generate RootHistogram TH2F: %s"%("+-+".join(variables[-1::-1]))) log.debug("options: " + str(ybins+xbins[1:]+[ 's'])) corr_vars["+-+".join(variables)] = 0 root_histograms["+-+".join(variables)].SetDirectory(0) ROOT.SetOwnership (root_histograms["+-+".join(variables)], False) root_histograms["+-+".join(variables[-1::-1])].SetDirectory(0) ROOT.SetOwnership (root_histograms["+-+".join(variables[-1::-1])], False) log.debug("===============================X-X Correlations==============================") for variable in config["parameters_list"]: bins = binnings_dict.get_binning("%s_"%config["channel"]+variable).strip() bins_raw = [] if " " in bins: tmp_bins = bins.split(" ") bins_raw = [10,float(tmp_bins[0]),float(tmp_bins[-1])] elif "," in bins: tmp_bins = bins.split(",") bins_raw = [10,float(tmp_bins[1]),float(tmp_bins[-1])] else: bins_raw = [10,0.,250] log.debug("Generate RootHistogram TH2F: %s"%("+-+".join([variable,variable]))) log.debug(bins_raw*2) root_histograms["+-+".join([variable,variable])] = ROOT.TProfile("+-+".join([variable,variable]), "correlation: %s vs %s"%(variable,variable), *(bins_raw+bins_raw[1:]+[ 's'])) corr_vars["+-+".join([variable,variable])] = 0 corr_vars[variable] = 0 corr_vars["var_%s"%variable] = 0 root_histograms["+-+".join([variable,variable])].SetDirectory(0) ROOT.SetOwnership (root_histograms["+-+".join([variable,variable])], False) log.info( "=================================================================================") log.info( "Calculate correlations in sample %s and make scatter plots for %i variable pairs."%(config["request_nick"],len(root_histograms))) log.info( "=================================================================================") #sys.exit() i = 0. n = 0 zero_vals = {} lumi_val = config["lumi"] for event in root_inst: calced_means = [] w = event.__getattr__(config["weight_variable"]) * lumi_val if config["request_nick"] in ["ztt", "zll", "wj"]: w *= event.__getattr__("stitchWeight%s"%(config["request_nick"].upper())) for varxy in corr_vars.iterkeys(): if not "+-+" in varxy: continue varx, vary = map(str, varxy.split("+-+")) x, y = map(event.__getattr__, map(str, varxy.split("+-+"))) root_histograms["+-+".join([varx, vary])].Fill(x, y, w) root_histograms["+-+".join([vary, varx])].Fill(y, x, w) if varx not in zero_vals: zero_vals[varx] = 0 if vary not in zero_vals: zero_vals[vary] = 0 if varx not in calced_means: #log.info( "calculate mean for %s" %varx) corr_vars[varx] += w*(x -zero_vals[varx]) corr_vars["var_%s"%varx] += w*(x -zero_vals[varx])**2 calced_means.append(varx) if vary not in calced_means: #log.info( "calculate mean for %s" %vary) corr_vars[vary] += w*(y - zero_vals[vary]) corr_vars["var_%s"%vary] += w*(y - zero_vals[vary])**2 calced_means.append(vary) corr_vars[varxy] += w*(x -zero_vals[varx]) * (y - zero_vals[vary]) i += w n += 1 if n%1000 == 0: log.info( "processed: %i events"%n) hist_file = ROOT.TFile(os.path.join(nick_path, "Histograms.root"),"RECREATE") for varxy in root_histograms.iterkeys(): root_histograms[varxy].Write() corr_vars["weight_sum"] = i corr_vars["n"] = n hist_file.Close() root_inf.Close() config["correlations"] = corr_vars jsonTools.JsonDict(config).save(os.path.join(nick_path,"Correlations.json"),indent=4)