def __init__(self):
        super(SamplesBase, self).__init__()

        self.config = jsonTools.JsonDict({})
        self.postfit_scales = None
        self.expressions = expressions.ExpressionsDict()

        self.exclude_cuts = []
        self.period = "run"
Esempio n. 2
0
def create_input_root_files(datacards, args):
    ''' Configuring Harry plotter according to the samples and creating input root files according to the args.'''
    plot_configs = []
    output_files = []
    merged_output_files = []
    hadd_commands = []

    sample_settings = samples.Samples()
    expression_settings = expressions.ExpressionsDict()
    binnings_settings = binnings.BinningsDict()
    systematics_factory = systematics.SystematicsFactory()

    datacards.configs._mapping_process2sample = {
        "data_obs": "data",
        "EWKZ": "ewkz",
        "QCD": "qcd",
        "TT": "ttj",
        "TTT": "ttt",
        "TTJ": "ttjj",
        "VV": "vv",
        "VVT": "vvt",
        "VVJ": "vvj",
        "W": "wj",
        "ZJ": "zj",
        "ZL": "zl",
        "ZLL": "zll",
        "ZTTPOSPOL": "zttpospol",
        "ZTTNEGPOL": "zttnegpol",
        "ZTT_GEN_DM_ZERO": "ztt_gen_dm_zero",
        "ZTT_GEN_DM_ONE": "ztt_gen_dm_one",
        "ZTT_GEN_DM_TWO": "ztt_gen_dm_two",
        "ZTT_GEN_DM_TEN": "ztt_gen_dm_ten",
        "ZTT_GEN_DM_ELEVEN": "ztt_gen_dm_eleven",
    }

    for index, (channel,
                categories) in enumerate(zip(args.channel, args.categories)):

        for category in categories:
            datacards_per_channel_category = datacardsbase.Datacards(
                cb=datacards.cb.cp().channel([channel]).bin([category]))

            higgs_masses = [
                mass for mass in datacards_per_channel_category.cb.mass_set()
                if mass != "*"
            ]

            output_file = os.path.join(
                args.output_dir,
                "input/{ANALYSIS}_{CHANNEL}_{BIN}_{ERA}.root".format(
                    ANALYSIS="ztt", CHANNEL=channel, BIN=category,
                    ERA="13TeV"))
            output_files.append(output_file)
            tmp_output_files = []

            for shape_systematic, list_of_samples in datacards_per_channel_category.get_samples_per_shape_systematic(
            ).iteritems():
                nominal = (shape_systematic == "nominal")
                list_of_samples = [
                    datacards.configs.process2sample(process)
                    for process in list_of_samples
                ]
                if ("wj"
                        in list_of_samples) and not ("qcd" in list_of_samples):
                    list_of_samples.append("qcd")
                elif ("qcd"
                      in list_of_samples) and not ("wj" in list_of_samples):
                    list_of_samples.append("wj")
                asimov_nicks = []
                if args.use_asimov_dataset:
                    asimov_nicks = [
                        nick.replace("zttpospol", "zttpospol_noplot").replace(
                            "zttnegpol", "zttnegpol_noplot")
                        for nick in list_of_samples
                    ]
                    if "data" in asimov_nicks:
                        asimov_nicks.remove("data")

                for shift_up in ([True] if nominal else [True, False]):
                    systematic = "nominal" if nominal else (
                        shape_systematic + ("Up" if shift_up else "Down"))

                    log.debug(
                        "Create inputs for (samples, systematic) = ([\"{samples}\"], {systematic}), (channel, category) = ({channel}, {category})."
                        .format(samples="\", \"".join(list_of_samples),
                                channel=channel,
                                category=category,
                                systematic=systematic))

                    tmp_quantity = args.quantity
                    tmp_omega_version = args.omega_version
                    if args.fixed_variables == "best_choice":
                        if channel in ["tt"]:
                            if category in [
                                    channel + "_" + cat for cat in [
                                        "combined_rho_oneprong",
                                        "combined_oneprong_oneprong"
                                    ]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_a1_rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat for cat in
                                ["combined_a1_a1", "combined_a1_oneprong"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "BarSvfitM91"
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_rho", "rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"
                        elif channel in ["mt", "et"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_a1_oneprong"]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_oneprong"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat for cat in [
                                        "combined_oneprong_oneprong", "a1",
                                        "oneprong"
                                    ]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "BarSvfitM91"
                            elif category in [
                                    channel + "_" + cat for cat in ["rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"
                        elif channel in ["em"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_oneprong_oneprong"]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                    elif args.fixed_variables == "best_choice_no_svfit":
                        tmp_quantity = "m_vis"
                        tmp_omega_version = None
                        if channel in ["tt", "mt", "et"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_rho", "rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"

                    x_expression = None
                    if tmp_quantity:
                        x_expression = tmp_quantity
                    else:
                        x_expression = "testZttPol13TeV_" + category
                        if tmp_omega_version:
                            x_expression = expression_settings.expressions_dict[
                                x_expression].replace("BarSvfit",
                                                      tmp_omega_version)
                    x_expression = expression_settings.expressions_dict.get(
                        x_expression, x_expression)

                    # prepare plotting configs for retrieving the input histograms
                    config = sample_settings.get_config(
                        samples=[
                            getattr(samples.Samples, sample)
                            for sample in list_of_samples
                        ],
                        channel=channel,
                        category="catZttPol13TeV_" + category,
                        weight=args.weight,
                        lumi=args.lumi * 1000,
                        higgs_masses=higgs_masses,
                        estimationMethod="new",
                        polarisation_bias_correction=True,
                        cut_type="low_mvis_smhtt2016",
                        exclude_cuts=(["m_vis"]
                                      if x_expression == "m_vis" else []),
                        no_ewk_samples=args.no_ewk_samples,
                        no_ewkz_as_dy=True,
                        asimov_nicks=asimov_nicks)

                    systematics_settings = systematics_factory.get(
                        shape_systematic)(config)
                    # TODO: evaluate shift from datacards_per_channel_category.cb

                    config = systematics_settings.get_config(shift=(
                        0.0 if nominal else (1.0 if shift_up else -1.0)))

                    #config["qcd_subtract_shape"] =[args.qcd_subtract_shapes]

                    config["x_expressions"] = [
                        ("0" if (("gen_zttpospol" in nick) or
                                 ("gen_zttnegpol" in nick)) else x_expression)
                        for nick in config["nicks"]
                    ]

                    binnings_key = "binningZttPol13TeV_" + category + "_" + x_expression
                    if not (binnings_key in binnings_settings.binnings_dict):
                        binnings_key = "binningZttPol13TeV_" + category + (
                            ("_" + tmp_quantity) if tmp_quantity else "")
                    if binnings_key in binnings_settings.binnings_dict:
                        config["x_bins"] = [
                            ("1,-1,1" if
                             (("gen_zttpospol" in nick) or
                              ("gen_zttnegpol" in nick)) else binnings_key)
                            for nick in config["nicks"]
                        ]

                    if args.fixed_binning:
                        if args.fixed_variables:
                            if channel == "tt":
                                config["x_bins"] = [
                                    args.fixed_binning.split(",")[0] +
                                    ",-1.0001,1.0001"
                                    for nick in config["nicks"]
                                ]
                            else:
                                config["x_bins"] = [
                                    args.fixed_binning
                                    for nick in config["nicks"]
                                ]
                        else:
                            config["x_bins"] = [
                                args.fixed_binning for nick in config["nicks"]
                            ]

                    config["directories"] = [args.input_dir]

                    histogram_name_template = "${BIN}/${PROCESS}" if nominal else "${BIN}/${PROCESS}_${SYSTEMATIC}"
                    config["labels"] = [
                        histogram_name_template.replace("$", "").format(
                            PROCESS=datacards.configs.sample2process(
                                sample.replace("asimov", "data")),
                            BIN=category,
                            SYSTEMATIC=systematic)
                        for sample in config["labels"]
                    ]

                    tmp_output_file = os.path.join(
                        args.output_dir,
                        "input/{ANALYSIS}_{CHANNEL}_{BIN}_{SYSTEMATIC}_{ERA}.root"
                        .format(ANALYSIS="ztt",
                                CHANNEL=channel,
                                BIN=category,
                                SYSTEMATIC=systematic,
                                ERA="13TeV"))
                    tmp_output_files.append(tmp_output_file)
                    config["output_dir"] = os.path.dirname(tmp_output_file)
                    config["filename"] = os.path.splitext(
                        os.path.basename(tmp_output_file))[0]

                    config["plot_modules"] = ["ExportRoot"]
                    config["file_mode"] = "UPDATE"

                    if "legend_markers" in config:
                        config.pop("legend_markers")

                    plot_configs.append(config)

            hadd_commands.append("hadd -f {DST} {SRC} && rm {SRC}".format(
                DST=output_file, SRC=" ".join(tmp_output_files)))

    tmp_output_files = list(
        set([
            os.path.join(config["output_dir"], config["filename"] + ".root")
            for config in plot_configs[:args.n_plots[0]]
        ]))
    for output_file in tmp_output_files:
        if os.path.exists(output_file):
            os.remove(output_file)
            log.debug("Removed file \"" + output_file +
                      "\" before it is recreated again.")
    output_files = list(set(output_files))

    higgsplot.HiggsPlotter(list_of_config_dicts=plot_configs,
                           list_of_args_strings=[args.args],
                           n_processes=args.n_processes,
                           n_plots=args.n_plots[0],
                           batch=args.batch)

    if args.n_plots[0] != 0:
        tools.parallelize(_call_command,
                          hadd_commands,
                          n_processes=args.n_processes)

    debug_plot_configs = []
    for output_file in (output_files):
        debug_plot_configs.extend(plotconfigs.PlotConfigs().all_histograms(
            output_file,
            plot_config_template={
                "markers": ["E"],
                "colors": ["#FF0000"]
            }))
    if args.www:
        for debug_plot_config in debug_plot_configs:
            debug_plot_config["www"] = debug_plot_config["output_dir"].replace(
                args.output_dir, args.www)
    #higgsplot.HiggsPlotter(list_of_config_dicts=debug_plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0])

    return None
Esempio n. 3
0
						help="Channel. [Default: %(default)s]")
	parser.add_argument("--title",
						default="Category Overlap",
						help="Title [Default: %(default)s]")
	parser.add_argument("-e", "--exclude-cuts", nargs="+", default=[],
						help="Exclude (default) selection cuts. [Default: %(default)s]")
	parser.add_argument("-r", "--ratio", nargs="+", default=["qqh"],
						help="List of Numerator samples. [Default: %(default)s]")
	parser.add_argument("--ratio-title",
						default="VBF/ggh",
						help="Title for ratio [Default: %(default)s]")

	args = parser.parse_args()
	logger.initLogger(args)

	exp_dict = expressions.ExpressionsDict()
	plot_configs = []
	list_of_samples = [getattr(samples.Samples, sample) for sample in args.Samples]
	sample_settings = samples.Samples()

	config = sample_settings.get_config(
	samples=list_of_samples,
	channel=args.channel,
	category="1.0",
	weight="1.0",
	higgs_masses=args.higgs_masses,
	normalise_signal_to_one_pb=False,
	exclude_cuts=args.exclude_cuts
	)
	firsts = [exp_dict.replace_expressions(s) for s in args.first_category]
	seconds = [exp_dict.replace_expressions(s) for s in args.second_category]