예제 #1
0
    # Clean the output dir
    args.output_dir = os.path.abspath(os.path.expandvars(args.output_dir))
    if args.clear_output_dir:
        clear_output_dir = raw_input(
            "Do you really want to clear the output directory? [yes]").lower(
            ) == "yes"
        if not clear_output_dir:
            log.info(
                "Terminate. Remove the clear_output_dir option and run the programm again."
            )
            sys.exit(1)
        logger.subprocessCall("rm -r " + args.output_dir, shell=True)

    sample_settings = samples.Samples()
    binnings_settings = binnings.BinningsDict()
    systematics_factory = systematics.SystematicsFactory()
    www_output_dirs_postfit = []
    www_output_dirs_weightbin = []
    www_output_dirs_parabola = []

    plot_configs = []
    output_files = []
    merged_output_files = []
    hadd_commands = []

    # Initialise directory and naming scheme templates for datacards
    tmp_input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${BIN}_${SYSTEMATIC}_${ERA}.root"
    input_root_filename_template = "input/${ANALYSIS}_${CHANNEL}_${ERA}.root"
    bkg_histogram_name_template = "${BIN}/${PROCESS}"
    sig_histogram_name_template = "${BIN}/${PROCESS}"
    bkg_syst_histogram_name_template = "${BIN}/${PROCESS}_${SYSTEMATIC}"
예제 #2
0
def create_input_root_files(datacards, args):
    ''' Configuring Harry plotter according to the samples and creating input root files according to the args.'''
    plot_configs = []
    output_files = []
    merged_output_files = []
    hadd_commands = []

    sample_settings = samples.Samples()
    expression_settings = expressions.ExpressionsDict()
    binnings_settings = binnings.BinningsDict()
    systematics_factory = systematics.SystematicsFactory()

    datacards.configs._mapping_process2sample = {
        "data_obs": "data",
        "EWKZ": "ewkz",
        "QCD": "qcd",
        "TT": "ttj",
        "TTT": "ttt",
        "TTJ": "ttjj",
        "VV": "vv",
        "VVT": "vvt",
        "VVJ": "vvj",
        "W": "wj",
        "ZJ": "zj",
        "ZL": "zl",
        "ZLL": "zll",
        "ZTTPOSPOL": "zttpospol",
        "ZTTNEGPOL": "zttnegpol",
        "ZTT_GEN_DM_ZERO": "ztt_gen_dm_zero",
        "ZTT_GEN_DM_ONE": "ztt_gen_dm_one",
        "ZTT_GEN_DM_TWO": "ztt_gen_dm_two",
        "ZTT_GEN_DM_TEN": "ztt_gen_dm_ten",
        "ZTT_GEN_DM_ELEVEN": "ztt_gen_dm_eleven",
    }

    for index, (channel,
                categories) in enumerate(zip(args.channel, args.categories)):

        for category in categories:
            datacards_per_channel_category = datacardsbase.Datacards(
                cb=datacards.cb.cp().channel([channel]).bin([category]))

            higgs_masses = [
                mass for mass in datacards_per_channel_category.cb.mass_set()
                if mass != "*"
            ]

            output_file = os.path.join(
                args.output_dir,
                "input/{ANALYSIS}_{CHANNEL}_{BIN}_{ERA}.root".format(
                    ANALYSIS="ztt", CHANNEL=channel, BIN=category,
                    ERA="13TeV"))
            output_files.append(output_file)
            tmp_output_files = []

            for shape_systematic, list_of_samples in datacards_per_channel_category.get_samples_per_shape_systematic(
            ).iteritems():
                nominal = (shape_systematic == "nominal")
                list_of_samples = [
                    datacards.configs.process2sample(process)
                    for process in list_of_samples
                ]
                if ("wj"
                        in list_of_samples) and not ("qcd" in list_of_samples):
                    list_of_samples.append("qcd")
                elif ("qcd"
                      in list_of_samples) and not ("wj" in list_of_samples):
                    list_of_samples.append("wj")
                asimov_nicks = []
                if args.use_asimov_dataset:
                    asimov_nicks = [
                        nick.replace("zttpospol", "zttpospol_noplot").replace(
                            "zttnegpol", "zttnegpol_noplot")
                        for nick in list_of_samples
                    ]
                    if "data" in asimov_nicks:
                        asimov_nicks.remove("data")

                for shift_up in ([True] if nominal else [True, False]):
                    systematic = "nominal" if nominal else (
                        shape_systematic + ("Up" if shift_up else "Down"))

                    log.debug(
                        "Create inputs for (samples, systematic) = ([\"{samples}\"], {systematic}), (channel, category) = ({channel}, {category})."
                        .format(samples="\", \"".join(list_of_samples),
                                channel=channel,
                                category=category,
                                systematic=systematic))

                    tmp_quantity = args.quantity
                    tmp_omega_version = args.omega_version
                    if args.fixed_variables == "best_choice":
                        if channel in ["tt"]:
                            if category in [
                                    channel + "_" + cat for cat in [
                                        "combined_rho_oneprong",
                                        "combined_oneprong_oneprong"
                                    ]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_a1_rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat for cat in
                                ["combined_a1_a1", "combined_a1_oneprong"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "BarSvfitM91"
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_rho", "rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"
                        elif channel in ["mt", "et"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_a1_oneprong"]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_oneprong"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = None
                            elif category in [
                                    channel + "_" + cat for cat in [
                                        "combined_oneprong_oneprong", "a1",
                                        "oneprong"
                                    ]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "BarSvfitM91"
                            elif category in [
                                    channel + "_" + cat for cat in ["rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"
                        elif channel in ["em"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_oneprong_oneprong"]
                            ]:
                                tmp_quantity = "m_vis"
                                tmp_omega_version = None
                    elif args.fixed_variables == "best_choice_no_svfit":
                        tmp_quantity = "m_vis"
                        tmp_omega_version = None
                        if channel in ["tt", "mt", "et"]:
                            if category in [
                                    channel + "_" + cat
                                    for cat in ["combined_rho_rho", "rho"]
                            ]:
                                tmp_quantity = None
                                tmp_omega_version = "VisibleSvfit"

                    x_expression = None
                    if tmp_quantity:
                        x_expression = tmp_quantity
                    else:
                        x_expression = "testZttPol13TeV_" + category
                        if tmp_omega_version:
                            x_expression = expression_settings.expressions_dict[
                                x_expression].replace("BarSvfit",
                                                      tmp_omega_version)
                    x_expression = expression_settings.expressions_dict.get(
                        x_expression, x_expression)

                    # prepare plotting configs for retrieving the input histograms
                    config = sample_settings.get_config(
                        samples=[
                            getattr(samples.Samples, sample)
                            for sample in list_of_samples
                        ],
                        channel=channel,
                        category="catZttPol13TeV_" + category,
                        weight=args.weight,
                        lumi=args.lumi * 1000,
                        higgs_masses=higgs_masses,
                        estimationMethod="new",
                        polarisation_bias_correction=True,
                        cut_type="low_mvis_smhtt2016",
                        exclude_cuts=(["m_vis"]
                                      if x_expression == "m_vis" else []),
                        no_ewk_samples=args.no_ewk_samples,
                        no_ewkz_as_dy=True,
                        asimov_nicks=asimov_nicks)

                    systematics_settings = systematics_factory.get(
                        shape_systematic)(config)
                    # TODO: evaluate shift from datacards_per_channel_category.cb

                    config = systematics_settings.get_config(shift=(
                        0.0 if nominal else (1.0 if shift_up else -1.0)))

                    #config["qcd_subtract_shape"] =[args.qcd_subtract_shapes]

                    config["x_expressions"] = [
                        ("0" if (("gen_zttpospol" in nick) or
                                 ("gen_zttnegpol" in nick)) else x_expression)
                        for nick in config["nicks"]
                    ]

                    binnings_key = "binningZttPol13TeV_" + category + "_" + x_expression
                    if not (binnings_key in binnings_settings.binnings_dict):
                        binnings_key = "binningZttPol13TeV_" + category + (
                            ("_" + tmp_quantity) if tmp_quantity else "")
                    if binnings_key in binnings_settings.binnings_dict:
                        config["x_bins"] = [
                            ("1,-1,1" if
                             (("gen_zttpospol" in nick) or
                              ("gen_zttnegpol" in nick)) else binnings_key)
                            for nick in config["nicks"]
                        ]

                    if args.fixed_binning:
                        if args.fixed_variables:
                            if channel == "tt":
                                config["x_bins"] = [
                                    args.fixed_binning.split(",")[0] +
                                    ",-1.0001,1.0001"
                                    for nick in config["nicks"]
                                ]
                            else:
                                config["x_bins"] = [
                                    args.fixed_binning
                                    for nick in config["nicks"]
                                ]
                        else:
                            config["x_bins"] = [
                                args.fixed_binning for nick in config["nicks"]
                            ]

                    config["directories"] = [args.input_dir]

                    histogram_name_template = "${BIN}/${PROCESS}" if nominal else "${BIN}/${PROCESS}_${SYSTEMATIC}"
                    config["labels"] = [
                        histogram_name_template.replace("$", "").format(
                            PROCESS=datacards.configs.sample2process(
                                sample.replace("asimov", "data")),
                            BIN=category,
                            SYSTEMATIC=systematic)
                        for sample in config["labels"]
                    ]

                    tmp_output_file = os.path.join(
                        args.output_dir,
                        "input/{ANALYSIS}_{CHANNEL}_{BIN}_{SYSTEMATIC}_{ERA}.root"
                        .format(ANALYSIS="ztt",
                                CHANNEL=channel,
                                BIN=category,
                                SYSTEMATIC=systematic,
                                ERA="13TeV"))
                    tmp_output_files.append(tmp_output_file)
                    config["output_dir"] = os.path.dirname(tmp_output_file)
                    config["filename"] = os.path.splitext(
                        os.path.basename(tmp_output_file))[0]

                    config["plot_modules"] = ["ExportRoot"]
                    config["file_mode"] = "UPDATE"

                    if "legend_markers" in config:
                        config.pop("legend_markers")

                    plot_configs.append(config)

            hadd_commands.append("hadd -f {DST} {SRC} && rm {SRC}".format(
                DST=output_file, SRC=" ".join(tmp_output_files)))

    tmp_output_files = list(
        set([
            os.path.join(config["output_dir"], config["filename"] + ".root")
            for config in plot_configs[:args.n_plots[0]]
        ]))
    for output_file in tmp_output_files:
        if os.path.exists(output_file):
            os.remove(output_file)
            log.debug("Removed file \"" + output_file +
                      "\" before it is recreated again.")
    output_files = list(set(output_files))

    higgsplot.HiggsPlotter(list_of_config_dicts=plot_configs,
                           list_of_args_strings=[args.args],
                           n_processes=args.n_processes,
                           n_plots=args.n_plots[0],
                           batch=args.batch)

    if args.n_plots[0] != 0:
        tools.parallelize(_call_command,
                          hadd_commands,
                          n_processes=args.n_processes)

    debug_plot_configs = []
    for output_file in (output_files):
        debug_plot_configs.extend(plotconfigs.PlotConfigs().all_histograms(
            output_file,
            plot_config_template={
                "markers": ["E"],
                "colors": ["#FF0000"]
            }))
    if args.www:
        for debug_plot_config in debug_plot_configs:
            debug_plot_config["www"] = debug_plot_config["output_dir"].replace(
                args.output_dir, args.www)
    #higgsplot.HiggsPlotter(list_of_config_dicts=debug_plot_configs, list_of_args_strings=[args.args], n_processes=args.n_processes, n_plots=args.n_plots[0])

    return None
예제 #3
0
def create_datacards(channel, method):
    backgrounds = {"ZTT": "ztt", "VV": "vv", "W": "wj", "QCD": "qcd"}
    backgrounds.update({
        "TT": "ttj",
        "ZLL": "zll"
    } if channel == "em" else {
        "TTT": "ttt",
        "TTJJ": "ttjj",
        "ZL": "zl",
        "ZJ": "zj"
    })

    ##Combine harvester instance
    cb = ch.CombineHarvester()

    #Instance for extracting histograms
    sample_settings = samples.Samples()
    config_list = []

    ##weights
    cut_info = yaml.load(
        open(os.environ["CMSSW_BASE"] + "/src/FlavioOutput/Configs/cuts.yaml",
             "r"))
    parameter_info = yaml.load(
        open(
            os.environ["CMSSW_BASE"] +
            "/src/FlavioOutput/Configs/parameter.yaml", "r"))

    weights = []

    for index, category in enumerate(
        ["(njetspt30==0)", "(njetspt30==1)",
         "(njetspt30>1)"]):  #, "(nbtag==2)"]):
        #cut_strings = [parameter_info[param][4] for param in cut_info[index][channel].keys()]
        #cut_values, cut_side = [[entry[index2] for entry in cut_info[index][channel].values()] for index2 in [0,1]]

        weights.append(
            {  #"cut_based":	"*".join([cut_strings[index2].format(side = side, cut = value) for index2, (side, value) in enumerate(zip(cut_side, cut_values))] + [category]),
                "cut_BDT": "(BDT_forcut_score>0.7)*" + category,
                "cut_Ada_BDT": "(BDT_Ada_forcut_score>0.0)*" + category,
                "BDT": category,
                "Ada_BDT": category
            })

    ##Fill combine harvester with categories/processes
    for category in categories + controlregions:
        ##Add data/signal
        cb.AddObservations(["*"], ["lfv"], ["13TeV"], [channel], [category])

        if not "CR" in category[1]:
            cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel],
                            ["Z" + channel.upper()], [category], True)

        ##Config for each category
        config = sample_settings.get_config(
            [
                getattr(samples.Samples, sample)
                for sample in data.values() + {
                    True: ["z" + channel],
                    False: []
                }["CR" not in category[1]] + backgrounds.values()
            ],
            channel,
            None,
            estimationMethod="new",
            weight=weights[category[0]][method])
        config.pop("legend_markers")
        config += {
            "filename": "input_" + method + "_nominal_" + category[1],
            "plot_modules": ["ExportRoot"],
            "file_mode": "UPDATE",
            "directories": os.environ["MCPATH"],
            "x_expressions": x[method],
            "x_bins": x_bins[method],
            "output_dir": output_dir + channel,
            "no_cache": True
        }
        config["labels"] = [
            category[1] + "/" + process for process in data.keys() + {
                True: ["Z" + channel.upper()],
                False: []
            }["CR" not in category[1]] + backgrounds.keys()
        ]
        config_list.append(config)

        for process in backgrounds.keys():
            ##Add background
            cb.AddProcesses(["*"], ["lfv"], ["13TeV"], [channel], [process],
                            [category], False)

    ##Fill combine with control regions
    for CR in controlregions:
        cb.cp().channel([channel]).bin([category[1]]).AddSyst(
            cb, "scale_" + category[1].remove("_CR"), "rateParam",
            ch.SystMap())

        for category in catogories:
            cb.cp().bin([category[0]
                         ]).AddSyst(cb, "scale_" + category[1].remove("_CR"),
                                    "rateParam", ch.SystMapFunc())

    ##Fill combine harvester with systematics
    systematics_list = SystLib.SystematicLibary()
    systematics_factory = systematics.SystematicsFactory()

    for (systematic, process, category) in systematics_list.get_LFV_systs(
            channel, lnN=True) + systematics_list.get_LFV_systs(channel,
                                                                shape=True):
        cb.cp().channel([channel]).process(process).AddSyst(cb, *systematic)

        if "W" in process and "QCD" not in process:
            process.append("QCD")

        if "QCD" in process and "W" not in process:
            process.append("W")

        if systematic[1] == "shape":
            ##Config for each systematic shift:
            for category in categories + controlregions:

                if "CR" in category[1] and "Z" + channel.upper() in process:
                    process.remove("Z" + channel.upper())

                for shift in ["Down", "Up"]:
                    config = sample_settings.get_config(
                        [
                            getattr(samples.Samples,
                                    dict(signals, **backgrounds)[sample])
                            for sample in process
                        ],
                        channel,
                        None,
                        estimationMethod="new",
                        weight=weights[category[0]][method])
                    config.pop("legend_markers")
                    config += {
                        "filename":
                        "input_" + method + "_" + systematic[0].replace(
                            "$ERA", "13TeV").replace("$CHANNEL", channel) +
                        shift + "_" + category[1],
                        "plot_modules": ["ExportRoot"],
                        "file_mode":
                        "UPDATE",
                        "directories":
                        os.environ["MCPATH"],
                        "x_expressions":
                        x[method],
                        "x_bins":
                        x_bins[method],
                        "output_dir":
                        output_dir + channel,
                        "no_cache":
                        True
                    }
                    config["labels"] = [
                        category[1] + "/" + proc + "_" + systematic[0].replace(
                            "$ERA", "13TeV").replace("$CHANNEL", channel) +
                        shift for proc in process
                    ]

                    if systematic[0].replace("$ERA", "13TeV").replace(
                            "$CHANNEL", channel) == "CMS_scale_j_13TeV":
                        systematics_settings = systematics_factory.get(
                            systematic[0].replace("$ERA", "13TeV").replace(
                                "$CHANNEL", channel))(config, "Total")

                    else:
                        systematics_settings = systematics_factory.get(
                            systematic[0].replace("$ERA", "13TeV").replace(
                                "$CHANNEL", channel))(config)

                    config = systematics_settings.get_config(1 if shift ==
                                                             "Up" else -1)
                    config_list.append(config)

    pool = Pool(cpu_count())
    for config in config_list:
        pool.apply_async(harry_do_your_job, args=(config, ))

    pool.close()
    pool.join()

    os.system("hadd {target}.root {root_files}*.root".format(
        target=output_dir + channel + "/input_" + method,
        root_files=output_dir + channel + "/input_" + method))

    ##Fill combine harvester with the shapes which were extracted before from harry.py
    cb.cp().backgrounds().ExtractShapes(
        output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS",
        "$BIN/$PROCESS_$SYSTEMATIC")
    cb.cp().signals().ExtractShapes(
        output_dir + channel + "/input_" + method + ".root", "$BIN/$PROCESS",
        "$BIN/$PROCESS_$SYSTEMATIC")

    #Write datacard and call combine
    cb.WriteDatacard(
        output_dir + channel + "/combined_" + method + ".txt",
        output_dir + channel + "/combined_datacard_" + method + ".root")

    for category in categories:
        cb_copy = cb.cp()
        cb_copy.FilterAll(lambda obj: obj.bin() != category[1])
        cb_copy.WriteDatacard(
            output_dir + channel + "/" + category[1] + "_" + method + ".txt",
            output_dir + channel + "/" + category[1] + "_datacard_" + method +
            ".root")