def get_properties(dict_, era, channel, directory, additional_cuts):
    # Get data estimation method
    if "2016" in era.name:
        from shape_producer.estimation_methods_2016 import DataEstimation
    elif "2017" in era.name:
        from shape_producer.estimation_methods_2017 import DataEstimation
    else:
        logger.fatal(
            "Can not import data estimation because era {} is not implemented."
            .format(era.name))
        raise Exception
    estimation = DataEstimation(era, directory, channel)

    # Extract weight string, which should be equal (1.0)
    weight_string = estimation.get_weights().extract()
    logger.debug("Data weight string: %s", weight_string)
    if weight_string != "(1.0)":
        logger.fatal("Weight string is not equal to (1.0).")
        raise Exception

    # Extract cut string
    cut_string = (estimation.get_cuts() + channel.cuts +
                  additional_cuts).expand()
    logger.debug("Data cut string: %s", cut_string)
    dict_["cut_string"] = str(cut_string)

    # Get files
    files = [str(f) for f in estimation.get_files()]
    for i, f in enumerate(files):
        logger.debug("File %d: %s", i + 1, str(f).replace(directory + "/", ""))
    dict_["files"] = files
    dict_["directory"] = directory

    return dict_
Beispiel #2
0
def get_properties(dict_, era, channel, directory, additional_cuts):
    # Get data estimation method
    estimation = DataEstimation(era, directory, channel)

    # Extract weight string, which should be equal (1.0)
    weight_string = estimation.get_weights().extract()
    logger.debug("Data weight string: %s", weight_string)
    if weight_string != "(1.0)":
        logger.fatal("Weight string is not equal to (1.0).")
        raise Exception

    # Extract cut string
    cut_string = (estimation.get_cuts() + channel.cuts +
                  additional_cuts).expand()
    logger.debug("Data cut string: %s", cut_string)
    dict_["cut_string"] = str(cut_string)

    # Get files
    files = [str(f) for f in estimation.get_files()]
    for i, f in enumerate(files):
        logger.debug("File %d: %s", i + 1, str(f).replace(directory + "/", ""))
    dict_["files"] = files

    return dict_
Beispiel #3
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZTTEstimationTT, ZLEstimationMTSM, ZLEstimationETSM, ZLEstimationTT, ZJEstimationMT, ZJEstimationET, ZJEstimationTT, WEstimationRaw, TTTEstimationMT, TTTEstimationET, TTTEstimationTT, TTJEstimationMT, TTJEstimationET, TTJEstimationTT, VVEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimationMT, TTLEstimationET, TTLEstimationTT, TTTTEstimationMT, TTTTEstimationET, EWKWpEstimation, EWKWmEstimation, EWKZllEstimation, EWKZnnEstimation
        from shape_producer.era import Run2016
        era = Run2016(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    ############################################################################

    # Channel: mt
    if args.channel == "mt":
        channel = MTSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for mt: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "zll",
            "ZJ": "zll",
            "TTT": "tt",
            "TTL": "tt",
            "TTJ": "tt",
            "W": "w",
            "EWKWp": "w",
            "EWKWm": "w",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimation(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationMTSM(era, args.base_path, channel),
                ZJEstimationMT(era, args.base_path, channel),
                TTTEstimationMT(era, args.base_path, channel),
                #TTLEstimationMT(era, args.base_path, channel),
                TTJEstimationMT(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_ss = copy.deepcopy(channel)
        channel_ss.cuts.get("os").invert()
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_ss.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "ss"
        }

    ############################################################################

    # Channel: et
    if args.channel == "et":
        channel = ETSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for et: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "zll",
            "ZJ": "zll",
            "TTT": "tt",
            "TTL": "tt",
            "TTJ": "tt",
            "W": "w",
            "EWKWp": "w",
            "EWKWm": "w",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimation(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationETSM(era, args.base_path, channel),
                ZJEstimationET(era, args.base_path, channel),
                TTTEstimationET(era, args.base_path, channel),
                #TTLEstimationET(era, args.base_path, channel),
                TTJEstimationET(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_ss = copy.deepcopy(channel)
        channel_ss.cuts.get("os").invert()
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_ss.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "ss"
        }

    ############################################################################

    # Channel: tt
    if args.channel == "tt":
        channel = TTSM()

        # Set up `processes` part of config
        output_config["processes"] = {}

        # Additional cuts
        additional_cuts = Cuts()
        logger.warning("Use additional cuts for tt: %s",
                       additional_cuts.expand())

        # MC-driven processes
        # NOTE: Define here the mappig of the process estimations to the training classes
        classes_map = {
            "ggH": "ggh",
            "qqH": "qqh",
            "ZTT": "ztt",
            "EMB": "ztt",
            "ZL": "misc",
            "ZJ": "misc",
            "TTT": "misc",
            "TTL": "misc",
            "TTJ": "misc",
            "W": "misc",
            "EWKWp": "misc",
            "EWKWm": "misc",
            "VV": "misc",
            "EWKZll": "misc",
            "EWKZnn": "misc"
        }
        for estimation in [
                ggHEstimation(era, args.base_path, channel),
                qqHEstimation(era, args.base_path, channel),
                ZTTEstimationTT(era, args.base_path, channel),
                #ZTTEmbeddedEstimation(era, args.base_path, channel),
                ZLEstimationTT(era, args.base_path, channel),
                ZJEstimationTT(era, args.base_path, channel),
                TTTEstimationTT(era, args.base_path, channel),
                #TTLEstimationTT(era, args.base_path, channel),
                TTJEstimationTT(era, args.base_path, channel),
                WEstimationRaw(era, args.base_path, channel),
                EWKWpEstimation(era, args.base_path, channel),
                EWKWmEstimation(era, args.base_path, channel),
                VVEstimation(era, args.base_path, channel),
                EWKZllEstimation(era, args.base_path, channel),
                #EWKZnnEstimation(era, args.base_path, channel)
        ]:
            output_config["processes"][estimation.name] = {
                "files": [
                    str(f).replace(args.base_path + "/", "")
                    for f in estimation.get_files()
                ],
                "cut_string": (estimation.get_cuts() + channel.cuts +
                               additional_cuts).expand(),
                "weight_string":
                estimation.get_weights().extract(),
                "class":
                classes_map[estimation.name]
            }

        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_iso = copy.deepcopy(channel)
        channel_iso.cuts.remove("tau_2_iso")
        channel_iso.cuts.add(
            Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso"))
        channel_iso.cuts.add(
            Cut("byLooseIsolationMVArun2v1DBoldDMwLT_2>0.5",
                "tau_2_iso_loose"))
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_iso.cuts +
                           additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            "noniso"
        }

    ############################################################################

    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config,
              open(args.output_config, 'w'),
              default_flow_style=False)
Beispiel #4
0
def main(args):
    # Use 2016 dataset
    era = Run2016(args.datasets)

    # Channel
    if args.channel == "et":
        channel = ETSM2016()
        friend_directory = args.et_friend_directory
    elif args.channel == "mt":
        channel = MTSM2016()
        friend_directory = args.mt_friend_directory
    elif args.channel == "tt":
        channel = TTSM2016()
        friend_directory = args.tt_friend_directory
    else:
        raise Exception

    # Data estimation
    data = DataEstimation(era,
                          args.directory,
                          channel,
                          friend_directory=friend_directory)
    files = data.get_files()
    cuts = (data.get_cuts() + channel.cuts).expand()
    weights = data.get_weights().extract()

    # Combine all files
    tree = ROOT.TChain()
    for f in files:
        tree.Add(f + "/{}_nominal/ntuple".format(args.channel))
        #print("Add file to tree: {}".format(f))

    friend = ROOT.TChain()
    for f in files:
        friendname = os.path.basename(f).replace(".root", "")
        friendpath = os.path.join(friend_directory, friendname,
                                  friendname + ".root")
        friend.Add(friendpath + "/{}_nominal/ntuple".format(args.channel))
        #print("Add file to friend: {}".format(friendpath))

    tree.AddFriend(friend)

    # All events after baseline selection
    tree.Draw("m_sv>>all_events", cuts + "*({})".format(weights), "goff")
    all_events = ROOT.gDirectory.Get("all_events").Integral(-1000, 1000)

    # Only 16043
    tree.Draw(
        "m_sv>>only_16043", cuts + "*(({})==0)*(({})==1)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    only_16043 = ROOT.gDirectory.Get("only_16043").Integral(-1000, 1000)

    # All 16043
    tree.Draw("m_sv>>all_16043",
              cuts + "*(({})==1)*({})".format(args.cut16043, weights), "goff")
    all_16043 = ROOT.gDirectory.Get("all_16043").Integral(-1000, 1000)

    # Only 18032
    tree.Draw(
        "m_sv>>only_18032", cuts + "*(({})==1)*(({})==0)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    only_18032 = ROOT.gDirectory.Get("only_18032").Integral(-1000, 1000)

    # All 18032
    tree.Draw("m_sv>>all_18032",
              cuts + "*(({})==1)*({})".format(args.cut18032, weights), "goff")
    all_18032 = ROOT.gDirectory.Get("all_18032").Integral(-1000, 1000)

    # Both
    tree.Draw(
        "m_sv>>both", cuts + "*(({})==1)*(({})==1)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    both = ROOT.gDirectory.Get("both").Integral(-1000, 1000)

    # None
    tree.Draw(
        "m_sv>>none", cuts + "*(({})==0)*(({})==0)*({})".format(
            args.cut18032, args.cut16043, weights), "goff")
    none = ROOT.gDirectory.Get("none").Integral(-1000, 1000)

    # Print
    print("Cross-check: {}, {}".format(both + only_18032 + only_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(all_18032 + only_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(only_18032 + all_16043 + none,
                                       all_events))
    print("Cross-check: {}, {}".format(all_16043, only_16043 + both))
    print("Cross-check: {}, {}".format(all_18032, only_18032 + both))
    print("Cross-check: {}, {}".format(
        all_events - both - only_18032 - only_16043, none))
    print("All events: {}".format(all_events))
    print("In none of both selection: {}".format(none))
    print("In both selections together: {}".format(both))
    print("In at least one selection: {}".format(both + only_18032 +
                                                 only_16043))
    print("Only 16043: {}".format(only_16043))
    print("All 16043: {}".format(all_16043))
    print("Only 18032: {}".format(only_18032))
    print("All 18032: {}".format(all_18032))
Beispiel #5
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["friend_paths"] = args.friend_paths
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch
    logger.debug("Channel" + args.channel + " Era " + args.era)

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, \
            ZTTEstimation, ZLEstimation, ZJEstimation, TTTEstimation, TTJEstimation, \
            ZTTEmbeddedEstimation, TTLEstimation, \
            EWKZEstimation, VVLEstimation, VVTEstimation, VVJEstimation, WEstimation

        from shape_producer.era import Run2016
        era = Run2016(args.database)

    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \
            TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \
            ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.database)

    elif "2018" in args.era:
        from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, \
            TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, \
            ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2018
        era = Run2018(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    def estimationMethodAndClassMapGenerator():
        ###### common processes
        if args.training_stxs1p1:
            classes_map = {
# class1
"ggH_GG2H_PTH_GT200125": "ggh_PTHGT200",
# class2
"ggH_GG2H_0J_PTH_0_10125": "ggh_0J",
"ggH_GG2H_0J_PTH_GT10125": "ggh_0J",
# class3
"ggH_GG2H_1J_PTH_0_60125": "ggh_1J_PTH0to120",
"ggH_GG2H_1J_PTH_60_120125": "ggh_1J_PTH0to120",
# class4
"ggH_GG2H_1J_PTH_120_200125": "ggh_1J_PTH120to200",
# class5
"ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125": "ggh_2J",
"ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125": "ggh_2J",
"ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125": "ggh_2J",
# class6
"ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj",
"ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj",
"qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj",
"qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj",
# class7
"ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj",
"ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj",
"qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj",
"qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj",
# class8
"qqH_QQ2HQQ_GE2J_MJJ_0_60125": "qqh_2J",
"qqH_QQ2HQQ_GE2J_MJJ_60_120125": "qqh_2J",
"qqH_QQ2HQQ_GE2J_MJJ_120_350125": "qqh_2J",
# class9
"qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125": "qqh_PTHGT200",
            }
            estimationMethodList = [
ggHEstimation("ggH_GG2H_PTH_GT200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_0J_PTH_0_10125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_0J_PTH_GT10125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_0_60125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_60_120125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_1J_PTH_120_200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_0_60125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_60_120125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_120_350125", era, args.base_path, channel),
qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125", era, args.base_path, channel),
            ]
        elif args.training_inclusive:
            classes_map = {
                "ggH125": "xxh",
                "qqH125": "xxh",
            }
            estimationMethodList = [
                ggHEstimation("ggH125", era, args.base_path, channel),
                qqHEstimation("qqH125", era, args.base_path, channel),

            ]
        else:
            classes_map = {
                "ggH125": "ggh",
                "qqH125": "qqh",
            }
            estimationMethodList = [
                ggHEstimation("ggH125", era, args.base_path, channel),
                qqHEstimation("qqH125", era, args.base_path, channel),

            ]
        estimationMethodList.extend([
            EWKZEstimation(era, args.base_path, channel),
            VVLEstimation(era, args.base_path, channel)
        ])
        classes_map["EWKZ"]="misc"
        ##### TT* zl,zj processes
        estimationMethodList.extend([
            TTLEstimation(era, args.base_path, channel),
            ZLEstimation(era, args.base_path, channel)
        ])
        if args.channel == "tt":
            classes_map.update({
                "TTL": "misc",
                "ZL": "misc",
                "VVL": "misc"
            })
        ## not TTJ,ZJ for em
        elif args.channel == "em":
            classes_map.update({
                "TTL": "tt",
                "ZL": "misc",
                "VVL": "db"
            })
        else:
            classes_map.update({
                "TTL": "tt",
                "ZL": "zll",
                "VVL": "misc"
            })
        ######## Check for emb vs MC
        if args.training_z_estimation_method == "emb":
            classes_map["EMB"] = "emb"
            estimationMethodList.extend([
                ZTTEmbeddedEstimation(era, args.base_path, channel)])
        elif args.training_z_estimation_method == "mc":
            classes_map["ZTT"] = "ztt"
            estimationMethodList.extend([
                ZTTEstimation(era, args.base_path, channel),
                TTTEstimation(era, args.base_path, channel),
                VVTEstimation(era, args.base_path, channel)
            ])
            if args.channel == "tt":
                classes_map.update({
                    "TTT": "misc",
                    "VVT": "misc"
                })
            ## not TTJ,ZJ for em
            elif args.channel == "em":
                classes_map.update({
                    "TTT": "tt",
                    "VVT": "db"
                })
            else:
                classes_map.update({
                    "TTT": "tt",
                    "VVT": "misc"
                })

        else:
            logger.fatal("No valid training-z-estimation-method! Options are emb, mc. Argument was {}".format(
                args.training_z_estimation_method))
            raise Exception

        if args.training_jetfakes_estimation_method == "ff" and args.channel != "em":
            classes_map.update({
                "ff": "ff"
            })
        elif args.training_jetfakes_estimation_method == "mc" or args.channel == "em":
            # less data-> less categories for tt
            if args.channel == "tt":
                classes_map.update({
                    "TTJ": "misc",
                    "ZJ": "misc"
                })
            ## not TTJ,ZJ for em
            elif args.channel != "em":
                classes_map.update({
                    "TTJ": "tt",
                    "ZJ": "zll"
                })
            if args.channel != "em":
                classes_map.update({
                    "VVJ": "misc"
                })
                estimationMethodList.extend([
                    VVJEstimation(era, args.base_path, channel),
                    ZJEstimation(era, args.base_path, channel),
                    TTJEstimation(era, args.base_path, channel)
                ])
            ###w:
            estimationMethodList.extend([WEstimation(era, args.base_path, channel)])
            if args.channel in ["et", "mt"]:
                classes_map["W"] = "w"
            else:
                classes_map["W"] = "misc"
            ### QCD class
            if args.channel == "tt":
                classes_map["QCD"] = "noniso"
            else:
                classes_map["QCD"] = "ss"

        else:
            logger.fatal("No valid training-jetfakes-estimation-method! Options are ff, mc. Argument was {}".format(
                args.training_jetfakes_estimation_method))
            raise Exception
        return ([classes_map, estimationMethodList])

    channelDict = {}
    channelDict["2016"] = {"mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016()}
    channelDict["2017"] = {"mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017()}
    channelDict["2018"] = {"mt": MTSM2018(), "et": ETSM2018(), "tt": TTSM2018(), "em": EMSM2018()}

    channel = channelDict[args.era][args.channel]

    # Set up `processes` part of config
    output_config["processes"] = {}

    # Additional cuts
    additional_cuts = Cuts()
    logger.warning("Use additional cuts for mt: %s", additional_cuts.expand())

    classes_map, estimationMethodList = estimationMethodAndClassMapGenerator()

    ### disables all other estimation methods
    # classes_map={"ff":"ff"}
    # estimationMethodList=[]

    for estimation in estimationMethodList:
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel.cuts +
                           additional_cuts).expand(),
            "weight_string":
                estimation.get_weights().extract(),
            "class":
                classes_map[estimation.name]
        }

    if args.training_jetfakes_estimation_method == "mc" or args.channel == "em":
        if args.training_jetfakes_estimation_method == "ff":
            logger.warn("ff+em: using mc for em channel")
        # Same sign selection for data-driven QCD
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "QCD"
        channel_qcd = copy.deepcopy(channel)

        if args.channel != "tt":
            ## os= opposite sign
            channel_qcd.cuts.get("os").invert()
        # Same sign selection for data-driven QCD
        else:
            channel_qcd.cuts.remove("tau_2_iso")
            channel_qcd.cuts.add(
                Cut("byTightDeepTau2017v2p1VSjet_2<0.5", "tau_2_iso"))
            channel_qcd.cuts.add(
                Cut("byMediumDeepTau2017v2p1VSjet_2>0.5", "tau_2_iso_loose"))

        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(),
            "weight_string": estimation.get_weights().extract(),
            "class": classes_map[estimation.name]
        }
    else:  ## ff and not em
        estimation = DataEstimation(era, args.base_path, channel)
        estimation.name = "ff"
        aiso = copy.deepcopy(channel)
        if args.channel in ["et", "mt"]:
            aisoCut = Cut(
                "byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5",
                "tau_aiso")
            fakeWeightstring = "ff2_nom"
            aiso.cuts.remove("tau_iso")
        elif args.channel == "tt":
            aisoCut = Cut(
                "(byTightDeepTau2017v2p1VSjet_2>0.5&&byTightDeepTau2017v2p1VSjet_1<0.5&&byVLooseDeepTau2017v2p1VSjet_1>0.5)||(byTightDeepTau2017v2p1VSjet_1>0.5&&byTightDeepTau2017v2p1VSjet_2<0.5&&byVLooseDeepTau2017v2p1VSjet_2>0.5)",
                "tau_aiso")
            fakeWeightstring = "(0.5*ff1_nom*(byTightDeepTau2017v2p1VSjet_1<0.5)+0.5*ff2_nom*(byTightDeepTau2017v2p1VSjet_2<0.5))"
            aiso.cuts.remove("tau_1_iso")
            aiso.cuts.remove("tau_2_iso")
        # self._nofake_processes = [copy.deepcopy(p) for p in nofake_processes]

        aiso.cuts.add(aisoCut)
        additionalWeights = Weights(Weight(fakeWeightstring, "fake_factor"))

        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string": (estimation.get_cuts() + aiso.cuts).expand(),
            "weight_string": (estimation.get_weights() + additionalWeights).extract(),
            "class": classes_map[estimation.name]
        }

    output_config["datasets"] = [args.output_path + "/fold" + fold + "_training_dataset.root" for fold in ["0", "1"]]
    #####################################
    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
Beispiel #6
0
def main(args):
    # Write arparse arguments to YAML config
    logger.debug("Write argparse arguments to YAML config.")
    output_config = {}
    output_config["base_path"] = args.base_path
    output_config["friend_paths"] = args.friend_paths
    output_config["output_path"] = args.output_path
    output_config["output_filename"] = args.output_filename
    output_config["tree_path"] = args.tree_path
    output_config["event_branch"] = args.event_branch
    output_config["training_weight_branch"] = args.training_weight_branch

    # Define era
    if "2016" in args.era:
        from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation
        #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, EWKWpEstimation, EWKWmEstimation, , VHEstimation, HTTEstimation,
        from shape_producer.era import Run2016
        era = Run2016(args.database)

    elif "2017" in args.era:
        from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2017
        era = Run2017(args.database)

    elif "2018" in args.era:
        from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation

        from shape_producer.era import Run2018
        era = Run2018(args.database)
    else:
        logger.fatal("Era {} is not implemented.".format(args.era))
        raise Exception

    def estimationMethodAndClassMapGenerator():
        ###### common processes
        classes_map = {"ggH": "ggh", "qqH": "qqh", "EWKZ": "misc"}
        estimationMethodList = [
            ggHEstimation("ggH", era, args.base_path, channel),
            qqHEstimation("qqH", era, args.base_path, channel),
            EWKZEstimation(era, args.base_path, channel),
            VVLEstimation(era, args.base_path, channel),
            WEstimation(era, args.base_path, channel)
        ]
        ######## Check for emb vs MC
        if args.training_z_estimation_method == "emb":
            classes_map["EMB"] = "ztt"
            estimationMethodList.extend(
                [ZTTEmbeddedEstimation(era, args.base_path, channel)])

        elif args.training_z_estimation_method == "mc":
            classes_map["ZTT"] = "ztt"
            estimationMethodList.extend([
                ZTTEstimation(era, args.base_path, channel),
                TTTEstimation(era, args.base_path, channel),
                VVTEstimation(era, args.base_path, channel)
            ])
        else:
            logger.fatal(
                "No valid training-z-estimation-method! Options are emb, mc. Argument was {}"
                .format(args.training_z_estimation_method))
            raise Exception

        ##### TT* zl,zj processes
        estimationMethodList.extend([
            TTLEstimation(era, args.base_path, channel),
            ZLEstimation(era, args.base_path, channel)
        ])
        # less data-> less categories for tt
        if args.channel == "tt":
            classes_map.update({
                "TTT": "misc",
                "TTL": "misc",
                "TTJ": "misc",
                "ZL": "misc",
                "ZJ": "misc"
            })
            estimationMethodList.extend([
                ZJEstimation(era, args.base_path, channel),
                TTJEstimation(era, args.base_path, channel)
            ])
        ## not TTJ,ZJ for em
        elif args.channel == "em":
            classes_map.update({"TTT": "tt", "TTL": "tt", "ZL": "misc"})
        else:
            classes_map.update({
                "TTT": "tt",
                "TTL": "tt",
                "TTJ": "tt",
                "ZL": "zll",
                "ZJ": "zll"
            })
            estimationMethodList.extend([
                ZJEstimation(era, args.base_path, channel),
                TTJEstimation(era, args.base_path, channel)
            ])
        ###w:
        # estimation metho already included, just different mapping fror et and mt
        if args.channel in ["et", "mt"]:
            classes_map["W"] = "w"
        else:
            classes_map["W"] = "misc"

        #####  VV/[VVT,VVL,VVJ] split
        # VVL in common, VVT in "EMBvsMC"
        if args.channel == "em":
            classes_map.update({"VVT": "db", "VVL": "db"})
        else:
            classes_map.update({"VVT": "misc", "VVL": "misc", "VVJ": "misc"})
            estimationMethodList.extend([
                VVJEstimation(era, args.base_path, channel),
            ])
        ### QCD class

        if args.channel == "tt":
            classes_map["QCD"] = "noniso"
        else:
            classes_map["QCD"] = "ss"
        return ([classes_map, estimationMethodList])

    channelDict = {}
    channelDict["2016"] = {
        "mt": MTSM2016(),
        "et": ETSM2016(),
        "tt": TTSM2016(),
        "em": EMSM2016()
    }
    channelDict["2017"] = {
        "mt": MTSM2017(),
        "et": ETSM2017(),
        "tt": TTSM2017(),
        "em": EMSM2017()
    }
    channelDict["2018"] = {
        "mt": MTSM2018(),
        "et": ETSM2018(),
        "tt": TTSM2018(),
        "em": EMSM2018()
    }

    channel = channelDict[args.era][args.channel]

    # Set up `processes` part of config
    output_config["processes"] = {}

    # Additional cuts
    additional_cuts = Cuts()
    logger.warning("Use additional cuts for mt: %s", additional_cuts.expand())

    classes_map, estimationMethodList = estimationMethodAndClassMapGenerator()

    ##MC+/Embedding Processes
    for estimation in estimationMethodList:
        output_config["processes"][estimation.name] = {
            "files": [
                str(f).replace(args.base_path.rstrip("/") + "/", "")
                for f in estimation.get_files()
            ],
            "cut_string":
            (estimation.get_cuts() + channel.cuts + additional_cuts).expand(),
            "weight_string":
            estimation.get_weights().extract(),
            "class":
            classes_map[estimation.name]
        }
    ###
    # Same sign selection for data-driven QCD
    estimation = DataEstimation(era, args.base_path, channel)
    estimation.name = "QCD"
    channel_qcd = copy.deepcopy(channel)

    if args.channel != "tt":
        ## os= opposite sign
        channel_qcd.cuts.get("os").invert()
    # Same sign selection for data-driven QCD
    else:
        channel_qcd.cuts.remove("tau_2_iso")
        channel_qcd.cuts.add(
            Cut("byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5",
                "tau_2_iso"))
        channel_qcd.cuts.add(
            Cut("byLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5",
                "tau_2_iso_loose"))

    output_config["processes"][estimation.name] = {
        "files": [
            str(f).replace(args.base_path.rstrip("/") + "/", "")
            for f in estimation.get_files()
        ],
        "cut_string":
        (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(),
        "weight_string":
        estimation.get_weights().extract(),
        "class":
        classes_map[estimation.name]
    }

    #####################################
    # Write output config
    logger.info("Write config to file: {}".format(args.output_config))
    yaml.dump(output_config,
              open(args.output_config, 'w'),
              default_flow_style=False)
Beispiel #7
0
def main(args):
    # Define era and channel
    era = Run2016(args.datasets)

    if "et" in args.channel:
        channel = ETSM()
    elif "mt" in args.channel:
        channel = MTSM()
    elif "tt" in args.channel:
        channel = TTSM()
    else:
        logger.fatal("Channel %s not known.", args.channel)
        raise Exception
    logger.debug("Use channel %s.", args.channel)

    # Get cut string
    estimation = DataEstimation(era, args.directory, channel)
    cut_string = (estimation.get_cuts() + channel.cuts).expand()
    logger.debug("Data cut string: %s", cut_string)

    # Get chain
    tree_path = "{}_nominal/ntuple".format(args.channel)
    logger.debug("Use tree path %s to get tree.", tree_path)

    files = [str(f) for f in estimation.get_files()]
    chain = ROOT.TChain()
    for i, f in enumerate(files):
        base = os.path.basename(f).replace(".root", "")
        f_friend = os.path.join(args.artus_friends, base,
                                base + ".root") + "/" + tree_path
        logger.debug("Add file with scores %d: %s", i, f_friend)
        chain.Add(f_friend)
        logger.debug("Add friend with ntuple %d: %s", i, f)
        chain.AddFriend(tree_path, f)

    chain_numentries = chain.GetEntries()
    if not chain_numentries > 0:
        logger.fatal("Chain (before skimming) does not contain any events.")
        raise Exception
    logger.debug("Found %s events before skimming with cut string.",
                 chain_numentries)

    # Skim chain
    chain_skimmed = chain.CopyTree(cut_string)
    chain_skimmed_numentries = chain_skimmed.GetEntries()

    if not chain_skimmed_numentries > 0:
        logger.fatal("Chain (after skimming) does not contain any events.")
        raise Exception
    logger.debug("Found %s events after skimming with cut string.",
                 chain_skimmed_numentries)

    # Calculate binning
    logger.debug("Load classes from config %s.", args.training_config)
    classes = yaml.load(open(args.training_config))["classes"]
    logger.debug("Use classes %s.", classes)
    scores = [[] for c in classes]
    for event in chain_skimmed:
        max_score = float(getattr(event, args.channel + "_max_score"))
        max_index = int(getattr(event, args.channel + "_max_index"))
        scores[max_index].append(max_score)

    binning = {}
    percentiles = range(0, 105, 5)
    logger.debug("Use percentiles %s for binning.", percentiles)
    for i, name in enumerate(classes):
        logger.debug("Process class %s.", name)
        x = scores[i] + [1.0 / float(len(classes)), 1.0]
        logger.debug("Found %s events in class %s.", len(x), name)
        binning[name] = [float(x) for x in np.percentile(x, percentiles)]

    # Write binning to output
    config = yaml.load(open(args.output))
    config["analysis"][args.channel] = binning
    logger.info("Write binning to %s.", args.output)
    yaml.dump(config, open(args.output, "w"))