def GenerateSimpleModelCollections(MC_path, weight_path=None, input_config_file=None): mcolls = [] global_max_epochs = 100 global_hyperparams = {'number_layers': 2, 'number_neurons': 128} # some shortcuts nonperiodic_variables_default_0j = [ "PFMET", "Z1Pt", "Z2Pt", "Z1Mass", "Z2Mass", "ZZPt", "ZZEta", "ZZMass", "ZZMassErr" ] nonperiodic_variables_default_1j = nonperiodic_variables_default_0j + [ "nCleanedJetsPt30BTagged_bTagSF" ] nonperiodic_variables_default = nonperiodic_variables_default_0j + [ "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF" ] periodic_variables_default = ["ZZPhi"] def nonperiodic_variables_jet(num): retval = [] for i in range(num): retval.append("JetPt[JetPt|{:d}]".format(i)) retval.append("JetEta[JetPt|{:d}]".format(i)) return retval def periodic_variables_jet(num): retval = [] for i in range(num): retval.append("JetPhi[JetPt|{:d}]".format(i)) return retval def nonperiodic_variables_extra_lep(num): retval = [] for i in range(num): retval.append("ExtraLepPt[ExtraLepPt|{:d}]".format(i)) retval.append("ExtraLepEta[ExtraLepPt|{:d}]".format(i)) return retval def periodic_variables_extra_lep(num): retval = [] for i in range(num): retval.append("ExtraLepPhi[ExtraLepPt|{:d}]".format(i)) return retval MELA_2j_variables = [ "D_VBF2j_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_WHh_ggH_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME" ] MELA_1j_variables = ["D_VBF1j_ggH_ME"] # ------------------------ # define the categories for the classifier #VBF_cat = Category("VBF", {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.mZZ_cut}) VBF_cat = Category( "VBF", {MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.no_cut}) # mode that is fully differential in the number of jets ep = DiscriminantEndpiece("2|1|0j") ep_comp = DiscriminantEndpieceComponent( name="2j", public_name="2j", component_cut=lambda row: row["nCleanedJetsPt30"] >= 2, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + MELA_2j_variables + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="1j", public_name="1j", component_cut=lambda row: row["nCleanedJetsPt30"] == 1, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + MELA_1j_variables + nonperiodic_variables_jet(1), periodic_columns=periodic_variables_default + periodic_variables_jet(1), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="0j", public_name="0j", component_cut=lambda row: row["nCleanedJetsPt30"] == 0, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default_0j, periodic_columns=[], model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) VBF_cat.add_endpiece(ep) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) VBF_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier #ggH_cat = Category("ggH", {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.mZZ_cut}) ggH_cat = Category("ggH", {MC_path + "ggH125/ZZ4lAnalysis.root": cuts.no_cut}) # mode that is fully differential in the number of jets ep = DiscriminantEndpiece("2|1|0j") ep_comp = DiscriminantEndpieceComponent( name="2j", public_name="2j", component_cut=lambda row: row["nCleanedJetsPt30"] >= 2, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + MELA_2j_variables + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="1j", public_name="1j", component_cut=lambda row: row["nCleanedJetsPt30"] == 1, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default_1j + MELA_1j_variables + nonperiodic_variables_jet(1), periodic_columns=periodic_variables_default + periodic_variables_jet(1), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="0j", public_name="0j", component_cut=lambda row: row["nCleanedJetsPt30"] == 0, # and row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default_0j, periodic_columns=[], model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ggH_cat.add_endpiece(ep) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ggH_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier WHh_cat = Category( "WHh", { MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHhadr_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) WHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHh_cat = Category( "ZHh", {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHhadr_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHl_cat = Category( "ZHl", {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHlept_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHl_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier WHl_cat = Category( "WHl", { MC_path + "WplusH125/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH125/ZZ4lAnalysis.root": cuts.WHlept_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) WHl_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHMET_cat = Category( "ZHMET", {MC_path + "ZH125/ZZ4lAnalysis.root": cuts.ZHMET_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHMET_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ttHh_cat = Category( "ttHh", {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHhadr_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2) + nonperiodic_variables_extra_lep(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2) + periodic_variables_extra_lep(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ttHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ttHl_cat = Category( "ttHl", {MC_path + "ttH125/ZZ4lAnalysis.root": cuts.ttHlept_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: row[ "ZZMass"] > 0, #row["ZZMass"] > 118. and row["ZZMass"] < 130., nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ttHl_cat.add_endpiece(ep) # now make all combinations between those categories and add them to the model collections (do it manually now, automatized later) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, ZHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, WHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, ZHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, VBF_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, WHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHMET_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ttHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ttHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ZHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ZHMET_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ttHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ttHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, VBF_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, WHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ZHMET_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ttHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ttHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, VBF_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, WHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ttHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ttHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, VBF_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, WHh_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, ttHl_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, ggH_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, VBF_cat), input_config_file=input_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, WHh_cat), input_config_file=input_config_file)) return mcolls
def GenerateSimpleModelCollections(MC_path, weight_path=None, input_config_file=None, hyperparam_config_file=None, mass_point=125.0): confhandler = ConfigFileHandler() zzroot = os.environ["CMSSW_BASE"] config_file_path = os.path.join( zzroot, "src/ZZAnalysis/AnalysisStep/test/Python/trainlib/categories.conf") confhandler.load_configuration(config_file_path) default_section = confhandler.get_section("default") categories = [] defined_category_names = ConfigFileUtils.parse_list( confhandler.get_field("categories", "category_names"), lambda x: x) print "found definitions for the following event categories: " + str( defined_category_names) for current_category_name in defined_category_names: print "------------------------------------------------" print "now building category " + current_category_name cur_sect = confhandler.get_section(current_category_name) current_category = Category.from_config(cur_sect, current_category_name, MC_path) # now create the discriminant endpieces defined for this category defined_endpieces = ConfigFileUtils.parse_list( cur_sect["endpieces"], lambda x: x) for endpiece_id in defined_endpieces: cur_sect = confhandler.get_section(endpiece_id) current_endpiece = DiscriminantEndpiece.from_config(cur_sect) defined_endpiece_components = ConfigFileUtils.parse_list( cur_sect["components"], lambda x: x) for endpiece_component in defined_endpiece_components: cur_sect = confhandler.get_section(endpiece_component) current_endpiece_component = DiscriminantEndpieceComponent.from_config( cur_sect, variable_section=default_section, model_basetype=SimpleModel, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) current_endpiece.add_component(current_endpiece_component) current_category.add_endpiece(current_endpiece) categories.append(current_category) # have all category objects defined, now generate all the needed pairs mcolls = [] for pair in itertools.combinations(categories, 2): mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(*pair), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) return mcolls
def GenerateSimpleModelCollections(MC_path, weight_path=None, input_config_file=None, hyperparam_config_file=None, mass_point=125.0): mcolls = [] global_max_epochs = 100 global_hyperparams = {'number_layers': 2, 'number_neurons': 128} # some shortcuts concerning the default variables that are used for category pairings in case nothing else is specified nonperiodic_variables_default_0j = [ "PFMET", "Z1Pt", "Z2Pt", "Z1Mass", "Z2Mass", "ZZPt", "ZZEta", "ZZMass", "ZZMassErr" ] nonperiodic_variables_default_1j = nonperiodic_variables_default_0j + [ "nCleanedJetsPt30BTagged_bTagSF" ] nonperiodic_variables_default = nonperiodic_variables_default_0j + [ "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF" ] periodic_variables_default = ["ZZPhi"] def nonperiodic_variables_jet(num): retval = [] for i in range(num): retval.append("JetPt[JetPt|{:d}]".format(i)) retval.append("JetEta[JetPt|{:d}]".format(i)) return retval def periodic_variables_jet(num): retval = [] for i in range(num): retval.append("JetPhi[JetPt|{:d}]".format(i)) return retval def nonperiodic_variables_extra_lep(num): retval = [] for i in range(num): retval.append("ExtraLepPt[ExtraLepPt|{:d}]".format(i)) retval.append("ExtraLepEta[ExtraLepPt|{:d}]".format(i)) return retval def periodic_variables_extra_lep(num): retval = [] for i in range(num): retval.append("ExtraLepPhi[ExtraLepPt|{:d}]".format(i)) return retval MELA_2j_variables = [ "D_VBF2j_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_WHh_ggH_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME" ] MELA_1j_variables = ["D_VBF1j_ggH_ME"] # translation dictionaries that link Higgs mass points to the corresponding training files mass_point_translation = { 120.0: "120", 124.0: "124", 125.0: "125", 126.0: "126", 130.0: "130" } mass_point_suffix = mass_point_translation[mass_point] # ------------------------ # define the categories for the classifier VBF_cat = Category( "VBF", { MC_path + "VBFH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.no_cut }) # mode that is fully differential in the number of jets ep = DiscriminantEndpiece("2|1|0j") ep_comp = DiscriminantEndpieceComponent( name="2j", public_name="2j", component_cut=lambda row: row["nCleanedJetsPt30"] >= 2, nonperiodic_columns=nonperiodic_variables_default + MELA_2j_variables + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="1j", public_name="1j", component_cut=lambda row: row["nCleanedJetsPt30"] == 1, nonperiodic_columns=nonperiodic_variables_default + MELA_1j_variables + nonperiodic_variables_jet(1), periodic_columns=periodic_variables_default + periodic_variables_jet(1), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="0j", public_name="0j", component_cut=lambda row: row["nCleanedJetsPt30"] == 0, nonperiodic_columns=nonperiodic_variables_default_0j, periodic_columns=[], model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) VBF_cat.add_endpiece(ep) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) VBF_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ggH_cat = Category( "ggH", { MC_path + "ggH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.no_cut }) # mode that is fully differential in the number of jets ep = DiscriminantEndpiece("2|1|0j") ep_comp = DiscriminantEndpieceComponent( name="2j", public_name="2j", component_cut=lambda row: row["nCleanedJetsPt30"] >= 2, nonperiodic_columns=nonperiodic_variables_default + MELA_2j_variables + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="1j", public_name="1j", component_cut=lambda row: row["nCleanedJetsPt30"] == 1, nonperiodic_columns=nonperiodic_variables_default_1j + MELA_1j_variables + nonperiodic_variables_jet(1), periodic_columns=periodic_variables_default + periodic_variables_jet(1), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ep_comp = DiscriminantEndpieceComponent( name="0j", public_name="0j", component_cut=lambda row: row["nCleanedJetsPt30"] == 0, nonperiodic_columns=nonperiodic_variables_default_0j, periodic_columns=[], model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ggH_cat.add_endpiece(ep) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ggH_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier WHh_cat = Category( "WHh", { MC_path + "WplusH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.WHhadr_cut, MC_path + "WminusH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.WHhadr_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) WHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHh_cat = Category( "ZHh", { MC_path + "ZH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.ZHhadr_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHl_cat = Category( "ZHl", { MC_path + "ZH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.ZHlept_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHl_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier WHl_cat = Category( "WHl", { MC_path + "WplusH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.WHlept_cut, MC_path + "WminusH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.WHlept_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) WHl_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ZHMET_cat = Category( "ZHMET", { MC_path + "ZH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.ZHMET_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZHMET_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ttHh_cat = Category( "ttHh", { MC_path + "ttH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.ttHhadr_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_jet(2) + nonperiodic_variables_extra_lep(2), periodic_columns=periodic_variables_default + periodic_variables_jet(2) + periodic_variables_extra_lep(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ttHh_cat.add_endpiece(ep) # ------------------------------------ # define the categories for the classifier ttHl_cat = Category( "ttHl", { MC_path + "ttH" + mass_point_suffix + "/ZZ4lAnalysis.root": cuts.ttHlept_cut }) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ttHl_cat.add_endpiece(ep) # ------------------------------------ # for training on Z+X only ZX_datastream = {MC_path + "AllData/ZZ4lAnalysis.root": cuts.no_cut} ZX_cat = Category("ZX", {MC_path + "ZX/ZZ4lAnalysis.root": cuts.no_cut}, datastream_unmixed=ZX_datastream) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) ZX_cat.add_endpiece(ep) # ------------------------------------ # for training on Z+X only qq_cat = Category("qq", {MC_path + "ZZTo4l/ZZ4lAnalysis.root": cuts.no_cut}) # mode that is inclusive in the number of jets ep = DiscriminantEndpiece("210j") ep_comp = DiscriminantEndpieceComponent( name="210j", public_name="", component_cut=lambda row: True, nonperiodic_columns=nonperiodic_variables_default + nonperiodic_variables_extra_lep(2) + nonperiodic_variables_jet(2), periodic_columns=periodic_variables_default + periodic_variables_extra_lep(2) + periodic_variables_jet(2), model_basetype=SimpleModel, model_hyperparams=global_hyperparams, preprocessor_basetype=FlexiblePCAWhiteningPreprocessor) ep.add_component(ep_comp) qq_cat.add_endpiece(ep) # now make all combinations between those categories and add them to the model collections (do it manually now, automatized later) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, ZHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, ZHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, VBF_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZHMET_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ttHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ttHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ZHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ZHMET_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ttHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ttHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, VBF_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ZHMET_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ttHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ttHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, VBF_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ttHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ttHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, VBF_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, ttHl_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, ggH_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, VBF_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, WHh_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) # pairings of Z+X category with the signals mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ggH_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, ZX_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) # pairings of qq category with the signals mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ggH_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(VBF_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHh_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHl_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHh_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(WHl_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZHMET_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHh_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ttHl_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) # the two background categories among themselves mcolls.append( ModelCollection.from_discriminant_endpieces( *Category.match(ZX_cat, qq_cat), input_config_file=input_config_file, hyperparam_config_file=hyperparam_config_file)) return mcolls