def main(args): # Container for all distributions to be drawn logger.info("Set up shape variations.") systematics = Systematics( "{}_shapes.root".format(args.tag), num_threads=args.num_threads, skip_systematic_variations=args.skip_systematic_variations) # Era selection if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, WHEstimation, ZHEstimation, ttHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, VVLEstimation, VVTEstimation, VVJEstimation, TTLEstimation, TTTEstimation, TTJEstimation, QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, ZTTEmbeddedEstimation, FakeEstimationLT, NewFakeEstimationLT, FakeEstimationTT, NewFakeEstimationTT, DYJetsToLLEstimation, TTEstimation, VVEstimation from shape_producer.era import Run2016 era = Run2016(args.datasets) else: logger.critical("Era {} is not implemented.".format(args.era)) raise Exception wp_dict_mva = { "vvloose": "byVVLooseIsolationMVArun2017v2DBoldDMwLT2017_2", "vloose": "byVLooseIsolationMVArun2017v2DBoldDMwLT2017_2", "loose": "byLooseIsolationMVArun2017v2DBoldDMwLT2017_2", "medium": "byMediumIsolationMVArun2017v2DBoldDMwLT2017_2", "tight": "byTightIsolationMVArun2017v2DBoldDMwLT2017_2", "vtight": "byVTightIsolationMVArun2017v2DBoldDMwLT2017_2", "vvtight": "byVVTightIsolationMVArun2017v2DBoldDMwLT2017_2", "mm": "0<1", } wp_dict_deeptau = { "vvvloose": "byVVVLooseDeepTau2017v2p1VSjet_2", "vvloose": "byVVLooseDeepTau2017v2p1VSjet_2", "vloose": "byVLooseDeepTau2017v2p1VSjet_2", "loose": "byLooseDeepTau2017v2p1VSjet_2", "medium": "byMediumDeepTau2017v2p1VSjet_2", "tight": "byTightDeepTau2017v2p1VSjet_2", "vtight": "byVTightDeepTau2017v2p1VSjet_2", "vvtight": "byVVTightDeepTau2017v2p1VSjet_2", "mm": "0<1", } wp_dict = wp_dict_deeptau logger.info("Produce shapes for the %s working point of the MVA Tau ID", args.working_point) # Channels and processes # yapf: disable directory = args.directory ff_friend_directory = args.fake_factor_friend_directory mt = MTTauID2016() mt.cuts.add(Cut(wp_dict[args.working_point]+">0.5", "tau_iso")) mt_processes = { "data" : Process("data_obs", DataEstimation (era, directory, mt, friend_directory=[])), "ZTT" : Process("ZTT", ZTTEstimation (era, directory, mt, friend_directory=[])), "EMB" : Process("EMB", ZTTEmbeddedEstimation (era, directory, mt, friend_directory=[])), "ZJ" : Process("ZJ", ZJEstimation (era, directory, mt, friend_directory=[])), "ZL" : Process("ZL", ZLEstimation (era, directory, mt, friend_directory=[])), "TTT" : Process("TTT", TTTEstimation (era, directory, mt, friend_directory=[])), "TTJ" : Process("TTJ", TTJEstimation (era, directory, mt, friend_directory=[])), "TTL" : Process("TTL", TTLEstimation (era, directory, mt, friend_directory=[])), "VVT" : Process("VVT", VVTEstimation (era, directory, mt, friend_directory=[])), "VVJ" : Process("VVJ", VVJEstimation (era, directory, mt, friend_directory=[])), "VVL" : Process("VVL", VVLEstimation (era, directory, mt, friend_directory=[])), "W" : Process("W", WEstimation (era, directory, mt, friend_directory=[])), } # TODO: Include alternative jet fake estimation. # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory])) # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZL", "TTL", "TTT", "VVL", "VVT"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory])) mt_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mt, [mt_processes[process] for process in ["ZTT", "ZL", "ZJ", "TTT", "TTJ", "TTL", "VVT", "VVJ", "VVL", "W"]], mt_processes["data"], friend_directory=[], extrapolation_factor=1.17)) mt_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "ZJ", "TTJ", "TTL", "VVJ", "VVL", "W"]], mt_processes["data"], friend_directory=[], extrapolation_factor=1.17)) # TODO: Include Z-> mumu control region. mm = MMTauID2016() mm_processes = { "data" : Process("data_obs", DataEstimation (era, directory, mm, friend_directory=[])), "ZLL" : Process("ZLL", DYJetsToLLEstimation (era, directory, mm, friend_directory=[])), "MMEMB" : Process("MMEMB", ZTTEmbeddedEstimation(era, directory, mm, friend_directory=[])), "TT" : Process("TT", TTEstimation (era, directory, mm, friend_directory=[])), "VV" : Process("VV", VVEstimation (era, directory, mm, friend_directory=[])), "W" : Process("W", WEstimation (era, directory, mm, friend_directory=[])), } # mm_processes["FAKES"] = None TODO: Add fake factors or alternative fake rate estimation here mm_processes["QCD"] = Process("QCD", QCDEstimation_SStoOS_MTETEM(era, directory, mm, [mm_processes[process] for process in ["ZLL", "W", "TT", "VV"]], mm_processes["data"], friend_directory=[], extrapolation_factor=1.17)) mm_processes["QCDEMB"] = Process("QCDEMB", QCDEstimation_SStoOS_MTETEM(era, directory, mm, [mm_processes[process] for process in ["MMEMB", "W"]], mm_processes["data"], friend_directory=[], extrapolation_factor=1.17)) # Stage 0 and 1.1 signals for ggH & qqH # mt_processes["FAKES"] = Process("jetFakes", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["ZTT", "TTT", "VVT", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory])) # mt_processes["FAKESEMB"] = Process("jetFakesEMB", NewFakeEstimationLT(era, directory, mt, [mt_processes[process] for process in ["EMB", "ZL", "TTL", "VVL"]], mt_processes["data"], friend_directory=mt_friend_directory+[ff_friend_directory])) # Variables and categories binning = yaml.load(open(args.binning)) mt_categories = [] # Goodness of fit shapes if args.gof_channel == "mt": score = Variable( args.gof_variable, VariableBinning(binning["control"]["mt"][args.gof_variable]["bins"]), expression=binning["control"]["mt"][args.gof_variable]["expression"]) if "cut" in binning["control"]["mt"][args.gof_variable].keys(): cuts=Cuts(Cut(binning["control"]["mt"][args.gof_variable]["cut"], "binning")) else: cuts=Cuts() mt_categories.append( Category( args.gof_variable, mt, cuts, variable=score)) elif "mt" in args.channels: for cat in binning["categories"]["mt"]: category = Category( cat, mt, Cuts(Cut(binning["categories"]["mt"][cat]["cut"], "category")), variable=Variable(binning["categories"]["mt"][cat]["var"], VariableBinning(binning["categories"]["mt"][cat]["bins"]), expression=binning["categories"]["mt"][cat]["expression"])) mt_categories.append(category) # yapf: enable if "mt" in [args.gof_channel] + args.channels: for process, category in product(mt_processes.values(), mt_categories): systematics.add( Systematic( category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) mm_categories = [] if "mm" in args.channels: category = Category( "control", mm, Cuts(), variable=Variable("m_vis", ConstantBinning(1, 50, 150), "m_vis")) mm_categories.append(category) if "mm" in args.channels: for process, category in product(mm_processes.values(), mm_categories): systematics.add( Systematic( category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) # Shapes variations # MC tau energy scale tau_es_3prong_variations = create_systematic_variations( "CMS_scale_mc_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline) tau_es_1prong_variations = create_systematic_variations( "CMS_scale_mc_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline) tau_es_1prong1pizero_variations = create_systematic_variations( "CMS_scale_mc_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero", DifferentPipeline) for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations: for process_nick in ["ZTT", "TTT", "TTL", "VVL", "VVT",# "FAKES" ]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # Tau energy scale tau_es_3prong_variations = create_systematic_variations( "CMS_scale_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline) tau_es_1prong_variations = create_systematic_variations( "CMS_scale_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline) tau_es_1prong1pizero_variations = create_systematic_variations( "CMS_scale_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero", DifferentPipeline) for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations: for process_nick in ["ZTT", "TTT", "TTL", "VVL", "VVT", "EMB",# "FAKES" ]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # Jet energy scale # Inclusive JES shapes jet_es_variations = [] '''jet_es_variations += create_systematic_variations( "CMS_scale_j_Run2016", "jecUnc", DifferentPipeline)''' # Splitted JES shapes jet_es_variations += create_systematic_variations( "CMS_scale_j_eta0to3_Run2016", "jecUncEta0to3", DifferentPipeline) jet_es_variations += create_systematic_variations( "CMS_scale_j_eta0to5_Run2016", "jecUncEta0to5", DifferentPipeline) jet_es_variations += create_systematic_variations( "CMS_scale_j_eta3to5_Run2016", "jecUncEta3to5", DifferentPipeline) jet_es_variations += create_systematic_variations( "CMS_scale_j_RelativeBal_Run2016", "jecUncRelativeBal", DifferentPipeline) for variation in jet_es_variations: for process_nick in [ "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ" ]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # # MET energy scale met_unclustered_variations = create_systematic_variations( "CMS_scale_met_unclustered", "metUnclusteredEn", DifferentPipeline) # NOTE: Clustered MET not used anymore in the uncertainty model #met_clustered_variations = create_systematic_variations( # "CMS_scale_met_clustered_Run2016", "metJetEn", DifferentPipeline) for variation in met_unclustered_variations: # + met_clustered_variations: for process_nick in [ "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ" ]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # # Recoil correction unc recoil_resolution_variations = create_systematic_variations( "CMS_htt_boson_reso_met_Run2016", "metRecoilResolution", DifferentPipeline) recoil_response_variations = create_systematic_variations( "CMS_htt_boson_scale_met_Run2016", "metRecoilResponse", DifferentPipeline) for variation in recoil_resolution_variations + recoil_response_variations: for process_nick in [ "ZTT", "ZL", "ZJ", "W"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # Z pt reweighting zpt_variations = create_systematic_variations( "CMS_htt_dyShape_Run2016", "zPtReweightWeight", SquareAndRemoveWeight) for variation in zpt_variations: for process_nick in ["ZTT", "ZL", "ZJ"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # top pt reweighting top_pt_variations = create_systematic_variations( "CMS_htt_ttbarShape", "topPtReweightWeight", SquareAndRemoveWeight) for variation in top_pt_variations: for process_nick in ["TTT", "TTL", "TTJ"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # jet to tau fake efficiency jet_to_tau_fake_variations = [] jet_to_tau_fake_variations.append( AddWeight("CMS_htt_jetToTauFake_Run2016", "jetToTauFake_weight", Weight("max(1.0-pt_2*0.002, 0.6)", "jetToTauFake_weight"), "Up")) jet_to_tau_fake_variations.append( AddWeight("CMS_htt_jetToTauFake_Run2016", "jetToTauFake_weight", Weight("min(1.0+pt_2*0.002, 1.4)", "jetToTauFake_weight"), "Down")) for variation in jet_to_tau_fake_variations: for process_nick in ["ZJ", "TTJ", "W", "VVJ"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # ZL fakes energy scale mu_fake_es_1prong_variations = create_systematic_variations( "CMS_ZLShape_mt_1prong_Run2016", "tauMuFakeEsOneProng", DifferentPipeline) mu_fake_es_1prong1pizero_variations = create_systematic_variations( "CMS_ZLShape_mt_1prong1pizero_Run2016", "tauMuFakeEsOneProngPiZeros", DifferentPipeline) if "mt" in [args.gof_channel] + args.channels: for process_nick in ["ZL"]: for variation in mu_fake_es_1prong_variations + mu_fake_es_1prong1pizero_variations: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # # lepton trigger efficiency lep_trigger_eff_variations = [] lep_trigger_eff_variations.append( AddWeight("CMS_eff_trigger_mt_Run2016", "trg_mt_eff_weight", Weight("(1.0*(pt_1<=23)+1.02*(pt_1>23))", "trg_mt_eff_weight"), "Up")) lep_trigger_eff_variations.append( AddWeight("CMS_eff_trigger_mt_Run2016", "trg_mt_eff_weight", Weight("(1.0*(pt_1<=23)+0.98*(pt_1>23))", "trg_mt_eff_weight"), "Down")) for variation in lep_trigger_eff_variations: for process_nick in [ "ZTT", "ZL", "ZJ", "W", "TTT", "TTL", "TTJ", "VVL", "VVT", "VVJ" ]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) for process_nick in ["ZLL", "TT", "VV", "W"]: if "mm" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mm_processes[process_nick], channel=mm, era=era) lep_trigger_eff_variations = [] lep_trigger_eff_variations.append( AddWeight("CMS_eff_trigger_emb_mt_Run2016", "trg_mt_eff_weight", Weight("(1.0*(pt_1<=23)+1.02*(pt_1>23))", "trg_mt_eff_weight"), "Up")) lep_trigger_eff_variations.append( AddWeight("CMS_eff_trigger_emb_mt_Run2016", "trg_mt_eff_weight", Weight("(1.0*(pt_1<=23)+0.98*(pt_1>23))", "trg_mt_eff_weight"), "Down")) for variation in lep_trigger_eff_variations: for process_nick in ["EMB"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) for process_nick in ["MMEMB"]: if "mm" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mm_processes[process_nick], channel=mm, era=era) # # Zll reweighting !!! replaced by log normal uncertainties: CMS_eFakeTau_Run2016 15.5%; CMS_mFakeTau_Run2016 27.2% # '''zll_et_weight_variations = [] # zll_mt_weight_variations = [] # zll_mt_weight_variations.append( # AddWeight( # "CMS_mFakeTau_Run2016", "mFakeTau_reweight", # Weight( # "(((abs(eta_1) < 0.4)*1.63/1.47) + ((abs(eta_1) >= 0.4 && abs(eta_1) < 0.8)*1.85/1.55) + ((abs(eta_1) >= 0.8 && abs(eta_1) < 1.2)*1.38/1.33) + ((abs(eta_1) >= 1.2 && abs(eta_1) < 1.7)*2.26/1.72) + ((abs(eta_1) >= 1.7 && abs(eta_1) < 2.3)*3.13/2.5) + (abs(eta_1) >= 2.3))", # "mFakeTau_reweight"), "Up")) # zll_mt_weight_variations.append( # AddWeight( # "CMS_mFakeTau_Run2016", "mFakeTau_reweight", # Weight( # "(((abs(eta_1) < 0.4)*1.31/1.47) + ((abs(eta_1) >= 0.4 && abs(eta_1) < 0.8)*1.25/1.55) + ((abs(eta_1) >= 0.8 && abs(eta_1) < 1.2)*1.28/1.33) + ((abs(eta_1) >= 1.2 && abs(eta_1) < 1.7)*1.18/1.72) + ((abs(eta_1) >= 1.7 && abs(eta_1) < 2.3)*1.87/2.5) + (abs(eta_1) >= 2.3))", # "mFakeTau_reweight"), "Down")) # for variation in zll_mt_weight_variations: # for process_nick in ["ZL"]: # if "mt" in [args.gof_channel] + args.channels: # systematics.add_systematic_variation( # variation=variation, # process=mt_processes[process_nick], # channel=mt, # era=era)''' # Embedded event specifics # Tau energy scale tau_es_3prong_variations = create_systematic_variations( "CMS_scale_emb_t_3prong_Run2016", "tauEsThreeProng", DifferentPipeline) tau_es_1prong_variations = create_systematic_variations( "CMS_scale_emb_t_1prong_Run2016", "tauEsOneProng", DifferentPipeline) tau_es_1prong1pizero_variations = create_systematic_variations( "CMS_scale_emb_t_1prong1pizero_Run2016", "tauEsOneProngOnePiZero", DifferentPipeline) for variation in tau_es_3prong_variations + tau_es_1prong_variations + tau_es_1prong1pizero_variations: for process_nick in ["EMB"]: #, "FAKES"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) mt_decayMode_variations = [] mt_decayMode_variations.append( ReplaceWeight( "CMS_3ProngEff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effUp_pi0Nom", "decayMode_SF"), "Up")) mt_decayMode_variations.append( ReplaceWeight( "CMS_3ProngEff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effDown_pi0Nom", "decayMode_SF"), "Down")) mt_decayMode_variations.append( ReplaceWeight( "CMS_1ProngPi0Eff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Up", "decayMode_SF"), "Up")) mt_decayMode_variations.append( ReplaceWeight( "CMS_1ProngPi0Eff_Run2016", "decayMode_SF", Weight("embeddedDecayModeWeight_effNom_pi0Down", "decayMode_SF"), "Down")) for variation in mt_decayMode_variations: for process_nick in ["EMB"]: if "mt" in [args.gof_channel] + args.channels: systematics.add_systematic_variation( variation=variation, process=mt_processes[process_nick], channel=mt, era=era) # 10% removed events in ttbar simulation (ttbar -> real tau tau events) will be added/subtracted to ZTT shape to use as systematic tttautau_process_mt = Process( "TTT", TTTEstimation( era, directory, mt, friend_directory=[])) if "mt" in [args.gof_channel] + args.channels: for category in mt_categories: mt_processes['ZTTpTTTauTauDown'] = Process( "ZTTpTTTauTauDown", AddHistogramEstimationMethod( "AddHistogram", "nominal", era, directory, mt, [mt_processes["EMB"], tttautau_process_mt], [1.0, -0.1])) systematics.add( Systematic( category=category, process=mt_processes['ZTTpTTTauTauDown'], analysis="smhtt", era=era, variation=Relabel("CMS_htt_emb_ttbar_Run2016", "Down"), mass="125")) mt_processes['ZTTpTTTauTauUp'] = Process( "ZTTpTTTauTauUp", AddHistogramEstimationMethod( "AddHistogram", "nominal", era, directory, mt, [mt_processes["EMB"], tttautau_process_mt], [1.0, 0.1])) systematics.add( Systematic( category=category, process=mt_processes['ZTTpTTTauTauUp'], analysis="smhtt", era=era, variation=Relabel("CMS_htt_emb_ttbar_Run2016", "Up"), mass="125")) # Produce histograms logger.info("Start producing shapes.") systematics.produce() logger.info("Done producing shapes.")
def main(args): # Write arparse arguments to YAML config filelist = {} # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation, VHEstimation, EWKWpEstimation, EWKWmEstimation, ttHEstimation, ggHWWEstimation, qqHWWEstimation #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, HTTEstimation, from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, EWKZEstimation, ZTTEmbeddedEstimation, ttHEstimation from shape_producer.era import Run2017 era = Run2017(args.database) elif "2018" in args.era: from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, VHEstimation, EWKZEstimation, ZTTEmbeddedEstimation, ttHEstimation from shape_producer.era import Run2018 era = Run2018(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception logger.debug("Write filelist for channel %s in era %s.", args.channel, args.era) ############################################################################ # Era: 2016, Channel: mt if "2016" in args.era and args.channel == "mt": channel = MTSM2016() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), EWKWpEstimation(era, args.directory, channel), EWKWmEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel), ggHWWEstimation(era, args.directory, channel), qqHWWEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2017, Channel: mt if "2017" in args.era and args.channel == "mt": channel = MTSM2017() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2018, Channel: mt if "2018" in args.era and args.channel == "mt": channel = MTSM2018() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2016, Channel: et if "2016" in args.era and args.channel == "et": channel = ETSM2016() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), EWKWpEstimation(era, args.directory, channel), EWKWmEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel), ggHWWEstimation(era, args.directory, channel), qqHWWEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2017, Channel: et if "2017" in args.era and args.channel == "et": channel = ETSM2017() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2018, Channel: et if "2018" in args.era and args.channel == "et": channel = ETSM2018() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2016, Channel: tt if "2016" in args.era and args.channel == "tt": channel = TTSM2016() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), EWKWpEstimation(era, args.directory, channel), EWKWmEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel), ggHWWEstimation(era, args.directory, channel), qqHWWEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era 2017, Channel: tt if "2017" in args.era and args.channel == "tt": channel = TTSM2017() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era 2018, Channel: tt if "2018" in args.era and args.channel == "tt": channel = TTSM2018() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), ZJEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTJEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVJEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2016, Channel: em if "2016" in args.era and args.channel == "em": channel = EMSM2016() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), #ZTTEmbeddedEstimation(era, args.directory, channel), #TODO include EMB again once samples are there ZLEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), EWKWpEstimation(era, args.directory, channel), EWKWmEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel), ggHWWEstimation(era, args.directory, channel), qqHWWEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2017, Channel: em if "2017" in args.era and args.channel == "em": channel = EMSM2017() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Era: 2018, Channel: em if "2018" in args.era and args.channel == "em": channel = EMSM2018() for estimation in [ ggHEstimation("ggH", era, args.directory, channel), qqHEstimation("qqH", era, args.directory, channel), ttHEstimation(era, args.directory, channel), VHEstimation(era, args.directory, channel), ZTTEstimation(era, args.directory, channel), ZTTEmbeddedEstimation(era, args.directory, channel), ZLEstimation(era, args.directory, channel), TTTEstimation(era, args.directory, channel), TTLEstimation(era, args.directory, channel), WEstimation(era, args.directory, channel), VVTEstimation(era, args.directory, channel), VVLEstimation(era, args.directory, channel), EWKZEstimation(era, args.directory, channel), DataEstimation(era, args.directory, channel) ]: # Get files for estimation method logger.debug("Get files for estimation method %s.", estimation.name) files = [str(f) for f in estimation.get_files()] # Go through files and get folders for channel for f in files: if not os.path.exists(f): logger.fatal("File does not exist: %s", f) raise Exception folders = [] f_ = ROOT.TFile(f) for k in f_.GetListOfKeys(): if "{}_".format(args.channel) in k.GetName(): folders.append(k.GetName()) f_.Close() filelist[f] = folders ############################################################################ # Write output filelist logger.info("Write filelist to file: {}".format(args.output)) yaml.dump(filelist, open(args.output, 'w'), default_flow_style=False)
def estimationMethodAndClassMapGenerator(): ###### common processes if args.training_stxs1p1: classes_map = { # class1 "ggH_GG2H_PTH_GT200125": "ggh_PTHGT200", # class2 "ggH_GG2H_0J_PTH_0_10125": "ggh_0J", "ggH_GG2H_0J_PTH_GT10125": "ggh_0J", # class3 "ggH_GG2H_1J_PTH_0_60125": "ggh_1J_PTH0to120", "ggH_GG2H_1J_PTH_60_120125": "ggh_1J_PTH0to120", # class4 "ggH_GG2H_1J_PTH_120_200125": "ggh_1J_PTH120to200", # class5 "ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125": "ggh_2J", "ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125": "ggh_2J", "ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125": "ggh_2J", # class6 "ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj", "ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj", "qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125": "vbftopo_lowmjj", "qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125": "vbftopo_lowmjj", # class7 "ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj", "ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj", "qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125": "vbftopo_highmjj", "qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125": "vbftopo_highmjj", # class8 "qqH_QQ2HQQ_GE2J_MJJ_0_60125": "qqh_2J", "qqH_QQ2HQQ_GE2J_MJJ_60_120125": "qqh_2J", "qqH_QQ2HQQ_GE2J_MJJ_120_350125": "qqh_2J", # class9 "qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125": "qqh_PTHGT200", } estimationMethodList = [ ggHEstimation("ggH_GG2H_PTH_GT200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_0J_PTH_0_10125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_0J_PTH_GT10125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_0_60125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_60_120125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_1J_PTH_120_200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_0_60125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_60_120125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_0_350_PTH_120_200125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_350_700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), ggHEstimation("ggH_GG2H_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_0_25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT700_PTH_0_200_PTHJJ_GT25125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_0_60125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_60_120125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_120_350125", era, args.base_path, channel), qqHEstimation("qqH_QQ2HQQ_GE2J_MJJ_GT350_PTH_GT200125", era, args.base_path, channel), ] elif args.training_inclusive: classes_map = { "ggH125": "xxh", "qqH125": "xxh", } estimationMethodList = [ ggHEstimation("ggH125", era, args.base_path, channel), qqHEstimation("qqH125", era, args.base_path, channel), ] else: classes_map = { "ggH125": "ggh", "qqH125": "qqh", } estimationMethodList = [ ggHEstimation("ggH125", era, args.base_path, channel), qqHEstimation("qqH125", era, args.base_path, channel), ] estimationMethodList.extend([ EWKZEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel) ]) classes_map["EWKZ"]="misc" ##### TT* zl,zj processes estimationMethodList.extend([ TTLEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel) ]) if args.channel == "tt": classes_map.update({ "TTL": "misc", "ZL": "misc", "VVL": "misc" }) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({ "TTL": "tt", "ZL": "misc", "VVL": "db" }) else: classes_map.update({ "TTL": "tt", "ZL": "zll", "VVL": "misc" }) ######## Check for emb vs MC if args.training_z_estimation_method == "emb": classes_map["EMB"] = "emb" estimationMethodList.extend([ ZTTEmbeddedEstimation(era, args.base_path, channel)]) elif args.training_z_estimation_method == "mc": classes_map["ZTT"] = "ztt" estimationMethodList.extend([ ZTTEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel) ]) if args.channel == "tt": classes_map.update({ "TTT": "misc", "VVT": "misc" }) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({ "TTT": "tt", "VVT": "db" }) else: classes_map.update({ "TTT": "tt", "VVT": "misc" }) else: logger.fatal("No valid training-z-estimation-method! Options are emb, mc. Argument was {}".format( args.training_z_estimation_method)) raise Exception if args.training_jetfakes_estimation_method == "ff" and args.channel != "em": classes_map.update({ "ff": "ff" }) elif args.training_jetfakes_estimation_method == "mc" or args.channel == "em": # less data-> less categories for tt if args.channel == "tt": classes_map.update({ "TTJ": "misc", "ZJ": "misc" }) ## not TTJ,ZJ for em elif args.channel != "em": classes_map.update({ "TTJ": "tt", "ZJ": "zll" }) if args.channel != "em": classes_map.update({ "VVJ": "misc" }) estimationMethodList.extend([ VVJEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ###w: estimationMethodList.extend([WEstimation(era, args.base_path, channel)]) if args.channel in ["et", "mt"]: classes_map["W"] = "w" else: classes_map["W"] = "misc" ### QCD class if args.channel == "tt": classes_map["QCD"] = "noniso" else: classes_map["QCD"] = "ss" else: logger.fatal("No valid training-jetfakes-estimation-method! Options are ff, mc. Argument was {}".format( args.training_jetfakes_estimation_method)) raise Exception return ([classes_map, estimationMethodList])
def estimationMethodAndClassMapGenerator(): ###### common processes classes_map = {"ggH": "ggh", "qqH": "qqh", "EWKZ": "misc"} estimationMethodList = [ ggHEstimation("ggH", era, args.base_path, channel), qqHEstimation("qqH", era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel) ] ######## Check for emb vs MC if args.training_z_estimation_method == "emb": classes_map["EMB"] = "ztt" estimationMethodList.extend( [ZTTEmbeddedEstimation(era, args.base_path, channel)]) elif args.training_z_estimation_method == "mc": classes_map["ZTT"] = "ztt" estimationMethodList.extend([ ZTTEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel) ]) else: logger.fatal( "No valid training-z-estimation-method! Options are emb, mc. Argument was {}" .format(args.training_z_estimation_method)) raise Exception ##### TT* zl,zj processes estimationMethodList.extend([ TTLEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel) ]) # less data-> less categories for tt if args.channel == "tt": classes_map.update({ "TTT": "misc", "TTL": "misc", "TTJ": "misc", "ZL": "misc", "ZJ": "misc" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({"TTT": "tt", "TTL": "tt", "ZL": "misc"}) else: classes_map.update({ "TTT": "tt", "TTL": "tt", "TTJ": "tt", "ZL": "zll", "ZJ": "zll" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ###w: # estimation metho already included, just different mapping fror et and mt if args.channel in ["et", "mt"]: classes_map["W"] = "w" else: classes_map["W"] = "misc" ##### VV/[VVT,VVL,VVJ] split # VVL in common, VVT in "EMBvsMC" if args.channel == "em": classes_map.update({"VVT": "db", "VVL": "db"}) else: classes_map.update({"VVT": "misc", "VVL": "misc", "VVJ": "misc"}) estimationMethodList.extend([ VVJEstimation(era, args.base_path, channel), ]) ### QCD class if args.channel == "tt": classes_map["QCD"] = "noniso" else: classes_map["QCD"] = "ss" return ([classes_map, estimationMethodList])
def main(args): # Container for all distributions to be drawn logger.info("Set up shape variations.") systematics = Systematics("fake-factors/{}_ff_yields.root".format( args.tag), num_threads=args.num_threads) # Era selection if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, TTLEstimation, VVTEstimation, VVJEstimation, VVLEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2016 era = Run2016(args.datasets) else: logger.critical("Era {} is not implemented.".format(args.era)) raise Exception # Channels and processes channels = ["et", "mt", "tt"] additional_cuts = dict() additional_friends = dict() for channel in channels: with open(args.additional_cuts.format(channel), "r") as stream: config = yaml.load(stream) additional_cuts[channel] = config["cutstrings"] with open(args.additional_friends.format(channel), "r") as stream: config = yaml.load(stream) additional_friends[channel] = { key: value for key, value in zip(config["friend_dirs"], config["friend_aliases"]) } # yapf: disable directory = args.directory et_friend_directory = {args.et_friend_directory: ""} et_friend_directory.update(additional_friends["et"]) mt_friend_directory = {args.mt_friend_directory: ""} mt_friend_directory.update(additional_friends["mt"]) tt_friend_directory = {args.tt_friend_directory: ""} tt_friend_directory.update(additional_friends["tt"]) mt = MTSM2016() for cutstring in additional_cuts["mt"]: mt.cuts.add(Cut(cutstring)) mt.cuts.remove("tau_iso") mt.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_anti_iso")) mt_processes = { "data" : Process("data_obs", DataEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "EMB" : Process("EMB", ZTTEmbeddedEstimation(era, directory, mt, friend_directory=mt_friend_directory)), "ZTT" : Process("ZTT", ZTTEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "ZL" : Process("ZL", ZLEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "ZJ" : Process("ZJ", ZJEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "W" : Process("W", WEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "TTT" : Process("TTT", TTTEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "TTL" : Process("TTL", TTLEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "TTJ" : Process("TTJ", TTJEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "VVT" : Process("VVT", VVTEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "VVL" : Process("VVL", VVLEstimation (era, directory, mt, friend_directory=mt_friend_directory)), "VVJ" : Process("VVJ", VVJEstimation (era, directory, mt, friend_directory=mt_friend_directory)) #"EWKZ" : Process("EWKZ", EWKZEstimation (era, directory, mt, friend_directory=mt_friend_directory)) } et = ETSM2016() for cutstring in additional_cuts["et"]: et.cuts.add(Cut(cutstring)) et.cuts.remove("tau_iso") et.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_anti_iso")) et_processes = { "data" : Process("data_obs", DataEstimation (era, directory, et, friend_directory=et_friend_directory)), "EMB" : Process("EMB", ZTTEmbeddedEstimation(era, directory, et, friend_directory=et_friend_directory)), "ZTT" : Process("ZTT", ZTTEstimation (era, directory, et, friend_directory=et_friend_directory)), "ZL" : Process("ZL", ZLEstimation (era, directory, et, friend_directory=et_friend_directory)), "ZJ" : Process("ZJ", ZJEstimation (era, directory, et, friend_directory=et_friend_directory)), "W" : Process("W", WEstimation (era, directory, et, friend_directory=et_friend_directory)), "TTT" : Process("TTT", TTTEstimation (era, directory, et, friend_directory=et_friend_directory)), "TTL" : Process("TTL", TTLEstimation (era, directory, et, friend_directory=et_friend_directory)), "TTJ" : Process("TTJ", TTJEstimation (era, directory, et, friend_directory=et_friend_directory)), "VVT" : Process("VVT", VVTEstimation (era, directory, et, friend_directory=et_friend_directory)), "VVL" : Process("VVL", VVLEstimation (era, directory, et, friend_directory=et_friend_directory)), "VVJ" : Process("VVJ", VVJEstimation (era, directory, et, friend_directory=et_friend_directory)) #"EWKZ" : Process("EWKZ", EWKZEstimation (era, directory, et, friend_directory=et_friend_directory)) } #in tt two 'channels' are needed: antiisolated region for each tau respectively tt1 = TTSM2016() for cutstring in additional_cuts["tt"]: tt1.cuts.add(Cut(cutstring)) tt1.cuts.remove("tau_1_iso") tt1.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_1<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_1>0.5)", "tau_1_anti_iso")) tt1_processes = { "data" : Process("data_obs", DataEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "EMB" : Process("EMB", ZTTEmbeddedEstimation(era, directory, tt1, friend_directory=tt_friend_directory)), "ZTT" : Process("ZTT", ZTTEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "ZL" : Process("ZL", ZLEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "ZJ" : Process("ZJ", ZJEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "W" : Process("W", WEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "TTT" : Process("TTT", TTTEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "TTL" : Process("TTL", TTLEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "TTJ" : Process("TTJ", TTJEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "VVT" : Process("VVT", VVTEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "VVL" : Process("VVL", VVLEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), "VVJ" : Process("VVJ", VVJEstimation (era, directory, tt1, friend_directory=tt_friend_directory)) #"EWKZ" : Process("EWKZ", EWKZEstimation (era, directory, tt1, friend_directory=tt_friend_directory)), } tt2 = TTSM2016() for cutstring in additional_cuts["tt"]: tt2.cuts.add(Cut(cutstring)) tt2.cuts.remove("tau_2_iso") tt2.cuts.add(Cut("(byTightIsolationMVArun2v1DBoldDMwLT_2<0.5&&byVLooseIsolationMVArun2v1DBoldDMwLT_2>0.5)", "tau_2_anti_iso")) tt2_processes = { "data" : Process("data_obs", DataEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "EMB" : Process("EMB", ZTTEmbeddedEstimation(era, directory, tt2, friend_directory=tt_friend_directory)), "ZTT" : Process("ZTT", ZTTEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "ZL" : Process("ZL", ZLEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "ZJ" : Process("ZJ", ZJEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "W" : Process("W", WEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "TTT" : Process("TTT", TTTEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "TTL" : Process("TTL", TTLEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "TTJ" : Process("TTJ", TTJEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "VVT" : Process("VVT", VVTEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "VVL" : Process("VVL", VVLEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), "VVJ" : Process("VVJ", VVJEstimation (era, directory, tt2, friend_directory=tt_friend_directory)) #"EWKZ" : Process("EWKZ", EWKZEstimation (era, directory, tt2, friend_directory=tt_friend_directory)), } # Variables and categories config = yaml.load(open("fake-factors/config.yaml")) if not args.config in config.keys(): logger.critical("Requested config key %s not available in fake-factors/config.yaml!" % args.config) raise Exception config = config[args.config] et_categories = [] # Analysis shapes et_categories.append( Category( "inclusive", et, Cuts(), variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"]))) for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]): et_categories.append( Category( label, et, Cuts( Cut("et_max_index=={index}".format(index=i), "exclusive_score")), variable=Variable(args.config, VariableBinning(config["et"]["binning"]), config["et"]["expression"]))) mt_categories = [] # Analysis shapes mt_categories.append( Category( "inclusive", mt, Cuts(), variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"]))) for i, label in enumerate(["ggh", "qqh", "ztt", "zll", "w", "tt", "ss", "misc"]): mt_categories.append( Category( label, mt, Cuts( Cut("mt_max_index=={index}".format(index=i), "exclusive_score")), variable=Variable(args.config, VariableBinning(config["mt"]["binning"]), config["mt"]["expression"]))) tt1_categories = [] tt2_categories = [] # Analysis shapes tt1_categories.append( Category( "tt1_inclusive", tt1, Cuts(), variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"]))) tt2_categories.append( Category( "tt2_inclusive", tt2, Cuts(), variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"]))) for i, label in enumerate(["ggh", "qqh", "ztt", "noniso", "misc"]): tt1_categories.append( Category( "tt1_"+label, tt1, Cuts( Cut("tt_max_index=={index}".format(index=i), "exclusive_score")), variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"]))) tt2_categories.append( Category( "tt2_"+label, tt2, Cuts( Cut("tt_max_index=={index}".format(index=i), "exclusive_score")), variable=Variable(args.config, VariableBinning(config["tt"]["binning"]), config["tt"]["expression"]))) # Nominal histograms # yapf: enable for process, category in product(et_processes.values(), et_categories): systematics.add( Systematic(category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) for process, category in product(mt_processes.values(), mt_categories): systematics.add( Systematic(category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) for process, category in product(tt1_processes.values(), tt1_categories): systematics.add( Systematic(category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) for process, category in product(tt2_processes.values(), tt2_categories): systematics.add( Systematic(category=category, process=process, analysis="smhtt", era=era, variation=Nominal(), mass="125")) # Produce histograms logger.info("Start producing shapes.") systematics.produce() logger.info("Done producing shapes.")
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Add friends and global cuts global_cuts = Cuts() friend_dirs = [] friend_alias = [] logger.debug("Add global cuts specified by YAML selection file") with open(args.additional_cuts, "r") as stream: selection_config = yaml.load(stream) with open(args.friends_config, "r") as stream: friends_config = yaml.load(stream) for cutstring in selection_config["cutstrings"]: global_cuts.add(Cut(cutstring)) friend_dirs = friends_config["friend_dirs"] friend_aliases = friends_config["friend_aliases"] output_config["friend_dirs"] = friend_dirs output_config["friend_aliases"] = friend_aliases logger.warning("Added friend dirs: %s", repr(friend_dirs)) logger.warning("Globals cuts: %s", global_cuts.expand()) # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimationRaw, TTTEstimation, TTJEstimation, VVEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimation, EWKWpEstimation, EWKWmEstimation, EWKZEstimation from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_Fall17 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation from shape_producer.era import Run2017 era = Run2017(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception ############################################################################ # Era: 2016, Channel: mt if "2016" in args.era and args.channel == "mt": channel = MTSM2016() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZ": "misc", } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), #TTLEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Era: 2017, Channel: mt if "2017" in args.era and args.channel == "mt": channel = MTSM2017() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "VVJ": "misc", "VVT": "misc", "VVL": "misc", "EWKZ": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), TTLEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel), VVJEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Era: 2016, Channel: et if "2016" in args.era and args.channel == "et": channel = ETSM2016() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for et: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZ": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), #TTLEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Era: 2017, Channel: et if "2017" in args.era and args.channel == "et": channel = ETSM2017() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for et: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "VVJ": "misc", "VVT": "misc", "VVL": "misc", "EWKZ": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), TTLEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel), VVJEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Era: 2016, Channel: tt if "2016" in args.era and args.channel == "tt": channel = TTSM2016() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for tt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "misc", "ZJ": "misc", "TTT": "misc", "TTL": "misc", "TTJ": "misc", "W": "misc", "EWKWp": "misc", "EWKWm": "misc", "VV": "misc", "EWKZ": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), #TTLEstimationTT(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_iso = copy.deepcopy(channel) channel_iso.cuts.remove("tau_2_iso") channel_iso.cuts.add( Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso")) channel_iso.cuts.add( Cut("byLooseIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_iso.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "noniso" } ############################################################################ # Era: 2017, Channel: tt if "2017" in args.era and args.channel == "tt": channel = TTSM2017() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for tt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "ZJ": "misc", "ZL": "misc", "TTT": "misc", "TTL": "misc", "TTJ": "misc", "W": "misc", "VVT": "misc", "VVJ": "misc", "VVL": "misc", "EWKZ": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel), ZJEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel), TTLEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel), VVJEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_iso = copy.deepcopy(channel) channel_iso.cuts.remove("tau_2_iso") channel_iso.cuts.add( Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso")) channel_iso.cuts.add( Cut("byLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_iso.cuts + additional_cuts + global_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "noniso" } ############################################################################ # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)