def __init__(self, datap, run_param, mcordata, p_maxfiles, d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, p_chunksizeunp, p_chunksizeskim, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results): #directories self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml self.d_results = d_results self.datap = datap self.mcordata = mcordata self.p_frac_merge = p_frac_merge self.p_rd_merge = p_rd_merge self.period = p_period self.runlist = run_param[self.period] self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim #parameter names self.p_maxprocess = p_maxprocess self.indexsample = None #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names self.n_treereco = datap["files_names"]["treeoriginreco"] self.n_treegen = datap["files_names"]["treeorigingen"] self.n_treeevt = datap["files_names"]["treeoriginevt"] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_gen = datap["files_names"]["namefile_gen"] self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] #bitmap self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] #variables name self.v_all = datap["variables"]["var_all"] self.v_train = datap["variables"]["var_training"] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_var_binning = datap["var_binning"] #list of files names self.l_path = None if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles) self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.p_modelname = datap["analysis"]["modelname"] self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.p_nptbins = len(datap["sel_skim_binmax"]) self.lpt_model = datap["analysis"]["modelsperptbin"] self.dirmodel = datap["ml"]["mlout"] self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) self.lpt_probcutpre = datap["analysis"]["probcutpresel"][self.mcordata] self.lpt_probcutfin = datap["analysis"]["probcutoptimal"] if self.lpt_probcutfin < self.lpt_probcutpre: print("FATAL error: probability cut final must be tighter!") self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(self.d_results, self.n_filemass) self.n_fileeff = os.path.join(self.d_results, self.n_fileeff) self.lpt_recosk = [self.n_reco.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_ml = os.path.join(self.d_pkl_ml, self.n_evt) self.f_evtorig_ml = os.path.join(self.d_pkl_ml, self.n_evtorig) self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins) ] if self.mcordata == "mc": self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins) ] self.lpt_filemass = [self.n_filemass.replace(".root", "%d_%d_%.2f.root" % \ (self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt], \ self.lpt_probcutfin[ipt])) for ipt in range(self.p_nptbins)] self.p_mass_fit_lim = datap["analysis"]['mass_fit_lim'] self.p_bin_width = datap["analysis"]['bin_width'] self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ self.p_bin_width)) self.l_selml = ["y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[ipt]) \ for ipt in range(self.p_nptbins)] self.s_presel_gen_eff = datap["analysis"]['presel_gen_eff']
class Processer: # pylint: disable=too-many-instance-attributes # Class Attribute species = 'processer' # Initializer / Instance Attributes # pylint: disable=too-many-statements, too-many-arguments def __init__(self, case, datap, run_param, mcordata, p_maxfiles, d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, p_chunksizeunp, p_chunksizeskim, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results, d_val, typean, runlisttrigger, d_mcreweights): self.nprongs = datap["nprongs"] self.doml = datap["doml"] self.case = case self.typean = typean #directories self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml self.d_results = d_results self.d_val = d_val self.d_mcreweights = d_mcreweights self.datap = datap self.mcordata = mcordata self.p_frac_merge = p_frac_merge self.p_rd_merge = p_rd_merge self.period = p_period self.runlist = run_param[self.period] self.run_param = run_param self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim #parameter names self.p_maxprocess = p_maxprocess self.indexsample = None self.p_dofullevtmerge = datap["dofullevtmerge"] #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names self.n_treereco = datap["files_names"]["treeoriginreco"] self.n_treegen = datap["files_names"]["treeorigingen"] self.n_treeevt = datap["files_names"]["treeoriginevt"] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_evtvalroot = datap["files_names"]["namefile_evtvalroot"] self.n_gen = datap["files_names"]["namefile_gen"] self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] self.n_mcreweights = datap["files_names"]["namefile_mcweights"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] #bitmap self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] self.b_mcrefl = datap["bitmap_sel"]["ismcrefl"] #variables name self.v_all = datap["variables"]["var_all"] self.v_train = datap["variables"]["var_training"] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_ismcrefl = datap["bitmap_sel"]["var_ismcrefl"] self.v_var_binning = datap["var_binning"] #list of files names self.l_path = None if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles) self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) self.l_evtvalroot = createlist(self.d_val, self.l_path, self.n_evtvalroot) self.l_histomass = createlist(self.d_results, self.l_path, self.n_filemass) self.l_histoeff = createlist(self.d_results, self.l_path, self.n_fileeff) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.f_totevtvalroot = os.path.join(self.d_val, self.n_evtvalroot) self.p_modelname = datap["mlapplication"]["modelname"] self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.p_nptbins = len(datap["sel_skim_binmax"]) self.lpt_model = datap["mlapplication"]["modelsperptbin"] self.dirmodel = datap["ml"]["mlout"] self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata] self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"] if self.lpt_probcutfin < self.lpt_probcutpre: print("FATAL error: probability cut final must be tighter!") self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(self.d_results, self.n_filemass) self.n_fileeff = os.path.join(self.d_results, self.n_fileeff) self.lpt_recosk = [self.n_reco.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_ml = os.path.join(self.d_pkl_ml, self.n_evt) self.f_evtorig_ml = os.path.join(self.d_pkl_ml, self.n_evtorig) self.lpt_recodec = None if self.doml is True: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_std.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] if self.mcordata == "mc": self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] self.p_bin_width = datap["analysis"][self.typean]['bin_width'] self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ self.p_bin_width)) self.l_selml = ["y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[ipt]) \ for ipt in range(self.p_nptbins)] self.s_presel_gen_eff = datap["analysis"][self.typean]['presel_gen_eff'] self.lvar2_binmin = datap["analysis"][self.typean]["sel_binmin2"] self.lvar2_binmax = datap["analysis"][self.typean]["sel_binmax2"] self.v_var2_binning = datap["analysis"][self.typean]["var_binning2"] self.v_var2_binning_gen = datap["analysis"][self.typean]["var_binning2_gen"] self.corr_eff_mult = datap["analysis"][self.typean]["corrEffMult"] self.lpt_finbinmin = datap["analysis"][self.typean]["sel_an_binmin"] self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"] self.p_nptfinbins = len(self.lpt_finbinmin) self.bin_matching = datap["analysis"][self.typean]["binning_matching"] #self.sel_final_fineptbins = datap["analysis"][self.typean]["sel_final_fineptbins"] self.s_evtsel = datap["analysis"][self.typean]["evtsel"] self.s_trigger = datap["analysis"][self.typean]["triggersel"][self.mcordata] self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.runlistrigger = runlisttrigger
class Processer: # pylint: disable=too-many-instance-attributes # Class Attribute species = 'processer' # Initializer / Instance Attributes # pylint: disable=too-many-statements, too-many-arguments def __init__(self, case, datap, run_param, mcordata, p_maxfiles, d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, p_chunksizeunp, p_chunksizeskim, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results, typean, runlisttrigger, d_mcreweights): #self.logger = get_logger() self.nprongs = datap["nprongs"] self.prongformultsub = datap["prongformultsub"] self.doml = datap["doml"] self.case = case self.typean = typean #directories self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml self.d_results = d_results self.d_mcreweights = d_mcreweights self.datap = datap self.mcordata = mcordata self.p_frac_merge = p_frac_merge self.p_rd_merge = p_rd_merge self.period = p_period self.run_param = run_param self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim #parameter names self.p_maxprocess = p_maxprocess self.indexsample = None self.p_dofullevtmerge = datap["dofullevtmerge"] #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names self.n_treereco = datap["files_names"]["treeoriginreco"] self.n_treegen = datap["files_names"]["treeorigingen"] self.n_treeevt = datap["files_names"]["treeoriginevt"] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_gen = datap["files_names"]["namefile_gen"] self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] self.n_fileresp = datap["files_names"]["respfilename"] self.n_mcreweights = datap["files_names"]["namefile_mcweights"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] #bitmap self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] self.b_mcrefl = datap["bitmap_sel"]["ismcrefl"] #variables name self.v_all = datap["variables"]["var_all"] self.v_train = datap["variables"]["var_training"] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_ismcrefl = datap["bitmap_sel"]["var_ismcrefl"] self.v_var_binning = datap["var_binning"] #list of files names self.l_path = None if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles) self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) self.l_histomass = createlist(self.d_results, self.l_path, self.n_filemass) self.l_histoeff = createlist(self.d_results, self.l_path, self.n_fileeff) self.l_historesp = createlist(self.d_results, self.l_path, self.n_fileresp) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.p_modelname = datap["mlapplication"]["modelname"] self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.p_nptbins = len(self.lpt_anbinmin) # Analysis pT bins self.lpt_finbinmin = datap["analysis"][self.typean]["sel_an_binmin"] self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"] self.p_nptfinbins = len(self.lpt_finbinmin) self.lpt_model = datap["mlapplication"]["modelsperptbin"] self.dirmodel = datap["ml"]["mlout"] self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][ self.mcordata] self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"] if self.lpt_probcutfin < self.lpt_probcutpre: print("FATAL error: probability cut final must be tighter!") self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(self.d_results, self.n_filemass) self.n_fileeff = os.path.join(self.d_results, self.n_fileeff) self.n_fileresp = os.path.join(self.d_results, self.n_fileresp) self.lpt_recosk = [self.n_reco.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_ml = os.path.join(self.d_pkl_ml, self.n_evt) self.f_evtorig_ml = os.path.join(self.d_pkl_ml, self.n_evtorig) self.lpt_recodec = None if self.doml is True: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_std.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins) ] if self.mcordata == "mc": self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins) ] self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.runlistrigger = runlisttrigger # if os.path.exists(self.d_root) is False: # self.logger.warning("ROOT tree folder is not there. Is it intentional?") # Analysis cuts (loaded in self.process_histomass) self.analysis_cuts = None # Flag if they should be used self.do_custom_analysis_cuts = datap["analysis"][self.typean].get( "use_cuts", False)
class Processer: # pylint: disable=too-many-instance-attributes # Class Attribute species = 'processer' # Initializer / Instance Attributes # pylint: disable=too-many-statements, too-many-arguments def __init__(self, case, datap, run_param, mcordata, p_maxfiles, d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period, p_chunksizeunp, p_chunksizeskim, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, checkiffileexist, doapply): self.datap = datap self.case = case self.first_check_if_file_exists = checkiffileexist self.doapply = doapply #directories self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml #processing variables self.mcordata = mcordata self.p_frac_merge = p_frac_merge self.p_rd_merge = p_rd_merge self.period = p_period self.select_children = datap["multi"][mcordata].get( "select_children", None) if self.select_children: # Make sure we have "<child>/" instead if <child> only. Cause in the latter case # "child_1" might select further children like "child_11" self.select_children = [ f"{child}/" for child in self.select_children[i_period] ] self.runlist = run_param.get(self.period, None) self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim self.p_maxprocess = p_maxprocess self.p_dofullevtmerge = datap["dofullevtmerge"] self.v_max_ncand_merge = datap["multi"]["max_ncand_merge"] self.p_max_frac_merge = datap["multi"][self.mcordata]["max_frac_merge"] #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names mcordata_int = 0 if self.mcordata == "mc": mcordata_int = 1 self.n_treereco = datap["files_names"]["treeoriginreco"][mcordata_int] self.n_treegen = datap["files_names"]["treeorigingen"][mcordata_int] self.n_treeevt = datap["files_names"]["treeoriginevt"][mcordata_int] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_gen = datap["files_names"]["namefile_gen"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] if isinstance(self.s_cen_unp, list): if self.mcordata == "data": self.s_cen_unp = datap["sel_cen_unp"][0] else: self.s_cen_unp = datap["sel_cen_unp"][1] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] # mask missing values data_maskmissingvalues = datap.get("maskmissingvalues", None) if data_maskmissingvalues is not None: self.b_maskmissing = datap["maskmissingvalues"].get( "activate", False) self.v_varstomask = datap["maskmissingvalues"].get("tomask", None) else: self.b_maskmissing = False self.v_varstomask = None #bitmap self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] self.b_mcrefl = datap["bitmap_sel"]["ismcrefl"] self.b_dsprompt = datap["bitmap_sel"].get("isdsprompt", None) self.b_dsfdbplus = datap["bitmap_sel"].get("isdsfdbplus", None) self.b_dsfdbzero = datap["bitmap_sel"].get("isdsfdbzero", None) self.b_dsfdlambdab = datap["bitmap_sel"].get("isdsfdlambdab", None) self.b_dsfdbs = datap["bitmap_sel"].get("isdsfdbs", None) #variables name self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.lpt_anbinmintr = datap["ml"].get("binmin", self.lpt_anbinmin) self.lpt_anbinmaxtr = datap["ml"].get("binmax", self.lpt_anbinmax) if len(self.lpt_anbinmintr) > len(self.lpt_anbinmin) and self.doapply: #if self.doapply: self.lpt_anbinmin = datap["ml"]["binminsk"] self.lpt_anbinmax = datap["ml"]["binmaxsk"] self.p_nptbins = len(self.lpt_anbinmin) self.v_all = datap["variables"]["var_all"] self.v_train = datap["variables"]["var_training"] if not isinstance(self.v_train[0], list): self.v_train = [self.v_train for _ in range(self.p_nptbins)] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_ismcrefl = datap["bitmap_sel"]["var_ismcrefl"] self.v_dsprompt = datap["bitmap_sel"].get("var_isdsprompt", None) self.v_dsfdbplus = datap["bitmap_sel"].get("var_isdsfdbplus", None) self.v_dsfdbzero = datap["bitmap_sel"].get("var_isdsfdbzero", None) self.v_dsfdlambdab = datap["bitmap_sel"].get("var_isdsfdlambdab", None) self.v_dsfdbs = datap["bitmap_sel"].get("var_isdsfdbs", None) self.v_var_binning = datap["var_binning"] self.nprongs = datap["nprongs"] #list of files names self.l_path = None if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles, self.select_children) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles, self.select_children) self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_evt_ml = os.path.join(self.d_pkl_ml, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.f_evtorig_ml = os.path.join(self.d_pkl_ml, self.n_evtorig) self.lpt_recosk = [self.n_reco.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_reco_mlmax = [os.path.join(self.d_pkl_ml + '_max', self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.mptfiles_recosk = [ createlist(self.d_pklsk, self.l_path, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins) ] if self.mcordata == "mc": self.mptfiles_gensk = [ createlist(self.d_pklsk, self.l_path, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins) ] #Variables for ML applying self.do_mlprefilter = datap.get("doml_asprefilter", None) self.apply_w_pkl_layout = datap.get("apply_with_pkl_layout", None) self.overwrite_mlprob_mc = None if self.mcordata == "mc": self.overwrite_mlprob_mc = datap.get("overwrite_mlprob_mc", None) self.p_modelname = datap["mlapplication"]["modelname"] self.mltype = datap["ml"]["mltype"] self.lpt_model = datap["mlapplication"]["modelsperptbin"] self.lpt_modhandler_hipe4ml = datap["mlapplication"][ "modelsperptbin_hipe4ml"] self.dirmodel = datap["ml"]["mlout"] #if self.do_mlprefilter is True: # self.dirmodel = self.dirmodel + "/prefilter" # self.p_modelname = self.p_modelname + "prefilter" #if self.do_mlprefilter is False: # self.dirmodel = self.dirmodel + "/analysis" self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) self.lpt_modhandler_hipe4ml = appendmainfoldertolist( self.dirmodel, self.lpt_modhandler_hipe4ml) self.doml = datap["doml"] if self.do_mlprefilter is not None and self.doml is False: print( "FATAL error: The ML prefilter feature cannot combine with rectangular cuts" ) if not self.doml: datap["mlapplication"]["probcutpresel"][self.mcordata] = [ 0 for _ in self.lpt_anbinmin ] datap["mlapplication"]["probcutoptimal"] = [ 0 for _ in self.lpt_anbinmin ] datap["mlapplication"]["ml_prefilter_probcut"] = [ 0 for _ in self.lpt_anbinmin ] self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"] if self.do_mlprefilter is True or self.overwrite_mlprob_mc is True: self.lpt_probcutpre = datap["mlapplication"][ "ml_prefilter_probcut"] else: self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][ self.mcordata] if not isinstance(self.lpt_probcutpre[0], list): if self.lpt_probcutfin < self.lpt_probcutpre: print( "FATAL error: probability cut final must be tighter!") self.d_pkl_dec = d_pkl_dec self.d_pkl_decmerged = d_pkl_decmerged inputdir_forapply = None #if self.do_mlprefilter is True: # self.d_pkl_dec = d_pkl_dec + "/prefilter" # self.d_pkl_decmerged = d_pkl_decmerged + "/prefilter" #if self.do_mlprefilter is False: # inputdir_forapply = d_pkl_dec.replace("/analysis", "/prefilter") # self.d_pkl_dec = d_pkl_dec + "/analysis" # self.d_pkl_decmerged = d_pkl_decmerged + "/analysis" self.mptfiles_recosk_forapply = [] if self.do_mlprefilter is False: self.mptfiles_recosk_forapply = [ createlist(inputdir_forapply, self.l_path, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins) ] else: self.mptfiles_recosk_forapply = [ createlist(self.d_pklsk, self.l_path, self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins) ] self.lpt_recodec = None if self.doml is True: if self.do_mlprefilter is True or self.overwrite_mlprob_mc is True or self.apply_w_pkl_layout is True: self.lpt_recodec = self.lpt_recosk else: if not isinstance(self.lpt_probcutpre[0], list): self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmintr[i], self.lpt_anbinmaxtr[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f%.2f.pkl" % \ (self.lpt_anbinmintr[i], self.lpt_anbinmaxtr[i], \ self.lpt_probcutpre[i][0], self.lpt_probcutpre[i][1])) \ for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_std.pkl" % \ (self.lpt_anbinmintr[i], self.lpt_anbinmaxtr[i])) \ for i in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins) ] if self.mcordata == "mc": self.mptfiles_genskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins) ] self.multiclass_labels = datap["ml"].get("multiclass_labels", None)
class Processer: # pylint: disable=too-many-instance-attributes # Class Attribute species = 'processer' # Initializer / Instance Attributes # pylint: disable=too-many-statements, too-many-arguments def __init__(self, case, datap, run_param, mcordata, p_maxfiles, d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, p_chunksizeunp, p_chunksizeskim, p_chunksizejet, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results): #directories self.case = case self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml self.datap = datap self.mcordata = mcordata self.p_frac_merge = p_frac_merge self.p_rd_merge = p_rd_merge self.period = p_period self.runlist = run_param[self.period] self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim self.p_chunksizejet = p_chunksizejet #parameter names self.p_maxprocess = p_maxprocess self.indexsample = None #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names self.n_treereco = datap["files_names"]["treeoriginreco"] self.n_treegen = datap["files_names"]["treeorigingen"] self.n_treeevt = datap["files_names"]["treeoriginevt"] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_gen = datap["files_names"]["namefile_gen"] self.n_jet = datap["files_names"]["namefile_jets"] self.n_filemass = self.n_fileeff = None if 'Jet' not in case: self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] #bitmap self.b_trackcuts = self.b_std = self.b_mcsig = self.b_mcsigprompt = \ self.b_mcsigfd = self.b_mcbkg = None if 'Jet' not in case: self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] #variables name self.v_all = datap["variables"]["var_all"] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_train = self.v_bitvar = self.v_isstd = self.v_ismcsignal = \ self.v_ismcprompt = self.v_ismcfd = self.v_ismcbkg = self.v_var_binning = None if 'Jet' not in case: self.v_train = datap["variables"]["var_training"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_var_binning = datap["variables"]["var_binning"] #list of files names self.l_path = None if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles) self.ignore_prev_jet_calc = datap["ignore_prev_jet_calc"] if ( 'Jet' in case) else None self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_jet = createlist(self.d_pkl, self.l_path, self.n_jet) if ('Jet' in case) else None self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.p_nptbins = len(datap["sel_skim_binmax"]) self.p_modelname = self.lpt_model = self.dirmodel = self.lpt_model = \ self.lpt_probcutpre = self.lpt_probcutfin = self.d_pkl_decmerged = \ self.n_filemass = self.n_fileeff = None if 'Jet' in case: self.jetRadii = datap['variables']['jetRadii'] self.pTbins = datap['variables']['pTbins'] self.betas = datap['variables']['betas'] #self.jets = None # Will fill this in if needed using findJets() # 4D list of binned lambda values. Filled during jet-finding. # [ (pTbin1:) [ (jetR1:) [ (beta1:) [lambdabin1, lambdabin2, ... ], # (beta2:) [ ... ], # ... ], # (jetR2:) [ ... ], # ... ], # (pTbin2:) [ ... ], # ... ] self.jet_lambda = None self.n_lambda_bins = datap['variables']['N_lambda_bins'] self.lambda_max = datap['variables']['lambda_max'] # 2D list of dictionaries of N_jet_constits. Filled during jet-finding. # [ (pTbin1:) [ (jetR1:) { "2": N_2, "3": N_3, ... }, # (jetR2:) [ ... ], # ... ], # (pTbin2:) [ ... ], # ... ] self.N_constits = None else: #if 'Jet' not in case: self.p_modelname = datap["analysis"]["modelname"] self.lpt_model = datap["analysis"]["modelsperptbin"] self.dirmodel = datap["ml"]["mlout"] self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) self.lpt_probcutpre = datap["analysis"]["probcutpresel"] self.lpt_probcutfin = datap["analysis"]["probcutoptimal"] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(d_results, self.n_filemass) self.n_fileeff = os.path.join(d_results, self.n_fileeff) self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.lpt_recosk = [self.n_reco.replace(".pkl", "%d_%d.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "%d_%d.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = self.lpt_gen_ml = self.f_evt_ml = self.f_evtorig_ml = \ self.lpt_recodec = self.mptfiles_recoskmldec = self.lpt_recodecmerged = None if 'Jet' not in case: self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_ml = os.path.join(self.d_pkl_ml, self.n_evt) self.f_evtorig_ml = os.path.join(self.d_pkl_ml, self.n_evtorig) self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins) ] self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)] if self.mcordata == "mc": self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = None if 'Jet' not in case: self.lpt_gendecmerged = [ os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins) ] self.lpt_filemass = self.p_mass_fit_lim = self.p_bin_width = self.p_num_bins = \ self.l_selml = self.s_presel_gen_eff = None if 'Jet' not in case: self.lpt_filemass = [self.n_filemass.replace(".root", "%d_%d_%.2f.root" % \ (self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt], \ self.lpt_probcutfin[ipt])) for ipt in range(self.p_nptbins)] self.p_mass_fit_lim = datap["analysis"]['mass_fit_lim'] self.p_bin_width = datap["analysis"]['bin_width'] self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ self.p_bin_width)) self.l_selml = ["y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[ipt]) \ for ipt in range(self.p_nptbins)] self.s_presel_gen_eff = datap["analysis"]['presel_gen_eff']
class Processer: # pylint: disable=too-many-instance-attributes # Class Attribute species = 'processer' # Initializer / Instance Attributes # pylint: disable=too-many-statements, too-many-arguments def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disable=too-many-branches d_root, d_pkl, d_pklsk, d_pkl_ml, p_period, i_period, p_chunksizeunp, p_chunksizeskim, p_maxprocess, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results, typean, runlisttrigger, d_mcreweights): #self.logger = get_logger() self.nprongs = datap["nprongs"] self.prongformultsub = datap["prongformultsub"] self.doml = datap["doml"] self.case = case self.typean = typean #directories self.d_root = d_root self.d_pkl = d_pkl self.d_pklsk = d_pklsk self.d_pkl_ml = d_pkl_ml self.d_results = d_results self.d_mcreweights = d_mcreweights self.datap = datap self.mcordata = mcordata self.lpt_anbinmin = datap["sel_skim_binmin"] self.lpt_anbinmax = datap["sel_skim_binmax"] self.p_nptbins = len(self.lpt_anbinmin) self.p_frac_merge = p_frac_merge try: iter(p_frac_merge) except TypeError: self.p_frac_merge = [p_frac_merge] * self.p_nptbins if len(self.p_frac_merge) != self.p_nptbins: print(f"Length of merge-fraction list != number of pT bins \n" \ f"{len(self.p_frac_merge)} != {self.p_nptbins}") sys.exit(1) self.p_rd_merge = p_rd_merge self.period = p_period self.i_period = i_period self.select_children = datap["multi"][mcordata].get("select_children", None) if self.select_children: # Make sure we have "<child>/" instead if <child> only. Cause in the latter case # "child_1" might select further children like "child_11" self.select_children = [f"{child}/" for child in self.select_children[i_period]] self.run_param = run_param self.p_maxfiles = p_maxfiles self.p_chunksizeunp = p_chunksizeunp self.p_chunksizeskim = p_chunksizeskim #parameter names self.p_maxprocess = p_maxprocess self.indexsample = None self.p_dofullevtmerge = datap["dofullevtmerge"] #namefile root self.n_root = datap["files_names"]["namefile_unmerged_tree"] #troot trees names self.n_treereco = datap["files_names"]["treeoriginreco"] self.n_treegen = datap["files_names"]["treeorigingen"] self.n_treeevt = datap["files_names"]["treeoriginevt"] #namefiles pkl self.n_reco = datap["files_names"]["namefile_reco"] self.n_evt = datap["files_names"]["namefile_evt"] self.n_evtorig = datap["files_names"]["namefile_evtorig"] self.n_evt_count_ml = datap["files_names"].get("namefile_evt_count", "evtcount.yaml") self.n_gen = datap["files_names"]["namefile_gen"] self.n_filemass = datap["files_names"]["histofilename"] self.n_fileeff = datap["files_names"]["efffilename"] self.n_fileresp = datap["files_names"]["respfilename"] self.n_mcreweights = datap["files_names"]["namefile_mcweights"] #selections self.s_reco_unp = datap["sel_reco_unp"] self.s_good_evt_unp = datap["sel_good_evt_unp"] self.s_cen_unp = datap["sel_cen_unp"] self.s_gen_unp = datap["sel_gen_unp"] self.s_reco_skim = datap["sel_reco_skim"] self.s_gen_skim = datap["sel_gen_skim"] self.s_apply_yptacccut = datap.get("apply_yptacccut", True) #bitmap self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] self.b_mcsigfd = datap["bitmap_sel"]["ismcfd"] self.b_mcbkg = datap["bitmap_sel"]["ismcbkg"] self.b_mcrefl = datap["bitmap_sel"]["ismcrefl"] #variables name self.v_all = datap["variables"]["var_all"] self.v_train = datap["variables"]["var_training"] self.v_evt = datap["variables"]["var_evt"][self.mcordata] self.v_gen = datap["variables"]["var_gen"] self.v_evtmatch = datap["variables"]["var_evt_match"] self.v_bitvar = datap["bitmap_sel"]["var_name"] self.v_isstd = datap["bitmap_sel"]["var_isstd"] self.v_ismcsignal = datap["bitmap_sel"]["var_ismcsignal"] self.v_ismcprompt = datap["bitmap_sel"]["var_ismcprompt"] self.v_ismcfd = datap["bitmap_sel"]["var_ismcfd"] self.v_ismcbkg = datap["bitmap_sel"]["var_ismcbkg"] self.v_ismcrefl = datap["bitmap_sel"]["var_ismcrefl"] self.v_var_binning = datap["var_binning"] self.v_invmass = datap["variables"].get("var_inv_mass", "inv_mass") self.v_rapy = datap["variables"].get("var_y", "y_cand") self.s_var_evt_sel = datap["variables"].get("var_evt_sel", "is_ev_rej") #list of files names if os.path.isdir(self.d_root): self.l_path = list_folders(self.d_root, self.n_root, self.p_maxfiles, self.select_children) else: self.l_path = list_folders(self.d_pkl, self.n_reco, self.p_maxfiles, self.select_children) self.l_root = createlist(self.d_root, self.l_path, self.n_root) self.l_reco = createlist(self.d_pkl, self.l_path, self.n_reco) self.l_evt = createlist(self.d_pkl, self.l_path, self.n_evt) self.l_evtorig = createlist(self.d_pkl, self.l_path, self.n_evtorig) self.l_histomass = createlist(self.d_results, self.l_path, self.n_filemass) self.l_histoeff = createlist(self.d_results, self.l_path, self.n_fileeff) self.l_historesp = createlist(self.d_results, self.l_path, self.n_fileresp) if self.mcordata == "mc": self.l_gen = createlist(self.d_pkl, self.l_path, self.n_gen) self.f_totevt = os.path.join(self.d_pkl, self.n_evt) self.f_totevtorig = os.path.join(self.d_pkl, self.n_evtorig) self.p_modelname = datap["mlapplication"]["modelname"] # Analysis pT bins self.lpt_finbinmin = datap["analysis"][self.typean]["sel_an_binmin"] self.lpt_finbinmax = datap["analysis"][self.typean]["sel_an_binmax"] self.p_nptfinbins = len(self.lpt_finbinmin) self.lpt_model = datap["mlapplication"]["modelsperptbin"] self.dirmodel = datap["ml"]["mlout"] self.mltype = datap["ml"]["mltype"] self.multiclass_labels = datap["ml"].get("multiclass_labels", None) self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model) # Potentially mask certain values (e.g. nsigma TOF of -999) self.p_mask_values = datap["ml"].get("mask_values", None) self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata] self.lpt_probcutfin = datap["analysis"][self.typean].get("probcuts", None) # Make it backwards-compatible if not self.lpt_probcutfin: bin_matching = datap["analysis"][self.typean]["binning_matching"] lpt_probcutfin_tmp = datap["mlapplication"]["probcutoptimal"] self.lpt_probcutfin = [] for i in range(self.p_nptfinbins): bin_id = bin_matching[i] self.lpt_probcutfin.append(lpt_probcutfin_tmp[bin_id]) if self.mltype != "MultiClassification": if self.lpt_probcutfin < self.lpt_probcutpre: print("FATAL error: probability cut final must be tighter!") if self.mltype == "MultiClassification": self.l_selml = [] for ipt in range(self.p_nptfinbins): mlsel_multi0 = "y_test_prob" + self.p_modelname + self.multiclass_labels[0] + \ " <= " + str(self.lpt_probcutfin[ipt][0]) mlsel_multi1 = "y_test_prob" + self.p_modelname + self.multiclass_labels[1] + \ " >= " + str(self.lpt_probcutfin[ipt][1]) mlsel_multi = mlsel_multi0 + " and " + mlsel_multi1 self.l_selml.append(mlsel_multi) else: self.l_selml = ["y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[ipt]) \ for ipt in range(self.p_nptfinbins)] self.d_pkl_dec = d_pkl_dec self.mptfiles_recosk = [] self.mptfiles_gensk = [] self.d_pkl_decmerged = d_pkl_decmerged self.n_filemass = os.path.join(self.d_results, self.n_filemass) self.n_fileeff = os.path.join(self.d_results, self.n_fileeff) self.n_fileresp = os.path.join(self.d_results, self.n_fileresp) self.lpt_recosk = [self.n_reco.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_gensk = [self.n_gen.replace(".pkl", "_%s%d_%d.pkl" % \ (self.v_var_binning, self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.lpt_reco_ml = [os.path.join(self.d_pkl_ml, self.lpt_recosk[ipt]) \ for ipt in range(self.p_nptbins)] self.lpt_gen_ml = [os.path.join(self.d_pkl_ml, self.lpt_gensk[ipt]) \ for ipt in range(self.p_nptbins)] self.f_evt_count_ml = os.path.join(self.d_pkl_ml, self.n_evt_count_ml) self.lpt_recodec = None if self.doml is True: if self.mltype == "MultiClassification": self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i][0], self.lpt_probcutpre[i][1])) \ for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_%.2f.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i], \ self.lpt_probcutpre[i])) for i in range(self.p_nptbins)] else: self.lpt_recodec = [self.n_reco.replace(".pkl", "%d_%d_std.pkl" % \ (self.lpt_anbinmin[i], self.lpt_anbinmax[i])) \ for i in range(self.p_nptbins)] self.mptfiles_recosk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_recosk[ipt]) for ipt in range(self.p_nptbins)] self.mptfiles_recoskmldec = [createlist(self.d_pkl_dec, self.l_path, \ self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] self.lpt_recodecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_recodec[ipt]) for ipt in range(self.p_nptbins)] if self.mcordata == "mc": self.mptfiles_gensk = [createlist(self.d_pklsk, self.l_path, \ self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.lpt_gendecmerged = [os.path.join(self.d_pkl_decmerged, self.lpt_gensk[ipt]) for ipt in range(self.p_nptbins)] self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.runlistrigger = runlisttrigger # if os.path.exists(self.d_root) is False: # self.logger.warning("ROOT tree folder is not there. Is it intentional?") # Analysis cuts (loaded in self.process_histomass) self.analysis_cuts = None # Flag if they should be used self.do_custom_analysis_cuts = datap["analysis"][self.typean].get("use_cuts", False)