def multi_skim_allperiods(self): for indexp in range(self.prodnumber): self.process_listsample[indexp].process_skim_par() mergerootfiles(self.lper_evtorigroot, self.f_evtorigroot_mergedallp) if self.p_dofullevtmerge is True: merge_method(self.lper_evt, self.f_evt_mergedallp) merge_method(self.lper_evtorig, self.f_evtorig_mergedallp)
def process_mergedec(self): for ipt in range(self.p_nptbins): merge_method(self.mptfiles_recoskmldec[ipt], self.lpt_recodecmerged[ipt]) if self.mcordata == "mc": merge_method(self.mptfiles_gensk[ipt], self.lpt_gendecmerged[ipt])
def process_skim_par(self): print("doing skimming", self.mcordata, self.period) create_folder_struc(self.d_pklsk, self.l_path) arguments = [(i, ) for i in range(len(self.l_reco))] self.parallelizer(self.skim, arguments, self.p_chunksizeskim) merge_method(self.l_evt, self.f_totevt) merge_method(self.l_evtorig, self.f_totevtorig)
def multi_mergeml_allinone(self): for ipt in range(self.p_nptbins): merge_method(self.lptper_recoml[ipt], self.lpt_recoml_mergedallp[ipt]) for ipt in range(self.p_nptbins): if self.mcordata == "mc": merge_method(self.lptper_genml[ipt], self.lpt_genml_mergedallp[ipt]) merge_method(self.lper_evtml, self.f_evtml_mergedallp) merge_method(self.lper_evtorigml, self.f_evtorigml_mergedallp) if self.v_max_ncand_merge > 0: for ipt in range(self.p_nptbins): merge_method(self.lptper_recomlmax[ipt], self.lpt_recoml_mergedallpmax[ipt])
def multi_mergeml_allinone(self): for ipt in range(self.p_nptbins): merge_method(self.lptper_recoml[ipt], self.lpt_recoml_mergedallp[ipt]) if self.mcordata == "mc": merge_method(self.lptper_genml[ipt], self.lpt_genml_mergedallp[ipt]) count_evt = 0 count_evtorig = 0 for evt_count_file in self.lper_evt_count_ml: count_dict = parse_yaml(evt_count_file) count_evt += count_dict["evt"] count_evtorig += count_dict["evtorig"] dump_yaml_from_dict({ "evt": count_evt, "evtorig": count_evtorig }, self.f_evtml_count)
def process_mergeforml(self): indices_for_evt = [] for ipt in range(self.p_nptbins): nfiles = len(self.mptfiles_recosk[ipt]) if not nfiles: print("There are no files to be merged") sys.exit(1) print(f"Use merge fraction {self.p_frac_merge[ipt]} for pT bin {ipt}") ntomerge = int(nfiles * self.p_frac_merge[ipt]) rd.seed(self.p_rd_merge) filesel = rd.sample(range(0, nfiles), ntomerge) indices_for_evt = list(set(indices_for_evt) | set(filesel)) list_sel_recosk = [self.mptfiles_recosk[ipt][j] for j in filesel] merge_method(list_sel_recosk, self.lpt_reco_ml[ipt]) if self.mcordata == "mc": list_sel_gensk = [self.mptfiles_gensk[ipt][j] for j in filesel] merge_method(list_sel_gensk, self.lpt_gen_ml[ipt]) print("Count events...") list_sel_evt = [self.l_evt[j] for j in indices_for_evt] list_sel_evtorig = [self.l_evtorig[j] for j in indices_for_evt] count_dict = {"evt": count_df_length_pkl(*list_sel_evt), "evtorig": count_df_length_pkl(*list_sel_evtorig)} dump_yaml_from_dict(count_dict, self.f_evt_count_ml)
def process_mergeforml(self): nfiles = len(self.mptfiles_recosk[0]) if nfiles == 0: print("increase the fraction of merged files or the total number") print(" of files you process") ntomerge = (int)(nfiles * self.p_frac_merge) rd.seed(self.p_rd_merge) filesel = rd.sample(range(0, nfiles), ntomerge) for ipt in range(self.p_nptbins): list_sel_recosk = [self.mptfiles_recosk[ipt][j] for j in filesel] merge_method(list_sel_recosk, self.lpt_reco_ml[ipt]) if self.mcordata == "mc": list_sel_gensk = [self.mptfiles_gensk[ipt][j] for j in filesel] merge_method(list_sel_gensk, self.lpt_gen_ml[ipt]) list_sel_evt = [self.l_evt[j] for j in filesel] list_sel_evtorig = [self.l_evtorig[j] for j in filesel] merge_method(list_sel_evt, self.f_evt_ml) merge_method(list_sel_evtorig, self.f_evtorig_ml)
def process_mergeforml(self): print("doing merging", self.mcordata, self.period) nfiles = len(self.mptfiles_recosk[0]) if nfiles == 0: print("increase the fraction of merged files or the total number") print(" of files you process") for ipt in range(self.p_nptbins): ntomerge = (int)(nfiles * self.p_frac_merge[ipt]) rd.seed(self.p_rd_merge ) #make sure we start with the same files for each pT bin filesel = rd.sample(range(0, nfiles), ntomerge) list_sel_recosk = [self.mptfiles_recosk[ipt][j] for j in filesel] merge_method(list_sel_recosk, self.lpt_reco_ml[ipt]) if self.mcordata == "mc": list_sel_gensk = [self.mptfiles_gensk[ipt][j] for j in filesel] merge_method(list_sel_gensk, self.lpt_gen_ml[ipt]) #only merge event for first pT bin (smallest) if ipt == 0: list_sel_evt = [self.l_evt[j] for j in filesel] list_sel_evtorig = [self.l_evtorig[j] for j in filesel] merge_method(list_sel_evt, self.f_evt_ml) merge_method(list_sel_evtorig, self.f_evtorig_ml)
def multi_skim_allperiods(self): for indexp in range(self.prodnumber): self.process_listsample[indexp].process_skim_par() merge_method(self.lper_evt, self.f_evt_mergedallp) merge_method(self.lper_evtorig, self.f_evtorig_mergedallp)