def wfunc(self, fq_list, first_check=1, callback=False, callback_kwargs={}, max_workers=10, pass_if_exists=[], clean_before=[], **kwargs): """ pass_if_exists: 路径列表, 如果列表中的路径都存在,的跳过相应样本的处理,路径中的{sample}及MetagenomePipline的context将会被相应属性替代 """ fq_list = split_list(fq_list, max_workers) print("######################Running " + str(func)) start_time = time.time() if not callback: executor = ThreadPoolExecutor(max_workers=max_workers) for fqs in fq_list: if callback: executor = ThreadPoolExecutor(max_workers=max_workers) for fq in fqs: parsed_fqs = self.parse_fq_list(fq) if pass_if_exists: paths = [p.format(**parsed_fqs, **self.context) for p in pass_if_exists] if all_path_exists(paths): continue if clean_before: for path in clean_before: path = path.format(**parsed_fqs, **self.context) if os.path.exists(path): self.system("rm -r {}".format(path)) executor.submit(func, self, fq_list=fq, **kwargs) if callback: executor.shutdown(True) print("This run done! checking callback...") callback(**callback_kwargs) if not callback: executor.shutdown(True) end_time = time.time() time_used = (end_time - start_time) / 60 print("######################" + str(func) + " done; time used: {} min".format(time_used))
def wfunc(self, fq_list, first_check=10, callback=False, callback_kwargs={}, max_workers=10, pass_if_exists=[], clean_before=[], **kwargs): fq_list = split_list(fq_list, max_workers) print("######################Running " + str(func)) start_time = time.time() for fqs in fq_list: for fq in fqs: parsed_fqs = self.parse_fq_list(fq) if pass_if_exists: paths = [p.format(**parsed_fqs, **self.context) for p in pass_if_exists] if all_path_exists(paths): continue if clean_before: for path in clean_before: path = path.format(**parsed_fqs, **self.context) if os.path.exists(path): self.system("rm -r {}".format(path)) func(self, fq_list=fq, **kwargs) if callback: wait_sge(first_check) print("This run done! checking callback...") callback(**callback_kwargs) if not callback: wait_sge(first_check) end_time = time.time() time_used = (end_time - start_time) / 60 print("######################" + str(func) + " done; time used: {} min".format(time_used))
def paired_data(self, pattern, each=False): r1 = self.map_list(pattern=pattern, each=False, use_direction="R1") r2 = self.map_list(pattern=pattern, each=False, use_direction="R2") if not len(r1) == len(r2): raise RawDataNotPairedError( "The file number of Reads1 and Reads2 is not equal") out = np.array([r1, r2]).T.tolist() return split_list(out, each) if each else out
def map_list(self, pattern=False, each=False, use_direction="both"): out = [] for fq_path, new_id, direction in self.fq_info: ele = pattern.format(sample_id=new_id, direction=direction, direction_num=re.search( r"\d$", direction).group()) if pattern else [new_id, direction] if direction == "R2" and (use_direction == "both" or use_direction == "R2"): out.append(ele) elif direction == "R1" and (use_direction == "both" or use_direction == "R1"): out.append(ele) out.sort() return split_list(out, each) if each else out