def main1(prog_name, input_config_fn, logger_config_fn=None): global fc_run_logger fc_run_logger = support.setup_logger(logger_config_fn) fc_run_logger.info("fc_run started with configuration %s", input_config_fn) config = support.get_dict_from_old_falcon_cfg( support.parse_config(input_config_fn)) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): support.make_dirs(d) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile( os.path.basename(config["input_fofn_fn"])) rawread_fofn_plf = makePypeLocalFile( os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf}, outputs={"o_fofn": rawread_fofn_plf}, parameters={}, TaskType=PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done")) rdb_build_done = makePypeLocalFile( os.path.join(rawread_dir, "rdb_build_done")) run_jobs = makePypeLocalFile(os.path.join(rawread_dir, "run_jobs.sh")) parameters = {"work_dir": rawread_dir, "config": config} make_build_rdb_task = PypeTask(inputs={"input_fofn": rawread_fofn_plf}, outputs={ "rdb_build_done": rdb_build_done, "run_jobs": run_jobs }, parameters=parameters, TaskType=PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(task_build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile( os.path.join(rawread_dir, "%s.db" % "raw_reads")) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks( fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": r_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/rda_check") def check_r_da_task(self): system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config) wf.addTasks(merge_tasks) if config["target"] == "overlapping": wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed sys.exit(0) wf.addTasks(consensus_tasks) r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done")) pread_fofn = makePypeLocalFile( os.path.join(pread_dir, "input_preads.fofn")) @PypeTask(inputs=consensus_out, outputs={ "cns_done": r_cns_done, "pread_fofn": pread_fofn }, TaskType=PypeThreadTaskBase, URL="task://localhost/cns_check") def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >> f, fa_fn system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets( updateFreq=wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile( os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf}, outputs={"o_fofn": pread_fofn}, parameters={}, TaskType=PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join(pread_dir, "pdb_build_done")) parameters = {"work_dir": pread_dir, "config": config} run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh')) make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn}, outputs={ "pdb_build_done": pdb_build_done, "run_jobs": run_jobs }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(task_build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads")) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", db_file, pdb_build_done, config, pread_aln=True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": p_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pda_check") def check_p_da_task(self): system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( fn(run_jobs), pread_dir, "preads", p_da_done, config) wf.addTasks(merge_tasks) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done")) @PypeTask(inputs=merge_out, outputs={"p_merge_done": p_merge_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pmerge_check") def check_p_merge_check_task(self): system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq=wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join(falcon_asm_dir, "falcon_asm_done")) make_run_falcon_asm = PypeTask( inputs={ "p_merge_done": p_merge_done, "db_file": db_file }, outputs={"falcon_asm_done": falcon_asm_done}, parameters={ "wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir }, TaskType=PypeThreadTaskBase, URL="task://localhost/falcon") wf.addTask(make_run_falcon_asm(task_run_falcon_asm)) wf.refreshTargets(updateFreq=wait_time) #all
ch.setFormatter(formatter) logger.addHandler(ch) inputs = {"input": makePypeLocalFile("/tmp/test1_input")} outputs = {"output": makePypeLocalFile("/tmp/test1_output")} os.system("touch /tmp/test1_input") @PypeTask(inputs = inputs, outputs = outputs, TaskType = PypeThreadTaskBase) def f(self): i = 0 while 1: time.sleep(0.1) if self.shutdown_event != None and self.shutdown_event.is_set(): break if i > 10: break i += 1 if self.shutdown_event == None or not self.shutdown_event.is_set(): os.system("touch %s" % fn(self.output)) wf = PypeThreadWorkflow() wf.addTasks([f]) wf.refreshTargets()
def main(*argv): setup_logger() if len(argv) < 2: print "you need to specify a configuration file" print "example: HGAP.py HGAP_run.cfg" sys.exit(1) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(argv[1]) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() if config["input_type"] == "raw": #### import sequences into daligner DB input_h5_fofn = makePypeLocalFile( os.path.abspath(config["input_fofn_fn"])) rdb_build_done = makePypeLocalFile( os.path.join(rawread_dir, "rdb_build_done")) parameters = {"work_dir": rawread_dir, "config": config} make_buid_rdb_task = PypeTask( inputs={"input_fofn": input_h5_fofn}, outputs={"rdb_build_done": rdb_build_done}, parameters=parameters, TaskType=PypeThreadTaskBase) buid_rdb_task = make_buid_rdb_task(build_rdb) wf.addTasks([buid_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile( os.path.join(rawread_dir, "%s.db" % "raw_reads")) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": r_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/rda_check") def check_r_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config) wf.addTasks(merge_tasks) if config["target"] == "overlapping": wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed exit(0) wf.addTasks(consensus_tasks) r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done")) pread_fofn = makePypeLocalFile( os.path.join(pread_dir, "input_preads.fofn")) @PypeTask(inputs=consensus_out, outputs={ "cns_done": r_cns_done, "pread_fofn": pread_fofn }, TaskType=PypeThreadTaskBase, URL="task://localhost/cns_check") def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fa" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >> f, fa_fn os.system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets( updateFreq=wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": exit(0) if config["input_type"] == "preads": if not os.path.exists("%s/input_preads.fofn" % pread_dir): os.system("cp %s %s/input_preads.fofn" % (os.path.abspath(config["input_fofn_fn"]), pread_dir)) pread_fofn = makePypeLocalFile( os.path.join(pread_dir, "input_preads.fofn")) rdb_build_done = makePypeLocalFile( os.path.join(pread_dir, "rdb_build_done")) @PypeTask(inputs={"pread_fofn": pread_fofn}, outputs={"rdb_build_done": rdb_build_done}, parameters={ "config": config, "pread_dir": pread_dir }, TaskType=PypeThreadTaskBase, URL="task://localhost/build_p_rdb") def build_p_rdb_task(self): config = self.parameters["config"] pread_dir = self.parameters["pread_dir"] fa_serial = 0 for fa_fn in open(fn(self.pread_fofn)).readlines(): fa_fn = fa_fn.strip() c = 0 fa_serial += 1 with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial), "w") as p_norm: f = FastaReader(fa_fn) for r in f: if len(r.sequence) < config["length_cutoff_pr"]: continue name = r.name name = name.replace("_", "") ignore_read = False for cc in r.sequence: if cc not in ["A", "C", "G", "T"]: ignore_read = True break if ignore_read: continue print >> p_norm, ">prolog_%05d/%d/%d_%d" % ( fa_serial, c, 0, len(r.sequence)) for i in range(0, len(r.sequence) / 80): print >> p_norm, r.sequence[i * 80:(i + 1) * 80] print >> p_norm, r.sequence[(i + 1) * 80:] c += 1 os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" % (pread_dir, fa_serial)) os.system("cd %s; DBsplit %s preads" % (pread_dir, config["ovlp_DBsplit_option"])) os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" % (pread_dir, config["ovlp_HPCdaligner_option"])) os.system("cd %s; touch rdb_build_done" % pread_dir) wf.addTask(build_p_rdb_task) wf.refreshTargets( updateFreq=wait_time) # larger number better for more jobs db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads")) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(pread_dir, "preads", db_file, rdb_build_done, config, pread_aln=True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": p_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pda_check") def check_p_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config) wf.addTasks(merge_tasks) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done")) @PypeTask(inputs=merge_out, outputs={"p_merge_done": p_merge_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pmerge_check") def check_p_merge_check_task(self): os.system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq=wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join(falcon_asm_dir, "falcon_asm_done")) @PypeTask(inputs={"p_merge_done": p_merge_done}, outputs={"falcon_asm_done": falcon_asm_done}, parameters={ "wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir }, TaskType=PypeThreadTaskBase, URL="task://localhost/falcon") def run_falcon_asm_task(self): wd = self.parameters["wd"] config = self.parameters["config"] install_prefix = config["install_prefix"] pread_dir = self.parameters["pread_dir"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "run_falcon_asm.sh") script = [] script.append("source {install_prefix}/bin/activate".format( install_prefix=install_prefix)) script.append("cd %s" % pread_dir) script.append("DB2Falcon preads") script.append("cd %s" % wd) script.append("""find %s/las_files -name "*.las" > las.fofn """ % pread_dir) overlap_filtering_setting = config["overlap_filtering_setting"] length_cutoff_pr = config["length_cutoff_pr"] script.append("""fc_ovlp_filter.py --fofn las.fofn %s \ --n_core 24 --min_len %d > preads.ovl""" % (overlap_filtering_setting, length_cutoff_pr)) script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir) script.append("""fc_ovlp_to_graph.py preads.ovl > fc.log""") script.append("""fc_graph_to_contig.py""") script.append("""touch %s\n""" % fn(self.falcon_asm_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script)) job_name = self.URL.split("/")[-1] job_name += "-" + str(uuid.uuid1())[:8] job_data = { "job_name": job_name, "cwd": wd, "sge_option": config["sge_option_fc"], "script_fn": script_fn } run_script(job_data, job_type="SGE") wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name) wf.addTask(run_falcon_asm_task) wf.refreshTargets(updateFreq=wait_time) #all
smrt_bin = config.get('Unzip', 'smrt_bin') quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = {"job_type": job_type, "sge_quiver": sge_quiver, "smrt_bin": smrt_bin} support.job_type = "SGE" #tmp hack until we have a configuration parser ctg_ids = [] PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs, quiver_concurrent_jobs) wf = PypeThreadWorkflow() ref_seq_data = {} p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa") ctg_types = {} for r in p_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "p" h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa") for r in h_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence
fasta_dir = os.path.abspath("./0-fasta_files") dist_map_dir = os.path.abspath("./1-dist_map") pa_dir = os.path.abspath("./2-preads") celera_asm_dir = os.path.abspath("./3-CA") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (dist_map_dir, fasta_dir, pa_dir, script_dir, celera_asm_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(sys.argv[1]) concurrent_jobs = config["concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() #### Task to convert bas.h5 and bax.h5 to fasta files, it will generates two fofn files for the queries and targets input_h5_fofn = makePypeLocalFile(os.path.abspath( config["input_fofn_fn"] )) query_fa_fofn = makePypeLocalFile( os.path.join( fasta_dir, "queries.fofn" ) ) target_fa_fofn = makePypeLocalFile( os.path.join( fasta_dir, "targets.fofn" ) ) fasta_dump_done = makePypeLocalFile(os.path.abspath( os.path.join( fasta_dir, "fasta_dump_done") ) ) parameters = {"fasta_dir": fasta_dir, "min_length": config["length_cutoff"], "min_read_score": config["RQ_threshold"]} @PypeTask(inputs = {"input_fofn": input_h5_fofn}, outputs = {"fasta_dump_done": fasta_dump_done, "target_fa_fofn": target_fa_fofn,
fc_run_logger.info( "fc_run started with configuration %s", sys.argv[1] ) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(sys.argv[1]) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() if config["input_type"] == "raw": #### import sequences into daligner DB input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) parameters = {"work_dir": rawread_dir, "config": config} make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn}, outputs = {"rdb_build_done": rdb_build_done}, parameters = parameters, TaskType = PypeThreadTaskBase) buid_rdb_task = make_buid_rdb_task(build_rdb)
rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(sys.argv[1]) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() if config["input_type"] == "raw": #### import sequences into daligner DB input_h5_fofn = makePypeLocalFile( os.path.abspath(config["input_fofn_fn"])) rdb_build_done = makePypeLocalFile( os.path.join(rawread_dir, "rdb_build_done")) parameters = {"work_dir": rawread_dir, "config": config} make_buid_rdb_task = PypeTask( inputs={"input_fofn": input_h5_fofn}, outputs={"rdb_build_done": rdb_build_done}, parameters=parameters, TaskType=PypeThreadTaskBase)
dist_map_dir = os.path.abspath("./1-dist_map") pa_dir = os.path.abspath("./2-preads") celera_asm_dir = os.path.abspath("./3-CA") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (dist_map_dir, fasta_dir, pa_dir, script_dir, celera_asm_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(sys.argv[1]) concurrent_jobs = config["concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() #### Task to convert bas.h5 and bax.h5 to fasta files, it will generates two fofn files for the queries and targets input_h5_fofn = makePypeLocalFile(os.path.abspath(config["input_fofn_fn"])) query_fa_fofn = makePypeLocalFile(os.path.join(fasta_dir, "queries.fofn")) target_fa_fofn = makePypeLocalFile(os.path.join(fasta_dir, "targets.fofn")) fasta_dump_done = makePypeLocalFile( os.path.abspath(os.path.join(fasta_dir, "fasta_dump_done"))) parameters = { "fasta_dir": fasta_dir, "min_length": config["length_cutoff"], "min_read_score": config["RQ_threshold"] } @PypeTask(inputs={"input_fofn": input_h5_fofn},
quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = { "job_type": job_type, "sge_quiver": sge_quiver, "smrt_bin": smrt_bin } support.job_type = "SGE" #tmp hack until we have a configuration parser ctg_ids = [] PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs, quiver_concurrent_jobs) wf = PypeThreadWorkflow() ref_seq_data = {} p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa") ctg_types = {} for r in p_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "p" h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa") for r in h_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "h"
parser.add_argument('--base_dir', type=str, default="./", help='the output base_dir, default to current working directory') args = parser.parse_args() bam_fn = args.bam fasta_fn = args.fasta ctg_id = args.ctg_id base_dir = args.base_dir ref_seq = "" for r in FastaReader(fasta_fn): rid = r.name.split()[0] if rid != ctg_id: continue ref_seq = r.sequence.upper() PypeThreadWorkflow.setNumThreadAllowed(1, 1) wf = PypeThreadWorkflow() bam_file = makePypeLocalFile(bam_fn) vmap_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "variant_map") ) vpos_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "variant_pos") ) q_id_map_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "q_id_map") ) parameters = {} parameters["ctg_id"] = ctg_id parameters["ref_seq"] = ref_seq parameters["base_dir"] = base_dir make_het_call_task = PypeTask( inputs = { "bam_file": bam_file }, outputs = { "vmap_file": vmap_file, "vpos_file": vpos_file, "q_id_map_file": q_id_map_file },
def phasing(args): bam_fn = args.bam fasta_fn = args.fasta ctg_id = args.ctg_id base_dir = args.base_dir ref_seq = "" for r in FastaReader(fasta_fn): rid = r.name.split()[0] if rid != ctg_id: continue ref_seq = r.sequence.upper() PypeThreadWorkflow.setNumThreadAllowed(1, 1) wf = PypeThreadWorkflow() bam_file = makePypeLocalFile(bam_fn) vmap_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "variant_map")) vpos_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "variant_pos")) q_id_map_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "q_id_map")) parameters = {} parameters["ctg_id"] = ctg_id parameters["ref_seq"] = ref_seq parameters["base_dir"] = base_dir make_het_call_task = PypeTask( inputs={"bam_file": bam_file}, outputs={ "vmap_file": vmap_file, "vpos_file": vpos_file, "q_id_map_file": q_id_map_file }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/het_call")(make_het_call) wf.addTasks([make_het_call_task]) atable_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "atable")) parameters = {} parameters["ctg_id"] = ctg_id parameters["base_dir"] = base_dir generate_association_table_task = PypeTask( inputs={"vmap_file": vmap_file}, outputs={"atable_file": atable_file}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/g_atable")(generate_association_table) wf.addTasks([generate_association_table_task]) phased_variant_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "phased_variants")) get_phased_blocks_task = PypeTask( inputs={ "vmap_file": vmap_file, "atable_file": atable_file }, outputs={"phased_variant_file": phased_variant_file}, TaskType=PypeThreadTaskBase, URL="task://localhost/get_phased_blocks")(get_phased_blocks) wf.addTasks([get_phased_blocks_task]) phased_read_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "phased_reads")) get_phased_reads_task = PypeTask( inputs={ "vmap_file": vmap_file, "q_id_map_file": q_id_map_file, "phased_variant_file": phased_variant_file }, outputs={"phased_read_file": phased_read_file}, parameters={"ctg_id": ctg_id}, TaskType=PypeThreadTaskBase, URL="task://localhost/get_phased_reads")(get_phased_reads) wf.addTasks([get_phased_reads_task]) wf.refreshTargets()
def main1(prog_name, input_config_fn, logger_config_fn=None): global fc_run_logger fc_run_logger = support.setup_logger(logger_config_fn) fc_run_logger.info("fc_run started with configuration %s", input_config_fn) try: config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn)) except Exception: fc_run_logger.exception('Failed to parse config "{}".'.format(input_config_fn)) raise rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): support.make_dirs(d) exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile(config["input_fofn"]) rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf}, outputs = {"o_fofn": rawread_fofn_plf}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) parameters = {"work_dir": rawread_dir, "config": config} raw_reads_db_plf = makePypeLocalFile(os.path.join(rawread_dir, "%s.db" % "raw_reads")) make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf}, outputs = {"rdb_build_done": rdb_build_done, "raw_reads_db": raw_reads_db_plf, "run_jobs": run_jobs, }, parameters = parameters, TaskType = PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(task_build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) raw_reads_nblock = support.get_nblock(fn(raw_reads_db_plf)) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done, nblock=raw_reads_nblock, config=config) wf.addTasks(daligner_tasks) r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") ) parameters = { "nblock": raw_reads_nblock, } make_daligner_gather = PypeTask( inputs = daligner_out, outputs = {"da_done":r_da_done}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/rda_check" ) check_r_da_task = make_daligner_gather(task_daligner_gather) wf.addTask(check_r_da_task) wf.refreshTargets(exitOnFailure=exitOnFailure) merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config) wf.addTasks( merge_tasks ) wf.refreshTargets(exitOnFailure=exitOnFailure) if config["target"] == "overlapping": sys.exit(0) consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done) wf.addTasks( consensus_tasks ) r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) @PypeTask( inputs = consensus_out, outputs = {"cns_done":r_cns_done, "pread_fofn": pread_fofn}, TaskType = PypeThreadTaskBase, URL = "task://localhost/cns_check" ) def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >>f, fa_fn system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) length_cutoff_plf = makePypeLocalFile(os.path.join(rawread_dir, "length_cutoff")) pre_assembly_report_plf = makePypeLocalFile(os.path.join(rawread_dir, "pre_assembly_stats.json")) #tho technically it needs pread_fofn make_task = PypeTask( inputs = {"length_cutoff_fn": length_cutoff_plf, "raw_reads_db": raw_reads_db_plf, "preads_fofn": pread_fofn, }, outputs = {"pre_assembly_report": pre_assembly_report_plf, }, parameters = config, TaskType = PypeThreadTaskBase, URL = "task://localhost/report_pre_assembly") task = make_task(task_report_pre_assembly) wf.addTask(task) concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf.refreshTargets(exitOnFailure=exitOnFailure) if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf}, outputs = {"o_fofn": pread_fofn}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) parameters = {"work_dir": pread_dir, "config": config} run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh')) preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course. make_build_pdb_task = PypeTask(inputs = {"pread_fofn": pread_fofn }, outputs = {"pdb_build_done": pdb_build_done, "preads_db": preads_db, "run_jobs": run_jobs}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(task_build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) preads_nblock = support.get_nblock(fn(preads_db)) #### run daligner config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done, nblock=preads_nblock, config=config, pread_aln=True) wf.addTasks(daligner_tasks) p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done")) parameters = { "nblock": preads_nblock, } make_daligner_gather = PypeTask( inputs = daligner_out, outputs = {"da_done":p_da_done}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/pda_check" ) check_p_da_task = make_daligner_gather(task_daligner_gather) wf.addTask(check_p_da_task) wf.refreshTargets(exitOnFailure=exitOnFailure) merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config) wf.addTasks( merge_tasks ) p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done")) @PypeTask( inputs = merge_out, outputs = {"p_merge_done": p_merge_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pmerge_check" ) def check_p_merge_check_task(self): system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf.refreshTargets(exitOnFailure=exitOnFailure) db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done")) make_run_db2falcon = PypeTask( inputs = {"p_merge_done": p_merge_done,}, outputs = {"db2falcon_done": db2falcon_done}, parameters = {"wd": pread_dir, "config": config, }, TaskType = PypeThreadTaskBase, URL = "task://localhost/db2falcon" ) wf.addTask(make_run_db2falcon(task_run_db2falcon)) falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") ) make_run_falcon_asm = PypeTask( inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db}, outputs = {"falcon_asm_done": falcon_asm_done}, parameters = {"wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/falcon" ) wf.addTask(make_run_falcon_asm(task_run_falcon_asm)) wf.refreshTargets()
help='the output base_dir, default to current working directory') args = parser.parse_args() bam_fn = args.bam fasta_fn = args.fasta ctg_id = args.ctg_id base_dir = args.base_dir ref_seq = "" for r in FastaReader(fasta_fn): rid = r.name.split()[0] if rid != ctg_id: continue ref_seq = r.sequence.upper() PypeThreadWorkflow.setNumThreadAllowed(1, 1) wf = PypeThreadWorkflow() bam_file = makePypeLocalFile(bam_fn) vmap_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "variant_map")) vpos_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "variant_pos")) q_id_map_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "q_id_map")) parameters = {} parameters["ctg_id"] = ctg_id parameters["ref_seq"] = ref_seq parameters["base_dir"] = base_dir make_het_call_task = PypeTask(
def main(argv=sys.argv): global fc_run_logger fc_run_logger = support.setup_logger(None) if len(sys.argv) < 2: print "you need to provide a configuration file to specific a couple cluster running environment" sys.exit(1) config_fn = sys.argv[1] config = ConfigParser.ConfigParser() config.read(config_fn) job_type = "SGE" if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') sge_track_reads = " -pe smp 12 -q bigmem" if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') sge_quiver = " -pe smp 24 -q bigmem " if config.has_option('Unzip', 'sge_quiver'): sge_quiver = config.get('Unzip', 'sge_quiver') smrt_bin = "/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/" if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') input_bam_fofn = "input_bam.fofn" if config.has_option('Unzip', 'input_bam_fofn'): input_bam_fofn = config.get('Unzip', 'input_bam_fofn') quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = { "job_type": job_type, "sge_quiver": sge_quiver, "sge_track_reads": sge_track_reads, "input_bam_fofn": input_bam_fofn, "smrt_bin": smrt_bin } support.job_type = "SGE" #tmp hack until we have a configuration parser ctg_ids = [] PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs, quiver_concurrent_jobs) wf = PypeThreadWorkflow() parameters = {"wd": os.path.abspath("."), "config": config} hasm_done = makePypeLocalFile("./3-unzip/1-hasm/hasm_done") job_done = makePypeLocalFile( os.path.join(parameters["wd"], "track_reads_h_done")) make_track_reads_task = PypeTask(inputs={"hasm_done": hasm_done}, outputs={"job_done": job_done}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/track_reads_h") track_reads_task = make_track_reads_task(task_track_reads) wf.addTask(track_reads_task) wf.refreshTargets() #force refresh now, will put proper dependence later ref_seq_data = {} p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa") ctg_types = {} for r in p_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "p" h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa") for r in h_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "h" ctg_ids = sorted(ref_seq_data.keys()) p_ctg_out = [] h_ctg_out = [] for ctg_id in ctg_ids: sequence = ref_seq_data[ctg_id] m_ctg_id = ctg_id.split("-")[0] wd = os.path.join(os.getcwd(), "./4-quiver/", m_ctg_id) mkdir(wd) ref_fasta = makePypeLocalFile( os.path.join(wd, "{ctg_id}_ref.fa".format(ctg_id=ctg_id))) read_sam = makePypeLocalFile( os.path.join( os.getcwd(), "./4-quiver/reads/" "{ctg_id}.sam".format(ctg_id=ctg_id))) cns_fasta = makePypeLocalFile( os.path.join(wd, "cns-{ctg_id}.fasta.gz".format(ctg_id=ctg_id))) cns_fastq = makePypeLocalFile( os.path.join(wd, "cns-{ctg_id}.fastq.gz".format(ctg_id=ctg_id))) job_done = makePypeLocalFile( os.path.join(wd, "{ctg_id}_quiver_done".format(ctg_id=ctg_id))) if os.path.exists(fn(read_sam)): if ctg_types[ctg_id] == "p": p_ctg_out.append((cns_fasta, cns_fastq)) if ctg_types[ctg_id] == "h": h_ctg_out.append((cns_fasta, cns_fastq)) if not os.path.exists(fn(ref_fasta)): with open(fn(ref_fasta), "w") as f: print >> f, ">" + ctg_id print >> f, sequence parameters = { "job_uid": "q-" + ctg_id, "wd": wd, "config": config, "ctg_id": ctg_id } make_quiver_task = PypeTask( inputs={ "ref_fasta": ref_fasta, "read_sam": read_sam }, outputs={ "cns_fasta": cns_fasta, "cns_fastq": cns_fastq, "job_done": job_done }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/q_{ctg_id}".format(ctg_id=ctg_id)) quiver_task = make_quiver_task(task_run_quiver) wf.addTask(quiver_task) wf.refreshTargets() os.system("sleep 30") mkdir("./4-quiver/cns_output") os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta") os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq") for cns_fasta, cns_fastq in sorted(p_ctg_out): os.system( "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format( cns_fasta=fn(cns_fasta))) os.system( "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format( cns_fastq=fn(cns_fastq))) os.system("rm ./4-quiver/cns_output/cns_h_ctg.fasta") os.system("rm ./4-quiver/cns_output/cns_h_ctg.fastq") for cns_fasta, cns_fastq in sorted(h_ctg_out): os.system( "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_h_ctg.fasta".format( cns_fasta=fn(cns_fasta))) os.system( "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_h_ctg.fastq".format( cns_fastq=fn(cns_fastq)))
def main1(prog_name, input_config_fn, logger_config_fn=None): setup_logger(logger_config_fn) fc_run_logger.info("fc_run started with configuration %s", input_config_fn) config = get_config(parse_config(input_config_fn)) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): make_dirs(d) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile( os.path.basename(config["input_fofn_fn"])) rawread_fofn_plf = makePypeLocalFile( os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf}, outputs={"o_fofn": rawread_fofn_plf}, parameters={}, TaskType=PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done")) rdb_build_done = makePypeLocalFile( os.path.join(rawread_dir, "rdb_build_done")) parameters = {"work_dir": rawread_dir, "config": config} make_build_rdb_task = PypeTask( inputs={"input_fofn": rawread_fofn_plf}, outputs={"rdb_build_done": rdb_build_done}, parameters=parameters, TaskType=PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile( os.path.join(rawread_dir, "%s.db" % "raw_reads")) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": r_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/rda_check") def check_r_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config) wf.addTasks(merge_tasks) if config["target"] == "overlapping": wf.refreshTargets( updateFreq=wait_time ) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed sys.exit(0) wf.addTasks(consensus_tasks) r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done")) pread_fofn = makePypeLocalFile( os.path.join(pread_dir, "input_preads.fofn")) @PypeTask(inputs=consensus_out, outputs={ "cns_done": r_cns_done, "pread_fofn": pread_fofn }, TaskType=PypeThreadTaskBase, URL="task://localhost/cns_check") def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >> f, fa_fn os.system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets( updateFreq=wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile( os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf}, outputs={"o_fofn": pread_fofn}, parameters={}, TaskType=PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join(pread_dir, "pdb_build_done")) parameters = {"work_dir": pread_dir, "config": config} make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn}, outputs={"pdb_build_done": pdb_build_done}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads")) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(pread_dir, "preads", db_file, pdb_build_done, config, pread_aln=True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done")) @PypeTask(inputs=daligner_out, outputs={"da_done": p_da_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pda_check") def check_p_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config) wf.addTasks(merge_tasks) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done")) @PypeTask(inputs=merge_out, outputs={"p_merge_done": p_merge_done}, TaskType=PypeThreadTaskBase, URL="task://localhost/pmerge_check") def check_p_merge_check_task(self): os.system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq=wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join(falcon_asm_dir, "falcon_asm_done")) @PypeTask(inputs={ "p_merge_done": p_merge_done, "db_file": db_file }, outputs={"falcon_asm_done": falcon_asm_done}, parameters={ "wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir }, TaskType=PypeThreadTaskBase, URL="task://localhost/falcon") def run_falcon_asm_task(self): wd = self.parameters["wd"] config = self.parameters["config"] install_prefix = config["install_prefix"] pread_dir = self.parameters["pread_dir"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "run_falcon_asm.sh") script = [] script.append("set -vex") script.append("trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done)) script.append("source {install_prefix}/bin/activate".format( install_prefix=install_prefix)) script.append("cd %s" % pread_dir) # Write preads4falcon.fasta, in 1-preads_ovl: script.append("DB2Falcon -U preads") script.append("cd %s" % wd) script.append("""find %s/las_files -name "*.las" > las.fofn """ % pread_dir) overlap_filtering_setting = config["overlap_filtering_setting"] length_cutoff_pr = config["length_cutoff_pr"] script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\ (fn(db_file), overlap_filtering_setting, length_cutoff_pr) ) script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir) script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr) # TODO: drop this logfile # Write 'p_ctg.fa' and 'a_ctg.fa': script.append("""fc_graph_to_contig.py""") script.append("""touch %s""" % fn(self.falcon_asm_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script)) job_name = self.URL.split("/")[-1] job_name += "-" + str(uuid.uuid4())[:8] job_data = { "job_name": job_name, "cwd": wd, "sge_option": config["sge_option_fc"], "script_fn": script_fn } run_script(job_data, job_type=config["job_type"]) wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name) wf.addTask(run_falcon_asm_task) wf.refreshTargets(updateFreq=wait_time) #all
def main1(prog_name, input_config_fn, logger_config_fn=None): setup_logger(logger_config_fn) fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) config = get_config(parse_config(input_config_fn)) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): make_dirs(d) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"])) rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf}, outputs = {"o_fofn": rawread_fofn_plf}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) parameters = {"work_dir": rawread_dir, "config": config} make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf}, outputs = {"rdb_build_done": rdb_build_done}, parameters = parameters, TaskType = PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" )) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":r_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/rda_check" ) def check_r_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config ) wf.addTasks( merge_tasks ) if config["target"] == "overlapping": wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed sys.exit(0) wf.addTasks( consensus_tasks ) r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) @PypeTask( inputs = consensus_out, outputs = {"cns_done":r_cns_done, "pread_fofn": pread_fofn}, TaskType = PypeThreadTaskBase, URL = "task://localhost/cns_check" ) def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >>f, fa_fn os.system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf}, outputs = {"o_fofn": pread_fofn}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) parameters = {"work_dir": pread_dir, "config": config} make_build_pdb_task = PypeTask( inputs = { "pread_fofn": pread_fofn }, outputs = { "pdb_build_done": pdb_build_done }, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" )) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":p_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pda_check" ) def check_p_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config ) wf.addTasks( merge_tasks ) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") ) @PypeTask( inputs = merge_out, outputs = {"p_merge_done":p_merge_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pmerge_check" ) def check_p_merge_check_task(self): os.system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq = wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") ) @PypeTask( inputs = {"p_merge_done": p_merge_done, "db_file":db_file}, outputs = {"falcon_asm_done":falcon_asm_done}, parameters = {"wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/falcon" ) def run_falcon_asm_task(self): wd = self.parameters["wd"] config = self.parameters["config"] install_prefix = config["install_prefix"] pread_dir = self.parameters["pread_dir"] script_dir = os.path.join( wd ) script_fn = os.path.join( script_dir ,"run_falcon_asm.sh" ) script = [] script.append( "set -vex" ) script.append( "trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done) ) script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) ) script.append( "cd %s" % pread_dir ) # Write preads4falcon.fasta, in 1-preads_ovl: script.append( "DB2Falcon -U preads") script.append( "cd %s" % wd ) script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir ) overlap_filtering_setting = config["overlap_filtering_setting"] length_cutoff_pr = config["length_cutoff_pr"] script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\ (fn(db_file), overlap_filtering_setting, length_cutoff_pr) ) script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir) script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr) # TODO: drop this logfile # Write 'p_ctg.fa' and 'a_ctg.fa': script.append( """fc_graph_to_contig.py""" ) script.append( """touch %s""" % fn(self.falcon_asm_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script)) job_data = make_job_data(self.URL, script_fn) job_data["sge_option"] = config["sge_option_fc"] run_script(job_data, job_type = config["job_type"]) wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_data['job_name']) wf.addTask( run_falcon_asm_task ) wf.refreshTargets(updateFreq = wait_time) #all
rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(sys.argv[1]) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() if config["input_type"] == "raw": #### import sequences into daligner DB input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) parameters = {"work_dir": rawread_dir, "config": config} make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn}, outputs = {"rdb_build_done": rdb_build_done}, parameters = parameters, TaskType = PypeThreadTaskBase) buid_rdb_task = make_buid_rdb_task(build_rdb)
def main1(prog_name, input_config_fn, logger_config_fn=None): global fc_run_logger fc_run_logger = support.setup_logger(logger_config_fn) fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) config = support.get_config(support.parse_config(input_config_fn)) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): support.make_dirs(d) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"])) rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf}, outputs = {"o_fofn": rawread_fofn_plf}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) parameters = {"work_dir": rawread_dir, "config": config} make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf}, outputs = {"rdb_build_done": rdb_build_done, "run_jobs": run_jobs}, parameters = parameters, TaskType = PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(task_build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" )) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":r_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/rda_check" ) def check_r_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config) wf.addTasks( merge_tasks ) if config["target"] == "overlapping": wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed sys.exit(0) wf.addTasks( consensus_tasks ) r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) @PypeTask( inputs = consensus_out, outputs = {"cns_done":r_cns_done, "pread_fofn": pread_fofn}, TaskType = PypeThreadTaskBase, URL = "task://localhost/cns_check" ) def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >>f, fa_fn os.system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf}, outputs = {"o_fofn": pread_fofn}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) parameters = {"work_dir": pread_dir, "config": config} run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh')) make_build_pdb_task = PypeTask(inputs = { "pread_fofn": pread_fofn }, outputs = { "pdb_build_done": pdb_build_done, "run_jobs": run_jobs}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(task_build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" )) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":p_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pda_check" ) def check_p_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config) wf.addTasks( merge_tasks ) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") ) @PypeTask( inputs = merge_out, outputs = {"p_merge_done":p_merge_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pmerge_check" ) def check_p_merge_check_task(self): os.system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq = wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") ) make_run_falcon_asm = PypeTask( inputs = {"p_merge_done": p_merge_done, "db_file":db_file}, outputs = {"falcon_asm_done":falcon_asm_done}, parameters = {"wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/falcon" ) wf.addTask(make_run_falcon_asm(task_run_falcon_asm)) wf.refreshTargets(updateFreq = wait_time) #all
def main(*argv): setup_logger() if len(argv) < 2: print "you need to specify a configuration file" print "example: HGAP.py HGAP_run.cfg" sys.exit(1) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): try: os.makedirs(d) except: pass config = get_config(argv[1]) concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() if config["input_type"] == "raw": #### import sequences into daligner DB input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) parameters = {"work_dir": rawread_dir, "config": config} make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn}, outputs = {"rdb_build_done": rdb_build_done}, parameters = parameters, TaskType = PypeThreadTaskBase) buid_rdb_task = make_buid_rdb_task(build_rdb) wf.addTasks([buid_rdb_task]) wf.refreshTargets([rdb_build_done]) db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" )) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":r_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/rda_check" ) def check_r_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_r_da_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config ) wf.addTasks( merge_tasks ) if config["target"] == "overlapping": wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed exit(0) wf.addTasks( consensus_tasks ) r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) @PypeTask( inputs = consensus_out, outputs = {"cns_done":r_cns_done, "pread_fofn": pread_fofn}, TaskType = PypeThreadTaskBase, URL = "task://localhost/cns_check" ) def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fa" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >>f, fa_fn os.system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs if config["target"] == "pre-assembly": exit(0) if config["input_type"] == "preads": if not os.path.exists( "%s/input_preads.fofn" % pread_dir): os.system( "cp %s %s/input_preads.fofn" % (os.path.abspath( config["input_fofn_fn"] ), pread_dir) ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) rdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "rdb_build_done") ) @PypeTask( inputs = { "pread_fofn": pread_fofn }, outputs = { "rdb_build_done": rdb_build_done }, parameters = {"config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/build_p_rdb") def build_p_rdb_task(self): config = self.parameters["config"] pread_dir = self.parameters["pread_dir"] fa_serial = 0 for fa_fn in open(fn(self.pread_fofn)).readlines(): fa_fn = fa_fn.strip() c = 0 fa_serial += 1 with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial), "w") as p_norm: f = FastaReader(fa_fn) for r in f: if len(r.sequence) < config["length_cutoff_pr"]: continue name = r.name name = name.replace("_","") ignore_read = False for cc in r.sequence: if cc not in ["A","C","G","T"]: ignore_read = True break if ignore_read: continue print >> p_norm, ">prolog_%05d/%d/%d_%d" % (fa_serial, c, 0, len(r.sequence) ) for i in range(0, len(r.sequence)/80): print >> p_norm, r.sequence[ i *80 : (i + 1) * 80] print >> p_norm, r.sequence[(i+1)*80:] c += 1 os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" % (pread_dir, fa_serial) ) os.system("cd %s; DBsplit %s preads" % (pread_dir, config["ovlp_DBsplit_option"])) os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" % (pread_dir, config["ovlp_HPCdaligner_option"])) os.system("cd %s; touch rdb_build_done" % pread_dir) wf.addTask(build_p_rdb_task) wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" )) #### run daligner concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, rdb_build_done, config, pread_aln= True) wf.addTasks(daligner_tasks) #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") ) @PypeTask( inputs = daligner_out, outputs = {"da_done":p_da_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pda_check" ) def check_p_da_task(self): os.system("touch %s" % fn(self.da_done)) wf.addTask(check_p_da_task) merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config ) wf.addTasks( merge_tasks ) #wf.refreshTargets(updateFreq = 30) #all p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") ) @PypeTask( inputs = merge_out, outputs = {"p_merge_done":p_merge_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pmerge_check" ) def check_p_merge_check_task(self): os.system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) wf.refreshTargets(updateFreq = wait_time) #all falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") ) @PypeTask( inputs = {"p_merge_done": p_merge_done}, outputs = {"falcon_asm_done":falcon_asm_done}, parameters = {"wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/falcon" ) def run_falcon_asm_task(self): wd = self.parameters["wd"] config = self.parameters["config"] install_prefix = config["install_prefix"] pread_dir = self.parameters["pread_dir"] script_dir = os.path.join( wd ) script_fn = os.path.join( script_dir ,"run_falcon_asm.sh" ) script = [] script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) ) script.append( "cd %s" % pread_dir ) script.append( "DB2Falcon preads") script.append( "cd %s" % wd ) script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir ) overlap_filtering_setting = config["overlap_filtering_setting"] length_cutoff_pr = config["length_cutoff_pr"] script.append( """fc_ovlp_filter.py --fofn las.fofn %s \ --n_core 24 --min_len %d > preads.ovl""" % (overlap_filtering_setting, length_cutoff_pr) ) script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir) script.append( """fc_ovlp_to_graph.py preads.ovl > fc.log""" ) script.append( """fc_graph_to_contig.py""" ) script.append( """touch %s\n""" % fn(self.falcon_asm_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script)) job_name = self.URL.split("/")[-1] job_name += "-"+str(uuid.uuid1())[:8] job_data = {"job_name": job_name, "cwd": wd, "sge_option": config["sge_option_fc"], "script_fn": script_fn } run_script(job_data, job_type = "SGE") wait_for_file( fn(self.falcon_asm_done), task=self, job_name=job_name ) wf.addTask( run_falcon_asm_task ) wf.refreshTargets(updateFreq = wait_time) #all
def main1(prog_name, input_config_fn, logger_config_fn=None): global fc_run_logger fc_run_logger = support.setup_logger(logger_config_fn) fc_run_logger.info("fc_run started with configuration %s", input_config_fn) config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn)) rawread_dir = os.path.abspath("./0-rawreads") pread_dir = os.path.abspath("./1-preads_ovl") falcon_asm_dir = os.path.abspath("./2-asm-falcon") script_dir = os.path.abspath("./scripts") sge_log_dir = os.path.abspath("./sge_log") for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir): support.make_dirs(d) exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs concurrent_jobs = config["pa_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() input_fofn_plf = makePypeLocalFile(config["input_fofn"]) rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf}, outputs = {"o_fofn": rawread_fofn_plf}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) if config["input_type"] == "raw": #### import sequences into daligner DB sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") ) rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) parameters = {"work_dir": rawread_dir, "config": config} raw_reads_db = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" )) make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf}, outputs = {"rdb_build_done": rdb_build_done, "raw_reads.db": raw_reads_db, "run_jobs": run_jobs}, parameters = parameters, TaskType = PypeThreadTaskBase) build_rdb_task = make_build_rdb_task(task_build_rdb) wf.addTasks([build_rdb_task]) wf.refreshTargets([rdb_build_done]) raw_reads_nblock = support.get_nblock(fn(raw_reads_db)) #### run daligner daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done, config) wf.addTasks(daligner_tasks) r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") ) parameters = { "nblock": raw_reads_nblock, } make_daligner_gather = PypeTask( inputs = daligner_out, outputs = {"da_done":r_da_done}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/rda_check" ) check_r_da_task = make_daligner_gather(task_daligner_gather) wf.addTask(check_r_da_task) wf.refreshTargets(exitOnFailure=exitOnFailure) merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config) wf.addTasks( merge_tasks ) wf.refreshTargets(exitOnFailure=exitOnFailure) if config["target"] == "overlapping": sys.exit(0) consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done) wf.addTasks( consensus_tasks ) r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") ) pread_fofn = makePypeLocalFile( os.path.join( pread_dir, "input_preads.fofn" ) ) @PypeTask( inputs = consensus_out, outputs = {"cns_done":r_cns_done, "pread_fofn": pread_fofn}, TaskType = PypeThreadTaskBase, URL = "task://localhost/cns_check" ) def check_r_cns_task(self): with open(fn(self.pread_fofn), "w") as f: fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir) fn_list.sort() for fa_fn in fn_list: print >>f, fa_fn system("touch %s" % fn(self.cns_done)) wf.addTask(check_r_cns_task) concurrent_jobs = config["cns_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf.refreshTargets(exitOnFailure=exitOnFailure) if config["target"] == "pre-assembly": sys.exit(0) # build pread database if config["input_type"] == "preads": pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"]))) make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf}, outputs = {"o_fofn": pread_fofn}, parameters = {}, TaskType = PypeThreadTaskBase) fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads) wf.addTasks([fofn_abs_task]) wf.refreshTargets([fofn_abs_task]) pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) parameters = {"work_dir": pread_dir, "config": config} run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh')) preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course. make_build_pdb_task = PypeTask(inputs = {"pread_fofn": pread_fofn }, outputs = {"pdb_build_done": pdb_build_done, "preads_db": preads_db, "run_jobs": run_jobs}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/build_pdb") build_pdb_task = make_build_pdb_task(task_build_pdb) wf.addTasks([build_pdb_task]) wf.refreshTargets([pdb_build_done]) preads_nblock = support.get_nblock(fn(preads_db)) #### run daligner config["sge_option_da"] = config["sge_option_pda"] config["sge_option_la"] = config["sge_option_pla"] daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done, config, pread_aln=True) wf.addTasks(daligner_tasks) p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done")) parameters = { "nblock": preads_nblock, } make_daligner_gather = PypeTask( inputs = daligner_out, outputs = {"da_done":p_da_done}, parameters = parameters, TaskType = PypeThreadTaskBase, URL = "task://localhost/pda_check" ) check_p_da_task = make_daligner_gather(task_daligner_gather) wf.addTask(check_p_da_task) wf.refreshTargets(exitOnFailure=exitOnFailure) merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config) wf.addTasks( merge_tasks ) p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done")) @PypeTask( inputs = merge_out, outputs = {"p_merge_done": p_merge_done}, TaskType = PypeThreadTaskBase, URL = "task://localhost/pmerge_check" ) def check_p_merge_check_task(self): system("touch %s" % fn(self.p_merge_done)) wf.addTask(check_p_merge_check_task) concurrent_jobs = config["ovlp_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf.refreshTargets(exitOnFailure=exitOnFailure) db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done")) make_run_db2falcon = PypeTask( inputs = {"p_merge_done": p_merge_done,}, outputs = {"db2falcon_done": db2falcon_done}, parameters = {"wd": pread_dir, "config": config, }, TaskType = PypeThreadTaskBase, URL = "task://localhost/db2falcon" ) wf.addTask(make_run_db2falcon(task_run_db2falcon)) falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") ) make_run_falcon_asm = PypeTask( inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db}, outputs = {"falcon_asm_done": falcon_asm_done}, parameters = {"wd": falcon_asm_dir, "config": config, "pread_dir": pread_dir}, TaskType = PypeThreadTaskBase, URL = "task://localhost/falcon" ) wf.addTask(make_run_falcon_asm(task_run_falcon_asm)) wf.refreshTargets()
def unzip_all(config): unzip_concurrent_jobs = config["unzip_concurrent_jobs"] PypeThreadWorkflow.setNumThreadAllowed(unzip_concurrent_jobs, unzip_concurrent_jobs) wf = PypeThreadWorkflow() ctg_list_file = makePypeLocalFile("./3-unzip/reads/ctg_list") falcon_asm_done = makePypeLocalFile("./2-asm-falcon/falcon_asm_done") parameters = {"wd": os.path.abspath("."), "config": config} job_done = makePypeLocalFile( os.path.join(parameters["wd"], "track_reads_done")) make_track_reads_task = PypeTask( inputs={"falcon_asm_done": falcon_asm_done}, outputs={ "job_done": job_done, "ctg_list_file": ctg_list_file }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/track_reads") track_reads_task = make_track_reads_task(task_track_reads) wf.addTask(track_reads_task) wf.refreshTargets() #force refresh now, will put proper dependence later ctg_ids = [] with open("./3-unzip/reads/ctg_list") as f: for row in f: row = row.strip() ctg_ids.append(row) aln1_outs = {} all_ctg_out = {} for ctg_id in ctg_ids: # inputs ref_fasta = makePypeLocalFile( "./3-unzip/reads/{ctg_id}_ref.fa".format(ctg_id=ctg_id)) read_fasta = makePypeLocalFile( "./3-unzip/reads/{ctg_id}_reads.fa".format(ctg_id=ctg_id)) # outputs wd = os.path.join( os.getcwd(), "./3-unzip/0-phasing/{ctg_id}/".format(ctg_id=ctg_id)) mkdir(wd) ctg_aln_out = makePypeLocalFile( os.path.join(wd, "{ctg_id}_sorted.bam".format(ctg_id=ctg_id))) job_done = makePypeLocalFile( os.path.join(wd, "aln_{ctg_id}_done".format(ctg_id=ctg_id))) parameters = { "job_uid": "aln-" + ctg_id, "wd": wd, "config": config, "ctg_id": ctg_id } make_blasr_task = PypeTask( inputs={ "ref_fasta": ref_fasta, "read_fasta": read_fasta }, outputs={ "ctg_aln_out": ctg_aln_out, "job_done": job_done }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/aln_{ctg_id}".format(ctg_id=ctg_id)) blasr_task = make_blasr_task(task_run_blasr) aln1_outs[ctg_id] = (ctg_aln_out, job_done) wf.addTask(blasr_task) job_done = makePypeLocalFile( os.path.join(wd, "p_{ctg_id}_done".format(ctg_id=ctg_id))) rid_to_phase_out = makePypeLocalFile( os.path.join(wd, "rid_to_phase.{ctg_id}".format(ctg_id=ctg_id))) all_ctg_out["r2p.{ctg_id}".format(ctg_id=ctg_id)] = rid_to_phase_out parameters = { "job_uid": "ha-" + ctg_id, "wd": wd, "config": config, "ctg_id": ctg_id } make_phasing_task = PypeTask( inputs={ "ref_fasta": ref_fasta, "aln_bam": ctg_aln_out }, outputs={"job_done": job_done}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/p_{ctg_id}".format(ctg_id=ctg_id)) phasing_task = make_phasing_task(task_phasing) wf.addTask(phasing_task) wf.refreshTargets() hasm_wd = os.path.abspath("./3-unzip/1-hasm/") mkdir(hasm_wd) rid_to_phase_all = makePypeLocalFile( os.path.join(hasm_wd, "rid_to_phase.all")) @PypeTask(inputs=all_ctg_out, outputs={"rid_to_phase_all": rid_to_phase_all}, TaskType=PypeThreadTaskBase, URL="task://localhost/rid_to_phase_all") def get_rid_to_phase_all(self): rid_to_phase_all_fn = fn(self.rid_to_phase_all) inputs_fn = [fn(f) for f in self.inputs.values()] inputs_fn.sort() output = [] for fname in inputs_fn: output.extend(open(fname).read()) out = open(rid_to_phase_all_fn, "w") out.write("".join(output)) out.close() wf.addTask(get_rid_to_phase_all) parameters["wd"] = hasm_wd job_done = makePypeLocalFile(os.path.join(hasm_wd, "hasm_done")) make_hasm_task = PypeTask(inputs={"rid_to_phase_all": rid_to_phase_all}, outputs={"job_done": job_done}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/hasm") hasm_task = make_hasm_task(task_hasm) wf.addTask(hasm_task) wf.refreshTargets()
job_name = self.URL.split("/")[-1] job_name += "-"+str(uuid.uuid1())[:8] job_data = {"job_name": job_name, "cwd": cwd, "sge_option": " -pe smp 6 -q huasm ", "script_fn": script_fn } run_script(job_data, job_type = "SGE") wait_for_file( os.path.join(cwd,"c_%05d_done" % job_id) , task=self, job_name=job_name ) if __name__ == "__main__": prefix = sys.argv[1] concurrent_jobs = 16 PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs) wf = PypeThreadWorkflow() mjob_data = {} with open("run_jobs.sh") as f: for l in f: l = l.strip().split() if l[0] not in ( "LAsort", "LAmerge" ): continue if l[0] == "LAsort": p_id = int( l[2].split(".")[1] ) mjob_data.setdefault( p_id, [] ) mjob_data[p_id].append( " ".join(l) ) if l[0] == "LAmerge": l2 = l[2].split(".")
unzip_concurrent_jobs = config.getint('Unzip', 'unzip_concurrent_jobs') config = {"job_type": job_type, "sge_blasr_aln": sge_blasr_aln, "smrt_bin": smrt_bin, "sge_phasing": sge_phasing} support.job_type = "SGE" #tmp hack until we have a configuration parser ctg_ids = [] with open("./3-unzip/reads/ctg_list") as f: for row in f: row = row.strip() ctg_ids.append( row ) PypeThreadWorkflow.setNumThreadAllowed(unzip_concurrent_jobs, unzip_concurrent_jobs) wf = PypeThreadWorkflow() ctg_list_file = makePypeLocalFile("./3-unzip/reads/ctg_list") aln1_outs = {} for ctg_id in ctg_ids: # inputs ref_fasta = makePypeLocalFile("./3-unzip/reads/{ctg_id}_ref.fa".format(ctg_id = ctg_id)) read_fasta = makePypeLocalFile("./3-unzip/reads/{ctg_id}_reads.fa".format(ctg_id = ctg_id)) # outputs wd = os.path.join( os.getcwd(), "./3-unzip/0-phasing/{ctg_id}/".format( ctg_id = ctg_id ) ) mkdir(wd) ctg_aln_out = makePypeLocalFile( os.path.join( wd, "{ctg_id}_sorted.bam".format( ctg_id = ctg_id ) ) ) job_done = makePypeLocalFile( os.path.join( wd, "aln_{ctg_id}_done".format( ctg_id = ctg_id ) ) )