def task_track_reads(self): job_done = fn(self.job_done) wd = self.parameters["wd"] config = self.parameters["config"] input_bam_fofn = config["input_bam_fofn"] sge_track_reads = config["sge_track_reads"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "track_reads_h.sh") script = [] script.append("set -vex") script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done)) script.append("cd %s" % wd) script.append("hostname") script.append("date") script.append("cd {wd}".format(wd=wd)) script.append("fc_get_read_hctg_map.py") script.append("fc_rr_hctg_track.py") script.append("mkdir -p 4-quiver/reads/") script.append("fc_select_reads_from_bam.py {input_bam_fofn}".format( input_bam_fofn=input_bam_fofn)) script.append("date") script.append("touch {job_done}".format(job_done=job_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_track_reads run_script(job_data, job_type=config["job_type"]) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_track_reads(self): job_done = fn(self.job_done) wd = self.parameters["wd"] config = self.parameters["config"] sge_track_reads = config["sge_track_reads"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "track_reads.sh") script = [] script.append("set -vex") script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done)) script.append("cd %s" % wd) script.append("hostname") script.append("date") script.append("cd {wd}".format(wd=wd)) script.append("python -m falcon_kit.mains.get_read_ctg_map") script.append("python -m falcon_kit.mains.rr_ctg_track") script.append("python -m falcon_kit.mains.pr_ctg_track") script.append("mkdir -p 3-unzip/reads/") script.append("python -m falcon_kit.mains.fetch_reads") script.append("date") script.append("touch {job_done}".format(job_done=job_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_track_reads run_script(job_data, job_type=config["job_type"]) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_hasm(self): job_done = fn(self.job_done) config = self.parameters["config"] sge_hasm = config["sge_hasm"] wd = self.parameters["wd"] job_type = config["job_type"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "hasm.sh") script = """\ set -vex trap 'touch {job_done}.exit' EXIT hostname date cd {wd} fc_ovlp_filter_with_phase.py --fofn ../../2-asm-falcon/las.fofn --max_diff 120 --max_cov 120 --min_cov 1 --n_core 12 --min_len 2500 --db ../../1-preads_ovl/preads.db --rid_phase_map ./rid_to_phase.all > preads.p_ovl fc_phased_ovlp_to_graph.py preads.p_ovl --min_len 2500 > fc.log ln -sf db2falcon/preads4falcon.fasta ../../1-preads_ovl/ fc_graphs_to_h_tigs.py --fc_asm_path ../../2-asm-falcon/ --fc_hasm_path ./ --ctg_id all --rid_phase_map ./rid_to_phase.all --fasta ../../1-preads_ovl/preads4falcon.fasta """.format(**locals()) more_script = \ """ WD=$PWD for f in `cat ../reads/ctg_list `; mkdir -p $WD/$f; do cd $WD/$f; fc_dedup_h_tigs.py $f; done ## prepare for quviering the haplotig cd $WD/.. if [ -e "all_phased_reads" ]; then rm all_phased_reads; fi if [ -e "all_h_ctg_ids" ]; then rm all_h_ctg_ids; fi if [ -e "all_p_ctg_edges" ]; then rm all_p_ctg_edges; fi if [ -e "all_p_ctg.fa" ]; then rm all_p_ctg.fa; fi if [ -e "all_h_ctg.fa" ]; then rm all_h_ctg.fa; fi find 0-phasing -name "phased_reads" | sort | xargs cat >> all_phased_reads find 1-hasm -name "h_ctg_ids.*" | sort | xargs cat >> all_h_ctg_ids find 1-hasm -name "p_ctg_edges.*" | sort | xargs cat >> all_p_ctg_edges find 1-hasm -name "h_ctg_edges.*" | sort | xargs cat >> all_h_ctg_edges find 1-hasm -name "p_ctg.*.fa" | sort | xargs cat >> all_p_ctg.fa find 1-hasm -name "h_ctg.*.fa" | sort | xargs cat >> all_h_ctg.fa cd ../ date """ script += more_script # a little bit hacky here, we should improve script += "touch {job_done}\n".format(job_done=job_done) with open(script_fn, "w") as script_file: script_file.write(script) job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_hasm run_script(job_data, job_type=job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_run_quiver(self): ref_fasta = fn(self.ref_fasta) read_sam = fn(self.read_sam) cns_fasta = fn(self.cns_fasta) cns_fastq = fn(self.cns_fastq) job_done = fn(self.job_done) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] config = self.parameters["config"] ctg_id = self.parameters["ctg_id"] smrt_bin = config["smrt_bin"] sge_quiver = config["sge_quiver"] job_type = config["job_type"] samtools = os.path.join( smrt_bin, "samtools") pbalign = os.path.join( smrt_bin, "pbalign") makePbi = os.path.join( smrt_bin, "makePbi") variantCaller = os.path.join( smrt_bin, "variantCaller") script_dir = os.path.join( wd ) script_fn = os.path.join( script_dir , "cns_%s.sh" % (ctg_id)) script = [] script.append( "set -vex" ) script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) ) script.append( "cd %s" % wd ) script.append( "hostname" ) script.append( "date" ) script.append( "cd {wd}".format(wd = wd) ) script.append( "{samtools} faidx {ref_fasta}".format( samtools=samtools, ref_fasta=ref_fasta ) ) script.append( "{samtools} view -b -S {read_sam} > {ctg_id}.bam".format( samtools=samtools, read_sam = read_sam, ctg_id = ctg_id ) ) script.append( "{pbalign} --tmpDir=/localdisk/scratch/ --nproc=24 --minAccuracy=0.75 --minLength=50\ --minAnchorSize=12 --maxDivergence=30 --concordant --algorithm=blasr\ --algorithmOptions=-useQuality --maxHits=1 --hitPolicy=random --seed=1\ {ctg_id}.bam {ref_fasta} aln-{ctg_id}.bam".format( pbalign=pbalign , ctg_id = ctg_id, ref_fasta = ref_fasta)) script.append( "#{makePbi} --referenceFasta {ref_fasta} aln-{ctg_id}.bam".format(makePbi = makePbi, ref_fasta = ref_fasta, ctg_id = ctg_id) ) script.append( "({variantCaller} -x 5 -X 120 -q 20 -j 24 -r {ref_fasta} aln-{ctg_id}.bam\ -o {cns_fasta} -o {cns_fastq}) || echo quvier failed".format( variantCaller = variantCaller, ctg_id = ctg_id, ref_fasta = ref_fasta, cns_fasta=cns_fasta, cns_fastq=cns_fastq )) script.append( "date" ) script.append( "touch {job_done}".format(job_done = job_done) ) with open(script_fn,"w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_quiver run_script(job_data, job_type = job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_run_blasr(self): job_done = fn(self.job_done) ref_fasta = fn(self.ref_fasta) read_fasta = fn(self.read_fasta) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] ctg_id = self.parameters["ctg_id"] config = self.parameters["config"] smrt_bin = config["smrt_bin"] sge_blasr_aln = config["sge_blasr_aln"] job_type = config["job_type"] blasr = os.path.join(smrt_bin, "blasr") samtools = os.path.join(smrt_bin, "samtools") script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "aln_{ctg_id}.sh".format(ctg_id=ctg_id)) script = [] script.append("set -vex") script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done)) script.append("cd %s" % wd) script.append("hostname") script.append("date") script.append("cd {wd}".format(wd=wd)) script.append( "time {blasr} {read_fasta} {ref_fasta} -noSplitSubreads -clipping subread\ -hitPolicy randombest -randomSeed 42 -bestn 1 -minPctIdentity 70.0\ -minMatch 12 -nproc 24 -sam -out tmp_aln.sam".format(blasr=blasr, read_fasta=read_fasta, ref_fasta=ref_fasta)) script.append( "{samtools} view -bS tmp_aln.sam | {samtools} sort - {ctg_id}_sorted". format(samtools=samtools, ctg_id=ctg_id)) script.append("{samtools} index {ctg_id}_sorted.bam".format( samtools=samtools, ctg_id=ctg_id)) script.append("rm tmp_aln.sam") script.append("date") script.append("touch {job_done}".format(job_done=job_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_blasr_aln run_script(job_data, job_type=config["job_type"]) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_phasing(self): ref_fasta = fn(self.ref_fasta) aln_bam = fn(self.aln_bam) job_done = fn(self.job_done) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] ctg_id = self.parameters["ctg_id"] config = self.parameters["config"] sge_phasing = config["sge_phasing"] job_type = config["job_type"] script_dir = os.path.join( wd ) script_fn = os.path.join( script_dir , "p_%s.sh" % (ctg_id)) script = [] script.append( "set -vex" ) script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) ) script.append( "cd %s" % wd ) script.append( "hostname" ) script.append( "date" ) script.append( "cd {wd}".format(wd = wd) ) script.append( "fc_phasing.py --bam {aln_bam} --fasta {ref_fasta} --ctg_id {ctg_id} --base_dir ../".format( aln_bam = aln_bam, ref_fasta = ref_fasta, ctg_id = ctg_id )) script.append( "fc_phasing_readmap.py --ctg_id {ctg_id} --read_map_dir ../../../2-asm-falcon/read_maps --phased_reads phased_reads".format(ctg_id = ctg_id) ) #script.append( "fc_ovlp_filter_with_phase.py --fofn ../../2-asm-falcon/las.fofn\ # --max_diff 120 --max_cov 120 --min_cov 1 --n_core 12 --min_len 2500\ # --db ../../1-preads_ovl/preads.db --rid_phase_map ./rid_to_phase > preads.p_ovl") #TODO: make it configurable #script.append( "fc_phased_ovlp_to_graph.py preads.p_ovl --min_len 2500 > fc.log" ) #script.append( "fc_graphs_to_h_tigs.py --fc_asm_path ../../2-asm-falcon/ --fc_phase_path ./ --ctg_id {ctg_id}\ # --rid_phase_map ./rid_to_phase --fasta ../../1-preads_ovl/preads4falcon.fasta".format(ctg_id = ctg_id)) #script.append( "fc_dedup_h_tigs.py" ) script.append( "date" ) script.append( "touch {job_done}".format(job_done = job_done) ) with open(script_fn,"w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_phasing run_script(job_data, job_type = job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_run_blasr(self): job_done = fn(self.job_done) ref_fasta = fn(self.ref_fasta) read_fasta = fn(self.read_fasta) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] ctg_id = self.parameters["ctg_id"] config = self.parameters["config"] smrt_bin = config["smrt_bin"] sge_blasr_aln = config["sge_blasr_aln"] job_type = config["job_type"] blasr = os.path.join(smrt_bin, "blasr") samtools = os.path.join( smrt_bin, "samtools") script_dir = os.path.join( wd ) script_fn = os.path.join( script_dir , "aln_%s.sh" % (ctg_id)) script = [] script.append( "set -vex" ) script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) ) script.append( "cd %s" % wd ) script.append( "hostname" ) script.append( "date" ) script.append( "cd {wd}".format(wd = wd) ) script.append( "time {blasr} {read_fasta} {ref_fasta} -noSplitSubreads -clipping subread\ -hitPolicy randombest -randomSeed 42 -bestn 1 -minPctIdentity 70.0\ -minMatch 12 -nproc 24 -sam -out tmp_aln.sam".format(blasr = blasr, read_fasta = read_fasta, ref_fasta = ref_fasta) ) script.append( "{samtools} view -bS tmp_aln.sam | {samtools} sort - {ctg_id}_sorted".format( samtools = samtools, ctg_id = ctg_id) ) script.append( "{samtools} index {ctg_id}_sorted.bam".format( samtools = samtools, ctg_id = ctg_id) ) script.append( "rm tmp_aln.sam" ) script.append( "date" ) script.append( "touch {job_done}".format(job_done = job_done) ) with open(script_fn,"w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_blasr_aln run_script(job_data, job_type = config["job_type"]) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_phasing(self): ref_fasta = fn(self.ref_fasta) aln_bam = fn(self.aln_bam) job_done = fn(self.job_done) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] ctg_id = self.parameters["ctg_id"] config = self.parameters["config"] sge_phasing = config["sge_phasing"] job_type = config["job_type"] script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "p_%s.sh" % (ctg_id)) script = [] script.append("set -vex") script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done)) script.append("cd %s" % wd) script.append("hostname") script.append("date") script.append("cd {wd}".format(wd=wd)) script.append( "fc_phasing.py --bam {aln_bam} --fasta {ref_fasta} --ctg_id {ctg_id} --base_dir ../" .format(aln_bam=aln_bam, ref_fasta=ref_fasta, ctg_id=ctg_id)) script.append( "fc_phasing_readmap.py --ctg_id {ctg_id} --read_map_dir ../../../2-asm-falcon/read_maps --phased_reads phased_reads" .format(ctg_id=ctg_id)) script.append("date") script.append("touch {job_done}".format(job_done=job_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_phasing run_script(job_data, job_type=job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name'])
def task_run_quiver(self): ref_fasta = fn(self.ref_fasta) read_sam = fn(self.read_sam) cns_fasta = fn(self.cns_fasta) cns_fastq = fn(self.cns_fastq) job_done = fn(self.job_done) job_uid = self.parameters["job_uid"] wd = self.parameters["wd"] config = self.parameters["config"] ctg_id = self.parameters["ctg_id"] smrt_bin = config["smrt_bin"] sge_quiver = config["sge_quiver"] job_type = config["job_type"] samtools = os.path.join(smrt_bin, "samtools") pbalign = os.path.join(smrt_bin, "pbalign") makePbi = os.path.join(smrt_bin, "makePbi") variantCaller = os.path.join(smrt_bin, "variantCaller") script_dir = os.path.join(wd) script_fn = os.path.join(script_dir, "cns_%s.sh" % (ctg_id)) script = [] script.append("set -vex") script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done)) script.append("cd %s" % wd) script.append("hostname") script.append("date") script.append("cd {wd}".format(wd=wd)) script.append("{samtools} faidx {ref_fasta}".format(samtools=samtools, ref_fasta=ref_fasta)) script.append("{samtools} view -b -S {read_sam} > {ctg_id}.bam".format( samtools=samtools, read_sam=read_sam, ctg_id=ctg_id)) script.append( "{pbalign} --tmpDir=/localdisk/scratch/ --nproc=24 --minAccuracy=0.75 --minLength=50\ --minAnchorSize=12 --maxDivergence=30 --concordant --algorithm=blasr\ --algorithmOptions=-useQuality --maxHits=1 --hitPolicy=random --seed=1\ {ctg_id}.bam {ref_fasta} aln-{ctg_id}.bam".format( pbalign=pbalign, ctg_id=ctg_id, ref_fasta=ref_fasta)) script.append( "#{makePbi} --referenceFasta {ref_fasta} aln-{ctg_id}.bam".format( makePbi=makePbi, ref_fasta=ref_fasta, ctg_id=ctg_id)) script.append( "({variantCaller} -x 5 -X 120 -q 20 -j 24 -r {ref_fasta} aln-{ctg_id}.bam\ -o {cns_fasta} -o {cns_fastq}) || echo quvier failed".format( variantCaller=variantCaller, ctg_id=ctg_id, ref_fasta=ref_fasta, cns_fasta=cns_fasta, cns_fastq=cns_fastq)) script.append("date") script.append("touch {job_done}".format(job_done=job_done)) with open(script_fn, "w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_quiver run_script(job_data, job_type=job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name'])