def run(self, outfile, params): path = os.environ["PATH"] gp = P.get_parameters_as_namedtuple() cluster_queue = gp.cluster["queue"] cluster_memory_resource = gp.cluster["memory_resource"] cluster_parallel_environment = gp.cluster["parallel_environment"] outdir = os.path.dirname(outfile) outname = os.path.basename(outdir) # -sync y forces qsub to wait until job completes before # continuing. statement = ( "{self.path} " "-p canu " "-d {outdir} " "-genomeSize={params.genome_size} " "gridOptionsJobName={outname} " "java={params.path_java} " "gridOptions=\"-q {cluster_queue} -v PATH={path} -sync y \" " "gridEngineMemoryOption=\"-l {cluster_memory_resource}=MEMORY\" " "gridEngineThreadsOption=\"-pe {cluster_parallel_environment} THREADS\" " "{params.options} " "{params.assembly_mode} " "{params.fasta} " ">& {outfile}.log; " "mv {outdir}/canu.contigs.fasta {outfile}".format(**locals())) return P.run(statement, without_cluster=True)
def pre_process(self, infile, outfile, params): statements = [] infile = IOTools.snip(infile, ".bam") tmpdir = P.get_parameters_as_namedtuple().tmpdir outprefix = os.path.basename(os.path.dirname(outfile)) if params.copy_bam: statements.append("cp @[email protected] @[email protected]; " "cp @[email protected] @[email protected]") if params.split_bam: statements.append("daisy bam2bam-split-reads " "-i @[email protected] " "-o - " "{params.split_bam} " "--log={outfile}_split_bam.log " "2> {outfile}_split_bam.err " "> @[email protected]; ".format(**locals())) if params.bam2bam: statements.append("daisy bam2bam " "--stdin=@[email protected] " "{params.bam2bam} " "--log={outfile}_bam2bam.log " "2> {outfile}_bam2bam.err " "> @[email protected]; ".format(**locals())) if params.region: statements.append( "samtools view -b @[email protected] {} > @[email protected]".format( params.region)) if params.shift_quality: statements.append("samtools view -h @[email protected] " "| perl -lane " "'if(/^@/) {{print; next;}} " "@qual=split(//, $F[10]); " "$_=chr(ord($_)+{}) for (@qual); " "$F[10]=join(\"\",@qual); " "print join(\"\\t\", @F)' " "| samtools view -bS > @[email protected]".format( params.shift_quality)) if is_true(params.remove_chr): # also substitute chrM to MT. statements.append("samtools view -h @[email protected] " "| awk -v OFS='\\t' '" "$1 == \"@SQ\" " "{{ gsub(\"chrM\", \"chrMT\", $2); " " gsub(\"chr\", \"\", $2); print; next }} " "{{ gsub(\"chrM\", \"chrMT\", $3); " " gsub(\"chr\", \"\", $3); print; next}} '" "| samtools view -bS - " "2> {outfile}_remove_chr.log " "> @[email protected]; ".format(**locals())) if not statements: return infile + ".bam", "", "" filename, build_statement, cleanup_statement = P.join_statements( statements, infile) filename += ".bam" build_statement += ( "; samtools index {filename} >& {outfile}.index.log".format( **locals())) return filename, build_statement, cleanup_statement