def bwa_align_unpaired(ref_fasta, read_fastq, out_name, algorithm='ALN', max_hits=None, read_group_header=None, num_threads=24): """ Runs bwa aligner on reads without using paired-information (using bam as input format). """ assert (type(read_fastq) != list) if read_group_header is None: read_group_header = tk_bam.make_rg_header() if algorithm == 'MEM': # Temp file names sam_name = out_name + '.sam' sam_out_file = open(sam_name, 'w') log_subprocess.check_call([ 'bwa', 'mem', '-t', str(num_threads), '-M', '-R', read_group_header, ref_fasta, read_fastq ], stdout=sam_out_file) sam_out_file.close() # Create final bam file from the sam file tk_bam.convert_to_bam(sam_name, out_name) # Remove temp files subprocess.check_call(['rm', sam_name]) elif algorithm == 'ALN': # Temp file names sam_name = out_name + '.sam' index_name = out_name + '.sai' sam_out_file = open(sam_name, 'w') index_file = open(index_name, 'w') log_subprocess.check_call( ['bwa', 'aln', '-t', str(num_threads), ref_fasta, read_fastq], stdout=index_file) index_file.close() if max_hits: log_subprocess.check_call([ 'bwa', 'samse', '-n', str(max_hits), ref_fasta, index_name, read_fastq ], stdout=sam_out_file) else: log_subprocess.check_call( ['bwa', 'samse', ref_fasta, index_name, read_fastq], stdout=sam_out_file) sam_out_file.close() # Create final bam file from the sam file tk_bam.convert_to_bam(sam_name, out_name) # Remove temp files subprocess.check_call(['rm', index_name]) subprocess.check_call(['rm', sam_name]) else: raise NotSupportedException('Unsupported bwa algorithm: ' + algorithm)
def bwa_align_paired(ref_fasta, read_fastq, out_name, algorithm='ALN', max_hits=None, read_group_header=None, num_threads=24): """Runs bwa paired-end aligner on reads using paired-end information Algorithm choices are currently MEM: Maximal Exact Matching (better for longer reads) ALN: Better for longer reads Haven't yet implemented BWA-SW Currently assumes the input read_fastq is in interleaved format, i.e. the reads of a pair are alternating. """ if read_group_header is None: read_group_header = tk_bam.make_rg_header() if algorithm == 'MEM': if type(read_fastq) == list: assert (len(read_fastq) == 2) ## This restricts to primary alignments only out_file = open(out_name, 'w') ps = log_subprocess.Popen([ 'bwa', 'mem', '-t', str(num_threads), '-M', '-R', read_group_header, ref_fasta, read_fastq[0], read_fastq[1] ], stdout=subprocess.PIPE) #log_subprocess.check_call(['samtools', 'view', '-bSh', '-'], stdin=ps.stdout, stdout=out_file) # restore once bug fixed errors_file = open(out_name + '_ERRORS', 'w') log_subprocess.check_call(['samtools', 'view', '-bSh', '-'], stdin=ps.stdout, stdout=out_file, stderr=errors_file) out_file.close() errors_file.close() else: ## This restricts to primary alignments only out_file = open(out_name, 'w') ps = log_subprocess.Popen([ 'bwa', 'mem', '-p', '-t', str(num_threads), '-M', '-R', read_group_header, ref_fasta, read_fastq ], stdout=subprocess.PIPE) #log_subprocess.check_call(['samtools', 'view', '-bSh', '-'], stdin=ps.stdout, stdout=out_file) # restore once bug fixed errors_file = open(out_name + '_ERRORS', 'w') log_subprocess.check_call(['samtools', 'view', '-bSh', '-'], stdin=ps.stdout, stdout=out_file, stderr=errors_file) out_file.close() errors_file.close() elif algorithm == 'ALN': # Temp file names temp_fastq_name1 = out_name + '1.fastq' temp_fastq_name2 = out_name + '2.fastq' index_name_1 = out_name + '1.sai' index_name_2 = out_name + '2.sai' sam_name = out_name + '.sam' # Create the temp non-interleaved files in_fastq = open(read_fastq, 'r') temp_fastq1 = open(temp_fastq_name1, 'w') temp_fastq2 = open(temp_fastq_name2, 'w') tk_fasta.uninterleave_fastq(in_fastq, temp_fastq1, temp_fastq2) temp_fastq1.close() temp_fastq2.close() # Create the bwa index files index_file_1 = open(index_name_1, 'w') index_file_2 = open(index_name_2, 'w') log_subprocess.check_call([ 'bwa', 'aln', '-t', str(num_threads), ref_fasta, temp_fastq_name1 ], stdout=index_file_1) log_subprocess.check_call([ 'bwa', 'aln', '-t', str(num_threads), ref_fasta, temp_fastq_name2 ], stdout=index_file_2) index_file_1.close() index_file_2.close() # Create the sorted SAM file sam_out_file = open(sam_name, 'w') if max_hits: log_subprocess.check_call([ 'bwa', 'sampe', '-n', str(max_hits), ref_fasta, index_name_1, index_name_2, temp_fastq_name1, temp_fastq_name2 ], stdout=sam_out_file) else: log_subprocess.check_call([ 'bwa', 'sampe', ref_fasta, index_name_1, index_name_2, temp_fastq_name1, temp_fastq_name2 ], stdout=sam_out_file) sam_out_file.close() # Create final bam file from the sam file tk_bam.convert_to_bam(sam_name, out_name) # Clean up temporary files subprocess.check_call(['rm', temp_fastq_name1]) subprocess.check_call(['rm', temp_fastq_name2]) subprocess.check_call(['rm', index_name_1]) subprocess.check_call(['rm', index_name_2]) subprocess.check_call(['rm', sam_name]) else: raise NotSupportedException('Unsupported bwa algorithm: ' + algorithm)