def main(): # read params args = parse_arguments()'Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end # if bwa index is tarball then unpack it if args.bwa_index_prefix_or_tar.endswith('.tar') or \ args.bwa_index_prefix_or_tar.endswith('.tar.gz'):'Unpacking bwa index tar...') tar = args.bwa_index_prefix_or_tar # untar untar(tar, args.out_dir) bwa_index_prefix = find_bwa_index_prefix(args.out_dir) temp_files.append('{}*'.format( bwa_index_prefix)) else: bwa_index_prefix = args.bwa_index_prefix_or_tar # check if bowties indices are unpacked on out_dir chk_bwa_index(bwa_index_prefix) # bwa'Running bwa...') if args.paired_end: bam = bwa_pe( args.fastqs[0], args.fastqs[1], bwa_index_prefix, args.nth, args.mem_gb, args.use_bwa_mem_for_pe, args.bwa_mem_read_len_limit, args.rescue_reads_for_bwa_mem, args.out_dir) else: bam = bwa_se( args.fastqs[0], bwa_index_prefix, args.nth, args.mem_gb, args.out_dir)'Removing temporary files...') rm_f(temp_files)'Checking if BAM file is empty...') if bam_is_empty(bam, args.nth): raise ValueError('BAM file is empty, no reads found.')'List all files in output directory...') ls_l(args.out_dir)'All done.')
def main(): # filt_bam - dupmark_bam - nodup_bam # \ dup_qc \ pbc_qc # read params args = parse_arguments()'Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end'Removing unmapped/low-quality reads...') if args.paired_end: filt_bam = rm_unmapped_lowq_reads_pe(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir) else: filt_bam = rm_unmapped_lowq_reads_se(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir)'Checking if filtered BAM file is empty...') if bam_is_empty(filt_bam, args.nth): help_msg = ( 'No reads found in filtered BAM. ' 'Low quality sample? ' 'Or no reads passing criteria "samtools view -F 1804"? ' 'Check samtools flags at ' ' ') if args.paired_end: help_msg += ( 'Or is this truely PE BAM? ' 'All unpaired SE reads could be removed by "samtools view -f 2". ' ) raise ValueError(help_msg)'Marking dupes with {}...'.format(args.dup_marker)) if args.dup_marker == 'picard': dupmark_bam, dup_qc = mark_dup_picard(filt_bam, args.out_dir, args.picard_java_heap) elif args.dup_marker == 'sambamba': dupmark_bam, dup_qc = mark_dup_sambamba(filt_bam, args.nth, args.out_dir) else: raise argparse.ArgumentTypeError('Unsupported --dup-marker {}'.format( args.dup_marker)) if args.no_dup_removal: nodup_bam = filt_bam else: temp_files.append(filt_bam)'Removing dupes...') if args.paired_end: nodup_bam = rm_dup_pe(dupmark_bam, args.nth, args.out_dir) else: nodup_bam = rm_dup_se(dupmark_bam, args.nth, args.out_dir) samtools_index(dupmark_bam) temp_files.append(dupmark_bam + '.bai') temp_files.append(dupmark_bam) if len(args.filter_chrs) > 0: final_bam = remove_chrs_from_bam(nodup_bam, args.filter_chrs, args.chrsz, args.nth, args.out_dir) temp_files.append(nodup_bam) else: final_bam = nodup_bam'Checking if final BAM file is empty...') if bam_is_empty(final_bam, args.nth): raise ValueError('No reads found in final (filtered/deduped) BAM. ' 'Low quality sample? ' 'Or BAM with duplicates only? ')'samtools index (final_bam)...') samtools_index(final_bam, args.nth, args.out_dir)'samstat...') samstat(final_bam, args.nth, args.mem_gb, args.out_dir)'Generating PBC QC log...') if args.paired_end: pbc_qc_pe(dupmark_bam, args.mito_chr_name, args.nth, args.out_dir) else: pbc_qc_se(dupmark_bam, args.mito_chr_name, args.out_dir)'samtools index (raw bam)...') bam = copy_f_to_dir(args.bam, args.out_dir) bai = samtools_index(bam, args.nth, args.out_dir) temp_files.extend([bam, bai])'Removing temporary files...') rm_f(temp_files)'List all files in output directory...') ls_l(args.out_dir)'All done.')
def rm_unmapped_lowq_reads_pe(bam, multimapping, mapq_thresh, nth, mem_gb, out_dir): """There are pipes with multiple samtools commands. For such pipes, use multiple threads (-@) for only one of them. Priority is on sort > index > fixmate > view. """ prefix = os.path.join(out_dir, os.path.basename(strip_ext_bam(bam))) filt_bam = '{}.filt.bam'.format(prefix) tmp_filt_bam = '{}.tmp_filt.bam'.format(prefix) fixmate_bam = '{}.fixmate.bam'.format(prefix) if multimapping: run_shell_cmd( 'samtools view -F 524 -f 2 -u {bam} | ' 'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param} ' .format( bam=bam, tmp_filt_bam=tmp_filt_bam, prefix=prefix, res_param=get_samtools_res_param('view', nth=nth), )) run_shell_cmd( 'samtools view -h {tmp_filt_bam} | ' '$(which -k {multimapping} --paired-end | ' 'samtools fixmate -r /dev/stdin {fixmate_bam} {res_param}'.format( tmp_filt_bam=tmp_filt_bam, multimapping=multimapping, fixmate_bam=fixmate_bam, res_param=get_samtools_res_param('fixmate', nth=nth), )) else: run_shell_cmd( 'samtools view -F 1804 -f 2 -q {mapq_thresh} -u {bam} | ' 'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param}' .format( mapq_thresh=mapq_thresh, bam=bam, tmp_filt_bam=tmp_filt_bam, prefix=prefix, res_param=get_samtools_res_param('sort', nth=nth, mem_gb=mem_gb), )) run_shell_cmd( 'samtools fixmate -r {tmp_filt_bam} {fixmate_bam} {res_param}'. format( tmp_filt_bam=tmp_filt_bam, fixmate_bam=fixmate_bam, res_param=get_samtools_res_param('fixmate', nth=nth), )) rm_f(tmp_filt_bam) run_shell_cmd( 'samtools view -F 1804 -f 2 -u {fixmate_bam} | ' 'samtools sort /dev/stdin -o {filt_bam} -T {prefix} {res_param}'. format( fixmate_bam=fixmate_bam, filt_bam=filt_bam, prefix=prefix, res_param=get_samtools_res_param('sort', nth=nth, mem_gb=mem_gb), )) rm_f(fixmate_bam)'Checking if filtered (but not deduped) BAM is empty ' 'after filtering with "samtools view -F 1804 -f 2".') if bam_is_empty(filt_bam, nth): raise ValueError( 'No reads found aftering filtering "samtools fixmate"d PE BAM with ' '"samtools view -F 1804 -f 2". ' 'Reads are not properly paired even though mapping rate is good? ') return filt_bam