def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end # if bwa index is tarball then unpack it if args.bwa_index_prefix_or_tar.endswith('.tar') or \ args.bwa_index_prefix_or_tar.endswith('.tar.gz'): log.info('Unpacking bwa index tar...') tar = args.bwa_index_prefix_or_tar # untar untar(tar, args.out_dir) bwa_index_prefix = find_bwa_index_prefix(args.out_dir) temp_files.append('{}*'.format( bwa_index_prefix)) else: bwa_index_prefix = args.bwa_index_prefix_or_tar # check if bowties indices are unpacked on out_dir chk_bwa_index(bwa_index_prefix) # bwa log.info('Running bwa...') if args.paired_end: bam = bwa_pe( args.fastqs[0], args.fastqs[1], bwa_index_prefix, args.nth, args.mem_gb, args.use_bwa_mem_for_pe, args.bwa_mem_read_len_limit, args.rescue_reads_for_bwa_mem, args.out_dir) else: bam = bwa_se( args.fastqs[0], bwa_index_prefix, args.nth, args.mem_gb, args.out_dir) log.info('Removing temporary files...') rm_f(temp_files) log.info('Checking if BAM file is empty...') if bam_is_empty(bam, args.nth): raise ValueError('BAM file is empty, no reads found.') log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # filt_bam - dupmark_bam - nodup_bam # \ dup_qc \ pbc_qc # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end log.info('Removing unmapped/low-quality reads...') if args.paired_end: filt_bam = rm_unmapped_lowq_reads_pe(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir) else: filt_bam = rm_unmapped_lowq_reads_se(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir) log.info('Checking if filtered BAM file is empty...') if bam_is_empty(filt_bam, args.nth): help_msg = ( 'No reads found in filtered BAM. ' 'Low quality sample? ' 'Or no reads passing criteria "samtools view -F 1804"? ' 'Check samtools flags at ' 'https://broadinstitute.github.io/picard/explain-flags.html. ') if args.paired_end: help_msg += ( 'Or is this truely PE BAM? ' 'All unpaired SE reads could be removed by "samtools view -f 2". ' ) raise ValueError(help_msg) log.info('Marking dupes with {}...'.format(args.dup_marker)) if args.dup_marker == 'picard': dupmark_bam, dup_qc = mark_dup_picard(filt_bam, args.out_dir, args.picard_java_heap) elif args.dup_marker == 'sambamba': dupmark_bam, dup_qc = mark_dup_sambamba(filt_bam, args.nth, args.out_dir) else: raise argparse.ArgumentTypeError('Unsupported --dup-marker {}'.format( args.dup_marker)) if args.no_dup_removal: nodup_bam = filt_bam else: temp_files.append(filt_bam) log.info('Removing dupes...') if args.paired_end: nodup_bam = rm_dup_pe(dupmark_bam, args.nth, args.out_dir) else: nodup_bam = rm_dup_se(dupmark_bam, args.nth, args.out_dir) samtools_index(dupmark_bam) temp_files.append(dupmark_bam + '.bai') temp_files.append(dupmark_bam) if len(args.filter_chrs) > 0: final_bam = remove_chrs_from_bam(nodup_bam, args.filter_chrs, args.chrsz, args.nth, args.out_dir) temp_files.append(nodup_bam) else: final_bam = nodup_bam log.info('Checking if final BAM file is empty...') if bam_is_empty(final_bam, args.nth): raise ValueError('No reads found in final (filtered/deduped) BAM. ' 'Low quality sample? ' 'Or BAM with duplicates only? ') log.info('samtools index (final_bam)...') samtools_index(final_bam, args.nth, args.out_dir) log.info('samstat...') samstat(final_bam, args.nth, args.mem_gb, args.out_dir) log.info('Generating PBC QC log...') if args.paired_end: pbc_qc_pe(dupmark_bam, args.mito_chr_name, args.nth, args.out_dir) else: pbc_qc_se(dupmark_bam, args.mito_chr_name, args.out_dir) log.info('samtools index (raw bam)...') bam = copy_f_to_dir(args.bam, args.out_dir) bai = samtools_index(bam, args.nth, args.out_dir) temp_files.extend([bam, bai]) log.info('Removing temporary files...') rm_f(temp_files) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def rm_unmapped_lowq_reads_pe(bam, multimapping, mapq_thresh, nth, mem_gb, out_dir): """There are pipes with multiple samtools commands. For such pipes, use multiple threads (-@) for only one of them. Priority is on sort > index > fixmate > view. """ prefix = os.path.join(out_dir, os.path.basename(strip_ext_bam(bam))) filt_bam = '{}.filt.bam'.format(prefix) tmp_filt_bam = '{}.tmp_filt.bam'.format(prefix) fixmate_bam = '{}.fixmate.bam'.format(prefix) if multimapping: run_shell_cmd( 'samtools view -F 524 -f 2 -u {bam} | ' 'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param} ' .format( bam=bam, tmp_filt_bam=tmp_filt_bam, prefix=prefix, res_param=get_samtools_res_param('view', nth=nth), )) run_shell_cmd( 'samtools view -h {tmp_filt_bam} | ' '$(which assign_multimappers.py) -k {multimapping} --paired-end | ' 'samtools fixmate -r /dev/stdin {fixmate_bam} {res_param}'.format( tmp_filt_bam=tmp_filt_bam, multimapping=multimapping, fixmate_bam=fixmate_bam, res_param=get_samtools_res_param('fixmate', nth=nth), )) else: run_shell_cmd( 'samtools view -F 1804 -f 2 -q {mapq_thresh} -u {bam} | ' 'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param}' .format( mapq_thresh=mapq_thresh, bam=bam, tmp_filt_bam=tmp_filt_bam, prefix=prefix, res_param=get_samtools_res_param('sort', nth=nth, mem_gb=mem_gb), )) run_shell_cmd( 'samtools fixmate -r {tmp_filt_bam} {fixmate_bam} {res_param}'. format( tmp_filt_bam=tmp_filt_bam, fixmate_bam=fixmate_bam, res_param=get_samtools_res_param('fixmate', nth=nth), )) rm_f(tmp_filt_bam) run_shell_cmd( 'samtools view -F 1804 -f 2 -u {fixmate_bam} | ' 'samtools sort /dev/stdin -o {filt_bam} -T {prefix} {res_param}'. format( fixmate_bam=fixmate_bam, filt_bam=filt_bam, prefix=prefix, res_param=get_samtools_res_param('sort', nth=nth, mem_gb=mem_gb), )) rm_f(fixmate_bam) log.info('Checking if filtered (but not deduped) BAM is empty ' 'after filtering with "samtools view -F 1804 -f 2".') if bam_is_empty(filt_bam, nth): raise ValueError( 'No reads found aftering filtering "samtools fixmate"d PE BAM with ' '"samtools view -F 1804 -f 2". ' 'Reads are not properly paired even though mapping rate is good? ') return filt_bam