def main(): # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) # generate read length file log.info('Generating read length file...') make_read_length_file(args.fastq, args.out_dir) log.info('Running samtools index...') samtools_index(args.bam, args.nth, args.out_dir) log.info('SAMstat on raw BAM...') samstat(args.bam, args.nth, args.out_dir) if args.chrsz: log.info('SAMstat on non-mito BAM...') non_mito_out_dir = os.path.join(args.out_dir, 'non_mito') mkdir_p(non_mito_out_dir) non_mito_bam = remove_chrs_from_bam(args.bam, [args.mito_chr_name], args.chrsz, args.nth, non_mito_out_dir) samstat(non_mito_bam, args.nth, non_mito_out_dir) rm_f(non_mito_bam) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # filt_bam - dupmark_bam - nodup_bam # \ dup_qc \ pbc_qc # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end log.info('Removing unmapped/low-quality reads...') if args.paired_end: filt_bam = rm_unmapped_lowq_reads_pe(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir) else: filt_bam = rm_unmapped_lowq_reads_se(args.bam, args.multimapping, args.mapq_thresh, args.nth, args.mem_gb, args.out_dir) log.info('Checking if filtered BAM file is empty...') if bam_is_empty(filt_bam, args.nth): help_msg = ( 'No reads found in filtered BAM. ' 'Low quality sample? ' 'Or no reads passing criteria "samtools view -F 1804"? ' 'Check samtools flags at ' 'https://broadinstitute.github.io/picard/explain-flags.html. ') if args.paired_end: help_msg += ( 'Or is this truely PE BAM? ' 'All unpaired SE reads could be removed by "samtools view -f 2". ' ) raise ValueError(help_msg) log.info('Marking dupes with {}...'.format(args.dup_marker)) if args.dup_marker == 'picard': dupmark_bam, dup_qc = mark_dup_picard(filt_bam, args.out_dir, args.picard_java_heap) elif args.dup_marker == 'sambamba': dupmark_bam, dup_qc = mark_dup_sambamba(filt_bam, args.nth, args.out_dir) else: raise argparse.ArgumentTypeError('Unsupported --dup-marker {}'.format( args.dup_marker)) if args.no_dup_removal: nodup_bam = filt_bam else: temp_files.append(filt_bam) log.info('Removing dupes...') if args.paired_end: nodup_bam = rm_dup_pe(dupmark_bam, args.nth, args.out_dir) else: nodup_bam = rm_dup_se(dupmark_bam, args.nth, args.out_dir) samtools_index(dupmark_bam) temp_files.append(dupmark_bam + '.bai') temp_files.append(dupmark_bam) if len(args.filter_chrs) > 0: final_bam = remove_chrs_from_bam(nodup_bam, args.filter_chrs, args.chrsz, args.nth, args.out_dir) temp_files.append(nodup_bam) else: final_bam = nodup_bam log.info('Checking if final BAM file is empty...') if bam_is_empty(final_bam, args.nth): raise ValueError('No reads found in final (filtered/deduped) BAM. ' 'Low quality sample? ' 'Or BAM with duplicates only? ') log.info('samtools index (final_bam)...') samtools_index(final_bam, args.nth, args.out_dir) log.info('samstat...') samstat(final_bam, args.nth, args.mem_gb, args.out_dir) log.info('Generating PBC QC log...') if args.paired_end: pbc_qc_pe(dupmark_bam, args.mito_chr_name, args.nth, args.out_dir) else: pbc_qc_se(dupmark_bam, args.mito_chr_name, args.out_dir) log.info('samtools index (raw bam)...') bam = copy_f_to_dir(args.bam, args.out_dir) bai = samtools_index(bam, args.nth, args.out_dir) temp_files.extend([bam, bai]) log.info('Removing temporary files...') rm_f(temp_files) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')
def main(): # filt_bam - dupmark_bam - nodup_bam # \ dup_qc \ pbc_qc # read params args = parse_arguments() log.info('Initializing and making output directory...') mkdir_p(args.out_dir) # declare temp arrays temp_files = [] # files to deleted later at the end log.info('Removing unmapped/low-quality reads...') if args.paired_end: filt_bam = rm_unmapped_lowq_reads_pe( args.bam, args.multimapping, args.mapq_thresh, args.nth, args.out_dir) else: filt_bam = rm_unmapped_lowq_reads_se( args.bam, args.multimapping, args.mapq_thresh, args.nth, args.out_dir) log.info('Marking dupes with {}...'.format(args.dup_marker)) if args.dup_marker == 'picard': dupmark_bam, dup_qc = mark_dup_picard( filt_bam, args.out_dir) elif args.dup_marker == 'sambamba': dupmark_bam, dup_qc = mark_dup_sambamba( filt_bam, args.nth, args.out_dir) else: raise argparse.ArgumentTypeError( 'Unsupported --dup-marker {}'.format(args.dup_marker)) if args.no_dup_removal: nodup_bam = filt_bam else: temp_files.append(filt_bam) log.info('Removing dupes...') if args.paired_end: nodup_bam = rm_dup_pe( dupmark_bam, args.nth, args.out_dir) else: nodup_bam = rm_dup_se( dupmark_bam, args.nth, args.out_dir) samtools_index(dupmark_bam) temp_files.append(dupmark_bam+'.bai') temp_files.append(dupmark_bam) if len(args.filter_chrs) > 0: final_bam = remove_chrs_from_bam(nodup_bam, args.filter_chrs, args.chrsz, args.nth, args.out_dir) temp_files.append(nodup_bam) else: final_bam = nodup_bam log.info('samtools index (final_bam)...') samtools_index(final_bam, args.nth, args.out_dir) log.info('samstat...') samstat(final_bam, args.nth, args.out_dir) log.info('Generating PBC QC log...') if args.paired_end: pbc_qc_pe(dupmark_bam, args.mito_chr_name, args.nth, args.out_dir) else: pbc_qc_se(dupmark_bam, args.mito_chr_name, args.out_dir) log.info('samtools index (raw bam)...') bam = copy_f_to_dir(args.bam, args.out_dir) bai = samtools_index(bam, args.nth, args.out_dir) temp_files.extend([bam, bai]) log.info('Removing temporary files...') rm_f(temp_files) log.info('List all files in output directory...') ls_l(args.out_dir) log.info('All done.')