Example #1
0
def main():
    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    # declare temp arrays
    temp_files = []  # files to deleted later at the end

    # if bwa index is tarball then unpack it
    if args.bwa_index_prefix_or_tar.endswith('.tar') or \
            args.bwa_index_prefix_or_tar.endswith('.tar.gz'):
        log.info('Unpacking bwa index tar...')
        tar = args.bwa_index_prefix_or_tar
        # untar
        untar(tar, args.out_dir)
        bwa_index_prefix = find_bwa_index_prefix(args.out_dir)
        temp_files.append('{}*'.format(
            bwa_index_prefix))
    else:
        bwa_index_prefix = args.bwa_index_prefix_or_tar

    # check if bowties indices are unpacked on out_dir
    chk_bwa_index(bwa_index_prefix)

    # bwa
    log.info('Running bwa...')
    if args.paired_end:
        bam = bwa_pe(
            args.fastqs[0], args.fastqs[1],
            bwa_index_prefix, args.nth, args.mem_gb, args.use_bwa_mem_for_pe,
            args.bwa_mem_read_len_limit, args.rescue_reads_for_bwa_mem,
            args.out_dir)
    else:
        bam = bwa_se(
            args.fastqs[0],
            bwa_index_prefix, args.nth, args.mem_gb,
            args.out_dir)

    log.info('Removing temporary files...')
    rm_f(temp_files)

    log.info('Checking if BAM file is empty...')
    if bam_is_empty(bam, args.nth):
        raise ValueError('BAM file is empty, no reads found.')

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
def main():
    # filt_bam - dupmark_bam - nodup_bam
    #          \ dup_qc      \ pbc_qc

    # read params
    args = parse_arguments()

    log.info('Initializing and making output directory...')
    mkdir_p(args.out_dir)

    # declare temp arrays
    temp_files = []  # files to deleted later at the end

    log.info('Removing unmapped/low-quality reads...')
    if args.paired_end:
        filt_bam = rm_unmapped_lowq_reads_pe(args.bam, args.multimapping,
                                             args.mapq_thresh, args.nth,
                                             args.mem_gb, args.out_dir)
    else:
        filt_bam = rm_unmapped_lowq_reads_se(args.bam, args.multimapping,
                                             args.mapq_thresh, args.nth,
                                             args.mem_gb, args.out_dir)

    log.info('Checking if filtered BAM file is empty...')

    if bam_is_empty(filt_bam, args.nth):
        help_msg = (
            'No reads found in filtered BAM. '
            'Low quality sample? '
            'Or no reads passing criteria "samtools view -F 1804"? '
            'Check samtools flags at '
            'https://broadinstitute.github.io/picard/explain-flags.html. ')
        if args.paired_end:
            help_msg += (
                'Or is this truely PE BAM? '
                'All unpaired SE reads could be removed by "samtools view -f 2". '
            )
        raise ValueError(help_msg)

    log.info('Marking dupes with {}...'.format(args.dup_marker))
    if args.dup_marker == 'picard':
        dupmark_bam, dup_qc = mark_dup_picard(filt_bam, args.out_dir,
                                              args.picard_java_heap)
    elif args.dup_marker == 'sambamba':
        dupmark_bam, dup_qc = mark_dup_sambamba(filt_bam, args.nth,
                                                args.out_dir)
    else:
        raise argparse.ArgumentTypeError('Unsupported --dup-marker {}'.format(
            args.dup_marker))

    if args.no_dup_removal:
        nodup_bam = filt_bam
    else:
        temp_files.append(filt_bam)
        log.info('Removing dupes...')
        if args.paired_end:
            nodup_bam = rm_dup_pe(dupmark_bam, args.nth, args.out_dir)
        else:
            nodup_bam = rm_dup_se(dupmark_bam, args.nth, args.out_dir)
        samtools_index(dupmark_bam)
        temp_files.append(dupmark_bam + '.bai')
    temp_files.append(dupmark_bam)

    if len(args.filter_chrs) > 0:
        final_bam = remove_chrs_from_bam(nodup_bam, args.filter_chrs,
                                         args.chrsz, args.nth, args.out_dir)
        temp_files.append(nodup_bam)
    else:
        final_bam = nodup_bam

    log.info('Checking if final BAM file is empty...')
    if bam_is_empty(final_bam, args.nth):
        raise ValueError('No reads found in final (filtered/deduped) BAM. '
                         'Low quality sample? '
                         'Or BAM with duplicates only? ')

    log.info('samtools index (final_bam)...')
    samtools_index(final_bam, args.nth, args.out_dir)

    log.info('samstat...')
    samstat(final_bam, args.nth, args.mem_gb, args.out_dir)

    log.info('Generating PBC QC log...')
    if args.paired_end:
        pbc_qc_pe(dupmark_bam, args.mito_chr_name, args.nth, args.out_dir)
    else:
        pbc_qc_se(dupmark_bam, args.mito_chr_name, args.out_dir)

    log.info('samtools index (raw bam)...')
    bam = copy_f_to_dir(args.bam, args.out_dir)
    bai = samtools_index(bam, args.nth, args.out_dir)
    temp_files.extend([bam, bai])

    log.info('Removing temporary files...')
    rm_f(temp_files)

    log.info('List all files in output directory...')
    ls_l(args.out_dir)

    log.info('All done.')
Example #3
0
def rm_unmapped_lowq_reads_pe(bam, multimapping, mapq_thresh, nth, mem_gb,
                              out_dir):
    """There are pipes with multiple samtools commands.
    For such pipes, use multiple threads (-@) for only one of them.
    Priority is on sort > index > fixmate > view.
    """
    prefix = os.path.join(out_dir, os.path.basename(strip_ext_bam(bam)))
    filt_bam = '{}.filt.bam'.format(prefix)
    tmp_filt_bam = '{}.tmp_filt.bam'.format(prefix)
    fixmate_bam = '{}.fixmate.bam'.format(prefix)

    if multimapping:
        run_shell_cmd(
            'samtools view -F 524 -f 2 -u {bam} | '
            'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param} '
            .format(
                bam=bam,
                tmp_filt_bam=tmp_filt_bam,
                prefix=prefix,
                res_param=get_samtools_res_param('view', nth=nth),
            ))

        run_shell_cmd(
            'samtools view -h {tmp_filt_bam} | '
            '$(which assign_multimappers.py) -k {multimapping} --paired-end | '
            'samtools fixmate -r /dev/stdin {fixmate_bam} {res_param}'.format(
                tmp_filt_bam=tmp_filt_bam,
                multimapping=multimapping,
                fixmate_bam=fixmate_bam,
                res_param=get_samtools_res_param('fixmate', nth=nth),
            ))
    else:
        run_shell_cmd(
            'samtools view -F 1804 -f 2 -q {mapq_thresh} -u {bam} | '
            'samtools sort -n /dev/stdin -o {tmp_filt_bam} -T {prefix} {res_param}'
            .format(
                mapq_thresh=mapq_thresh,
                bam=bam,
                tmp_filt_bam=tmp_filt_bam,
                prefix=prefix,
                res_param=get_samtools_res_param('sort',
                                                 nth=nth,
                                                 mem_gb=mem_gb),
            ))
        run_shell_cmd(
            'samtools fixmate -r {tmp_filt_bam} {fixmate_bam} {res_param}'.
            format(
                tmp_filt_bam=tmp_filt_bam,
                fixmate_bam=fixmate_bam,
                res_param=get_samtools_res_param('fixmate', nth=nth),
            ))

    rm_f(tmp_filt_bam)

    run_shell_cmd(
        'samtools view -F 1804 -f 2 -u {fixmate_bam} | '
        'samtools sort /dev/stdin -o {filt_bam} -T {prefix} {res_param}'.
        format(
            fixmate_bam=fixmate_bam,
            filt_bam=filt_bam,
            prefix=prefix,
            res_param=get_samtools_res_param('sort', nth=nth, mem_gb=mem_gb),
        ))

    rm_f(fixmate_bam)

    log.info('Checking if filtered (but not deduped) BAM is empty '
             'after filtering with "samtools view -F 1804 -f 2".')
    if bam_is_empty(filt_bam, nth):
        raise ValueError(
            'No reads found aftering filtering "samtools fixmate"d PE BAM with '
            '"samtools view -F 1804 -f 2". '
            'Reads are not properly paired even though mapping rate is good? ')

    return filt_bam