Ejemplo n.º 1
0
def main_deplete(args):
    ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn.
    '''

    assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam

    with read_utils.revert_bam_if_aligned(
            args.inBam,
            revert_bam=args.revertBam,
            clear_tags=args.clear_tags,
            tags_to_clear=args.tags_to_clear,
            picardOptions=['MAX_DISCARD_FRACTION=0.5'],
            JVMmemory=args.JVMmemory,
            sanitize=not args.do_not_sanitize) as bamToDeplete:
        multi_db_deplete_bam(bamToDeplete,
                             args.bwaDbs,
                             deplete_bwa_bam,
                             args.bwaBam,
                             threads=args.threads)

    def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None):
        return deplete_bmtagger_bam(inBam,
                                    db,
                                    outBam,
                                    srprism_memory=args.srprism_memory,
                                    JVMmemory=JVMmemory)

    multi_db_deplete_bam(args.bwaBam,
                         args.bmtaggerDbs,
                         bmtagger_wrapper,
                         args.bmtaggerBam,
                         JVMmemory=args.JVMmemory)

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam,
                                 args.rmdupBam,
                                 JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(args.rmdupBam,
                         args.blastDbs,
                         deplete_blastn_bam,
                         args.blastnBam,
                         chunkSize=args.chunkSize,
                         threads=args.threads,
                         JVMmemory=args.JVMmemory)
    return 0
Ejemplo n.º 2
0
def main_deplete(args):
    ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn.
    '''

    assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam

    with read_utils.revert_bam_if_aligned(              args.inBam,
                                        revert_bam    = args.revertBam,
                                        clear_tags    = args.clear_tags,
                                        tags_to_clear = args.tags_to_clear,
                                        picardOptions = ['MAX_DISCARD_FRACTION=0.5'],
                                        JVMmemory     = args.JVMmemory,
                                        sanitize      = not args.do_not_sanitize) as bamToDeplete:
        multi_db_deplete_bam(
            bamToDeplete,
            args.bwaDbs,
            deplete_bwa_bam,
            args.bwaBam,
            threads=args.threads
        )

    def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None):
        return deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory)

    multi_db_deplete_bam(
        args.bwaBam,
        args.bmtaggerDbs,
        bmtagger_wrapper,
        args.bmtaggerBam,
        JVMmemory=args.JVMmemory
    )

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(
        args.rmdupBam,
        args.blastDbs,
        deplete_blastn_bam,
        args.blastnBam,
        chunkSize=args.chunkSize,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    return 0
Ejemplo n.º 3
0
def main_deplete_bwa_bam(args):
    '''Use BWA to remove reads that match at least one of the specified databases.'''
    with read_utils.revert_bam_if_aligned(              args.inBam,
                                        clear_tags    = args.clear_tags,
                                        tags_to_clear = args.tags_to_clear,
                                        picardOptions = ['MAX_DISCARD_FRACTION=0.5'],
                                        JVMmemory     = args.JVMmemory,
                                        sanitize      = not args.do_not_sanitize) as bamToDeplete:

        #def wrapper(inBam, db, outBam, threads, JVMmemory=None):
        #    return deplete_bwa_bam(inBam, db, outBam, threads=threads, )
        multi_db_deplete_bam(bamToDeplete, args.refDbs, deplete_bwa_bam, args.outBam, threads=args.threads, clear_tags=args.clear_tags, tags_to_clear=args.tags_to_clear, JVMmemory=args.JVMmemory)
    return 0
Ejemplo n.º 4
0
def main_deplete_bam_bmtagger(args):
    '''Use bmtagger to deplete input reads against several databases.'''

    def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None):
        return deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory)

    with read_utils.revert_bam_if_aligned(              args.inBam,
                                        clear_tags    = args.clear_tags,
                                        tags_to_clear = args.tags_to_clear,
                                        picardOptions = ['MAX_DISCARD_FRACTION=0.5'],
                                        JVMmemory     = args.JVMmemory,
                                        sanitize      = not args.do_not_sanitize) as bamToDeplete:
        multi_db_deplete_bam(
            args.inBam,
            args.refDbs,
            bmtagger_wrapper,
            args.outBam,
            JVMmemory=args.JVMmemory
        )