def main_deplete_human(args): ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn. Optionally, use lastal to select a specific taxon of interest.''' tools.picard.RevertSamTool().execute( args.inBam, args.revertBam, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']) multi_db_deplete_bam(args.revertBam, args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, threads=args.threads, JVMmemory=args.JVMmemory) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam(args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, threads=args.threads, JVMmemory=args.JVMmemory) if args.taxfiltBam and args.lastDb: filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory) return 0
def main_deplete_human(args): ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn. Optionally, use lastal to select a specific taxon of interest.''' tools.picard.RevertSamTool().execute( args.inBam, args.revertBam, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true'] ) multi_db_deplete_bam( args.revertBam, args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, threads=args.threads, JVMmemory=args.JVMmemory ) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam( args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, threads=args.threads, JVMmemory=args.JVMmemory ) if args.taxfiltBam and args.lastDb: filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory) return 0
def main_deplete(args): ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn. ''' assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0 # only RevertSam if inBam is already aligned # Most of the time the input will be unaligned # so we can save save time if we can skip RevertSam in the unaligned case # # via the SAM/BAM spec, if the file is aligned, an SQ line should be present # in the header. Using pysam, we can check this if header['SQ'])>0 # https://samtools.github.io/hts-specs/SAMv1.pdf # if the user has requested a revertBam with read_utils.revert_bam_if_aligned( args.inBam, revert_bam=args.revertBam, clear_tags=args.clear_tags, tags_to_clear=args.tags_to_clear, picardOptions=['MAX_DISCARD_FRACTION=0.5'], JVMmemory=args.JVMmemory, sanitize=not args.do_not_sanitize) as bamToDeplete: multi_db_deplete_bam(bamToDeplete, args.bwaDbs, deplete_bwa_bam, args.bwaBam, threads=args.threads) def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None): return deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory) multi_db_deplete_bam(args.bwaBam, args.bmtaggerDbs, bmtagger_wrapper, args.bmtaggerBam, JVMmemory=args.JVMmemory) # if the user has not specified saving a revertBam, we used a temp file and can remove it if not args.revertBam: os.unlink(revertBamOut) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam(args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, chunkSize=args.chunkSize, threads=args.threads, JVMmemory=args.JVMmemory) return 0
def main_deplete_human(args): ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn. Optionally, use lastal to select a specific taxon of interest.''' # only RevertSam if inBam is already aligned # Most of the time the input will be unaligned # so we can save save time if we can skip RevertSam in the unaligned case # # via the SAM/BAM spec, if the file is aligned, an SQ line should be present # in the header. Using pysam, we can check this if header['SQ'])>0 # https://samtools.github.io/hts-specs/SAMv1.pdf # if the user has requested a revertBam revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam') bamToDeplete = args.inBam with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam: # if it looks like the bam is aligned, revert it if 'SQ' in bam.header and len(bam.header['SQ'])>0: tools.picard.RevertSamTool().execute( args.inBam, revertBamOut, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true'] ) bamToDeplete = revertBamOut else: # if we don't need to produce a revertBam file # but the user has specified one anyway # simply touch the output if args.revertBam: log.warning("An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output.") util.file.touch(revertBamOut) # TODO: error out? run RevertSam anyway? multi_db_deplete_bam( bamToDeplete, args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, threads=args.threads, JVMmemory=args.JVMmemory ) # if the user has not specified saving a revertBam, we used a temp file and can remove it if not args.revertBam: os.unlink(revertBamOut) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam( args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, threads=args.threads, JVMmemory=args.JVMmemory ) if args.taxfiltBam and args.lastDb: filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory) return 0
def test_mvicuna_empty_input(self): samtools = tools.samtools.SamtoolsTool() empty_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam') output_bam = util.file.mkstempfname("output.bam") read_utils.rmdup_mvicuna_bam( empty_bam, output_bam ) self.assertEqual(samtools.count(output_bam), 0)
def main_deplete(args): ''' Run the entire depletion pipeline: bwa, bmtagger, mvicuna, blastn. ''' assert len(args.bmtaggerDbs) + len(args.blastDbs) + len(args.bwaDbs) > 0 # only RevertSam if inBam is already aligned # Most of the time the input will be unaligned # so we can save save time if we can skip RevertSam in the unaligned case # # via the SAM/BAM spec, if the file is aligned, an SQ line should be present # in the header. Using pysam, we can check this if header['SQ'])>0 # https://samtools.github.io/hts-specs/SAMv1.pdf # if the user has requested a revertBam with read_utils.revert_bam_if_aligned( args.inBam, revert_bam = args.revertBam, clear_tags = args.clear_tags, tags_to_clear = args.tags_to_clear, picardOptions = ['MAX_DISCARD_FRACTION=0.5'], JVMmemory = args.JVMmemory, sanitize = not args.do_not_sanitize) as bamToDeplete: multi_db_deplete_bam( bamToDeplete, args.bwaDbs, deplete_bwa_bam, args.bwaBam, threads=args.threads ) def bmtagger_wrapper(inBam, db, outBam, JVMmemory=None): return deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory) multi_db_deplete_bam( args.bwaBam, args.bmtaggerDbs, bmtagger_wrapper, args.bmtaggerBam, JVMmemory=args.JVMmemory ) # if the user has not specified saving a revertBam, we used a temp file and can remove it if not args.revertBam: os.unlink(revertBamOut) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam( args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, chunkSize=args.chunkSize, threads=args.threads, JVMmemory=args.JVMmemory ) return 0
def test_mvicuna_canned_input(self): samtools = tools.samtools.SamtoolsTool() input_bam = os.path.join(util.file.get_test_input_path(self), 'input.bam') expected_bam = os.path.join(util.file.get_test_input_path(self), 'expected.bam') output_bam = util.file.mkstempfname("output.bam") read_utils.rmdup_mvicuna_bam( input_bam, output_bam ) self.assertEqual(samtools.count(output_bam), samtools.count(expected_bam))
def main_deplete_human(args): ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn. Optionally, use lastal to select a specific taxon of interest.''' # only RevertSam if inBam is already aligned # Most of the time the input will be unaligned # so we can save save time if we can skip RevertSam in the unaligned case # # via the SAM/BAM spec, if the file is aligned, an SQ line should be present # in the header. Using pysam, we can check this if header['SQ'])>0 # https://samtools.github.io/hts-specs/SAMv1.pdf # if the user has requested a revertBam revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam') bamToDeplete = args.inBam with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam: # if it looks like the bam is aligned, revert it if 'SQ' in bam.header and len(bam.header['SQ']) > 0: tools.picard.RevertSamTool().execute( args.inBam, revertBamOut, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']) bamToDeplete = revertBamOut else: # if we don't need to produce a revertBam file # but the user has specified one anyway # simply touch the output if args.revertBam: log.warning( "An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output." ) util.file.touch(revertBamOut) # TODO: error out? run RevertSam anyway? def bmtagger_wrapper(inBam, db, outBam, threads, JVMmemory=None): return deplete_bmtagger_bam(inBam, db, outBam, threads=threads, srprism_memory=args.srprism_memory, JVMmemory=JVMmemory) multi_db_deplete_bam(bamToDeplete, args.bmtaggerDbs, bmtagger_wrapper, args.bmtaggerBam, threads=args.threads, JVMmemory=args.JVMmemory) # if the user has not specified saving a revertBam, we used a temp file and can remove it if not args.revertBam: os.unlink(revertBamOut) read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory) multi_db_deplete_bam(args.rmdupBam, args.blastDbs, deplete_blastn_bam, args.blastnBam, chunkSize=args.chunkSize, threads=args.threads, JVMmemory=args.JVMmemory) return 0
def test_mvicuna_empty_input(self): empty_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam') read_utils.rmdup_mvicuna_bam( empty_bam, util.file.mkstempfname() )