def filter_bam(bamfile, bedfile): with pysam.Samfile(bamfile) as bam_handle_in: regions = collect_regions(bedfile, bam_handle_in) regions.reverse() write_mode = samtools_compatible_wbu_mode() with pysam.Samfile("-", write_mode, template=bam_handle_in) as bam_handle_out: while regions: region_aend = 0 contig, start, end = regions[-1] for record in bam_handle_in.fetch(contig, start): current_aend = record.aend region_aend = max(region_aend, current_aend) if record.pos > end: last_contig, _, _ = regions.pop() if not regions: break contig, start, end = regions[-1] if (region_aend + _BAM_BLOCK_SIZE < start) \ or (contig != last_contig): break if current_aend >= start: bam_handle_out.write(record) else: # Reached the end of this contig while regions and (regions[-1][0] == contig): regions.pop() return 0
def setup_basic_batch(args, regions, prefix, func): setup = {"files": {}, "temp_files": {}, "procs": {}, "handles": {}} try: setup["files"]["bed"] = write_bed_file(prefix, regions) setup["temp_files"]["bed"] = setup["files"]["bed"] setup["files"]["pipe"] = make_bam_pipe(prefix) setup["temp_files"]["pipe"] = setup["files"]["pipe"] setup["handles"]["outfile"] = open(prefix, "w") zip_proc = popen(["bgzip"], stdin=func(setup), stdout=setup["handles"]["outfile"], close_fds=True) setup["procs"]["gzip"] = zip_proc write_mode = samtools_compatible_wbu_mode() setup["handles"]["bam_in"] = pysam.Samfile(args.bamfile) setup["handles"]["bam_out"] = \ pysam.Samfile(setup["files"]["pipe"], write_mode, template=setup["handles"]["bam_in"]) return setup except: traceback.print_exc() cleanup_batch(setup) raise
def filter_bam(bamfile, bedfile): with pysam.Samfile(bamfile) as bam_handle_in: regions = collect_regions(bedfile, bam_handle_in) regions.reverse() write_mode = samtools_compatible_wbu_mode() with pysam.Samfile("-", write_mode, template=bam_handle_in) as bam_handle_out: while regions: region_aend = 0 contig, start, end = regions[-1] for record in bam_handle_in.fetch(contig, start): current_aend = record.aend region_aend = max(region_aend, current_aend) if record.pos > end: last_contig, _, _ = regions.pop() if not regions: break contig, start, end = regions[-1] if (region_aend + _BAM_BLOCK_SIZE < start) or (contig != last_contig): break if current_aend >= start: bam_handle_out.write(record) else: # Reached the end of this contig while regions and (regions[-1][0] == contig): regions.pop() return 0
def _pipe_to_bam(): """Simply pipes a BAM/SAM file to stdout; this is required to handle SAM files that do not contain records (i.e. only a header), which are not properly handled by "samtools view -S -", resulting in a parse failure. """ with pysam.Samfile("-", "r") as input_handle: write_mode = samtools_compatible_wbu_mode() with pysam.Samfile("-", write_mode, template=input_handle) as output_handle: for record in input_handle: output_handle.write(record) return 0
def _cleanup_unmapped(args, cleanup_sam): """Reads a BAM (or SAM, if cleanup_sam is True) file from STDIN, and filters reads according to the filters specified in the commandline arguments 'args'. The resulting records are written to STDOUT in uncompressed BAM format. The output BAM is marked as sorted (under the assumption that 'samtools sort' is to be run on the output) and PG tags are updated if specified in the args. """ spec = "r" if cleanup_sam else "rb" with pysam.Samfile("-", spec) as input_handle: header = copy.deepcopy(input_handle.header) _set_sort_order(header) _set_pg_tags(header, args.update_pg_tag) if args.rg_id is not None: _set_rg_tags(header, args.rg_id, args.rg) write_mode = samtools_compatible_wbu_mode() with pysam.Samfile("-", write_mode, header=header) as output_handle: for record in input_handle: if (record.mapq < args.min_quality) \ or (record.flag & args.exclude_flags): continue if record.is_unmapped: # Unmapped read; clear all non-required fields record = _cleanup_record(record) elif record.mate_is_unmapped: # Unmapped mate record.rnext = record.tid record.pnext = record.pos record.tlen = 0 if args.rg_id is not None: # Ensure that only one RG tag is set tags = [(key, value) for (key, value) in record.tags if key != "RG"] tags.append(("RG", args.rg_id)) record.tags = tags output_handle.write(record) return 0