Esempio n. 1
0
def filter_bam(bamfile, bedfile):
    with pysam.Samfile(bamfile) as bam_handle_in:
        regions = collect_regions(bedfile, bam_handle_in)
        regions.reverse()

        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode,
                           template=bam_handle_in) as bam_handle_out:
            while regions:
                region_aend = 0
                contig, start, end = regions[-1]
                for record in bam_handle_in.fetch(contig, start):
                    current_aend = record.aend
                    region_aend = max(region_aend, current_aend)
                    if record.pos > end:
                        last_contig, _, _ = regions.pop()
                        if not regions:
                            break

                        contig, start, end = regions[-1]
                        if (region_aend + _BAM_BLOCK_SIZE < start) \
                                or (contig != last_contig):
                            break

                    if current_aend >= start:
                        bam_handle_out.write(record)
                else:  # Reached the end of this contig
                    while regions and (regions[-1][0] == contig):
                        regions.pop()

    return 0
Esempio n. 2
0
def setup_basic_batch(args, regions, prefix, func):
    setup = {"files": {},
             "temp_files": {},
             "procs": {},
             "handles": {}}

    try:
        setup["files"]["bed"] = write_bed_file(prefix, regions)
        setup["temp_files"]["bed"] = setup["files"]["bed"]

        setup["files"]["pipe"] = make_bam_pipe(prefix)
        setup["temp_files"]["pipe"] = setup["files"]["pipe"]

        setup["handles"]["outfile"] = open(prefix, "w")
        zip_proc = popen(["bgzip"],
                         stdin=func(setup),
                         stdout=setup["handles"]["outfile"],
                         close_fds=True)

        setup["procs"]["gzip"] = zip_proc

        write_mode = samtools_compatible_wbu_mode()
        setup["handles"]["bam_in"] = pysam.Samfile(args.bamfile)
        setup["handles"]["bam_out"] = \
            pysam.Samfile(setup["files"]["pipe"], write_mode,
                          template=setup["handles"]["bam_in"])

        return setup
    except:
        traceback.print_exc()
        cleanup_batch(setup)
        raise
Esempio n. 3
0
def setup_basic_batch(args, regions, prefix, func):
    setup = {"files": {}, "temp_files": {}, "procs": {}, "handles": {}}

    try:
        setup["files"]["bed"] = write_bed_file(prefix, regions)
        setup["temp_files"]["bed"] = setup["files"]["bed"]

        setup["files"]["pipe"] = make_bam_pipe(prefix)
        setup["temp_files"]["pipe"] = setup["files"]["pipe"]

        setup["handles"]["outfile"] = open(prefix, "w")
        zip_proc = popen(["bgzip"],
                         stdin=func(setup),
                         stdout=setup["handles"]["outfile"],
                         close_fds=True)

        setup["procs"]["gzip"] = zip_proc

        write_mode = samtools_compatible_wbu_mode()
        setup["handles"]["bam_in"] = pysam.Samfile(args.bamfile)
        setup["handles"]["bam_out"] = \
            pysam.Samfile(setup["files"]["pipe"], write_mode,
                          template=setup["handles"]["bam_in"])

        return setup
    except:
        traceback.print_exc()
        cleanup_batch(setup)
        raise
Esempio n. 4
0
def filter_bam(bamfile, bedfile):
    with pysam.Samfile(bamfile) as bam_handle_in:
        regions = collect_regions(bedfile, bam_handle_in)
        regions.reverse()

        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode, template=bam_handle_in) as bam_handle_out:
            while regions:
                region_aend = 0
                contig, start, end = regions[-1]
                for record in bam_handle_in.fetch(contig, start):
                    current_aend = record.aend
                    region_aend = max(region_aend, current_aend)
                    if record.pos > end:
                        last_contig, _, _ = regions.pop()
                        if not regions:
                            break

                        contig, start, end = regions[-1]
                        if (region_aend + _BAM_BLOCK_SIZE < start) or (contig != last_contig):
                            break

                    if current_aend >= start:
                        bam_handle_out.write(record)
                else:  # Reached the end of this contig
                    while regions and (regions[-1][0] == contig):
                        regions.pop()

    return 0
Esempio n. 5
0
def _pipe_to_bam():
    """Simply pipes a BAM/SAM file to stdout; this is required to handle SAM
    files that do not contain records (i.e. only a header), which are not
    properly handled by "samtools view -S -", resulting in a parse failure.
    """
    with pysam.Samfile("-", "r") as input_handle:
        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode, template=input_handle) as output_handle:
            for record in input_handle:
                output_handle.write(record)

    return 0
Esempio n. 6
0
def _pipe_to_bam():
    """Simply pipes a BAM/SAM file to stdout; this is required to handle SAM
    files that do not contain records (i.e. only a header), which are not
    properly handled by "samtools view -S -", resulting in a parse failure.
    """
    with pysam.Samfile("-", "r") as input_handle:
        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode, template=input_handle) as output_handle:
            for record in input_handle:
                output_handle.write(record)

    return 0
Esempio n. 7
0
def _cleanup_unmapped(args, cleanup_sam):
    """Reads a BAM (or SAM, if cleanup_sam is True) file from STDIN, and
    filters reads according to the filters specified in the commandline
    arguments 'args'. The resulting records are written to STDOUT in
    uncompressed BAM format. The output BAM is marked as sorted (under the
    assumption that 'samtools sort' is to be run on the output) and PG tags are
    updated if specified in the args.
    """
    spec = "r" if cleanup_sam else "rb"
    with pysam.Samfile("-", spec) as input_handle:
        header = copy.deepcopy(input_handle.header)
        _set_sort_order(header)
        _set_pg_tags(header, args.update_pg_tag)
        if args.rg_id is not None:
            _set_rg_tags(header, args.rg_id, args.rg)

        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode, header=header) as output_handle:
            for record in input_handle:
                if (record.mapq < args.min_quality) \
                        or (record.flag & args.exclude_flags):
                    continue

                if record.is_unmapped:
                    # Unmapped read; clear all non-required fields
                    record = _cleanup_record(record)
                elif record.mate_is_unmapped:
                    # Unmapped mate
                    record.rnext = record.tid
                    record.pnext = record.pos
                    record.tlen = 0

                if args.rg_id is not None:
                    # Ensure that only one RG tag is set
                    tags = [(key, value) for (key, value) in record.tags
                            if key != "RG"]
                    tags.append(("RG", args.rg_id))
                    record.tags = tags

                output_handle.write(record)

    return 0
Esempio n. 8
0
def _cleanup_unmapped(args, cleanup_sam):
    """Reads a BAM (or SAM, if cleanup_sam is True) file from STDIN, and
    filters reads according to the filters specified in the commandline
    arguments 'args'. The resulting records are written to STDOUT in
    uncompressed BAM format. The output BAM is marked as sorted (under the
    assumption that 'samtools sort' is to be run on the output) and PG tags are
    updated if specified in the args.
    """
    spec = "r" if cleanup_sam else "rb"
    with pysam.Samfile("-", spec) as input_handle:
        header = copy.deepcopy(input_handle.header)
        _set_sort_order(header)
        _set_pg_tags(header, args.update_pg_tag)
        if args.rg_id is not None:
            _set_rg_tags(header, args.rg_id, args.rg)

        write_mode = samtools_compatible_wbu_mode()
        with pysam.Samfile("-", write_mode, header=header) as output_handle:
            for record in input_handle:
                if (record.mapq < args.min_quality) \
                        or (record.flag & args.exclude_flags):
                    continue

                if record.is_unmapped:
                    # Unmapped read; clear all non-required fields
                    record = _cleanup_record(record)
                elif record.mate_is_unmapped:
                    # Unmapped mate
                    record.rnext = record.tid
                    record.pnext = record.pos
                    record.tlen = 0

                if args.rg_id is not None:
                    # Ensure that only one RG tag is set
                    tags = [(key, value) for (key, value) in record.tags
                            if key != "RG"]
                    tags.append(("RG", args.rg_id))
                    record.tags = tags

                output_handle.write(record)

    return 0