Example #1
0
def bam_find_regions(bam_name, merge_distance=10, min_read_count=2, only_uniq_starts=False, nostrand=False, out=sys.stdout):
    bamfile = bam_open(bam_name)
    region_plus = None
    region_minus = None

    for pileup in bam_pileup_iter(bamfile, mask=1540):
        chrom = bamfile.getrname(pileup.tid)

        for read in pileup.pileups:
            if read.is_del:
                continue
            if nostrand or not read.alignment.is_reverse:
                if not region_plus or region_plus.chrom != chrom or (region_plus.end + merge_distance) < pileup.pos:
                    if region_plus and region_plus.read_count >= min_read_count:
                        region_plus.write(out)

                    region_plus = ExpressedRegion(chrom, only_uniq_starts)
                region_plus.add_column(read, pileup.pos)
            else:
                if not region_minus or region_minus.chrom != chrom or (region_minus.end + merge_distance) < pileup.pos:
                    if region_minus and region_minus.read_count >= min_read_count:
                        region_minus.write(out)

                    region_minus = ExpressedRegion(chrom, only_uniq_starts)
                region_minus.add_column(read, pileup.pos)

    if region_plus and region_plus.read_count >= min_read_count:
        region_plus.write(out)
    if region_minus and region_minus.read_count >= min_read_count:
        region_minus.write(out)

    bamfile.close()
Example #2
0
def bam_cims_finder(bam_fnames, output='bed', ref_fname=None, flanking=12, cutoff=0.1, stranded=True, window_size=20):
    for bam_fname in bam_fnames:
        sys.stderr.write('%s\n' % bam_fname)
        bam = pysam.Samfile(bam_fname, "rb")

        if output == 'fasta':
            emitter = FASTAEmitter(ref_fname, flanking)
        else:
            emitter = BEDEmitter()

        if stranded:
            strands = ['+', '-']
        else:
            strands = ['']

        for strand in strands:
            manager = RegionManager(emitter, strand, window_size)
            for pileup in bam_pileup_iter(bam, mask=1540):
                chrom = bam.getrname(pileup.tid)

                deletions = 0.0
                total = 0.0

                del_reads = set()
                total_reads = set()

                for pileupread in pileup.pileups:
                    if not strand or (strand == '+' and not pileupread.alignment.is_reverse) or (strand == '-' and pileupread.alignment.is_reverse):
                        if is_read_match_at_pos(pileupread.alignment, pileupread.qpos):
                            total += 1
                            total_reads.add(pileupread.alignment.qname)

                        if is_read_del_at_pos(pileupread.alignment, pileupread.qpos):
                            deletions += 1
                            del_reads.add(pileupread.alignment.qname)
                            # print ""
                            # print chrom
                            # print pileup.pos
                            # print pileupread.alignment.qname
                            # print pileupread.alignment.pos
                            # print pileupread.alignment.cigar
                            # print pileupread.qpos

                if total > 0:
                    pct = deletions / total

                    if pct > cutoff:
                        manager.add(chrom, pileup.pos, strand, del_reads, total_reads)

            manager.close()
        bam.close()
        emitter.close()
Example #3
0
def bam_find_regions(bam_name,
                     merge_distance=10,
                     min_read_count=2,
                     only_uniq_starts=False,
                     nostrand=False,
                     out=sys.stdout):
    bamfile = bam_open(bam_name)
    region_plus = None
    region_minus = None

    for pileup in bam_pileup_iter(bamfile, mask=1540):
        chrom = bamfile.getrname(pileup.tid)

        for read in pileup.pileups:
            if read.is_del:
                continue
            if nostrand or not read.alignment.is_reverse:
                if not region_plus or region_plus.chrom != chrom or (
                        region_plus.end + merge_distance) < pileup.pos:
                    if region_plus and region_plus.read_count >= min_read_count:
                        region_plus.write(out)

                    region_plus = ExpressedRegion(chrom, only_uniq_starts)
                region_plus.add_column(read, pileup.pos)
            else:
                if not region_minus or region_minus.chrom != chrom or (
                        region_minus.end + merge_distance) < pileup.pos:
                    if region_minus and region_minus.read_count >= min_read_count:
                        region_minus.write(out)

                    region_minus = ExpressedRegion(chrom, only_uniq_starts)
                region_minus.add_column(read, pileup.pos)

    if region_plus and region_plus.read_count >= min_read_count:
        region_plus.write(out)
    if region_minus and region_minus.read_count >= min_read_count:
        region_minus.write(out)

    bamfile.close()
Example #4
0
def bam_minorallele(bam_fname, ref_fname, min_qual=0, min_count=0, num_alleles=0, name=None, min_ci_low=None):
    bam = pysam.Samfile(bam_fname, "rb")
    ref = pysam.Fastafile(ref_fname)

    if not name:
        name = os.path.basename(bam_fname)

    if num_alleles:
        print "# %s" % num_alleles

    sys.stdout.write('\t'.join("chrom pos refbase altbase total refcount altcount background refback altback".split()))
    if num_alleles and rscript:
        sys.stdout.write("\tci_low\tci_high\tallele_lowt\tallele_high")
    sys.stdout.write('\n')

    for pileup in bam_pileup_iter(bam, mask=1540):
        chrom = bam.getrname(pileup.tid)

        counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0}
        total = 0

        for pileupread in pileup.pileups:
            if not pileupread.is_del:
                if min_qual:
                    if pileupread.alignment.qual[pileupread.qpos] < min_qual:
                        continue
                if pileupread.indel == 0:
                    base = pileupread.alignment.seq[pileupread.qpos].upper()
                    if base != 'N':
                        counts[base] += 1
                        total += 1

        if total > min_count:
            refbase = ref.fetch(chrom, pileup.pos, pileup.pos + 1).upper()
            if not refbase in counts:
                continue

            refcount = counts[refbase]

            # sort non-ref counts.  first is alt, next is background

            scounts = []
            for c in counts:
                if c != refbase:
                    scounts.append((counts[c], c))

            scounts.sort()
            scounts.reverse()

            altbase = scounts[0][1]
            altcount = scounts[0][0]
            background = scounts[1][0]

            refback = refcount - background
            altback = altcount - background

            if (altback + refback) == 0:
                altfreq = 0
            else:
                altfreq = float(altback) / (altback + refback)

            cols = [chrom, pileup.pos + 1, refbase, altbase, total, refcount, altcount, background, refback, altback, altfreq]
            if num_alleles and rscript:
                ci_low, ci_high = calc_cp_ci(refback + altback, altback, num_alleles)
                allele_low = ci_low * num_alleles
                allele_high = ci_high * num_alleles

                cols.append(ci_low)
                cols.append(ci_high)
                cols.append(allele_low)
                cols.append(allele_high)
            else:
                ci_low = 0

            if not math.isnan(ci_low) and (min_ci_low is None or ci_low > min_ci_low):
                print '\t'.join([str(x) for x in cols])

    bam.close()
    ref.close()
Example #5
0
def bam_cims_finder(bam_fnames,
                    output='bed',
                    ref_fname=None,
                    flanking=12,
                    cutoff=0.1,
                    stranded=True,
                    window_size=20):
    for bam_fname in bam_fnames:
        sys.stderr.write('%s\n' % bam_fname)
        bam = pysam.Samfile(bam_fname, "rb")

        if output == 'fasta':
            emitter = FASTAEmitter(ref_fname, flanking)
        else:
            emitter = BEDEmitter()

        if stranded:
            strands = ['+', '-']
        else:
            strands = ['']

        for strand in strands:
            manager = RegionManager(emitter, strand, window_size)
            for pileup in bam_pileup_iter(bam, mask=1540):
                chrom = bam.getrname(pileup.tid)

                deletions = 0.0
                total = 0.0

                del_reads = set()
                total_reads = set()

                for pileupread in pileup.pileups:
                    if not strand or (strand == '+' and
                                      not pileupread.alignment.is_reverse) or (
                                          strand == '-'
                                          and pileupread.alignment.is_reverse):
                        if is_read_match_at_pos(pileupread.alignment,
                                                pileupread.qpos):
                            total += 1
                            total_reads.add(pileupread.alignment.qname)

                        if is_read_del_at_pos(pileupread.alignment,
                                              pileupread.qpos):
                            deletions += 1
                            del_reads.add(pileupread.alignment.qname)
                            # print ""
                            # print chrom
                            # print pileup.pos
                            # print pileupread.alignment.qname
                            # print pileupread.alignment.pos
                            # print pileupread.alignment.cigar
                            # print pileupread.qpos

                if total > 0:
                    pct = deletions / total

                    if pct > cutoff:
                        manager.add(chrom, pileup.pos, strand, del_reads,
                                    total_reads)

            manager.close()
        bam.close()
        emitter.close()
Example #6
0
def bam_tobedgraph(bamfile, strand=None, normalize=None, nogaps=False, out=sys.stdout):
    last_chrom = None
    last_count = 0
    last_start = None
    last_end = None

    for pileup in bam_pileup_iter(bamfile):
        # sys.stdin.readline()
        chrom = bamfile.getrname(pileup.tid)

        if chrom != last_chrom and last_count > 0 and last_end:
            write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out)
            last_count = 0
            last_start = None
            last_end = None

        count = 0
        if strand is None:
            if not nogaps:
                count = pileup.n
            else:
                for read in pileup.pileups:
                    op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del)
                    if op != 3:
                        count += 1
        else:
            #
            #  TODO - add rev_read2 option
            #
            for read in pileup.pileups:
                if not read.alignment.is_reverse and strand == '+':
                    if not nogaps:
                        count += 1
                    else:
                        op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del)
                        if op != 3:
                            count += 1

                elif read.alignment.is_reverse and strand == '-':
                    if not nogaps:
                        count += 1
                    else:
                        op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del)
                        if op != 3:
                            count += 1

            # print pileup.pos,count,last_start,last_end
        if count != last_count or not last_end or (pileup.pos - last_end) > 1:
            if last_count > 0:
                write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out)

            if count == 0:
                last_start = None
            # elif not last_end or (pileup.pos-last_end) > 1:
            #     last_start = pileup.pos
            else:
                last_start = pileup.pos

        last_end = pileup.pos
        last_count = count
        last_chrom = chrom

    write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out)