def bam_find_regions(bam_name, merge_distance=10, min_read_count=2, only_uniq_starts=False, nostrand=False, out=sys.stdout): bamfile = bam_open(bam_name) region_plus = None region_minus = None for pileup in bam_pileup_iter(bamfile, mask=1540): chrom = bamfile.getrname(pileup.tid) for read in pileup.pileups: if read.is_del: continue if nostrand or not read.alignment.is_reverse: if not region_plus or region_plus.chrom != chrom or (region_plus.end + merge_distance) < pileup.pos: if region_plus and region_plus.read_count >= min_read_count: region_plus.write(out) region_plus = ExpressedRegion(chrom, only_uniq_starts) region_plus.add_column(read, pileup.pos) else: if not region_minus or region_minus.chrom != chrom or (region_minus.end + merge_distance) < pileup.pos: if region_minus and region_minus.read_count >= min_read_count: region_minus.write(out) region_minus = ExpressedRegion(chrom, only_uniq_starts) region_minus.add_column(read, pileup.pos) if region_plus and region_plus.read_count >= min_read_count: region_plus.write(out) if region_minus and region_minus.read_count >= min_read_count: region_minus.write(out) bamfile.close()
def bam_cims_finder(bam_fnames, output='bed', ref_fname=None, flanking=12, cutoff=0.1, stranded=True, window_size=20): for bam_fname in bam_fnames: sys.stderr.write('%s\n' % bam_fname) bam = pysam.Samfile(bam_fname, "rb") if output == 'fasta': emitter = FASTAEmitter(ref_fname, flanking) else: emitter = BEDEmitter() if stranded: strands = ['+', '-'] else: strands = [''] for strand in strands: manager = RegionManager(emitter, strand, window_size) for pileup in bam_pileup_iter(bam, mask=1540): chrom = bam.getrname(pileup.tid) deletions = 0.0 total = 0.0 del_reads = set() total_reads = set() for pileupread in pileup.pileups: if not strand or (strand == '+' and not pileupread.alignment.is_reverse) or (strand == '-' and pileupread.alignment.is_reverse): if is_read_match_at_pos(pileupread.alignment, pileupread.qpos): total += 1 total_reads.add(pileupread.alignment.qname) if is_read_del_at_pos(pileupread.alignment, pileupread.qpos): deletions += 1 del_reads.add(pileupread.alignment.qname) # print "" # print chrom # print pileup.pos # print pileupread.alignment.qname # print pileupread.alignment.pos # print pileupread.alignment.cigar # print pileupread.qpos if total > 0: pct = deletions / total if pct > cutoff: manager.add(chrom, pileup.pos, strand, del_reads, total_reads) manager.close() bam.close() emitter.close()
def bam_find_regions(bam_name, merge_distance=10, min_read_count=2, only_uniq_starts=False, nostrand=False, out=sys.stdout): bamfile = bam_open(bam_name) region_plus = None region_minus = None for pileup in bam_pileup_iter(bamfile, mask=1540): chrom = bamfile.getrname(pileup.tid) for read in pileup.pileups: if read.is_del: continue if nostrand or not read.alignment.is_reverse: if not region_plus or region_plus.chrom != chrom or ( region_plus.end + merge_distance) < pileup.pos: if region_plus and region_plus.read_count >= min_read_count: region_plus.write(out) region_plus = ExpressedRegion(chrom, only_uniq_starts) region_plus.add_column(read, pileup.pos) else: if not region_minus or region_minus.chrom != chrom or ( region_minus.end + merge_distance) < pileup.pos: if region_minus and region_minus.read_count >= min_read_count: region_minus.write(out) region_minus = ExpressedRegion(chrom, only_uniq_starts) region_minus.add_column(read, pileup.pos) if region_plus and region_plus.read_count >= min_read_count: region_plus.write(out) if region_minus and region_minus.read_count >= min_read_count: region_minus.write(out) bamfile.close()
def bam_minorallele(bam_fname, ref_fname, min_qual=0, min_count=0, num_alleles=0, name=None, min_ci_low=None): bam = pysam.Samfile(bam_fname, "rb") ref = pysam.Fastafile(ref_fname) if not name: name = os.path.basename(bam_fname) if num_alleles: print "# %s" % num_alleles sys.stdout.write('\t'.join("chrom pos refbase altbase total refcount altcount background refback altback".split())) if num_alleles and rscript: sys.stdout.write("\tci_low\tci_high\tallele_lowt\tallele_high") sys.stdout.write('\n') for pileup in bam_pileup_iter(bam, mask=1540): chrom = bam.getrname(pileup.tid) counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0} total = 0 for pileupread in pileup.pileups: if not pileupread.is_del: if min_qual: if pileupread.alignment.qual[pileupread.qpos] < min_qual: continue if pileupread.indel == 0: base = pileupread.alignment.seq[pileupread.qpos].upper() if base != 'N': counts[base] += 1 total += 1 if total > min_count: refbase = ref.fetch(chrom, pileup.pos, pileup.pos + 1).upper() if not refbase in counts: continue refcount = counts[refbase] # sort non-ref counts. first is alt, next is background scounts = [] for c in counts: if c != refbase: scounts.append((counts[c], c)) scounts.sort() scounts.reverse() altbase = scounts[0][1] altcount = scounts[0][0] background = scounts[1][0] refback = refcount - background altback = altcount - background if (altback + refback) == 0: altfreq = 0 else: altfreq = float(altback) / (altback + refback) cols = [chrom, pileup.pos + 1, refbase, altbase, total, refcount, altcount, background, refback, altback, altfreq] if num_alleles and rscript: ci_low, ci_high = calc_cp_ci(refback + altback, altback, num_alleles) allele_low = ci_low * num_alleles allele_high = ci_high * num_alleles cols.append(ci_low) cols.append(ci_high) cols.append(allele_low) cols.append(allele_high) else: ci_low = 0 if not math.isnan(ci_low) and (min_ci_low is None or ci_low > min_ci_low): print '\t'.join([str(x) for x in cols]) bam.close() ref.close()
def bam_cims_finder(bam_fnames, output='bed', ref_fname=None, flanking=12, cutoff=0.1, stranded=True, window_size=20): for bam_fname in bam_fnames: sys.stderr.write('%s\n' % bam_fname) bam = pysam.Samfile(bam_fname, "rb") if output == 'fasta': emitter = FASTAEmitter(ref_fname, flanking) else: emitter = BEDEmitter() if stranded: strands = ['+', '-'] else: strands = [''] for strand in strands: manager = RegionManager(emitter, strand, window_size) for pileup in bam_pileup_iter(bam, mask=1540): chrom = bam.getrname(pileup.tid) deletions = 0.0 total = 0.0 del_reads = set() total_reads = set() for pileupread in pileup.pileups: if not strand or (strand == '+' and not pileupread.alignment.is_reverse) or ( strand == '-' and pileupread.alignment.is_reverse): if is_read_match_at_pos(pileupread.alignment, pileupread.qpos): total += 1 total_reads.add(pileupread.alignment.qname) if is_read_del_at_pos(pileupread.alignment, pileupread.qpos): deletions += 1 del_reads.add(pileupread.alignment.qname) # print "" # print chrom # print pileup.pos # print pileupread.alignment.qname # print pileupread.alignment.pos # print pileupread.alignment.cigar # print pileupread.qpos if total > 0: pct = deletions / total if pct > cutoff: manager.add(chrom, pileup.pos, strand, del_reads, total_reads) manager.close() bam.close() emitter.close()
def bam_tobedgraph(bamfile, strand=None, normalize=None, nogaps=False, out=sys.stdout): last_chrom = None last_count = 0 last_start = None last_end = None for pileup in bam_pileup_iter(bamfile): # sys.stdin.readline() chrom = bamfile.getrname(pileup.tid) if chrom != last_chrom and last_count > 0 and last_end: write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out) last_count = 0 last_start = None last_end = None count = 0 if strand is None: if not nogaps: count = pileup.n else: for read in pileup.pileups: op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del) if op != 3: count += 1 else: # # TODO - add rev_read2 option # for read in pileup.pileups: if not read.alignment.is_reverse and strand == '+': if not nogaps: count += 1 else: op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del) if op != 3: count += 1 elif read.alignment.is_reverse and strand == '-': if not nogaps: count += 1 else: op = read_cigar_at_pos(read.alignment.cigar, read.qpos, read.is_del) if op != 3: count += 1 # print pileup.pos,count,last_start,last_end if count != last_count or not last_end or (pileup.pos - last_end) > 1: if last_count > 0: write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out) if count == 0: last_start = None # elif not last_end or (pileup.pos-last_end) > 1: # last_start = pileup.pos else: last_start = pileup.pos last_end = pileup.pos last_count = count last_chrom = chrom write_bedgraph(last_chrom, last_start, last_end + 1, last_count, normalize, out)