def BamToBed12Iterator(handle,**kwargs): ''' handle is an bam iterator need references hash if handle is not filename. ''' if type(handle)==type("string"): handle=pysam.Samfile(handle,"rb"); for i in handle: #print i #debug if i.tid<0: continue strand="+" if i.is_reverse: strand="-" score=i.mapq ''' test ''' if kwargs.has_key("references"): if isinstance(kwargs["references"],str): chr=kwargs["references"] else: chr=kwargs["references"][i.tid]; else: try: chr=handle.references[i.tid]; except: chr="chr" if kwargs.has_key("strand"): if kwargs["strand"]=="read1" or kwargs["strand"]=="firstMate": read1=True else: read1=False else: read1=True start=i.pos end=i.aend name=i.qname cds_start=start cds_end=start itemRgb="0,0,0" ''' debug import sys if i.cigar is None: print >>sys.stderr,"why cigar is Nonetype?" print >>sys.stderr,i exit(0) end of debug ''' if i.cigar==None: continue # IGNORE THIS READS? (block_starts,block_sizes)=Tools.cigar_to_coordinates(i.cigar); if i.is_read1 and not read1: strand=Tools.reverse_strand(strand) elif i.is_read2 and read1: strand=Tools.reverse_strand(strand) bed=Bed12([chr,start,end,name,score,strand,cds_start,cds_end,itemRgb,len(block_sizes),block_sizes,block_starts]) yield bed
def BamToBed12Iterator(handle, **kwargs): ''' handle is an bam iterator need references hash if handle is not filename. ''' if type(handle) == type("string"): handle = pysam.Samfile(handle, "rb") for i in handle: #print i #debug if i.tid < 0: continue strand = "+" if i.is_reverse: strand = "-" score = i.mapq ''' test ''' if kwargs.has_key("references"): chr = kwargs["references"][i.tid] else: try: chr = handle.references[i.tid] except: chr = "chr" if kwargs.has_key("strand"): if kwargs["strand"] == "read1" or kwargs["strand"] == "firstMate": read1 = True else: read1 = False else: read1 = True start = i.pos end = i.aend name = i.qname cds_start = start cds_end = start itemRgb = "0,0,0" ''' debug import sys if i.cigar is None: print >>sys.stderr,"why cigar is Nonetype?" print >>sys.stderr,i exit(0) end of debug ''' if i.cigar == None: continue # IGNORE THIS READS? (block_starts, block_sizes) = Tools.cigar_to_coordinates(i.cigar) if i.is_read1 and not read1: strand = Tools.reverse_strand(strand) elif i.is_read2 and read1: strand = Tools.reverse_strand(strand) bed = Bed12([ chr, start, end, name, score, strand, cds_start, cds_end, itemRgb, len(block_sizes), block_sizes, block_starts ]) yield bed
def query(self, x=None, method="pileup", **dict): if type(x) == type("str"): x = x.split(":") chrom = x[0] start = None end = None if len(x) > 1: b = x[1].split("-") if len(b) == 2: start = int(b[0]) - 1 end = int(b[1]) elif isinstance(x, tuple): chrom = x[CHROM] start = x[CHROMSTART] end = x[CHROMEND] elif x is not None: chrom = x.chr start = x.start end = x.stop if method == "fetch": for bamfile in self.bamfiles: for read in bamfile.fetch(chrom, start, end): if read.tid < 0: continue if read.mapq == 0: continue strand = "+" if read.is_reverse: strand = "-" score = read.mapq bed = Bed([bamfile.references[read.tid], read.pos, read.aend, read.qname, score, strand]) yield bed elif method == "fetch12": """ test version still test Tools.cigar_to_coordinates """ for bamfile in self.bamfiles: for read in bamfile.fetch(chrom, start, end): if read.tid < 0: continue if read.mapq == 0: continue chr = bamfile.references[read.tid] strand = "+" if read.is_reverse: strand = "-" score = read.mapq start = read.pos end = read.aend name = read.qname cds_start = start cds_end = start itemRgb = "0,0,0" (block_starts, block_sizes) = Tools.cigar_to_coordinates(read.cigar) bed = Bed12( [ chr, start, end, name, score, strand, cds_start, cds_end, itemRgb, len(block_sizes), block_sizes, block_starts, ] ) yield bed elif method == "bam1": # fetch read from paired end with strand information for bamfile in self.bamfiles: strand = "read1" if dict.has_key("strand"): # TODO: if bamfiles have different read1 or read2 ? strand = dict["strand"] for bed in TableIO.parse( bamfile.fetch(chrom, start, end), "bam2bed12", references=chrom, strand=strand ): yield bed elif method == "paired_end": for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile): yield fragment elif method == "bam2": # yield bed12 for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile): if dict.has_key("strand"): yield fragment.toBed12(chr=chrom, strand=dict["strand"]) else: yield fragment.toBed12(chr=chrom) elif method == "bam1tuple": for bamfile in self.bamfiles: strand = "read1" if dict.has_key("strand"): # TODO: if bamfiles have different read1 or read2 ? strand = dict["strand"] for bed in TableIO.parse( bamfile.fetch(chrom, start, end), "bam2bed12tuple", references=chrom, strand=strand ): yield bed elif method == "bam2tuple": for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile): if dict.has_key("strand"): yield fragment.toBed12Tuple(chr=chrom, strand=dict["strand"]) else: yield fragment.toBed12Tuple(chr=chrom) elif method == "bam2tuple_fast": for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment"): if dict.has_key("strand"): yield fragment.toBed12Tuple(chr=chrom, strand=dict["strand"]) else: yield fragment.toBed12Tuple(chr=chrom) elif method == "pileup": s = [[0, 0, 0, 0] for row in range(end - start)] for bamfile in self.bamfiles: try: A = bamfile.pileup(chrom, start, end) except: print >>sys.stderr, "Can't pile up", chrom, start, end raise StopIteration for pileupcolumn in A: j = pileupcolumn.pos - start if j < 0: continue if j > end - start: break for pileupread in pileupcolumn.pileups: try: if pileupread.is_del: continue if pileupread.indel != 0: continue nt = pileupread.alignment.seq[pileupread.qpos] if BamI.hNtToNum.has_key(nt): k = BamI.hNtToNum[nt] s[j][k] += 1 except: pass for i in s: yield i elif method == "count": s = 0 for bamfile in self.bamfiles: s += bamfile.count(chrom, start, end) yield s elif method == "count_fragment": s = 0 for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile): s += 1 yield s elif method == "references": for i in self.bamfiles[0].references: yield i elif method == "lengths": for i in self.bamfiles[0].lengths: yield i
def query(self,x,method='pileup'): if method=='fetch': for bamfile in self.bamfiles: for read in bamfile.fetch(x.chr,x.start,x.stop): if read.tid<0:continue if read.mapq==0:continue strand='+' if read.is_reverse: strand='-' score=read.mapq bed=Bed([bamfile.references[read.tid],read.pos,read.aend,read.qname,score,strand]) yield bed elif method=='fetch12': ''' test version still test Tools.cigar_to_coordinates ''' for bamfile in self.bamfiles: for read in bamfile.fetch(x.chr,x.start,x.stop): if read.tid<0:continue if read.mapq==0:continue chr=bamfile.references[read.tid] strand='+' if read.is_reverse: strand='-' score=read.mapq start=read.pos end=read.aend name=read.qname cds_start=start cds_end=start itemRgb="0,0,0" (block_starts,block_sizes)=Tools.cigar_to_coordinates(read.cigar); bed=Bed12([chr,start,end,name,score,strand,cds_start,cds_end,itemRgb,len(block_sizes),block_sizes,block_starts]) yield bed elif method=="paired_end": for bamfile in self.bamfiles: for fragment in TableIO.parse(bamfile.fetch(x.chr,x.start,x.stop),"bam2fragment",bam=bamfile): yield fragment elif method=='pileup': s=[[0,0,0,0] for row in range(x.stop-x.start)] for bamfile in self.bamfiles: try: A=bamfile.pileup(x.chr,x.start,x.stop) except: print >>sys.stderr,"Can't pile up",x.chr,x.start,x.stop raise StopIteration for pileupcolumn in A: j=pileupcolumn.pos-x.start if j<0: continue if j>x.stop-x.start: break for pileupread in pileupcolumn.pileups: try: if pileupread.is_del: continue if pileupread.indel!=0: continue nt=pileupread.alignment.seq[pileupread.qpos] if BamI.hNtToNum.has_key(nt): k=BamI.hNtToNum[nt] s[j][k]+=1 except: pass for i in s: yield i