Ejemplo n.º 1
0
def BamToBed12Iterator(handle,**kwargs):
    '''
    handle is an bam iterator
    need references hash if handle is not filename.
    '''
    if type(handle)==type("string"):
        handle=pysam.Samfile(handle,"rb");
    for i in handle:
        #print i #debug
        if i.tid<0: continue
        strand="+"
        if i.is_reverse:
            strand="-"
        score=i.mapq
        
        '''
        test
        '''
        if kwargs.has_key("references"):
            if isinstance(kwargs["references"],str):
                chr=kwargs["references"]  
            else:
                chr=kwargs["references"][i.tid];
        else:
            try:
                 chr=handle.references[i.tid];
            except:
                 chr="chr"
        if kwargs.has_key("strand"):
            if kwargs["strand"]=="read1" or kwargs["strand"]=="firstMate":
                read1=True
            else:
                read1=False
        else:
            read1=True   
        start=i.pos
        end=i.aend
        name=i.qname
        cds_start=start
        cds_end=start
        itemRgb="0,0,0"
        '''
        debug
        import sys
        if i.cigar is None:
            print >>sys.stderr,"why cigar is Nonetype?"
            print >>sys.stderr,i
            exit(0)
        end of debug
        '''
        if i.cigar==None: continue # IGNORE THIS READS?
        (block_starts,block_sizes)=Tools.cigar_to_coordinates(i.cigar);
        if i.is_read1 and not read1:
            strand=Tools.reverse_strand(strand)
        elif i.is_read2 and read1:
            strand=Tools.reverse_strand(strand)
        bed=Bed12([chr,start,end,name,score,strand,cds_start,cds_end,itemRgb,len(block_sizes),block_sizes,block_starts])
        yield bed
Ejemplo n.º 2
0
def BamToBed12Iterator(handle, **kwargs):
    '''
    handle is an bam iterator
    need references hash if handle is not filename.
    '''
    if type(handle) == type("string"):
        handle = pysam.Samfile(handle, "rb")
    for i in handle:
        #print i #debug
        if i.tid < 0: continue
        strand = "+"
        if i.is_reverse:
            strand = "-"
        score = i.mapq
        '''
        test
        '''
        if kwargs.has_key("references"):
            chr = kwargs["references"][i.tid]
        else:
            try:
                chr = handle.references[i.tid]
            except:
                chr = "chr"
        if kwargs.has_key("strand"):
            if kwargs["strand"] == "read1" or kwargs["strand"] == "firstMate":
                read1 = True
            else:
                read1 = False
        else:
            read1 = True
        start = i.pos
        end = i.aend
        name = i.qname
        cds_start = start
        cds_end = start
        itemRgb = "0,0,0"
        '''
        debug
        import sys
        if i.cigar is None:
            print >>sys.stderr,"why cigar is Nonetype?"
            print >>sys.stderr,i
            exit(0)
        end of debug
        '''
        if i.cigar == None: continue  # IGNORE THIS READS?
        (block_starts, block_sizes) = Tools.cigar_to_coordinates(i.cigar)
        if i.is_read1 and not read1:
            strand = Tools.reverse_strand(strand)
        elif i.is_read2 and read1:
            strand = Tools.reverse_strand(strand)
        bed = Bed12([
            chr, start, end, name, score, strand, cds_start, cds_end, itemRgb,
            len(block_sizes), block_sizes, block_starts
        ])
        yield bed
Ejemplo n.º 3
0
Archivo: DB.py Proyecto: nimezhu/xplib
    def query(self, x=None, method="pileup", **dict):
        if type(x) == type("str"):
            x = x.split(":")
            chrom = x[0]
            start = None
            end = None
            if len(x) > 1:
                b = x[1].split("-")
                if len(b) == 2:
                    start = int(b[0]) - 1
                    end = int(b[1])
        elif isinstance(x, tuple):
            chrom = x[CHROM]
            start = x[CHROMSTART]
            end = x[CHROMEND]
        elif x is not None:
            chrom = x.chr
            start = x.start
            end = x.stop

        if method == "fetch":
            for bamfile in self.bamfiles:
                for read in bamfile.fetch(chrom, start, end):
                    if read.tid < 0:
                        continue
                    if read.mapq == 0:
                        continue
                    strand = "+"
                    if read.is_reverse:
                        strand = "-"
                    score = read.mapq
                    bed = Bed([bamfile.references[read.tid], read.pos, read.aend, read.qname, score, strand])
                    yield bed
        elif method == "fetch12":
            """
            test version
            still test Tools.cigar_to_coordinates
            """
            for bamfile in self.bamfiles:
                for read in bamfile.fetch(chrom, start, end):
                    if read.tid < 0:
                        continue
                    if read.mapq == 0:
                        continue
                    chr = bamfile.references[read.tid]
                    strand = "+"
                    if read.is_reverse:
                        strand = "-"
                    score = read.mapq
                    start = read.pos
                    end = read.aend
                    name = read.qname
                    cds_start = start
                    cds_end = start
                    itemRgb = "0,0,0"
                    (block_starts, block_sizes) = Tools.cigar_to_coordinates(read.cigar)
                    bed = Bed12(
                        [
                            chr,
                            start,
                            end,
                            name,
                            score,
                            strand,
                            cds_start,
                            cds_end,
                            itemRgb,
                            len(block_sizes),
                            block_sizes,
                            block_starts,
                        ]
                    )
                    yield bed
        elif method == "bam1":
            # fetch read from paired end with strand information
            for bamfile in self.bamfiles:
                strand = "read1"
                if dict.has_key("strand"):  # TODO: if bamfiles have different read1 or read2 ?
                    strand = dict["strand"]
                for bed in TableIO.parse(
                    bamfile.fetch(chrom, start, end), "bam2bed12", references=chrom, strand=strand
                ):
                    yield bed

        elif method == "paired_end":
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile):
                    yield fragment
        elif method == "bam2":  # yield bed12
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile):
                    if dict.has_key("strand"):
                        yield fragment.toBed12(chr=chrom, strand=dict["strand"])
                    else:
                        yield fragment.toBed12(chr=chrom)
        elif method == "bam1tuple":
            for bamfile in self.bamfiles:
                strand = "read1"
                if dict.has_key("strand"):  # TODO: if bamfiles have different read1 or read2 ?
                    strand = dict["strand"]
                for bed in TableIO.parse(
                    bamfile.fetch(chrom, start, end), "bam2bed12tuple", references=chrom, strand=strand
                ):
                    yield bed
        elif method == "bam2tuple":
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile):
                    if dict.has_key("strand"):
                        yield fragment.toBed12Tuple(chr=chrom, strand=dict["strand"])
                    else:
                        yield fragment.toBed12Tuple(chr=chrom)

        elif method == "bam2tuple_fast":
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment"):
                    if dict.has_key("strand"):
                        yield fragment.toBed12Tuple(chr=chrom, strand=dict["strand"])
                    else:
                        yield fragment.toBed12Tuple(chr=chrom)
        elif method == "pileup":
            s = [[0, 0, 0, 0] for row in range(end - start)]
            for bamfile in self.bamfiles:
                try:
                    A = bamfile.pileup(chrom, start, end)
                except:
                    print >>sys.stderr, "Can't pile up", chrom, start, end
                    raise StopIteration
                for pileupcolumn in A:
                    j = pileupcolumn.pos - start
                    if j < 0:
                        continue
                    if j > end - start:
                        break
                    for pileupread in pileupcolumn.pileups:
                        try:
                            if pileupread.is_del:
                                continue
                            if pileupread.indel != 0:
                                continue
                            nt = pileupread.alignment.seq[pileupread.qpos]
                            if BamI.hNtToNum.has_key(nt):
                                k = BamI.hNtToNum[nt]
                                s[j][k] += 1
                        except:
                            pass
            for i in s:
                yield i
        elif method == "count":
            s = 0
            for bamfile in self.bamfiles:
                s += bamfile.count(chrom, start, end)
            yield s
        elif method == "count_fragment":
            s = 0
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(chrom, start, end), "bam2fragment", bam=bamfile):
                    s += 1
            yield s
        elif method == "references":
            for i in self.bamfiles[0].references:
                yield i
        elif method == "lengths":
            for i in self.bamfiles[0].lengths:
                yield i
Ejemplo n.º 4
0
    def query(self,x,method='pileup'):
        if method=='fetch':
            for bamfile in self.bamfiles:
                for read in bamfile.fetch(x.chr,x.start,x.stop):
                    if read.tid<0:continue
                    if read.mapq==0:continue
                    strand='+'
                    if read.is_reverse:
                        strand='-'
                    score=read.mapq
                    bed=Bed([bamfile.references[read.tid],read.pos,read.aend,read.qname,score,strand])
                    yield bed
        elif method=='fetch12':
            '''
            test version
            still test Tools.cigar_to_coordinates
            '''
            for bamfile in self.bamfiles:
                for read in bamfile.fetch(x.chr,x.start,x.stop):
                    if read.tid<0:continue
                    if read.mapq==0:continue
                    chr=bamfile.references[read.tid]
                    strand='+'
                    if read.is_reverse:
                        strand='-'
                    score=read.mapq
                    start=read.pos
                    end=read.aend
                    name=read.qname
                    cds_start=start
                    cds_end=start
                    itemRgb="0,0,0"
                    (block_starts,block_sizes)=Tools.cigar_to_coordinates(read.cigar); 
                    bed=Bed12([chr,start,end,name,score,strand,cds_start,cds_end,itemRgb,len(block_sizes),block_sizes,block_starts])
                    yield bed
        elif method=="paired_end":
            for bamfile in self.bamfiles:
                for fragment in TableIO.parse(bamfile.fetch(x.chr,x.start,x.stop),"bam2fragment",bam=bamfile):
                    yield fragment
        elif method=='pileup':
            s=[[0,0,0,0] for row in range(x.stop-x.start)]
            for bamfile in self.bamfiles:
                try:
                    A=bamfile.pileup(x.chr,x.start,x.stop)

                except:
                    print >>sys.stderr,"Can't pile up",x.chr,x.start,x.stop
                    raise StopIteration 
                for pileupcolumn in A:
                    j=pileupcolumn.pos-x.start
                    if j<0: continue
                    if j>x.stop-x.start: break
                    for pileupread in pileupcolumn.pileups:
                        try:
                            if pileupread.is_del: continue
                            if pileupread.indel!=0: continue
                            nt=pileupread.alignment.seq[pileupread.qpos]
                            if BamI.hNtToNum.has_key(nt):
                                k=BamI.hNtToNum[nt]
                                s[j][k]+=1
                        except:
                            pass
            for i in s:
                yield i