예제 #1
0
파일: bam2dis.py 프로젝트: HaoKuo/bam2x
def run(args):
    logging.basicConfig(level=logging.DEBUG) 
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    bam=DBI.init(args.bam,"bam");
    beds=[i for i in TableIO.parse(fin,"bed12")]
    beds.sort()
    bp=args.bp
    print("mapped:{}".format(bam.mapped))
    print("unmapped:{}".format(bam.unmapped))
    data={}
    for i,x in enumerate(iter_cluster(beds)):
        print("{}\t{}:{}-{}".format(i+1,x["chr"],x["start"]+1,x["stop"]))
        '''
        cds=[z.cds() for z in x["beds"] if z.cds()]
        utr3=[z.utr3() for z in x["beds"] if z.utr3()]
        utr5=[z.utr5() for z in x["beds"] if z.utr5()]
        '''
        
        coords = [ up_down_coordinate(gene,args.bp,args.bp) for gene in x["beds"] ]
        for j,y in enumerate(coords):
            data[y.id]={}
            data[y.id]["coord"]=y
            data[y.id]["values"]=[0.0 for l in range(y.cdna_length())];
        coord_beds = [ _translate(coord,bed) for coord,bed in itertools.izip(coords,x["beds"])]
        for j,read in enumerate(bam.query(method="bam1",chr=x["chr"],start=x["start"]-args.bp,stop=x["stop"]+args.bp,strand=args.strand)):
            NM=getNM(read)  # number of hits
            NC=0            # number of compatible 
            c_coords=[]
            for k,coord in enumerate(coords):
                if overlap(read,coord) and compatible(read,coord): # don't consider the reads extend out of coords.
                    NC+=1
                    c_coords.append(k)
            for k,c in enumerate(c_coords):
                coord=coords[c]
                if read.start < coord.start or read.stop > coord.stop:
                    start=max(read.start,coord.start)
                    stop=min(read.stop,coord.stop)
                    read=read._slice(start,stop)
                read_in_coord = _translate(coord,read)
                for l in xrange(read_in_coord.start,read_in_coord.stop):
                    data[coord.id]["values"][l]+=1.0/NC/NM
        for j,y in enumerate(coords):
            print(data[y.id]["coord"])
            print(data[y.id]["values"])

    '''
예제 #2
0
def count_list(beds,bin_num,strand):        
    bin_sum=[0.0 for i in xrange(bin_num)]
    neg_bin_sum=[0.0 for i in xrange(bin_num)]
    bin_dis=[[] for i in xrange(bin_num)]
    neg_bin_dis=[[] for i in xrange(bin_num)]
    for bed in beds:
        neg_bed=bed._replace(strand=reverse_strand(bed.strand))
        bed_bin=[0.0 for i in xrange(bin_num)]
        neg_bed_bin=[0.0 for i in xrange(bin_num)]
        for read in bam.query(bed,"bam1",strand=strand):
            cdna_length=bed.cdna_length()
            bin_step=float(cdna_length)/bin_num
            if compatible_with_transcript(read,bed):
                gene_bed=_translate(bed,read)
                bin_start=gene_bed.start*bin_num/cdna_length
                bin_stop=(gene_bed.stop-1)*bin_num/cdna_length
                if bin_stop==bin_start:
                    bed_bin[bin_start]+=float(gene_bed.stop-gene_bed.start)/bin_step
                else:
                    bed_bin[bin_start]+=bin_start+1-float(gene_bed.start)/bin_step
                    bed_bin[bin_stop]+=float(gene_bed.stop)/bin_step-bin_stop
                    for i in xrange(bin_start+1,bin_stop):
                        bed_bin[i]+=1.0  
            elif compatible_with_transcript(read,neg_bed):
                gene_bed=_translate(bed,read)
                bin_start=gene_bed.start*bin_num/cdna_length
                bin_stop=(gene_bed.stop-1)*bin_num/cdna_length
                if bin_stop==bin_start:
                    neg_bed_bin[bin_start]+=float(gene_bed.stop-gene_bed.start)/bin_step
                else:
                    neg_bed_bin[bin_start]+=bin_start+1-float(gene_bed.start)/bin_step
                    neg_bed_bin[bin_stop]+=float(gene_bed.stop)/bin_step-bin_stop
                    for i in xrange(bin_start+1,bin_stop):
                        neg_bed_bin[i]+=1.0
        for i in xrange(bin_num):
            bin_sum[i]+=bed_bin[i]
            neg_bin_sum[i]+=neg_bed_bin[i]
            bin_dis[i].append(int(bed_bin[i]))
            neg_bin_dis[i].append(int(neg_bed_bin[i]))
    return bin_sum,bin_dis,neg_bin_sum,neg_bin_dis