def run(args): logging.basicConfig(level=logging.DEBUG) fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") bam=DBI.init(args.bam,"bam"); beds=[i for i in TableIO.parse(fin,"bed12")] beds.sort() bp=args.bp print("mapped:{}".format(bam.mapped)) print("unmapped:{}".format(bam.unmapped)) data={} for i,x in enumerate(iter_cluster(beds)): print("{}\t{}:{}-{}".format(i+1,x["chr"],x["start"]+1,x["stop"])) ''' cds=[z.cds() for z in x["beds"] if z.cds()] utr3=[z.utr3() for z in x["beds"] if z.utr3()] utr5=[z.utr5() for z in x["beds"] if z.utr5()] ''' coords = [ up_down_coordinate(gene,args.bp,args.bp) for gene in x["beds"] ] for j,y in enumerate(coords): data[y.id]={} data[y.id]["coord"]=y data[y.id]["values"]=[0.0 for l in range(y.cdna_length())]; coord_beds = [ _translate(coord,bed) for coord,bed in itertools.izip(coords,x["beds"])] for j,read in enumerate(bam.query(method="bam1",chr=x["chr"],start=x["start"]-args.bp,stop=x["stop"]+args.bp,strand=args.strand)): NM=getNM(read) # number of hits NC=0 # number of compatible c_coords=[] for k,coord in enumerate(coords): if overlap(read,coord) and compatible(read,coord): # don't consider the reads extend out of coords. NC+=1 c_coords.append(k) for k,c in enumerate(c_coords): coord=coords[c] if read.start < coord.start or read.stop > coord.stop: start=max(read.start,coord.start) stop=min(read.stop,coord.stop) read=read._slice(start,stop) read_in_coord = _translate(coord,read) for l in xrange(read_in_coord.start,read_in_coord.stop): data[coord.id]["values"][l]+=1.0/NC/NM for j,y in enumerate(coords): print(data[y.id]["coord"]) print(data[y.id]["values"]) '''
def count_list(beds,bin_num,strand): bin_sum=[0.0 for i in xrange(bin_num)] neg_bin_sum=[0.0 for i in xrange(bin_num)] bin_dis=[[] for i in xrange(bin_num)] neg_bin_dis=[[] for i in xrange(bin_num)] for bed in beds: neg_bed=bed._replace(strand=reverse_strand(bed.strand)) bed_bin=[0.0 for i in xrange(bin_num)] neg_bed_bin=[0.0 for i in xrange(bin_num)] for read in bam.query(bed,"bam1",strand=strand): cdna_length=bed.cdna_length() bin_step=float(cdna_length)/bin_num if compatible_with_transcript(read,bed): gene_bed=_translate(bed,read) bin_start=gene_bed.start*bin_num/cdna_length bin_stop=(gene_bed.stop-1)*bin_num/cdna_length if bin_stop==bin_start: bed_bin[bin_start]+=float(gene_bed.stop-gene_bed.start)/bin_step else: bed_bin[bin_start]+=bin_start+1-float(gene_bed.start)/bin_step bed_bin[bin_stop]+=float(gene_bed.stop)/bin_step-bin_stop for i in xrange(bin_start+1,bin_stop): bed_bin[i]+=1.0 elif compatible_with_transcript(read,neg_bed): gene_bed=_translate(bed,read) bin_start=gene_bed.start*bin_num/cdna_length bin_stop=(gene_bed.stop-1)*bin_num/cdna_length if bin_stop==bin_start: neg_bed_bin[bin_start]+=float(gene_bed.stop-gene_bed.start)/bin_step else: neg_bed_bin[bin_start]+=bin_start+1-float(gene_bed.start)/bin_step neg_bed_bin[bin_stop]+=float(gene_bed.stop)/bin_step-bin_stop for i in xrange(bin_start+1,bin_stop): neg_bed_bin[i]+=1.0 for i in xrange(bin_num): bin_sum[i]+=bed_bin[i] neg_bin_sum[i]+=neg_bed_bin[i] bin_dis[i].append(int(bed_bin[i])) neg_bin_dis[i].append(int(neg_bed_bin[i])) return bin_sum,bin_dis,neg_bin_sum,neg_bin_dis