def count_flank(beds,bp,strand,upstream=True): ''' flank seq aggregation ( no splicing) if upstream is False , count downstream ''' pos_sum=[0 for i in xrange(bp)] pos_dis=[[] for i in xrange(bp)] neg_sum=[0 for i in xrange(bp)] neg_dis=[[] for i in xrange(bp)] for bed in beds: if upstream: flank=bed.upstream(bp) offset=bp-flank.cdna_length() # in case upstream is less than bp else: flank=bed.downstream(bp) offset=0 pos=[0 for i in xrange(bp)] neg=[0 for i in xrange(bp)] for read in bam.query(flank,"bam1",strand=strand): translated_read=translate_coordinates(flank,read) if translated_read.strand=="+" or translated_read.strand==".": for start,size in itertools.izip(translated_read.blockStarts,translated_read.blockSizes): for j in xrange(start+offset,start+size+offset): if j>=0 and j<bp: pos[j]+=1 else: for start,size in itertools.izip(translated_read.blockStarts,translated_read.blockSizes): for j in xrange(start+offset,start+size+offset): if j>=0 and j<bp: neg[j]+=1 for i in xrange(bp): pos_sum[i]+=pos[i] neg_sum[i]+=neg[i] pos_dis[i].append(pos[i]) neg_dis[i].append(neg[i]) return pos_sum,pos_dis,neg_sum,neg_dis
def run(args): logging.basicConfig(level=logging.INFO) up=args.up down=args.down bp_num=up+down offset=-up bam=DBI.init(args.bam,"bam") fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") bin_sum=[0 for i in xrange(bp_num)] bin_e=[0.0 for i in xrange(bp_num)] bin_dis=[[] for i in xrange(bp_num)] for i0,bed in enumerate(TableIO.parse(fin,args.format)): bed_bin=[0 for i in xrange(bp_num)] if args.tts: pos=bed.tts() else: pos=bed.tss() pos_flank=get_flank_region(pos,up,down) for read in bam.query(pos_flank,"bam1",strand="read1"): a=translate_coordinates(pos,read) #print(a,file=out) for e in a.Exons(): #print(e,file=out) start=e.start-offset end=e.stop-offset if start < 0: start=0 if end > bp_num: end=bp_num for j in xrange(start,end): bed_bin[j]+=1 for i in xrange(bp_num): bin_sum[i]+=bed_bin[i] bin_dis[i].append(bed_bin[i]) bed_num=i0+1 for i in xrange(bp_num): bin_e[i]=gini_coefficient(bin_dis[i]) if args.tts: print("pos_to_tts\taggregation_mean\tgini_coefficient",file=out) else: print("pos_to_tss\taggregation_mean\tgini_coefficient",file=out) for i in xrange(bp_num): print("{bin}\t{aggregation}\t{E}".format(bin=i+offset,aggregation=float(bin_sum[i])/bed_num,E=bin_e[i]),file=out) try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt matplotlib.rcParams.update({'font.size':9}) ax1=plt.subplot2grid((7,1),(6,0)) plt.ylabel('gini coeffecient') plt.fill_between(range(-up,down),bin_e,color="r",alpha=0.2,y2=0) ax1.set_ylim(0,1) ax1.set_xlim(-up,down) ax1.axes.get_xaxis().set_visible(False) plt.axvline(x=0,linewidth=1, color='y') ax2=plt.subplot2grid((7,1),(0,0),rowspan=5) ax2.set_xlim(-up,down) plt.plot(range(-up,down),[float(i)/bed_num for i in bin_sum]) plt.ylabel('mean coverage') if args.tts: plt.xlabel('pos to tts (bp)') else: plt.xlabel('pos to tss (bp)') plt.axvline(x=0,linewidth=1, color='y') plt.grid(True) plt.savefig(args.output+".png") except: pass