def compute_overlaps(x,y): T1 = node.tree(y) T2 = node.tree(x) ct1 = 0 ct2 = 0 for start, stop in x: if T1.searchInterval((start, stop)): ct1+=1 for start, stop in y: if T2.searchInterval((start, stop)): ct2+=1 return ct1, ct2
def compute_overlaps(x,y,OUT=""): y.sort() T = node.tree(y) j,N,O = 0,len(y),list() #FHW.open(OUT,"w") for start, stop in x: FINDS = T.searchInterval((start, stop)) cx = (stop+start) / 2. for st,sp in FINDS: cy = (sp+st) / 2. O.append((cx-cy)) return O
def get_TSS_tree(FILE): G = {} with open(FILE) as FH: for line in FH: chrom, start, stop = line.strip("\n").split("\t")[:3] start, stop = int(start), int(stop) if chrom not in G: G[chrom] = list() G[chrom].append((start, stop)) for chrom in G: G[chrom].sort() G[chrom]=node.tree(G[chrom]) return G
def load_RNA_seq(FILE): G = {} R = {} with open(FILE) as FH: for line in FH: gene, chrom, start, stop, cov = re.split("\s+", line.strip("\n"))[:5] G[gene] = (chrom, start, stop, float(cov)) if chrom not in R: R[chrom] = list() R[chrom].append((int(start), int(stop), gene)) for chrom in R: R[chrom] = node.tree(R[chrom]) return G, R
def run(N1,N2,T=100,l=100,alpha=1,beta=1): A_stats = {} B_stats = {} AOs, BOs = list(),list() for t in range(T): A = [(x,x+alpha) for x in np.random.uniform(0, l-alpha, N1)] B = [(x,x+beta) for x in np.random.uniform(0, l-beta, N2)] A.sort() B.sort() TA = node.tree(A) TB = node.tree(B) AO,BO = 0,0 for a_st, a_sp in A: FINDS = TB.searchInterval((a_st, a_sp)) if len(FINDS) not in A_stats: A_stats[len(FINDS)] = 0 A_stats[len(FINDS)]+=1 AO+=len(FINDS) for a_st, a_sp in B: FINDS = TA.searchInterval((a_st, a_sp)) if len(FINDS) not in B_stats: B_stats[len(FINDS)] = 0 B_stats[len(FINDS)]+=1 BO+=len(FINDS) AOs.append(AO) BOs.append(BO) F = plt.figure(figsize=(15,10)) ax1 = F.add_subplot(2,2,1) ax1.set_title("List A; N: " + str(N1) ) ax1.hist([b for b in A_stats],weights=np.array([A_stats[b] for b in A_stats]) / float(sum([A_stats[b] for b in A_stats])), alpha=0.3) ax1.scatter([b for b in A_stats],[prob_single(b,alpha+beta, l,N2) for b in A_stats] ) ax1.set_xticks(range(0, max(A_stats.keys()) +1) ) ax2 = F.add_subplot(2,2,2) ax2.set_title("List B; N: " + str(N2) ) ax2.hist([b for b in B_stats],weights=np.array([B_stats[b] for b in B_stats]) / float(sum([B_stats[b] for b in B_stats])), alpha=0.3) ax2.set_xticks(range(0, max(B_stats.keys())+1)) ax2.set_xticklabels([str(i) for i in range(0,max(B_stats.keys())+1)]) ax2.scatter([b for b in B_stats],[prob_single(b,alpha+beta, l,N1) for b in B_stats] ) ax3 = F.add_subplot(2,2,3) ax3.set_title("Total Number of Overlapping Events on A") counts,edges = np.histogram(AOs,bins=max(AOs)-min(AOs)) edges = edges[:-1] counts =[float(ct)/float(sum(counts)) for ct in counts] ax3.bar(edges,counts , alpha=0.3) #ax3.set_xticks(range(0, max(AOs) ) ) xs = np.linspace(min(AOs), max(AOs)) mu = np.mean(AOs) std = np.std(AOs) ax3.scatter(AOs, [ prob_single(a,alpha+beta, l,N2*N1) for a in AOs]) ax3.plot(xs,[ normal(x, mu, std) for x in xs]) ax4 = F.add_subplot(2,2,4) ax4.set_title("Total Number of Overlapping Events on B") counts,edges = np.histogram(BOs,bins=max(BOs)-min(BOs)) edges = edges[:-1] counts =[float(ct)/float(sum(counts)) for ct in counts] ax4.bar(edges,counts , alpha=0.3) #ax3.set_xticks(range(0, max(AOs) ) ) xs = np.linspace(min(BOs), max(BOs)) mu = np.mean(BOs) std = np.std(BOs) ax4.scatter(BOs, [ prob_single(a,alpha+beta, l,N2*N1) for a in BOs]) ax4.plot(xs,[ normal(x, mu, std) for x in xs]) plt.show()