コード例 #1
0
ファイル: make_counts.py プロジェクト: dmalmer/EMG
def compute_overlaps(x,y):
	T1 	= node.tree(y)
	T2 	= node.tree(x)
	ct1 = 0
	ct2 = 0
	for start, stop in x:
		if T1.searchInterval((start, stop)):
			ct1+=1
	for start, stop in y:
		if T2.searchInterval((start, stop)):
			ct2+=1
	return ct1, ct2
コード例 #2
0
ファイル: co_ocurrence_simulator.py プロジェクト: dmalmer/EMG
def compute_overlaps(x,y,OUT=""):
	y.sort()
	T 	= node.tree(y)
	j,N,O 	= 0,len(y),list()
	#FHW.open(OUT,"w")
	for start, stop in x:
		FINDS 	= T.searchInterval((start, stop))
		cx 		= (stop+start) / 2.
		for st,sp in FINDS:
			cy  = (sp+st) / 2.
			O.append((cx-cy))
	return O
コード例 #3
0
ファイル: linkage.py プロジェクト: dmalmer/EMG
def get_TSS_tree(FILE):
	G 	= {}
	with open(FILE) as FH:
		for line in FH:
			chrom, start, stop 	= line.strip("\n").split("\t")[:3]
			start, stop 		= int(start), int(stop)
			if chrom not in G:
				G[chrom] 	= list()
			G[chrom].append((start, stop))
	for chrom in G:
		G[chrom].sort()
		G[chrom]=node.tree(G[chrom])
	return G
コード例 #4
0
ファイル: motif_significance.py プロジェクト: dmalmer/EMG
def load_RNA_seq(FILE):
    G = {}
    R = {}
    with open(FILE) as FH:
        for line in FH:
            gene, chrom, start, stop, cov = re.split("\s+", line.strip("\n"))[:5]
            G[gene] = (chrom, start, stop, float(cov))
            if chrom not in R:
                R[chrom] = list()
            R[chrom].append((int(start), int(stop), gene))
    for chrom in R:
        R[chrom] = node.tree(R[chrom])
    return G, R
コード例 #5
0
ファイル: overlap_model.py プロジェクト: dmalmer/EMG
def run(N1,N2,T=100,l=100,alpha=1,beta=1):
	A_stats 	= {}
	B_stats 	= {}
	AOs, BOs 	= list(),list()
	for t in range(T):
		A 	= [(x,x+alpha) for x in np.random.uniform(0, l-alpha, N1)]
		B 	= [(x,x+beta) for x in np.random.uniform(0, l-beta, N2)]
		A.sort()
		B.sort()
		TA 	= node.tree(A)
		TB 	= node.tree(B)
		AO,BO 	= 0,0
		for a_st, a_sp in A:
			FINDS 	= TB.searchInterval((a_st, a_sp))
			if len(FINDS) not in A_stats:
				A_stats[len(FINDS)] 	= 0
			A_stats[len(FINDS)]+=1
			AO+=len(FINDS)

		for a_st, a_sp in B:
			FINDS 	= TA.searchInterval((a_st, a_sp))
			if len(FINDS) not in B_stats:
				B_stats[len(FINDS)] 	= 0
			B_stats[len(FINDS)]+=1
			BO+=len(FINDS)
		AOs.append(AO)
		BOs.append(BO)

	F 	= plt.figure(figsize=(15,10))
	ax1 = F.add_subplot(2,2,1)
	ax1.set_title("List A; N: " + str(N1) )
	ax1.hist([b for b in A_stats],weights=np.array([A_stats[b] for b in A_stats]) / float(sum([A_stats[b] for b in A_stats])), alpha=0.3)
	ax1.scatter([b for b in A_stats],[prob_single(b,alpha+beta, l,N2) for b in A_stats]  )
	ax1.set_xticks(range(0, max(A_stats.keys()) +1) )

	ax2 = F.add_subplot(2,2,2)
	ax2.set_title("List B; N: " + str(N2) )
	ax2.hist([b for b in B_stats],weights=np.array([B_stats[b] for b in B_stats]) / float(sum([B_stats[b] for b in B_stats])), alpha=0.3)
	ax2.set_xticks(range(0, max(B_stats.keys())+1))
	ax2.set_xticklabels([str(i) for i in range(0,max(B_stats.keys())+1)])
	ax2.scatter([b for b in B_stats],[prob_single(b,alpha+beta, l,N1) for b in B_stats]  )
	

	ax3 = F.add_subplot(2,2,3)
	ax3.set_title("Total Number of Overlapping Events on A")
	
	counts,edges 	= np.histogram(AOs,bins=max(AOs)-min(AOs))
	edges 			= edges[:-1]
	counts 			=[float(ct)/float(sum(counts)) for ct in counts]
	
	ax3.bar(edges,counts , alpha=0.3)
	#ax3.set_xticks(range(0, max(AOs) ) )
	xs 	= np.linspace(min(AOs), max(AOs))
	mu 	= np.mean(AOs)
	std = np.std(AOs)
	ax3.scatter(AOs, [ prob_single(a,alpha+beta, l,N2*N1) for a in AOs])
	
	ax3.plot(xs,[ normal(x, mu, std) for x in xs])
	
	ax4 = F.add_subplot(2,2,4)
	ax4.set_title("Total Number of Overlapping Events on B")
	
	counts,edges 	= np.histogram(BOs,bins=max(BOs)-min(BOs))
	edges 			= edges[:-1]
	counts 			=[float(ct)/float(sum(counts)) for ct in counts]
	
	ax4.bar(edges,counts , alpha=0.3)
	#ax3.set_xticks(range(0, max(AOs) ) )
	xs 	= np.linspace(min(BOs), max(BOs))
	mu 	= np.mean(BOs)
	std = np.std(BOs)
	ax4.scatter(BOs, [ prob_single(a,alpha+beta, l,N2*N1) for a in BOs])
	
	ax4.plot(xs,[ normal(x, mu, std) for x in xs])
	



	

	plt.show()