def run(FILE): FHW = open(FILE, "w") X = load.grab_specific_region("chr1",6229860,6303055, SHOW=False, bins=1000 ) print min(X[:,0]), max(X[:,0]) FHW.write("#chr1,6229860,6303055\n") FHW.write("~forward\n") for i in range(X.shape[0]): FHW.write(str(X[i,0]) + "," + str(X[i,1]) + "\n") FHW.write("~reverse\n") for i in range(X.shape[0]): FHW.write(str(X[i,0]) + "," + str(X[i,2]) + "\n") X = simulate.runOne(mu=0, s=0.1, l=3, lr=100, ll=-50, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, N=1000, SHOW=False, bins=1000, noise=True ) X[:,0]+=6303055 X[:,0]*=100. st, sp = X[0,0], X[-1,0] print st, sp FHW.write("#chrN,"+str(st) + "," + str(sp) + "\n") FHW.write("~forward\n") for i in range(X.shape[0]): FHW.write(str(X[i,0]) + "," + str(X[i,1]) + "\n") FHW.write("~reverse\n") for i in range(X.shape[0]): FHW.write(str(X[i,0]) + "," + str(X[i,2]) + "\n") FHW.close()
coverage_scores = window(X,std=std,lam=lam,step_size=1) bayes_ks = bayes_factor(X,std=std,lam=lam,step_size=1) hybrid = center(coverage_scores, bayes_ks) starts = find_peaks([(x,y) for x,y in zip(np.linspace(X[0,0], X[-1,0], len(hybrid)), hybrid)]) return coverage_scores, bayes_ks, hybrid, starts def sample(X, k, std=1, lam=0.1): coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts(X,std=std, lam=lam) keeps = list() for i in range(k): j = np.random.geometric(0.8)-1 keeps.append(starts[j][0]) starts = starts[:j] + starts[j+1:] return keeps if __name__=="__main__": X = load.grab_specific_region("chr1",6229860,6303055, SHOW=True, bins=300 ) X[:,0]/=100. X[:,0]-=X[0,0] coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts(X,std=1,lam=0.1) draw(X, coverage_scores, bayes_ks, hybrid,starts) clf = model.EMGU(noise=True, K=3,noise_max=0.01, moveUniformSupport=5, max_it=50,seed=True) clf.fit(X) clf.draw(X)
ax.set_xlabel("Relative Genomic Position") plt.savefig("/Users/joazofeifa/Lab/Article_drafts/EMG_paper/images/example_gene_fig.svg") plt.show() def write_out(X, OUT=""): FHW = open(OUT, "w") for i in range(X.shape[0]): FHW.write(str(X[i,0])+","+str(X[i,1])+","+str(X[i,2])+"\n") FHW.close() def load_IN(FILE): L = list() with open(FILE) as FH: for line in FH: x,y,r = line.strip("\n").split(",") L.append((float(x), float(y), float(r))) return np.array(L) if __name__=="__main__": WRITE = False OUT = "/Users/joazofeifa/Lab/Article_drafts/EMG_paper/files/Example_Gene.csv" if WRITE: X = load.grab_specific_region("chr1",8012007, 8033978, pos_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph", neg_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph", SHOW =False, bins=300) X[:,0]-=X[0,0] X[:,0]/=100. write_out(X, OUT=OUT) X = load_IN(OUT) draw(X)
if __name__ == "__main__": IN = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/" #chr1:87,691,254-87,695,004 #88,319,575-88320266 #92,308,146-92,315,100 #62,182,362-62,198,443 #8,246,915-8,255,824 #chr1:3,233,790-3,239,961 #chr1:1,163,801-1,175,755 #chr1:1,240,585-1,248,496 #1,243,262-1,251,173 #chr1:1,090,956-1,114,133 #chr1:1,087,608-1,108,057 X = load.grab_specific_region("chr1",1087608,1108057, SHOW=False, bins=100, pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" ) X[:,0]-=min(X[:,0]) scale = 100 window = 1000 X[:,0]/=scale run_MM(X, window=window, scale=scale )
N = sum(Y) XS = sum([ X[i]*Y[i] for i in range(len(X))]) mean = XS/N var = sum([pow(X[i] - mean,2)*Y[i] for i in range(len(X))]) / N X2 = sum([pow(X[i],2)*Y[i] for i in range(len(X))]) print var, (X2 - 2*mean*XS + pow(mean,2)*N) /N if __name__ == "__main__": IN = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/" #chr1:87,691,254-87,695,004 #88,319,575-88320266 #92,308,146-92,315,100 #62,182,362-62,198,443 #8,246,915-8,255,824 #chr1:3,233,790-3,239,961 #chr1:1,013,872-1,017,272 #934,235-937,997 #1,206,352-1,213,240 #836,632-843,542 #1,091,333-1,096,157 #chr1:162,105,107-162,113,041 #25,681-33,615 X = load.grab_specific_region("chr1",162105107,162113041, SHOW=False, bins=500, pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" ) X[:,0]-=min(X[:,0]) scale = 100 window = 500 X[:,0]/=scale run_MM(X, window=window, scale=scale )
if __name__ == "__main__": #================================== #testing MAP-EM procedure # X = simulate.runOne(mu=0, s=1, l=10, lr=100, ll=-50, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, # N=1000, SHOW=False, bins=300, noise=False, foot_print=10 ) # chr1:20,984,647-20,991,448 #chr1:836,835-843,549 #chr1:539,399-542,484 #chr3:15,684,556-15,692,636 #chr2:10,420,826-10,462,048 WRITE = False if WRITE: X = load.grab_specific_region("chr2",10420826, 10433237, pos_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph", neg_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph", SHOW =False, bins=1000) X[:,0]-=X[0,0] X[:,0]/=100. FHW = open("/Users/joazofeifa/test.bed", "w") for i in range(X.shape[0]): FHW.write(str(X[i,0]) + "\t" + str(X[i,1]) + "\t" + str(X[i,2]) + "\n") FHW.close() X = list() with open("/Users/joazofeifa/test.bed") as FH: for line in FH: x,y,z = [float(x) for x in line.strip("\n").split("\t")] X.append([x,y,z]) X = np.array(X)
def sample(X, k, std=1, lam=0.1): coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts( X, std=std, lam=lam) keeps = list() for i in range(k): j = np.random.geometric(0.8) - 1 keeps.append(starts[j][0]) starts = starts[:j] + starts[j + 1:] return keeps if __name__ == "__main__": X = load.grab_specific_region("chr1", 6229860, 6303055, SHOW=True, bins=300) X[:, 0] /= 100. X[:, 0] -= X[0, 0] coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts( X, std=1, lam=0.1) draw(X, coverage_scores, bayes_ks, hybrid, starts) clf = model.EMGU(noise=True, K=3, noise_max=0.01, moveUniformSupport=5, max_it=50, seed=True) clf.fit(X)
#chr1:87,691,254-87,695,004 #88,319,575-88320266 #92,308,146-92,315,100 #62,182,362-62,198,443 #8,246,915-8,255,824 #chr1:3,233,790-3,239,961 #chr1:1,013,872-1,017,272 #934,235-937,997 #1,206,352-1,213,240 #836,632-843,542 #1,091,333-1,096,157 #chr1:162,105,107-162,113,041 #25,681-33,615 #chr1:760,940-764,973 #899,808-905,675 #chr1:4,763,739-4,766,290 #chr1:1,140,801-1,143,549 #:1,200,396-1,202,629 #:936,603-947,066 #1,246,727-1,252,981 #1,137,847-1,145,798 print (1252981 - 1246727)/500.0 X = load.grab_specific_region("chr1",1246727,1252981, SHOW=False, bins=500, pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" ) X[:,0]-=min(X[:,0]) scale = 100 window = 500 X[:,0]/=scale run_MM(X, window=window, scale=scale )