def compute_coefficients_ref(ks): """given ks, the roots of a polynomial P(x) = a_n x^n+...+a_1x^1+a_0, compute sequence of coefficients a_n...a_0""" coeffs = [1] for k in ks: coeffs = zipWith(lambda x,y:x+y,coeffs+[0],[0]+[-k*c for c in coeffs]) return coeffs
def sample_path_ref2(qf,koffs,t_final,chromosome=None,verbose=False): """Simulate a sample path until time t_final and return the marginal occupancies. Integrates update, sample_path_ref framework. """ if chromosome is None: # then start from empty chromosome chromosome = [0] * len(koffs) t = 0 dt = 0 occs = [0 for c in chromosome] while t < t_final: rates = [koffs[i] if bs else qf for i,bs in enumerate(chromosome)] sum_rate = sum(rates) dt = random.expovariate(sum_rate) t += dt if t > t_final: dt = t_final - t + dt # update occupancies after deciding dt, before updating chromosome occs = zipWith(lambda occ,ch:occ + ch*dt,occs,chromosome) idx = inverse_cdf_sample(range(G),normalize(rates)) if chromosome[idx]: # if reaction is an unbinding reaction... if verbose: print "unbinding at: ",idx chromosome[idx] = 0 qf += 1 else: # a binding reaction... if verbose: print "binding at: ",idx chromosome[idx] = 1 qf -= 1 if verbose: print "t:",t,"dt:",dt,"q:",qf,"qbound:",sum(chromosome),"mean occ:",sum([occ/t for occ in occs]) return [occ/t_final for occ in occs]
def sample_path_ref(qf,koffs,t_final,chromosome=None,verbose=False): """Simulate a sample path until time t_final and return the marginal occupancies""" if chromosome is None: # then start from empty chromosome chromosome = [0] * len(koffs) chrom = chromosome[:] new_chrom = chrom[:] occupancies = [0 for c in chromosome] t = 0 dt = 0 while t < t_final: new_chrom,qf,dt = update(chrom,qf,koffs,verbose=verbose) t += dt # This ugly bit of code ensures that we only track the # occupancies until exactly time t_final. if t > t_final: dt = t_final - t + dt occupancies = zipWith(lambda occ,ch:occ + ch*dt,occupancies,chrom) chrom = new_chrom[:] if verbose: print "t:",t,"dt:",dt,"q:",qf,"qbound:",sum(chrom),"mean occ:",sum([occ/t for occ in occupancies]) return [occ/t_final for occ in occupancies]
def mutation_matrix_ref(mu,w,mode="continuous",stochastic=False): "mutation matrix for w columns at rate mu" K = int(4**w) M = np.zeros((K,K)) if mode == "continuous": """M is a transition rate matrix""" res = 0 else: """M is a stochastic matrix""" res = 1 for i,kmer_i in enumerate(make_kmers(w)): sanity = 0 if not stochastic or (stochastic and random.random() < mu): for j,kmer_j in enumerate(make_kmers(w)): distance = w - sum(zipWith(lambda x,y:x==y,kmer_i,kmer_j)) if distance == 0: M[i][j] = res - 3*w*mu elif distance == 1: M[i][j] = mu sanity += 1 else: M[i][i] = res return np.transpose(M)
def update(self, final_time): # Compute propensities propensities = [propensity(self.state, self.time) for propensity in self.propensities] self.logging(propensities) p = sum(propensities) # rate of sum of random variables # determine time of next reaction if p == 0: raise Exception("No possible reactions") dt = rexp(p) # optimize next line later # determine which reaction new_time = self.time + dt if new_time < final_time: v = inverse_cdf_sample(self.stoich_vectors, normalize(propensities)) # update state vector self.state = zipWith(lambda x, y: x + y, self.state, v) self.time = new_time self.logging(str(self.time) + " " + str(self.state)) self.history.append((self.time, self.state)) # print self.state self.reactions_performed += 1 else: self.finished_run = True
def equilibrate(qtot, koffs, dt=0.01, iterations=1000): q = qtot ss = [0 for k in koffs] def dqdt(): ans = -q * sum((1 - s) for s in ss) + sum(k * s for s, k in zip(ss, koffs)) # print "dqdt:",ans return ans def dsidt(i): si = ss[i] ki = koffs[i] ans = q * (1 - si) - ki * si # print "dsdt ",i,":",ans return ans for iteration in xrange(iterations): dq = dqdt() dssdt = [dsidt(i) for i, k in enumerate(koffs)] q = q + dq * dt ss = zipWith(lambda s, ds: s + ds * dt, ss, dssdt) if iteration % 1000 == 0: print q # ,ss,q + sum(ss) return ss
def main_experiment(generate_data=False): if generate_data: iterations = 10000 prok_chains = [ posterior_chain2(motif, iterations=iterations) for motif in tqdm(prok_motifs) ] prok_bayes_spoofs = [[ motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations / 2::500]) ] for chain, motif in tqdm(zip(prok_chains, prok_motifs))] prok_psfms = [ psfm_from_motif(motif, pc=1 / 4.0) for motif in prok_motifs ] prok_psfm_spoofs = [[[ sample_from_psfm(psfm) for _ in range(len(motif)) ] for _ in range(10)] for psfm, motif in zip(prok_psfms, prok_motifs)] prok_maxent_spoofs = [ spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs) ] prok_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0), tqdm(prok_motifs)) prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)] for apw, motif in tqdm(zip(prok_apws, prok_motifs))] euk_submotifs = map(subsample, euk_motifs) euk_chains = [ posterior_chain2(motif, iterations=iterations) for motif in tqdm(euk_submotifs) ] euk_bayes_spoofs = [[ motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations / 2::500]) ] for chain, motif in tqdm(zip(euk_chains, euk_submotifs))] euk_psfms = [ psfm_from_motif(motif, pc=1 / 4.0) for motif in euk_submotifs ] euk_psfm_spoofs = [[[ sample_from_psfm(psfm) for _ in range(len(motif)) ] for _ in range(10)] for psfm, motif in zip(euk_psfms, euk_submotifs)] euk_maxent_spoofs = [ spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs) ] euk_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0), tqdm(euk_submotifs)) euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)] for apw, motif in tqdm(zip(euk_apws, euk_submotifs))] with open("prok_chains.pkl", 'w') as f: cPickle.dump(prok_chains, f) with open("prok_bayes_spoofs.pkl", 'w') as f: cPickle.dump(prok_bayes_spoofs, f) with open("prok_maxent_spoofs.pkl", 'w') as f: cPickle.dump(prok_maxent_spoofs, f) with open("prok_psfm_spoofs.pkl", 'w') as f: cPickle.dump(prok_psfm_spoofs, f) with open("prok_apw_spoofs.pkl", 'w') as f: cPickle.dump(prok_apw_spoofs, f) with open("euk_submotifs.pkl", 'w') as f: cPickle.dump(euk_submotifs, f) with open("euk_chains.pkl", 'w') as f: cPickle.dump(euk_chains, f) with open("euk_bayes_spoofs.pkl", 'w') as f: cPickle.dump(euk_bayes_spoofs, f) with open("euk_maxent_spoofs.pkl", 'w') as f: cPickle.dump(euk_maxent_spoofs, f) with open("euk_psfm_spoofs.pkl", 'w') as f: cPickle.dump(euk_psfm_spoofs, f) with open("euk_apw_spoofs.pkl", 'w') as f: cPickle.dump(euk_apw_spoofs, f) else: with open("prok_chains.pkl") as f: prok_chains = cPickle.load(f) with open("prok_bayes_spoofs.pkl") as f: prok_bayes_spoofs = cPickle.load(f) with open("prok_maxent_spoofs.pkl") as f: prok_maxent_spoofs = cPickle.load(f) with open("prok_psfm_spoofs.pkl") as f: prok_psfm_spoofs = cPickle.load(f) with open("prok_apw_spoofs.pkl") as f: prok_apw_spoofs = cPickle.load(f) with open("euk_submotifs.pkl") as f: euk_submotifs = cPickle.load(f) with open("euk_chains.pkl") as f: euk_chains = cPickle.load(f) with open("euk_bayes_spoofs.pkl") as f: euk_bayes_spoofs = cPickle.load(f) with open("euk_maxent_spoofs.pkl") as f: euk_maxent_spoofs = cPickle.load(f) with open("euk_apw_spoofs.pkl") as f: euk_apw_spoofs = cPickle.load(f) with open("euk_psfm_spoofs.pkl") as f: euk_psfm_spoofs = cPickle.load(f) #-------- prok_ics = map(motif_ic, prok_motifs) prok_mis = map(mi_per_col, prok_motifs) prok_maxent_ics = [mean(map(motif_ic, xs)) for xs in prok_maxent_spoofs] prok_maxent_mis = [mean(map(mi_per_col, xs)) for xs in prok_maxent_spoofs] prok_psfm_ics = [mean(map(motif_ic, xs)) for xs in prok_psfm_spoofs] prok_psfm_mis = [ mean(map(mi_per_col, xs)) for xs in tqdm(prok_psfm_spoofs) ] prok_bayes_ics = [mean(map(motif_ic, xs)) for xs in prok_bayes_spoofs] prok_bayes_mis = [ mean(map(mi_per_col, xs)) for xs in tqdm(prok_bayes_spoofs) ] prok_apw_ics = [mean(map(motif_ic, xs)) for xs in prok_apw_spoofs] prok_apw_mis = [mean(map(mi_per_col, xs)) for xs in prok_apw_spoofs] prok_ics_pp = map(motif_ic_per_col, prok_motifs) prok_maxent_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in prok_maxent_spoofs ] prok_psfm_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in prok_psfm_spoofs ] prok_bayes_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in prok_bayes_spoofs ] prok_apw_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in prok_apw_spoofs ] #-------- euk_ics = map(motif_ic, tqdm(euk_submotifs)) euk_mis = map(mi_per_col, tqdm(euk_submotifs)) euk_maxent_ics = [ mean(map(motif_ic, xs)) for xs in tqdm(euk_maxent_spoofs) ] euk_maxent_mis = [ mean(map(mi_per_col, xs)) for xs in tqdm(euk_maxent_spoofs) ] euk_psfm_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_psfm_spoofs)] euk_psfm_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_psfm_spoofs)] euk_bayes_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_bayes_spoofs)] euk_bayes_mis = [ mean(map(mi_per_col, xs)) for xs in tqdm(euk_bayes_spoofs) ] euk_apw_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_apw_spoofs)] euk_apw_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_apw_spoofs)] euk_ics_pp = map(motif_ic_per_col, euk_motifs) euk_maxent_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in euk_maxent_spoofs ] euk_psfm_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in euk_psfm_spoofs ] euk_bayes_ics_pp = [ mean(map(motif_ic_per_col, xs)) for xs in euk_bayes_spoofs ] euk_apw_ics_pp = [mean(map(motif_ic_per_col, xs)) for xs in euk_apw_spoofs] #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7 ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1 #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5 ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85 mi_xticks = [0, 0.25, 0.5, 0.75, 1] ic_yticks = [0, 0.5, 1, 1.5, 2] revscatter = lambda xs, ys: scatter(ys, xs) sns.set_style('dark') plt.subplot(4, 4, 1) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_maxent_ics_pp))) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("MaxEnt", fontsize='large') plt.subplot(4, 4, 3) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_maxent_mis) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_maxent_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 5) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_psfm_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("PSFM", fontsize='large') plt.subplot(4, 4, 7) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_psfm_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_psfm_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 9) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_apw_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_apw_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("APW", fontsize='large') #plt.xlabel("IC (bits)",fontsize='large') plt.subplot(4, 4, 11) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_apw_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_apw_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 13) #plt.xticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_bayes_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.xlabel("Prok IC", fontsize='large') plt.ylabel("Bayes", fontsize='large') plt.subplot(4, 4, 15) #plt.xticks([]) plt.xticks(mi_xticks, mi_xticks) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_bayes_mis) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_bayes_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.xlabel("Prok MI", fontsize='large') #--- euk plots ---# plt.subplot(4, 4, 2) plt.xticks([]) plt.yticks([]) r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_maxent_ics_pp))) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("MaxEnt",fontsize='large') plt.subplot(4, 4, 4) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_maxent_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_maxent_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 6) plt.xticks([]) plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_psfm_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("PSFM",fontsize='large') plt.subplot(4, 4, 8) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_psfm_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_psfm_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 10) plt.xticks([]) plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_apw_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_apw_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("APW",fontsize='large') #plt.xlabel("IC (bits)",fontsize='large') plt.subplot(4, 4, 12) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_apw_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_apw_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4, 4, 14) #plt.xticks([]) # plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp) rmsd = sqrt( mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_bayes_ics_pp))) plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("Bayes",fontsize='large') plt.xlabel("Euk IC", fontsize='large') plt.subplot(4, 4, 16) #plt.xticks([]) plt.xticks(mi_xticks, mi_xticks) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_bayes_mis) rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_bayes_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd) #plt.axis('off') #plt.xlabel("MI (bits/column pair)",fontsize='large') plt.xlabel("Euk MI", fontsize='large') plt.tight_layout() maybesave("spoof-statistics-rmsd.pdf")
def main_experiment(generate_data=False): if generate_data: iterations = 10000 prok_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(prok_motifs)] prok_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])] for chain, motif in tqdm(zip(prok_chains, prok_motifs))] prok_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in prok_motifs] prok_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)] for psfm, motif in zip(prok_psfms, prok_motifs)] prok_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs)] prok_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(prok_motifs)) prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)] for apw, motif in tqdm(zip(prok_apws,prok_motifs))] euk_submotifs = map(subsample, euk_motifs) euk_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(euk_submotifs)] euk_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])] for chain, motif in tqdm(zip(euk_chains, euk_submotifs))] euk_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in euk_submotifs] euk_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)] for psfm, motif in zip(euk_psfms, euk_submotifs)] euk_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs)] euk_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(euk_submotifs)) euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)] for apw, motif in tqdm(zip(euk_apws,euk_submotifs))] with open("prok_chains.pkl",'w') as f: cPickle.dump(prok_chains,f) with open("prok_bayes_spoofs.pkl",'w') as f: cPickle.dump(prok_bayes_spoofs,f) with open("prok_maxent_spoofs.pkl",'w') as f: cPickle.dump(prok_maxent_spoofs,f) with open("prok_psfm_spoofs.pkl",'w') as f: cPickle.dump(prok_psfm_spoofs,f) with open("prok_apw_spoofs.pkl",'w') as f: cPickle.dump(prok_apw_spoofs,f) with open("euk_submotifs.pkl",'w') as f: cPickle.dump(euk_submotifs,f) with open("euk_chains.pkl",'w') as f: cPickle.dump(euk_chains,f) with open("euk_bayes_spoofs.pkl",'w') as f: cPickle.dump(euk_bayes_spoofs,f) with open("euk_maxent_spoofs.pkl",'w') as f: cPickle.dump(euk_maxent_spoofs,f) with open("euk_psfm_spoofs.pkl",'w') as f: cPickle.dump(euk_psfm_spoofs,f) with open("euk_apw_spoofs.pkl",'w') as f: cPickle.dump(euk_apw_spoofs,f) else: with open("prok_chains.pkl") as f: prok_chains = cPickle.load(f) with open("prok_bayes_spoofs.pkl") as f: prok_bayes_spoofs = cPickle.load(f) with open("prok_maxent_spoofs.pkl") as f: prok_maxent_spoofs = cPickle.load(f) with open("prok_psfm_spoofs.pkl") as f: prok_psfm_spoofs = cPickle.load(f) with open("prok_apw_spoofs.pkl") as f: prok_apw_spoofs = cPickle.load(f) with open("euk_submotifs.pkl") as f: euk_submotifs = cPickle.load(f) with open("euk_chains.pkl") as f: euk_chains = cPickle.load(f) with open("euk_bayes_spoofs.pkl") as f: euk_bayes_spoofs = cPickle.load(f) with open("euk_maxent_spoofs.pkl") as f: euk_maxent_spoofs = cPickle.load(f) with open("euk_apw_spoofs.pkl") as f: euk_apw_spoofs = cPickle.load(f) with open("euk_psfm_spoofs.pkl") as f: euk_psfm_spoofs = cPickle.load(f) #-------- prok_ics = map(motif_ic, prok_motifs) prok_mis = map(mi_per_col, prok_motifs) prok_maxent_ics = [mean(map(motif_ic,xs)) for xs in prok_maxent_spoofs] prok_maxent_mis = [mean(map(mi_per_col,xs)) for xs in prok_maxent_spoofs] prok_psfm_ics = [mean(map(motif_ic,xs)) for xs in prok_psfm_spoofs] prok_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_psfm_spoofs)] prok_bayes_ics = [mean(map(motif_ic,xs)) for xs in prok_bayes_spoofs] prok_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_bayes_spoofs)] prok_apw_ics = [mean(map(motif_ic,xs)) for xs in prok_apw_spoofs] prok_apw_mis = [mean(map(mi_per_col,xs)) for xs in prok_apw_spoofs] prok_ics_pp = map(motif_ic_per_col, prok_motifs) prok_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_maxent_spoofs] prok_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_psfm_spoofs] prok_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_bayes_spoofs] prok_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_apw_spoofs] #-------- euk_ics = map(motif_ic, tqdm(euk_submotifs)) euk_mis = map(mi_per_col, tqdm(euk_submotifs)) euk_maxent_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_maxent_spoofs)] euk_maxent_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_maxent_spoofs)] euk_psfm_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_psfm_spoofs)] euk_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_psfm_spoofs)] euk_bayes_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_bayes_spoofs)] euk_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_bayes_spoofs)] euk_apw_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_apw_spoofs)] euk_apw_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_apw_spoofs)] euk_ics_pp = map(motif_ic_per_col, euk_motifs) euk_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_maxent_spoofs] euk_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_psfm_spoofs] euk_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_bayes_spoofs] euk_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_apw_spoofs] #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7 ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1 #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5 ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85 mi_xticks = [0, 0.25, 0.5, 0.75, 1] ic_yticks = [0, 0.5, 1, 1.5, 2] revscatter = lambda xs, ys:scatter(ys, xs) sns.set_style('dark') plt.subplot(4,4,1) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_maxent_ics_pp))) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("MaxEnt",fontsize='large') plt.subplot(4,4,3) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_maxent_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_maxent_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,5) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_psfm_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("PSFM",fontsize='large') plt.subplot(4,4,7) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_psfm_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_psfm_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,9) plt.xticks([]) #plt.yticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_apw_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_apw_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.ylabel("APW",fontsize='large') #plt.xlabel("IC (bits)",fontsize='large') plt.subplot(4,4,11) plt.xticks([]) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_apw_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_apw_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,13) #plt.xticks([]) plt.yticks(ic_yticks, ic_yticks) plt.xticks(ic_yticks, ic_yticks) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_bayes_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.xlabel("Prok IC",fontsize='large') plt.ylabel("Bayes",fontsize='large') plt.subplot(4,4,15) #plt.xticks([]) plt.xticks(mi_xticks, mi_xticks) plt.yticks(mi_xticks, mi_xticks) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(prok_mis, prok_bayes_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_bayes_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.xlabel("Prok MI",fontsize='large') #--- euk plots ---# plt.subplot(4,4,2) plt.xticks([]) plt.yticks([]) r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_maxent_ics_pp))) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("MaxEnt",fontsize='large') plt.subplot(4,4,4) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_maxent_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_maxent_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,6) plt.xticks([]) plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_psfm_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("PSFM",fontsize='large') plt.subplot(4,4,8) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_psfm_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_psfm_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,10) plt.xticks([]) plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_apw_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_apw_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("APW",fontsize='large') #plt.xlabel("IC (bits)",fontsize='large') plt.subplot(4,4,12) plt.xticks([]) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_apw_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_apw_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) plt.subplot(4,4,14) #plt.xticks([]) # plt.yticks([]) plt.xlim(ic_min, ic_max) plt.ylim(ic_min, ic_max) r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_bayes_ics_pp))) plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2)) plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) #plt.ylabel("Bayes",fontsize='large') plt.xlabel("Euk IC",fontsize='large') plt.subplot(4,4,16) #plt.xticks([]) plt.xticks(mi_xticks, mi_xticks) plt.yticks([]) plt.xlim(mi_min, mi_max) plt.ylim(mi_min, mi_max) r, p = revscatter(euk_mis, euk_bayes_mis) rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_bayes_mis))) plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2)) plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd) #plt.axis('off') #plt.xlabel("MI (bits/column pair)",fontsize='large') plt.xlabel("Euk MI",fontsize='large') plt.tight_layout() maybesave("spoof-statistics-rmsd.pdf")