Example #1
0
def compute_coefficients_ref(ks):
    """given ks, the roots of a polynomial P(x) = a_n x^n+...+a_1x^1+a_0,
    compute sequence of coefficients a_n...a_0"""
    coeffs = [1]
    for k in ks:
        coeffs = zipWith(lambda x,y:x+y,coeffs+[0],[0]+[-k*c for c in coeffs])
    return coeffs
Example #2
0
File: ssa.py Project: poneill/amic
def sample_path_ref2(qf,koffs,t_final,chromosome=None,verbose=False):
    """Simulate a sample path until time t_final and return the marginal occupancies.
    Integrates update, sample_path_ref framework.
    """
    if chromosome is None: # then start from empty chromosome
        chromosome = [0] * len(koffs)
    t = 0
    dt = 0
    occs = [0 for c in chromosome]
    while t < t_final:
        rates = [koffs[i] if bs else qf for i,bs in enumerate(chromosome)]
        sum_rate = sum(rates)
        dt = random.expovariate(sum_rate)
        t += dt
        if t > t_final:
            dt = t_final - t + dt
        # update occupancies after deciding dt, before updating chromosome
        occs = zipWith(lambda occ,ch:occ + ch*dt,occs,chromosome)
        idx = inverse_cdf_sample(range(G),normalize(rates))
        if chromosome[idx]: # if reaction is an unbinding reaction...
            if verbose:
                print "unbinding at: ",idx
            chromosome[idx] = 0
            qf += 1
        else: # a binding reaction...
            if verbose:
                print "binding at: ",idx
            chromosome[idx] = 1
            qf -= 1
        if verbose:
            print "t:",t,"dt:",dt,"q:",qf,"qbound:",sum(chromosome),"mean occ:",sum([occ/t for occ in occs])
    return [occ/t_final for occ in occs]
Example #3
0
File: ssa.py Project: poneill/amic
def sample_path_ref(qf,koffs,t_final,chromosome=None,verbose=False):
    """Simulate a sample path until time t_final and return the marginal occupancies"""
    if chromosome is None: # then start from empty chromosome
        chromosome = [0] * len(koffs)
    chrom = chromosome[:]
    new_chrom = chrom[:]
    occupancies = [0 for c in chromosome]
    t = 0
    dt = 0
    while t < t_final:
        new_chrom,qf,dt = update(chrom,qf,koffs,verbose=verbose)
        t += dt
        # This ugly bit of code ensures that we only track the
        # occupancies until exactly time t_final.
        if t > t_final:
            dt = t_final - t + dt
        occupancies = zipWith(lambda occ,ch:occ + ch*dt,occupancies,chrom)
        chrom = new_chrom[:]
        if verbose:
            print "t:",t,"dt:",dt,"q:",qf,"qbound:",sum(chrom),"mean occ:",sum([occ/t for occ in occupancies])
    return [occ/t_final for occ in occupancies]
Example #4
0
def mutation_matrix_ref(mu,w,mode="continuous",stochastic=False):
    "mutation matrix for w columns at rate mu"
    K = int(4**w)
    M = np.zeros((K,K))
    if mode == "continuous":
        """M is a transition rate matrix"""
        res = 0
    else:
        """M is a stochastic matrix"""
        res = 1
    for i,kmer_i in enumerate(make_kmers(w)):
        sanity = 0
        if not stochastic or (stochastic and random.random() < mu):
            for j,kmer_j in enumerate(make_kmers(w)):
                distance = w - sum(zipWith(lambda x,y:x==y,kmer_i,kmer_j))
                if distance == 0:
                    M[i][j] = res - 3*w*mu
                elif distance == 1:
                    M[i][j] = mu
                    sanity += 1
        else:
            M[i][i] = res
    return np.transpose(M)
Example #5
0
    def update(self, final_time):
        # Compute propensities
        propensities = [propensity(self.state, self.time) for propensity in self.propensities]
        self.logging(propensities)
        p = sum(propensities)  # rate of sum of random variables
        # determine time of next reaction
        if p == 0:
            raise Exception("No possible reactions")
        dt = rexp(p)
        # optimize next line later
        # determine which reaction
        new_time = self.time + dt
        if new_time < final_time:
            v = inverse_cdf_sample(self.stoich_vectors, normalize(propensities))
            # update state vector
            self.state = zipWith(lambda x, y: x + y, self.state, v)
            self.time = new_time
            self.logging(str(self.time) + " " + str(self.state))

            self.history.append((self.time, self.state))
            # print self.state
            self.reactions_performed += 1
        else:
            self.finished_run = True
Example #6
0
def equilibrate(qtot, koffs, dt=0.01, iterations=1000):
    q = qtot
    ss = [0 for k in koffs]

    def dqdt():
        ans = -q * sum((1 - s) for s in ss) + sum(k * s for s, k in zip(ss, koffs))
        # print "dqdt:",ans
        return ans

    def dsidt(i):
        si = ss[i]
        ki = koffs[i]
        ans = q * (1 - si) - ki * si
        # print "dsdt ",i,":",ans
        return ans

    for iteration in xrange(iterations):
        dq = dqdt()
        dssdt = [dsidt(i) for i, k in enumerate(koffs)]
        q = q + dq * dt
        ss = zipWith(lambda s, ds: s + ds * dt, ss, dssdt)
        if iteration % 1000 == 0:
            print q  # ,ss,q + sum(ss)
    return ss
Example #7
0
def main_experiment(generate_data=False):
    if generate_data:
        iterations = 10000
        prok_chains = [
            posterior_chain2(motif, iterations=iterations)
            for motif in tqdm(prok_motifs)
        ]
        prok_bayes_spoofs = [[
            motif_from_theta(theta, len(motif))
            for theta in tqdm(chain[iterations / 2::500])
        ] for chain, motif in tqdm(zip(prok_chains, prok_motifs))]
        prok_psfms = [
            psfm_from_motif(motif, pc=1 / 4.0) for motif in prok_motifs
        ]
        prok_psfm_spoofs = [[[
            sample_from_psfm(psfm) for _ in range(len(motif))
        ] for _ in range(10)] for psfm, motif in zip(prok_psfms, prok_motifs)]
        prok_maxent_spoofs = [
            spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs)
        ]
        prok_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0),
                        tqdm(prok_motifs))
        prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))]
                            for __ in range(10)]
                           for apw, motif in tqdm(zip(prok_apws, prok_motifs))]
        euk_submotifs = map(subsample, euk_motifs)
        euk_chains = [
            posterior_chain2(motif, iterations=iterations)
            for motif in tqdm(euk_submotifs)
        ]
        euk_bayes_spoofs = [[
            motif_from_theta(theta, len(motif))
            for theta in tqdm(chain[iterations / 2::500])
        ] for chain, motif in tqdm(zip(euk_chains, euk_submotifs))]
        euk_psfms = [
            psfm_from_motif(motif, pc=1 / 4.0) for motif in euk_submotifs
        ]
        euk_psfm_spoofs = [[[
            sample_from_psfm(psfm) for _ in range(len(motif))
        ] for _ in range(10)] for psfm, motif in zip(euk_psfms, euk_submotifs)]
        euk_maxent_spoofs = [
            spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs)
        ]
        euk_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0),
                       tqdm(euk_submotifs))
        euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))]
                           for __ in range(10)]
                          for apw, motif in tqdm(zip(euk_apws, euk_submotifs))]
        with open("prok_chains.pkl", 'w') as f:
            cPickle.dump(prok_chains, f)
        with open("prok_bayes_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_bayes_spoofs, f)
        with open("prok_maxent_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_maxent_spoofs, f)
        with open("prok_psfm_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_psfm_spoofs, f)
        with open("prok_apw_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_apw_spoofs, f)

        with open("euk_submotifs.pkl", 'w') as f:
            cPickle.dump(euk_submotifs, f)
        with open("euk_chains.pkl", 'w') as f:
            cPickle.dump(euk_chains, f)
        with open("euk_bayes_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_bayes_spoofs, f)
        with open("euk_maxent_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_maxent_spoofs, f)
        with open("euk_psfm_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_psfm_spoofs, f)
        with open("euk_apw_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_apw_spoofs, f)

    else:
        with open("prok_chains.pkl") as f:
            prok_chains = cPickle.load(f)
        with open("prok_bayes_spoofs.pkl") as f:
            prok_bayes_spoofs = cPickle.load(f)
        with open("prok_maxent_spoofs.pkl") as f:
            prok_maxent_spoofs = cPickle.load(f)
        with open("prok_psfm_spoofs.pkl") as f:
            prok_psfm_spoofs = cPickle.load(f)
        with open("prok_apw_spoofs.pkl") as f:
            prok_apw_spoofs = cPickle.load(f)

        with open("euk_submotifs.pkl") as f:
            euk_submotifs = cPickle.load(f)
        with open("euk_chains.pkl") as f:
            euk_chains = cPickle.load(f)
        with open("euk_bayes_spoofs.pkl") as f:
            euk_bayes_spoofs = cPickle.load(f)
        with open("euk_maxent_spoofs.pkl") as f:
            euk_maxent_spoofs = cPickle.load(f)
        with open("euk_apw_spoofs.pkl") as f:
            euk_apw_spoofs = cPickle.load(f)
        with open("euk_psfm_spoofs.pkl") as f:
            euk_psfm_spoofs = cPickle.load(f)

    #--------
    prok_ics = map(motif_ic, prok_motifs)
    prok_mis = map(mi_per_col, prok_motifs)
    prok_maxent_ics = [mean(map(motif_ic, xs)) for xs in prok_maxent_spoofs]
    prok_maxent_mis = [mean(map(mi_per_col, xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics = [mean(map(motif_ic, xs)) for xs in prok_psfm_spoofs]
    prok_psfm_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(prok_psfm_spoofs)
    ]
    prok_bayes_ics = [mean(map(motif_ic, xs)) for xs in prok_bayes_spoofs]
    prok_bayes_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(prok_bayes_spoofs)
    ]
    prok_apw_ics = [mean(map(motif_ic, xs)) for xs in prok_apw_spoofs]
    prok_apw_mis = [mean(map(mi_per_col, xs)) for xs in prok_apw_spoofs]

    prok_ics_pp = map(motif_ic_per_col, prok_motifs)
    prok_maxent_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_maxent_spoofs
    ]
    prok_psfm_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_psfm_spoofs
    ]
    prok_bayes_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_bayes_spoofs
    ]
    prok_apw_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_apw_spoofs
    ]

    #--------
    euk_ics = map(motif_ic, tqdm(euk_submotifs))
    euk_mis = map(mi_per_col, tqdm(euk_submotifs))
    euk_maxent_ics = [
        mean(map(motif_ic, xs)) for xs in tqdm(euk_maxent_spoofs)
    ]
    euk_maxent_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(euk_maxent_spoofs)
    ]
    euk_psfm_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_psfm_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_bayes_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_bayes_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(euk_bayes_spoofs)
    ]
    euk_apw_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_apw_spoofs)]
    euk_apw_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_apw_spoofs)]

    euk_ics_pp = map(motif_ic_per_col, euk_motifs)
    euk_maxent_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_maxent_spoofs
    ]
    euk_psfm_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_psfm_spoofs
    ]
    euk_bayes_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_bayes_spoofs
    ]
    euk_apw_ics_pp = [mean(map(motif_ic_per_col, xs)) for xs in euk_apw_spoofs]

    #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7
    ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1
    #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5
    ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85
    mi_xticks = [0, 0.25, 0.5, 0.75, 1]
    ic_yticks = [0, 0.5, 1, 1.5, 2]
    revscatter = lambda xs, ys: scatter(ys, xs)
    sns.set_style('dark')
    plt.subplot(4, 4, 1)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp,
                     prok_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("MaxEnt", fontsize='large')
    plt.subplot(4, 4, 3)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_maxent_mis)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 5)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("PSFM", fontsize='large')
    plt.subplot(4, 4, 7)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis,
                             prok_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 9)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_apw_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("APW", fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4, 4, 11)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 13)
    #plt.xticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok IC", fontsize='large')
    plt.ylabel("Bayes", fontsize='large')
    plt.subplot(4, 4, 15)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_bayes_mis)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok MI", fontsize='large')

    #--- euk plots ---#
    plt.subplot(4, 4, 2)
    plt.xticks([])
    plt.yticks([])
    r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4, 4, 4)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis,
                             euk_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 6)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4, 4, 8)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 10)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_apw_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4, 4, 12)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 14)
    #plt.xticks([])
    #
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("Bayes",fontsize='large')
    plt.xlabel("Euk IC", fontsize='large')
    plt.subplot(4, 4, 16)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.axis('off')
    #plt.xlabel("MI (bits/column pair)",fontsize='large')
    plt.xlabel("Euk MI", fontsize='large')
    plt.tight_layout()
    maybesave("spoof-statistics-rmsd.pdf")
def main_experiment(generate_data=False):
    if generate_data:
        iterations = 10000
        prok_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(prok_motifs)]
        prok_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])]
                       for chain, motif in tqdm(zip(prok_chains, prok_motifs))]
        prok_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in prok_motifs]
        prok_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)]
                            for psfm, motif in zip(prok_psfms, prok_motifs)]
        prok_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs)]
        prok_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(prok_motifs))
        prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)]
                             for apw, motif in tqdm(zip(prok_apws,prok_motifs))]
        euk_submotifs = map(subsample, euk_motifs)
        euk_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(euk_submotifs)]
        euk_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])]
                            for chain, motif in tqdm(zip(euk_chains, euk_submotifs))]
        euk_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in euk_submotifs]
        euk_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)]
                           for psfm, motif in zip(euk_psfms, euk_submotifs)]
        euk_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs)]
        euk_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(euk_submotifs))
        euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)]
                          for apw, motif in tqdm(zip(euk_apws,euk_submotifs))]
        with open("prok_chains.pkl",'w') as f:
            cPickle.dump(prok_chains,f)
        with open("prok_bayes_spoofs.pkl",'w') as f:
            cPickle.dump(prok_bayes_spoofs,f)
        with open("prok_maxent_spoofs.pkl",'w') as f:
            cPickle.dump(prok_maxent_spoofs,f)
        with open("prok_psfm_spoofs.pkl",'w') as f:
            cPickle.dump(prok_psfm_spoofs,f)
        with open("prok_apw_spoofs.pkl",'w') as f:
            cPickle.dump(prok_apw_spoofs,f)

        with open("euk_submotifs.pkl",'w') as f:
            cPickle.dump(euk_submotifs,f)
        with open("euk_chains.pkl",'w') as f:
            cPickle.dump(euk_chains,f)
        with open("euk_bayes_spoofs.pkl",'w') as f:
            cPickle.dump(euk_bayes_spoofs,f)
        with open("euk_maxent_spoofs.pkl",'w') as f:
            cPickle.dump(euk_maxent_spoofs,f)
        with open("euk_psfm_spoofs.pkl",'w') as f:
            cPickle.dump(euk_psfm_spoofs,f)
        with open("euk_apw_spoofs.pkl",'w') as f:
            cPickle.dump(euk_apw_spoofs,f)

    else:
        with open("prok_chains.pkl") as f:
            prok_chains = cPickle.load(f)
        with open("prok_bayes_spoofs.pkl") as f:
            prok_bayes_spoofs = cPickle.load(f)
        with open("prok_maxent_spoofs.pkl") as f:
            prok_maxent_spoofs = cPickle.load(f)
        with open("prok_psfm_spoofs.pkl") as f:
            prok_psfm_spoofs = cPickle.load(f)
        with open("prok_apw_spoofs.pkl") as f:
            prok_apw_spoofs = cPickle.load(f)

        with open("euk_submotifs.pkl") as f:
            euk_submotifs = cPickle.load(f)
        with open("euk_chains.pkl") as f:
            euk_chains = cPickle.load(f)
        with open("euk_bayes_spoofs.pkl") as f:
            euk_bayes_spoofs = cPickle.load(f)
        with open("euk_maxent_spoofs.pkl") as f:
            euk_maxent_spoofs = cPickle.load(f)
        with open("euk_apw_spoofs.pkl") as f:
            euk_apw_spoofs = cPickle.load(f)
        with open("euk_psfm_spoofs.pkl") as f:
            euk_psfm_spoofs = cPickle.load(f)

    #--------
    prok_ics = map(motif_ic, prok_motifs)
    prok_mis = map(mi_per_col, prok_motifs)
    prok_maxent_ics = [mean(map(motif_ic,xs)) for xs in prok_maxent_spoofs]
    prok_maxent_mis = [mean(map(mi_per_col,xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics = [mean(map(motif_ic,xs)) for xs in prok_psfm_spoofs]
    prok_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_psfm_spoofs)]
    prok_bayes_ics = [mean(map(motif_ic,xs)) for xs in prok_bayes_spoofs]
    prok_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_bayes_spoofs)]
    prok_apw_ics = [mean(map(motif_ic,xs)) for xs in prok_apw_spoofs]
    prok_apw_mis = [mean(map(mi_per_col,xs)) for xs in prok_apw_spoofs]

    prok_ics_pp = map(motif_ic_per_col, prok_motifs)
    prok_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_psfm_spoofs]
    prok_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_bayes_spoofs]
    prok_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_apw_spoofs]
    

    #--------
    euk_ics = map(motif_ic, tqdm(euk_submotifs))
    euk_mis = map(mi_per_col, tqdm(euk_submotifs))
    euk_maxent_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_maxent_spoofs)]
    euk_maxent_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_maxent_spoofs)]
    euk_psfm_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_bayes_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_apw_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_apw_spoofs)]
    euk_apw_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_apw_spoofs)]

    euk_ics_pp = map(motif_ic_per_col, euk_motifs)
    euk_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_maxent_spoofs]
    euk_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_psfm_spoofs]
    euk_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_bayes_spoofs]
    euk_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_apw_spoofs]



    #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7
    ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1
    #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5
    ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85
    mi_xticks = [0, 0.25, 0.5, 0.75, 1]
    ic_yticks = [0, 0.5, 1, 1.5, 2]
    revscatter = lambda xs, ys:scatter(ys, xs)
    sns.set_style('dark')
    plt.subplot(4,4,1)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4,4,3)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,5)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4,4,7)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,9)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_apw_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4,4,11)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,13)
    #plt.xticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok IC",fontsize='large')
    plt.ylabel("Bayes",fontsize='large')
    plt.subplot(4,4,15)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok MI",fontsize='large')

    #--- euk plots ---#
    plt.subplot(4,4,2)
    plt.xticks([])
    plt.yticks([])
    r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4,4,4)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,6)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4,4,8)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,10)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_apw_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4,4,12)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,14)
    #plt.xticks([])
    #
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("Bayes",fontsize='large')
    plt.xlabel("Euk IC",fontsize='large')
    plt.subplot(4,4,16)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.axis('off')
    #plt.xlabel("MI (bits/column pair)",fontsize='large')
    plt.xlabel("Euk MI",fontsize='large')
    plt.tight_layout()
    maybesave("spoof-statistics-rmsd.pdf")