Ejemplo n.º 1
0
def _show2d(self,
            bmin=None,
            bmax=None,
            margin=0.2,
            N_grid=20,
            levels=None,
            scatter_rule=None):
    """
    scatter_rule: Y -> [(idxes_1, "blue"), ..., (idxes_n, "red")]
    """
    def func(x):
        mu, var = self.predict(x)
        return mu, mu

    if (bmin is None) or (bmax is None):
        bmin, bmax = self.get_boundary(margin=margin)

    fig, ax = plt.subplots()
    fax = (fig, ax)
    utils.show2d(func, bmin, bmax, fax=fax, levels=levels, N=N_grid)

    if scatter_rule is None:
        ## Y -> [(idxes_1, c1), ..., (idxes_n, cn)]
        def f(Y):
            idxes = range(len(Y))
            color = "red"
            return [(idxes, color)]

        scatter_rule = f

    utils.scatter(scatter_rule, self.X, self.Y, fax)
Ejemplo n.º 2
0
 def show_best_x2(self, img1, img2, x1, x2, l):
     f = plt.figure()
     f.add_subplot(1, 2, 1)
     plt.imshow(img1)
     scatter(x1)
     f.add_subplot(1, 2, 2)
     self.plot_epipolar_line(img2, l, show=False)
     scatter(x2)
     plt.show()
def prok_model_comparison():
    sys.path.append("/home/pat/motifs")
    from parse_tfbs_data import tfdf
    prok_motifs = [getattr(tfdf, tf) for tf in tfdf.tfs]
    prok_comps = [model_comparison(motif) for motif in tqdm(prok_motifs)]
    pw_bics, li_bics = transpose(prok_comps)
    scatter(li_bics, pw_bics)
    plt.xlabel("Linear BIC")
    plt.ylabel("Pairwise BIC")
    plt.loglog()
Ejemplo n.º 4
0
def misclassification_polytope(a, c, ls):
    """creates misclassification constraints"""
    assert a.ndim == 2
    assert a.shape[0] == 1  # only batch size 1 is supported
    n_classes = a.shape[1]

    u = a[:, ls] - a[:, c]

    c = np.atleast_1d(np.asarray([c]).squeeze())
    ls = np.atleast_1d(np.asarray([ls]).squeeze())

    Av = lambda Vv: Vv[:, c] - Vv[:, ls]  # noqa: E731
    vA = lambda v: (
        scatter(c, np.sum(np.atleast_2d(v), axis=-1, keepdims=True), n_classes)
        +  # noqa: E731
        scatter(ls, -np.atleast_2d(v), n_classes))

    return Av, vA, u
Ejemplo n.º 5
0
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Ejemplo n.º 6
0
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Ejemplo n.º 7
0
def test_predict_ic_from_theta(trials=100, num_matrices=10):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        theta = sigma, mu, Ne
        pred_ic = predict_ic_from_theta(theta, L, num_matrices=num_matrices)
        obs_ic = observe_ic_from_theta(theta, L, num_matrices=num_matrices)
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
        print len(pred_ics), len(obs_ics)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Ejemplo n.º 8
0
def test_predict_ic_from_theta(trials=100, num_matrices=10):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        theta = sigma, mu, Ne
        pred_ic = predict_ic_from_theta(theta, L, num_matrices=num_matrices)
        obs_ic = observe_ic_from_theta(theta, L, num_matrices=num_matrices)
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
        print len(pred_ics), len(obs_ics)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Ejemplo n.º 9
0
def experiment2(trials=100):
    """APW models win, presumably because they have larger sigmas"""
    scatter(*transpose([
        map(lambda xs: mean(map(log, xs)), experiment2_())
        for i in trange(trials)
    ]))
Ejemplo n.º 10
0
def main_experiment(generate_data=False):
    if generate_data:
        iterations = 10000
        prok_chains = [
            posterior_chain2(motif, iterations=iterations)
            for motif in tqdm(prok_motifs)
        ]
        prok_bayes_spoofs = [[
            motif_from_theta(theta, len(motif))
            for theta in tqdm(chain[iterations / 2::500])
        ] for chain, motif in tqdm(zip(prok_chains, prok_motifs))]
        prok_psfms = [
            psfm_from_motif(motif, pc=1 / 4.0) for motif in prok_motifs
        ]
        prok_psfm_spoofs = [[[
            sample_from_psfm(psfm) for _ in range(len(motif))
        ] for _ in range(10)] for psfm, motif in zip(prok_psfms, prok_motifs)]
        prok_maxent_spoofs = [
            spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs)
        ]
        prok_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0),
                        tqdm(prok_motifs))
        prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))]
                            for __ in range(10)]
                           for apw, motif in tqdm(zip(prok_apws, prok_motifs))]
        euk_submotifs = map(subsample, euk_motifs)
        euk_chains = [
            posterior_chain2(motif, iterations=iterations)
            for motif in tqdm(euk_submotifs)
        ]
        euk_bayes_spoofs = [[
            motif_from_theta(theta, len(motif))
            for theta in tqdm(chain[iterations / 2::500])
        ] for chain, motif in tqdm(zip(euk_chains, euk_submotifs))]
        euk_psfms = [
            psfm_from_motif(motif, pc=1 / 4.0) for motif in euk_submotifs
        ]
        euk_psfm_spoofs = [[[
            sample_from_psfm(psfm) for _ in range(len(motif))
        ] for _ in range(10)] for psfm, motif in zip(euk_psfms, euk_submotifs)]
        euk_maxent_spoofs = [
            spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs)
        ]
        euk_apws = map(lambda m: code_from_motif(m, pc=1 / 16.0),
                       tqdm(euk_submotifs))
        euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))]
                           for __ in range(10)]
                          for apw, motif in tqdm(zip(euk_apws, euk_submotifs))]
        with open("prok_chains.pkl", 'w') as f:
            cPickle.dump(prok_chains, f)
        with open("prok_bayes_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_bayes_spoofs, f)
        with open("prok_maxent_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_maxent_spoofs, f)
        with open("prok_psfm_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_psfm_spoofs, f)
        with open("prok_apw_spoofs.pkl", 'w') as f:
            cPickle.dump(prok_apw_spoofs, f)

        with open("euk_submotifs.pkl", 'w') as f:
            cPickle.dump(euk_submotifs, f)
        with open("euk_chains.pkl", 'w') as f:
            cPickle.dump(euk_chains, f)
        with open("euk_bayes_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_bayes_spoofs, f)
        with open("euk_maxent_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_maxent_spoofs, f)
        with open("euk_psfm_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_psfm_spoofs, f)
        with open("euk_apw_spoofs.pkl", 'w') as f:
            cPickle.dump(euk_apw_spoofs, f)

    else:
        with open("prok_chains.pkl") as f:
            prok_chains = cPickle.load(f)
        with open("prok_bayes_spoofs.pkl") as f:
            prok_bayes_spoofs = cPickle.load(f)
        with open("prok_maxent_spoofs.pkl") as f:
            prok_maxent_spoofs = cPickle.load(f)
        with open("prok_psfm_spoofs.pkl") as f:
            prok_psfm_spoofs = cPickle.load(f)
        with open("prok_apw_spoofs.pkl") as f:
            prok_apw_spoofs = cPickle.load(f)

        with open("euk_submotifs.pkl") as f:
            euk_submotifs = cPickle.load(f)
        with open("euk_chains.pkl") as f:
            euk_chains = cPickle.load(f)
        with open("euk_bayes_spoofs.pkl") as f:
            euk_bayes_spoofs = cPickle.load(f)
        with open("euk_maxent_spoofs.pkl") as f:
            euk_maxent_spoofs = cPickle.load(f)
        with open("euk_apw_spoofs.pkl") as f:
            euk_apw_spoofs = cPickle.load(f)
        with open("euk_psfm_spoofs.pkl") as f:
            euk_psfm_spoofs = cPickle.load(f)

    #--------
    prok_ics = map(motif_ic, prok_motifs)
    prok_mis = map(mi_per_col, prok_motifs)
    prok_maxent_ics = [mean(map(motif_ic, xs)) for xs in prok_maxent_spoofs]
    prok_maxent_mis = [mean(map(mi_per_col, xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics = [mean(map(motif_ic, xs)) for xs in prok_psfm_spoofs]
    prok_psfm_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(prok_psfm_spoofs)
    ]
    prok_bayes_ics = [mean(map(motif_ic, xs)) for xs in prok_bayes_spoofs]
    prok_bayes_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(prok_bayes_spoofs)
    ]
    prok_apw_ics = [mean(map(motif_ic, xs)) for xs in prok_apw_spoofs]
    prok_apw_mis = [mean(map(mi_per_col, xs)) for xs in prok_apw_spoofs]

    prok_ics_pp = map(motif_ic_per_col, prok_motifs)
    prok_maxent_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_maxent_spoofs
    ]
    prok_psfm_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_psfm_spoofs
    ]
    prok_bayes_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_bayes_spoofs
    ]
    prok_apw_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in prok_apw_spoofs
    ]

    #--------
    euk_ics = map(motif_ic, tqdm(euk_submotifs))
    euk_mis = map(mi_per_col, tqdm(euk_submotifs))
    euk_maxent_ics = [
        mean(map(motif_ic, xs)) for xs in tqdm(euk_maxent_spoofs)
    ]
    euk_maxent_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(euk_maxent_spoofs)
    ]
    euk_psfm_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_psfm_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_bayes_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_bayes_mis = [
        mean(map(mi_per_col, xs)) for xs in tqdm(euk_bayes_spoofs)
    ]
    euk_apw_ics = [mean(map(motif_ic, xs)) for xs in tqdm(euk_apw_spoofs)]
    euk_apw_mis = [mean(map(mi_per_col, xs)) for xs in tqdm(euk_apw_spoofs)]

    euk_ics_pp = map(motif_ic_per_col, euk_motifs)
    euk_maxent_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_maxent_spoofs
    ]
    euk_psfm_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_psfm_spoofs
    ]
    euk_bayes_ics_pp = [
        mean(map(motif_ic_per_col, xs)) for xs in euk_bayes_spoofs
    ]
    euk_apw_ics_pp = [mean(map(motif_ic_per_col, xs)) for xs in euk_apw_spoofs]

    #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7
    ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1
    #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5
    ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85
    mi_xticks = [0, 0.25, 0.5, 0.75, 1]
    ic_yticks = [0, 0.5, 1, 1.5, 2]
    revscatter = lambda xs, ys: scatter(ys, xs)
    sns.set_style('dark')
    plt.subplot(4, 4, 1)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp,
                     prok_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("MaxEnt", fontsize='large')
    plt.subplot(4, 4, 3)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_maxent_mis)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 5)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("PSFM", fontsize='large')
    plt.subplot(4, 4, 7)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis,
                             prok_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 9)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_apw_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("APW", fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4, 4, 11)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 13)
    #plt.xticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_ics_pp, prok_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok IC", fontsize='large')
    plt.ylabel("Bayes", fontsize='large')
    plt.subplot(4, 4, 15)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_bayes_mis)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, prok_mis, prok_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok MI", fontsize='large')

    #--- euk plots ---#
    plt.subplot(4, 4, 2)
    plt.xticks([])
    plt.yticks([])
    r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4, 4, 4)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis,
                             euk_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 6)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4, 4, 8)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 10)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_apw_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4, 4, 12)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4, 4, 14)
    #plt.xticks([])
    #
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp)
    rmsd = sqrt(
        mean(zipWith(lambda x, y: (x - y)**2, euk_ics_pp, euk_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("Bayes",fontsize='large')
    plt.xlabel("Euk IC", fontsize='large')
    plt.subplot(4, 4, 16)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x, y: (x - y)**2, euk_mis, euk_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext * 0.8, s='$RMSD$ = %1.3f' % rmsd)
    #plt.axis('off')
    #plt.xlabel("MI (bits/column pair)",fontsize='large')
    plt.xlabel("Euk MI", fontsize='large')
    plt.tight_layout()
    maybesave("spoof-statistics-rmsd.pdf")
Ejemplo n.º 11
0
def grand_spoofing_experiment(prok_motifs, euk_motifs):
    # should we subsample once or each time??
    prok_maxent_spoofs = [spoof_maxent_motifs(motif,10) for motif in tqdm(prok_motifs)]
    euk_maxent_spoofs = [spoof_maxent_motifs(subsample(motif), 10) for motif in tqdm(euk_motifs)]
    prok_cftp_spoofs = [spoof_motif_cftp_occ(motif,10) for motif in tqdm(prok_motifs)]
    euk_cftp_spoofs = [spoof_motif_cftp_occ(subsample(motif),10) for motif in tqdm(euk_motifs)]
    prok_oo_spoofs = [spoof_oo_motifs(motif,10) for motif in tqdm(prok_motifs)]
    prok_oo_occ_spoofs = [spoof_oo_motifs_occ(motif,10) for motif in tqdm(prok_motifs)]
    euk_oo_spoofs = [spoof_oo_motifs(subsample(motif),10) for motif in tqdm(euk_motifs)]
    euk_oo_occ_spoofs = [spoof_oo_motifs_occ(motif,10) for motif in tqdm(euk_motifs)]
    with open("prok_maxent_spoofs",'w') as f:
        cPickle.dump(prok_maxent_spoofs, f)
    with open("euk_maxent_spoofs",'w') as f:
        cPickle.dump(euk_maxent_spoofs, f)
    with open("prok_cftp_spoofs",'w') as f:
        cPickle.dump(prok_cftp_spoofs, f)
    with open("euk_cftp_spoofs",'w') as f:
        cPickle.dump(euk_cftp_spoofs, f)
    with open("prok_oo_spoofs",'w') as f:
        cPickle.dump(prok_oo_spoofs, f)
    with open("euk_oo_spoofs",'w') as f:
        cPickle.dump(euk_oo_spoofs, f)

    with open("prok_maxent_spoofs.pkl") as f:
        prok_maxent_spoofs = cPickle.load(f)
    with open("euk_maxent_spoofs.pkl") as f:
        euk_maxent_spoofs = cPickle.load(f)
    with open("prok_cftp_spoofs") as f:
        prok_cftp_spoofs = cPickle.load(f)
    with open("euk_cftp_spoofs") as f:
        euk_cftp_spoofs = cPickle.load(f)
    with open("prok_oo_spoofs.pkl") as f:
        prok_oo_spoofs = cPickle.load(f)
    with open("euk_oo_spoofs.pkl") as f:
        euk_oo_spoofs = cPickle.load(f)

    prok_mis = map(mi_per_col, prok_motifs)
    prok_maxent_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(prok_maxent_spoofs)]
    euk_mis = map(mi_per_col, map(subsample,euk_motifs))
    euk_maxent_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(euk_maxent_spoofs)]
    prok_cftp_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(prok_cftp_spoofs)]
    euk_cftp_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(euk_cftp_spoofs)]
    prok_oo_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(prok_oo_spoofs)]
    euk_oo_mis = [mean(map(mi_per_col, spoofs)) for spoofs in tqdm(euk_oo_spoofs)]
    
    plt.subplot(1,3,1)
    scatter(prok_maxent_mis,
            prok_mis)
    plt.xlabel("Predicted MI",fontsize='large')
    plt.ylabel("Observed MI",fontsize='large')
    plt.title("MaxEnt",fontsize='large')
    scatter(euk_maxent_mis,
            euk_mis,color='g')
    plt.subplot(1,3,2)
    scatter(prok_cftp_mis,
            prok_mis)
    scatter(euk_cftp_mis,
            euk_mis,color='g')
    plt.xlabel("Predicted MI",fontsize='large')
    plt.ylabel("Observed MI",fontsize='large')
    plt.title("Gaussian Linear Ensemble",fontsize='large')
    plt.subplot(1,3,3)
    scatter(prok_oo_mis,
            prok_mis)
    scatter(euk_oo_mis,
            euk_mis,color='g')
    plt.xlabel("Predicted MI",fontsize='large')
    plt.ylabel("Observed MI",fontsize='large')
    plt.title("Match-Mismatch",fontsize='large')
    plt.tight_layout()
    maybesave("mi-spoof-plot.eps")
Ejemplo n.º 12
0
def mi_sampling_experiment():
    prok_maxent_motifs = [spoof_maxent_motifs(motif,100) for motif in tqdm(prok_motifs)]
    prok_cftp_motifs = [spoof_motif_cftp_occ(motif,10) for motif in tqdm(prok_motifs)]
    motif_mi_nc = lambda m:motif_mi(m,correct=False)
    scatter(map(motif_mi, prok_motifs), [mean(map(motif_mi,spoofs)) for spoofs in tqdm(prok_maxent_motifs)])
    scatter(map(motif_mi, prok_motifs), [mean(map(motif_mi,spoofs)) for spoofs in tqdm(prok_cftp_motifs)])
Ejemplo n.º 13
0
st.sidebar.markdown("② ** Strain Selection **")
strains, strain_formats = strain_counts(strain_category_data)
strain_value = st.sidebar.selectbox(label="Strain (Test Count)",
                                    options=strains,
                                    format_func=strain_formats.get)
strain_data = strain_category_data.query("test_strain == @strain_value")

st.sidebar.markdown("---")
st.sidebar.markdown("ℹ️ ** Details **")
desc_check = st.sidebar.checkbox("📃 Dataset Description")

desc_markdown = read_markdown_file("data_description.md")
dict_check = st.sidebar.checkbox("📕 Data Dictionary")
dict_markdown = read_markdown_file("data_dictionary.md")

if desc_check:
    st.sidebar.markdown(desc_markdown, unsafe_allow_html=True)
if dict_check:
    st.sidebar.markdown(dict_markdown, unsafe_allow_html=True)
    st.sidebar.code(pformat(colnames, indent=2))

st.markdown("---")
st.markdown("## Strain Testing Data")
st.altair_chart(scatter(strain_data))

st.altair_chart(line(strain_data))

st.markdown("### Top 10 Highest THC Measurements")
styled_test_table = get_top_test_table(strain_data)
st.table(styled_test_table)
Ejemplo n.º 14
0
def main_experiment(generate_data=False):
    if generate_data:
        iterations = 10000
        prok_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(prok_motifs)]
        prok_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])]
                       for chain, motif in tqdm(zip(prok_chains, prok_motifs))]
        prok_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in prok_motifs]
        prok_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)]
                            for psfm, motif in zip(prok_psfms, prok_motifs)]
        prok_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(prok_motifs)]
        prok_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(prok_motifs))
        prok_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)]
                             for apw, motif in tqdm(zip(prok_apws,prok_motifs))]
        euk_submotifs = map(subsample, euk_motifs)
        euk_chains = [posterior_chain2(motif,iterations=iterations) for motif in tqdm(euk_submotifs)]
        euk_bayes_spoofs = [[motif_from_theta(theta, len(motif)) for theta in tqdm(chain[iterations/2::500])]
                            for chain, motif in tqdm(zip(euk_chains, euk_submotifs))]
        euk_psfms = [psfm_from_motif(motif, pc=1/4.0) for motif in euk_submotifs]
        euk_psfm_spoofs = [[[sample_from_psfm(psfm) for _ in range(len(motif))] for _ in range(10)]
                           for psfm, motif in zip(euk_psfms, euk_submotifs)]
        euk_maxent_spoofs = [spoof_maxent_motifs(motif, 10) for motif in tqdm(euk_submotifs)]
        euk_apws = map(lambda m:code_from_motif(m, pc=1/16.0),tqdm(euk_submotifs))
        euk_apw_spoofs = [[[sample_site(apw) for _ in range(len(motif))] for __ in range(10)]
                          for apw, motif in tqdm(zip(euk_apws,euk_submotifs))]
        with open("prok_chains.pkl",'w') as f:
            cPickle.dump(prok_chains,f)
        with open("prok_bayes_spoofs.pkl",'w') as f:
            cPickle.dump(prok_bayes_spoofs,f)
        with open("prok_maxent_spoofs.pkl",'w') as f:
            cPickle.dump(prok_maxent_spoofs,f)
        with open("prok_psfm_spoofs.pkl",'w') as f:
            cPickle.dump(prok_psfm_spoofs,f)
        with open("prok_apw_spoofs.pkl",'w') as f:
            cPickle.dump(prok_apw_spoofs,f)

        with open("euk_submotifs.pkl",'w') as f:
            cPickle.dump(euk_submotifs,f)
        with open("euk_chains.pkl",'w') as f:
            cPickle.dump(euk_chains,f)
        with open("euk_bayes_spoofs.pkl",'w') as f:
            cPickle.dump(euk_bayes_spoofs,f)
        with open("euk_maxent_spoofs.pkl",'w') as f:
            cPickle.dump(euk_maxent_spoofs,f)
        with open("euk_psfm_spoofs.pkl",'w') as f:
            cPickle.dump(euk_psfm_spoofs,f)
        with open("euk_apw_spoofs.pkl",'w') as f:
            cPickle.dump(euk_apw_spoofs,f)

    else:
        with open("prok_chains.pkl") as f:
            prok_chains = cPickle.load(f)
        with open("prok_bayes_spoofs.pkl") as f:
            prok_bayes_spoofs = cPickle.load(f)
        with open("prok_maxent_spoofs.pkl") as f:
            prok_maxent_spoofs = cPickle.load(f)
        with open("prok_psfm_spoofs.pkl") as f:
            prok_psfm_spoofs = cPickle.load(f)
        with open("prok_apw_spoofs.pkl") as f:
            prok_apw_spoofs = cPickle.load(f)

        with open("euk_submotifs.pkl") as f:
            euk_submotifs = cPickle.load(f)
        with open("euk_chains.pkl") as f:
            euk_chains = cPickle.load(f)
        with open("euk_bayes_spoofs.pkl") as f:
            euk_bayes_spoofs = cPickle.load(f)
        with open("euk_maxent_spoofs.pkl") as f:
            euk_maxent_spoofs = cPickle.load(f)
        with open("euk_apw_spoofs.pkl") as f:
            euk_apw_spoofs = cPickle.load(f)
        with open("euk_psfm_spoofs.pkl") as f:
            euk_psfm_spoofs = cPickle.load(f)

    #--------
    prok_ics = map(motif_ic, prok_motifs)
    prok_mis = map(mi_per_col, prok_motifs)
    prok_maxent_ics = [mean(map(motif_ic,xs)) for xs in prok_maxent_spoofs]
    prok_maxent_mis = [mean(map(mi_per_col,xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics = [mean(map(motif_ic,xs)) for xs in prok_psfm_spoofs]
    prok_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_psfm_spoofs)]
    prok_bayes_ics = [mean(map(motif_ic,xs)) for xs in prok_bayes_spoofs]
    prok_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(prok_bayes_spoofs)]
    prok_apw_ics = [mean(map(motif_ic,xs)) for xs in prok_apw_spoofs]
    prok_apw_mis = [mean(map(mi_per_col,xs)) for xs in prok_apw_spoofs]

    prok_ics_pp = map(motif_ic_per_col, prok_motifs)
    prok_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_maxent_spoofs]
    prok_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_psfm_spoofs]
    prok_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_bayes_spoofs]
    prok_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in prok_apw_spoofs]
    

    #--------
    euk_ics = map(motif_ic, tqdm(euk_submotifs))
    euk_mis = map(mi_per_col, tqdm(euk_submotifs))
    euk_maxent_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_maxent_spoofs)]
    euk_maxent_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_maxent_spoofs)]
    euk_psfm_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_psfm_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_psfm_spoofs)]
    euk_bayes_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_bayes_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_bayes_spoofs)]
    euk_apw_ics = [mean(map(motif_ic,xs)) for xs in tqdm(euk_apw_spoofs)]
    euk_apw_mis = [mean(map(mi_per_col,xs)) for xs in tqdm(euk_apw_spoofs)]

    euk_ics_pp = map(motif_ic_per_col, euk_motifs)
    euk_maxent_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_maxent_spoofs]
    euk_psfm_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_psfm_spoofs]
    euk_bayes_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_bayes_spoofs]
    euk_apw_ics_pp = [mean(map(motif_ic_per_col,xs)) for xs in euk_apw_spoofs]



    #ic_min, ic_max, mi_min, mi_max = 4.5, 25, -0.1, 0.7
    ic_min, ic_max, mi_min, mi_max = -.1, 2.6, -0.05, 1
    #ic_xtext, ic_ytext, mi_xtext, mi_ytext = 5, 20, -0.05, 0.5
    ic_xtext, ic_ytext, mi_xtext, mi_ytext = -0.05, 2.2, -0.05, 0.85
    mi_xticks = [0, 0.25, 0.5, 0.75, 1]
    ic_yticks = [0, 0.5, 1, 1.5, 2]
    revscatter = lambda xs, ys:scatter(ys, xs)
    sns.set_style('dark')
    plt.subplot(4,4,1)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    r, p = revscatter(prok_ics_pp, prok_maxent_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4,4,3)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,5)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_psfm_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4,4,7)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,9)
    plt.xticks([])
    #plt.yticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_apw_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4,4,11)
    plt.xticks([])
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,13)
    #plt.xticks([])
    plt.yticks(ic_yticks, ic_yticks)
    plt.xticks(ic_yticks, ic_yticks)
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(prok_ics_pp, prok_bayes_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_ics_pp, prok_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok IC",fontsize='large')
    plt.ylabel("Bayes",fontsize='large')
    plt.subplot(4,4,15)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks(mi_xticks, mi_xticks)
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(prok_mis, prok_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, prok_mis, prok_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.xlabel("Prok MI",fontsize='large')

    #--- euk plots ---#
    plt.subplot(4,4,2)
    plt.xticks([])
    plt.yticks([])
    r, p = revscatter(euk_ics_pp, euk_maxent_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_maxent_ics_pp)))
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("MaxEnt",fontsize='large')
    plt.subplot(4,4,4)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_maxent_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_maxent_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,6)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_psfm_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_psfm_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("PSFM",fontsize='large')
    plt.subplot(4,4,8)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_psfm_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_psfm_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,10)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_apw_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_apw_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("APW",fontsize='large')
    #plt.xlabel("IC (bits)",fontsize='large')
    plt.subplot(4,4,12)
    plt.xticks([])
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_apw_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_apw_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    plt.subplot(4,4,14)
    #plt.xticks([])
    #
    plt.yticks([])
    plt.xlim(ic_min, ic_max)
    plt.ylim(ic_min, ic_max)
    r, p = revscatter(euk_ics_pp, euk_bayes_ics_pp)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_ics_pp, euk_bayes_ics_pp)))
    plt.text(ic_xtext, ic_ytext,s='$r^2$ = %1.3f' % (r**2))
    plt.text(ic_xtext, ic_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.ylabel("Bayes",fontsize='large')
    plt.xlabel("Euk IC",fontsize='large')
    plt.subplot(4,4,16)
    #plt.xticks([])
    plt.xticks(mi_xticks, mi_xticks)
    plt.yticks([])
    plt.xlim(mi_min, mi_max)
    plt.ylim(mi_min, mi_max)
    r, p = revscatter(euk_mis, euk_bayes_mis)
    rmsd = sqrt(mean(zipWith(lambda x,y:(x-y)**2, euk_mis, euk_bayes_mis)))
    plt.text(mi_xtext, mi_ytext, s='$r^2$ = %1.3f' % (r**2))
    plt.text(mi_xtext, mi_ytext*0.8,s='$RMSD$ = %1.3f' % rmsd)
    #plt.axis('off')
    #plt.xlabel("MI (bits/column pair)",fontsize='large')
    plt.xlabel("Euk MI",fontsize='large')
    plt.tight_layout()
    maybesave("spoof-statistics-rmsd.pdf")
def experiment2(trials=100):
    """APW models win, presumably because they have larger sigmas"""
    scatter(*transpose([map(lambda xs:mean(map(log,xs)),experiment2_()) for i in trange(trials)]))