Esempio n. 1
0
def cumsum_test():
    arca_reads = get_arca_reads(1000000)
    true_rdm = density_from_reads(arca_reads, G)
    pssm = make_pssm(Escherichia_coli.ArcA)
    comb_rdm = true_rdm[0] + true_rdm[1]
    print "fwd_scores"
    fwd_scores = score_genome_np(pssm, genome)
    print "rev_scores"
    rev_scores = score_genome_np(pssm, wc(genome))
    scores = np.log(np.exp(fwd_scores) + np.exp(rev_scores))
    probs = np.exp(scores)/np.sum(np.exp(scores))
    print "sorting scores"
    score_js = sorted_indices(scores)[::-1] # order scores from greatest to least
    print "sorting probs"
    prob_js = sorted_indices(probs)[::-1] # ditto
    plt.plot(cumsum(rslice(comb_rdm, score_js)), label="scores")
    plt.plot(cumsum(rslice(comb_rdm, prob_js)), label="boltzmann probs")
    comb_rdm_copy = list(comb_rdm)
    controls = 5
    for i in range(controls):
        print i
        random.shuffle(comb_rdm_copy)
        plt.plot(cumsum(comb_rdm_copy), color='r')
    plt.legend(loc=0)
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.show()
Esempio n. 2
0
def make_correlation_structure_by_length():
    q = fdr(concat(euk_tests))
    plt.close() # get rid of output from cluster_motif
    lens = map(len, euk_motifs)
    jss = [indices_where(lens, lambda x:10**i <= x < 10**(i+1)) for i in range(1, 4+1)]
    for i,js in tqdm(enumerate(jss)):
        analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str("10**%s" % (i+1)), q=q)
Esempio n. 3
0
def make_correlation_structure_by_cluster_figure():
    from motif_clustering import cluster_motif
    q = fdr(concat(euk_tests))
    euk_clusterses = [map(cluster_motif, tqdm(euk_motifs)) for i in range(3)]
    plt.close() # get rid of output from cluster_motif
    mean_lens = map(lambda xs:round(mean(xs)), transpose([map(len,cs) for cs in euk_clusterses]))
    jss = [indices_where(mean_lens, lambda x:x==i) for i in range(1, 5+1)]
    for i,js in tqdm(enumerate(jss)):
        analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str(i+1), q=q)
def main():
    prok_motifs, euk_motifs = get_motifs()
    prok_motifs = [
        sample(200, motif, replace=False) if len(motif) > 200 else motif
        for motif in tqdm(prok_motifs)
    ]
    mis = map(motif_mi, prok_motifs)
    js = sorted_indices(mis)
    maxent_mis = [
        mean(map(motif_mi, spoof_maxent_motifs(motif, 1000)))
        for motif in tqdm(prok_motifs)
    ]
    uniform_mis = [
        mean(map(motif_mi, spoof_uniform_motifs(motif, 1000)))
        for motif in tqdm(prok_motifs)
    ]
    perm_mis = [
        mean(map(motif_mi, [perm_motif(motif) for _ in xrange(1000)]))
        for motif in tqdm(prok_motifs)
    ]
    plt.plot(rslice(mis, js))
    plt.plot(rslice(maxent_mis, js))
    plt.plot(rslice(perm_mis, js))
def sample_site_cftp(matrix, mu, Ne):
    L = len(matrix)
    f = seq_scorer(matrix)
    def log_phat(s):
        ep = f(s)
        nu = Ne - 1
        return -nu*log(1 + exp(ep - mu))
    first_site = "A"*L
    last_site = "T"*L
    best_site = "".join(["ACGT"[argmin(row)] for row in matrix])
    worst_site = "".join(["ACGT"[argmax(row)] for row in matrix])
    #middle_sites  = [[random_site(L)] for i in range(10)]
    #trajs = [[best_site]] + middle_sites + [[worst_site]]
    trajs = [[best_site],[worst_site]]
    ords = [rslice("ACGT",sorted_indices(row)) for row in matrix]
    def mutate_site(site,(ri,direction)):
        b = (site[ri])
        idx = ords[ri].index(b)
        idxp = min(max(idx + direction,0),3)
        bp = ords[ri][idxp]
        return subst(site,bp,ri)
Esempio n. 6
0
def esp_ref(ks,j):
    """compute jth elementary symmetric polynomial on ks"""
    n = len(ks)
    return sum(product(rslice(ks,comb)) for comb in itertools.combinations(range(n),j))
Esempio n. 7
0
def linear_interpolate(xs, ys):
    js = sorted_indices(xs)
    xs = sorted(xs)
    ys = rslice(ys, js)