def cumsum_test(): arca_reads = get_arca_reads(1000000) true_rdm = density_from_reads(arca_reads, G) pssm = make_pssm(Escherichia_coli.ArcA) comb_rdm = true_rdm[0] + true_rdm[1] print "fwd_scores" fwd_scores = score_genome_np(pssm, genome) print "rev_scores" rev_scores = score_genome_np(pssm, wc(genome)) scores = np.log(np.exp(fwd_scores) + np.exp(rev_scores)) probs = np.exp(scores)/np.sum(np.exp(scores)) print "sorting scores" score_js = sorted_indices(scores)[::-1] # order scores from greatest to least print "sorting probs" prob_js = sorted_indices(probs)[::-1] # ditto plt.plot(cumsum(rslice(comb_rdm, score_js)), label="scores") plt.plot(cumsum(rslice(comb_rdm, prob_js)), label="boltzmann probs") comb_rdm_copy = list(comb_rdm) controls = 5 for i in range(controls): print i random.shuffle(comb_rdm_copy) plt.plot(cumsum(comb_rdm_copy), color='r') plt.legend(loc=0) plt.xlim(0, 1) plt.ylim(0, 1) plt.show()
def make_correlation_structure_by_length(): q = fdr(concat(euk_tests)) plt.close() # get rid of output from cluster_motif lens = map(len, euk_motifs) jss = [indices_where(lens, lambda x:10**i <= x < 10**(i+1)) for i in range(1, 4+1)] for i,js in tqdm(enumerate(jss)): analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str("10**%s" % (i+1)), q=q)
def make_correlation_structure_by_cluster_figure(): from motif_clustering import cluster_motif q = fdr(concat(euk_tests)) euk_clusterses = [map(cluster_motif, tqdm(euk_motifs)) for i in range(3)] plt.close() # get rid of output from cluster_motif mean_lens = map(lambda xs:round(mean(xs)), transpose([map(len,cs) for cs in euk_clusterses])) jss = [indices_where(mean_lens, lambda x:x==i) for i in range(1, 5+1)] for i,js in tqdm(enumerate(jss)): analyze_mi_tests2(rslice(euk_tests, js), rslice(euk_motifs, js), label=str(i+1), q=q)
def main(): prok_motifs, euk_motifs = get_motifs() prok_motifs = [ sample(200, motif, replace=False) if len(motif) > 200 else motif for motif in tqdm(prok_motifs) ] mis = map(motif_mi, prok_motifs) js = sorted_indices(mis) maxent_mis = [ mean(map(motif_mi, spoof_maxent_motifs(motif, 1000))) for motif in tqdm(prok_motifs) ] uniform_mis = [ mean(map(motif_mi, spoof_uniform_motifs(motif, 1000))) for motif in tqdm(prok_motifs) ] perm_mis = [ mean(map(motif_mi, [perm_motif(motif) for _ in xrange(1000)])) for motif in tqdm(prok_motifs) ] plt.plot(rslice(mis, js)) plt.plot(rslice(maxent_mis, js)) plt.plot(rslice(perm_mis, js))
def sample_site_cftp(matrix, mu, Ne): L = len(matrix) f = seq_scorer(matrix) def log_phat(s): ep = f(s) nu = Ne - 1 return -nu*log(1 + exp(ep - mu)) first_site = "A"*L last_site = "T"*L best_site = "".join(["ACGT"[argmin(row)] for row in matrix]) worst_site = "".join(["ACGT"[argmax(row)] for row in matrix]) #middle_sites = [[random_site(L)] for i in range(10)] #trajs = [[best_site]] + middle_sites + [[worst_site]] trajs = [[best_site],[worst_site]] ords = [rslice("ACGT",sorted_indices(row)) for row in matrix] def mutate_site(site,(ri,direction)): b = (site[ri]) idx = ords[ri].index(b) idxp = min(max(idx + direction,0),3) bp = ords[ri][idxp] return subst(site,bp,ri)
def esp_ref(ks,j): """compute jth elementary symmetric polynomial on ks""" n = len(ks) return sum(product(rslice(ks,comb)) for comb in itertools.combinations(range(n),j))
def linear_interpolate(xs, ys): js = sorted_indices(xs) xs = sorted(xs) ys = rslice(ys, js)