def spoof_motif_cftp(motif, num_motifs=10, trials=1, sigma=None,Ne_tol=10**-2,verbose=False): n = len(motif) L = len(motif[0]) copies = 10*n if sigma is None: sigma = sigma_from_matrix(pssm_from_motif(motif,pc=1)) print "sigma:", sigma bio_ic = motif_ic(motif) matrix = sample_matrix(L, sigma) mu = approx_mu(matrix, copies=10*n, G=5*10**6) print "mu:", mu def f(Ne): motifs = [sample_motif_cftp(matrix, mu, Ne, n, verbose=verbose) for i in trange(trials)] return mean(map(motif_ic,motifs)) - bio_ic # lb = 1 # ub = 10 # while f(ub) < 0: # ub *= 2 # print ub x0s = [2,10]#(lb + ub)/2.0 # print "choosing starting seed for Ne" # fs = map(lambda x:abs(f(x)),x0s) # print "starting values:",x0s,fs # x0 = x0s[argmin(fs)] # print "chose:",x0 # Ne = bisect_interval_noisy_ref(f,x0,lb=1,verbose=True) Ne = log_regress_spec2(f,x0s,tol=Ne_tol) print "Ne:",Ne return [sample_motif_cftp(matrix, mu, Ne, n) for _ in trange(num_motifs)]
def sample_motif_with_ic(n,L): matrix = sample_matrix(L,sigma=1) ringer_site = "".join(["ACGT"[argmin(col)] for col in matrix]) mu = approximate_mu(matrix,10*n,G=5*10**6) Nes = range(2,10) trials = 10 motifs = [[sample_motif_mh(matrix, mu, Ne, n) for t in range(trials)] for Ne in tqdm(Nes)]
def sample_site_study(trials=1000): sigmas = np.linspace(0,10,100) Nes = np.linspace(2,10,100) mus = np.linspace(-10,10,100) L = 10 results = {} for trial in trange(trials): sigma = random.choice(sigmas) Ne = random.choice(Nes) mu = random.choice(mus) matrix = sample_matrix(L,sigma) ringer_site = ringer_motif(matrix,1)[0] t0 = time.time() sites = [sample_site_bf(matrix, mu, Ne, ringer_site) for i in range(10)] t = time.time() - t0 results[(sigma, Ne, mu)] = t return results
def spoof_motif_ar(motif, num_motifs=10, trials=1, sigma=None,Ne_tol=10**-4): n = len(motif) L = len(motif[0]) copies = 10*n if sigma is None: sigma = sigma_from_matrix(pssm_from_motif(motif,pc=1)) print "sigma:", sigma bio_ic = motif_ic(motif) matrix = sample_matrix(L, sigma) mu = approx_mu(matrix, copies=10*n, G=5*10**6) print "mu:", mu def f(Ne): motifs = [sample_motif_ar(matrix, mu, Ne, n) for i in trange(trials)] return mean(map(motif_ic,motifs)) - bio_ic x0 = 2 print "Ne guess:", x0 Ne = bisect_interval_noisy(f,x0=x0,iterations=100,lb=1, verbose=False,w=0.5) print "Ne:",Ne return [sample_motif_ar(matrix, mu, Ne, n) for _ in trange(num_motifs)]
def site_sampling_methods_study(n=50, num_motifs=10, plot=True): """validate that the three proposed sampling methods: brute force rejection sampling metropolis hastings do in fact sample from the same distribution """ L = 10 sigma = 1 matrix = sample_matrix(L, sigma) Ne = 5 mu = -10 print "bf" t0 = time.time() bf_motifs = [sample_motif_bf(matrix, mu, Ne, n,verbose=True) for i in trange(num_motifs)] bf_time = time.time() - t0 print "ar" t0 = time.time() ar_motifs = [sample_motif_ar(matrix, mu, Ne, n) for i in range(num_motifs)] ar_time = time.time() - t0 print "mh" t0 = time.time() mh_motifs = [sample_motif_mh(matrix, mu, Ne, n) for i in range(num_motifs)] mh_time = time.time() - t0 icss = mmap(motif_ic,[bf_motifs, ar_motifs, mh_motifs]) print "ics:", map(mean_ci, icss) print "time per motif:", [t/num_motifs for t in [bf_time, ar_time, mh_time]] if plot: plt.boxplot(icss) for xs, ys in choose2(icss): print mannwhitneyu(xs,ys)
def mh_cftp_comparison(): matrix = sample_matrix(10,1) mu = -10 Ne = 5 mh_ics = [motif_ic(sample_motif_mh(matrix, mu, Ne, 50)) for i in trange(100)] cftp_ics = [motif_ic(sample_motif_cftp(matrix, mu, Ne, 50)) for i in trange(100)]
def f(sigma, Ne): matrix = sample_matrix(L, sigma) mu = approx_mu(matrix, 10*n) return motif_ic(sample_motif_cftp(matrix, mu, Ne, n))
def f(sigma, Ne): matrix = sample_matrix(L, sigma) mu = approx_mu(matrix, 10*n) return motif_ic(sample_motif_ar(matrix, mu, Ne, n, modulus=10**5))