def test_spoof_motif_cftp_occ(): motif = [ 'CGGTGAACTA', 'CGGTGTGCGA', 'CGCTGTGCTG', 'CGGGATGCAA', 'CACGCTACGA', 'CGCTATGCTA', 'CGGTTGGCTA', 'CGGCGTGCTA', 'CGGTATATTG', 'CGGGTTGCGA' ] bio_matrix = matrix_from_motif(motif) N = len(motif) print occupancies mean_bio_occ = mean(occupancies(motif)) motifs = spoof_motif_cftp_occ(motif, num_motifs=100) spoof_occs = map(lambda m: mean(occupancies(m)), motifs) lb, ub = mean_ci(spoof_occs) print lb, mean_bio_occ, ub assert_less_equal(lb, mean_bio_occ) assert_less_equal(mean_bio_occ, ub)
def spoof_motif_cftp_occ(motif, num_motifs=10, trials=1, sigma=None,Ne_tol=10**-2,verbose=False): """spoof motifs based on occupancy rather than motif IC""" N = len(motif) L = len(motif[0]) copies = 10*N pssm = pssm_from_motif(motif,pc=1) if sigma is None: sigma = sigma_from_matrix(pssm) print "sigma:", sigma matrix = sample_matrix(L, sigma) bio_matrix = matrix_from_motif(motif) mu = approx_mu(matrix, copies=copies, G=5*10**6) mean_bio_occ = mean(occupancies(motif)) print "mu:", mu def f(Ne): motifs = [sample_motif_cftp(matrix, mu, Ne, N, verbose=verbose) for i in trange(trials)] return mean(map(lambda m:mean(occupancies(m)), motifs)) - mean_bio_occ # lb = 1 # ub = 10 # while f(ub) < 0: # ub *= 2 # print ub x0s = [2,10]#(lb + ub)/2.0 # print "choosing starting seed for Ne" # fs = map(lambda x:abs(f(x)),x0s) # print "starting values:",x0s,fs # x0 = x0s[argmin(fs)] # print "chose:",x0 # Ne = bisect_interval_noisy_ref(f,x0,lb=1,verbose=True) Ne = log_regress_spec2(f,x0s,tol=Ne_tol) print "Ne:",Ne return [sample_motif_cftp(matrix, mu, Ne, N) for _ in trange(num_motifs)]
def f(Ne): motifs = [ sample_motif_cftp(matrix, mu, Ne, N, verbose=verbose) for i in trange(trials) ] return mean(map(lambda m: mean(occupancies(m)), motifs)) - mean_bio_occ
def test_spoof_motif_cftp_occ(): motif = ['CGGTGAACTA', 'CGGTGTGCGA', 'CGCTGTGCTG', 'CGGGATGCAA', 'CACGCTACGA', 'CGCTATGCTA', 'CGGTTGGCTA', 'CGGCGTGCTA', 'CGGTATATTG', 'CGGGTTGCGA'] bio_matrix = matrix_from_motif(motif) N = len(motif) print occupancies mean_bio_occ = mean(occupancies(motif)) motifs = spoof_motif_cftp_occ(motif, num_motifs=100) spoof_occs = map(lambda m:mean(occupancies(m)), motifs) lb, ub = mean_ci(spoof_occs) print lb, mean_bio_occ, ub assert_less_equal(lb, mean_bio_occ) assert_less_equal(mean_bio_occ, ub)
def spoof_motif_cftp_occ(motif, num_motifs=10, trials=1, sigma=None, Ne_tol=10**-2, verbose=False): """spoof motifs based on occupancy rather than motif IC""" N = len(motif) L = len(motif[0]) copies = 10 * N pssm = pssm_from_motif(motif, pc=1) if sigma is None: sigma = sigma_from_matrix(pssm) print "sigma:", sigma matrix = sample_matrix(L, sigma) bio_matrix = matrix_from_motif(motif) mu = approx_mu(matrix, copies=copies, G=5 * 10**6) mean_bio_occ = mean(occupancies(motif)) print "mu:", mu def f(Ne): motifs = [ sample_motif_cftp(matrix, mu, Ne, N, verbose=verbose) for i in trange(trials) ] return mean(map(lambda m: mean(occupancies(m)), motifs)) - mean_bio_occ # lb = 1 # ub = 10 # while f(ub) < 0: # ub *= 2 # print ub x0s = [2, 10] #(lb + ub)/2.0 # print "choosing starting seed for Ne" # fs = map(lambda x:abs(f(x)),x0s) # print "starting values:",x0s,fs # x0 = x0s[argmin(fs)] # print "chose:",x0 # Ne = bisect_interval_noisy_ref(f,x0,lb=1,verbose=True) Ne = log_regress_spec2(f, x0s, tol=Ne_tol) print "Ne:", Ne return [sample_motif_cftp(matrix, mu, Ne, N) for _ in trange(num_motifs)]
def f(Ne): motifs = [sample_motif_cftp(matrix, mu, Ne, N, verbose=verbose) for i in trange(trials)] return mean(map(lambda m:mean(occupancies(m)), motifs)) - mean_bio_occ
def mean_occupancy(motif): return mean(occupancies(motif))