Example #1
0
def infer_synthetic_energy_model(num_reads=100000):
    """the whole show: infer the energy model from true reads"""
    G = len(genome)
    w = 10
    true_matrix = [[-2, 0, 0, 0] for _ in range(w)]
    true_mu = -20
    true_eps = score_genome_np(true_matrix, genome)
    true_ps = fd_solve_np(true_eps, true_mu)
    MFL = 250 #mean frag length = 250bp
    lamb = 1/250.0
    true_reads = reads_from_ps(true_ps, MFL, min_seq_len=75, num_reads=num_reads)
    true_rdm = density_from_reads(true_reads, G)
    init_matrix = random_energy_matrix(w)
    init_mu = -20
    init_scores = score_genome_np(init_matrix, genome)
    init_state = ((init_matrix, init_mu), init_scores)
    logf = lambda state:timestamp(complete_log_likelihood(state, true_rdm, lamb, num_reads=num_reads))
    rprop = lambda state:complete_rprop(state, genome)
    verbose = True
    iterations = 50000
    print "true_ll:", logf(((true_matrix, true_mu), true_eps))
    matrix_chain = mh(logf, proposal=rprop, x0=init_state, dprop=log_dprop, 
                      capture_state=capture_state, verbose=verbose, 
                      use_log=True, iterations=iterations, modulus=100)
    return matrix_chain
Example #2
0
def complete_log_likelihood(state, true_rdm, lamb, num_reads=100000):
    """Compute log likelihood of true_rdm given energy model (state).
    
    (1) Simulate reads from energy model.
    (2) compare simulated reads to true reads with read_log_likelihood."""
    print "num_reads:", num_reads, "%e" % num_reads
    (matrix, mu), all_eps = state
    ps = fd_solve_np(all_eps, mu)
    print "copy number:", np.sum(ps)
    G = len(ps)
    MFL = 1/lamb
    print "generating reads"
    proposed_reads = reads_from_ps(ps, MFL, min_seq_len=75, num_reads=num_reads)
    print "mapping reads"
    proposed_rdm = density_from_reads(proposed_reads, G)
    #proposed_rdm = density_from_ps(ps, MFL, min_seq_len=75, num_reads=num_reads)
    return rdm_log_likelihood(true_rdm, proposed_rdm)
Example #3
0
def gradient_descent_experiment(true_rdm=None, num_reads=100000):
    #genome = get_ecoli_genome(at_lab=False)
    G = len(genome)
    w = 10
    mfl = 250
    lamb = 1.0/mfl
    simulating_data = False
    if true_rdm is None:
        simulating_data = True
        true_matrix = [[-2, 0, 0, 0] for i in range(w)]
        true_mu = -20
        true_eps = score_genome_np(true_matrix, genome)
        true_ps = fd_solve_np(true_eps, true_mu)
        true_reads = reads_from_ps(true_ps, mfl, min_seq_len=75, num_reads=num_reads)
        true_rdm = density_from_reads(true_reads, G)
        true_state = ((true_matrix, true_mu), true_eps)
    true_ll = logf(true_state) if simulating_data else None
    matrix = random_energy_matrix(w)
    mu = -20
    eps = score_genome_np(matrix, genome)
    init_state = ((matrix, mu), eps)
    logf = lambda state:timestamp(complete_log_likelihood(state, true_rdm, lamb, num_reads=num_reads))
    dw = 0.1
    dmu = 0.1
    old_ll = 0
    print "true_ll:", true_ll
    cur_ll = logf(init_state)
    eta = 10**-7 # learning rate
    iterations = 0
    while cur_ll > old_ll or iterations == 0:
        old_ll = cur_ll
        dmat = [[0]*4 for i in range(w)]
        for i in range(w):
            for j in range(4):
                print "i, j:", i, j
                new_mat = [row[:] for row in matrix]
                new_mat[i][j] += dw
                fwd_eps, rev_eps = eps
                new_eps = update_scores_np(fwd_eps, rev_eps, i, j, dw, w, genome)
                new_state = ((new_mat, mu), new_eps)
                new_ll = logf(new_state)
                print "cur ll, new_ll:",  cur_ll, new_ll, "(improvement)" if new_ll > cur_ll else "(worsening)"
                delta_w = (new_ll - cur_ll)/dw * eta
                print "delta_w:", delta_w
                dmat[i][j] = delta_w
        new_mu = mu + dmu
        new_state = ((matrix, new_mu), eps)
        new_ll = logf(new_state)
        print "mu:"
        print "cur ll, new_ll:",  cur_ll, new_ll, "(improvement)" if new_ll > cur_ll else "(worsening)"
        delta_mu = (new_ll - cur_ll)/dmu * eta
        print "delta_mu:", delta_mu
        old_matrix = [row[:] for row in matrix]
        for i in range(w):
            for j in range(4):
                matrix[i][j] += dmat[i][j]
        old_eps = np.array(eps)
        eps = score_genome_np(matrix, genome)
        old_mu = mu
        mu += delta_mu
        cur_state = ((matrix, mu), eps)
        cur_ll = logf(cur_state)
        print "\nresults of iteration %s:" % iterations
        pprint(matrix)
        print mu
        print "likelihood:", old_ll, "->", cur_ll
        iterations += 1
    return ((old_matrix, old_mu), old_eps)
Example #4
0
    MFL = 1/lamb
    print "generating reads"
    proposed_reads = reads_from_ps(ps, MFL, min_seq_len=75, num_reads=num_reads)
    print "mapping reads"
    proposed_rdm = density_from_reads(proposed_reads, G)
    #proposed_rdm = density_from_ps(ps, MFL, min_seq_len=75, num_reads=num_reads)
    return rdm_log_likelihood(true_rdm, proposed_rdm)

def rdm_from_state((matrix, mu), num_reads=100000, eps=None, mfl=250):
    if eps is None:
        print "scoring genome"
        eps = score_genome_np(matrix, genome)
    print "solving ps"
    ps = fd_solve_np(eps, mu)
    print "generating reads"
    reads = reads_from_ps(ps, mfl, min_seq_len=75, num_reads=num_reads)
    print "mapping reads"
    rdm = density_from_reads(reads, G)
    return rdm

def plot_state(state, num_reads=100000):
    a, b = state
    if type(a) is tuple: # if state consists of ((matrix, mu), eps)
        matrix, mu = a
        eps = b
        rdm = rdm_from_state((matrix, mu), num_reads, eps)
    else:
        matrix, mu = a, b
        eps = score_genome_np(matrix, genome)
        rdm = rdm_from_state((matrix, mu), num_reads, eps)
    plt.plot(rdm[0])