Beispiel #1
0
def write_chip_seq_data(chip_seq_data, filename):
    genome = get_ecoli_genome()
    with open(filename, 'w') as f:
        for (i, (base,
                 val)) in verbose_gen(enumerate(zip(genome, chip_seq_data)),
                                      10**5):
            f.write("%s,%s,%s\n" % (i, base, val))
Beispiel #2
0
def chip_ps_const_frag_length_ref(ps,mean_frag_length,cells=10000,verbose=False):
    G = len(ps)
    out = np.zeros(G)
    G_iterator = verbose_gen(xrange(G),modulus=1000) if verbose else xrange(G)
    for i in G_iterator:
        ps_i = ps[max(i-mean_frag_length/2,0):min(i+mean_frag_length,G)]
        out[i] = alo(ps_i)
    return cells*out
Beispiel #3
0
def compute_coefficients(ks):
    arr = np.array([0 for i in ks] + [0])
    arr[-1] = 1
    for k in verbose_gen(ks,modulus=10000):
        #print np.roll(arr,-1), - k*arr
        arr = np.roll(arr,-1) - k*arr
        #print arr
    return arr
Beispiel #4
0
def test_inverse_cdf_sampler():
    K = int(5*10**6)
    trials = K
    ps = [1.0/K for i in xrange(K)]
    sampler = inverse_cdf_sampler(ps)
    samples = [sampler() for i in verbose_gen(xrange(trials),modulus=100000)]
    plt.hist(samples,bins=1000)
    plt.show()
Beispiel #5
0
def test_frags_from_splits(G,mean_frag_length,trials):
    lamb = 1.0/mean_frag_length
    for trial in verbose_gen(range(trials)):
        config = [random.randrange(G) for i in range(100)]
        splits = make_splits(G,lamb)
        if set(frags_from_splits(config,splits)) == set(frags_from_splits_ref(config,splits)):
            continue
        else:
            return config,splits
Beispiel #6
0
def dummify_genome(genome, w):
    G = len(genome)
    mat = np.zeros((G, 4*w))
    print mat.shape
    d = {"A":0, "C":1, "G":2, "T":3}
    for i in verbose_gen(xrange(G), modulus=1000):
        for j in range(w):
            b = genome[(i+j)%G]
            mat[i, 4*j+d[b]] = 1#int(b != "T")
    return mat
Beispiel #7
0
def chip_ps_const_frag_length_ref2(ps,mean_frag_length,cells=10000,verbose=False):
    G = len(ps)
    out = np.zeros(G)
    cell_iterator = verbose_gen(xrange(cells),modulus=10000) if verbose else xrange(cells)
    for cell in cell_iterator:
        i = random.randrange(G-mean_frag_length)
        left,right = max(i-cutoff,0),min(i+cutoff,G)
        ps_i = ps[left:right]
        if random.random() < alo(ps_i):
            out[left:right] += 1
    return out
Beispiel #8
0
def max_in_window_ref(scores,k):
    """Return max in window of radius k over circular array of scores"""
    G = len(scores)
    max_scores = np.empty(G)
    for i in verbose_gen(xrange(G),10000):
        m = None
        for j in xrange(-k,k+1):
            if scores[(i+j) % G] > m:
                m = scores[(i+j) % G]
        max_scores[i] = m
    return max_scores
Beispiel #9
0
def chip_seq_log_likelihood_ref(ps,mapped_reads,N):
    """Given hypothesis ps, a chip-seq dataset in the form of mapped
    reads, and total number of cells, compute log likelihood--
    reference implementation.  Note that pi is an hypothesis about the
    probability that a fragment covers base i, not an hypothesis that base i is occupied.
    """

    def log_dbinom(N,k,p):
        return log_choose(N,k) + k*log(p) + (N-k)*log(1-p)

    return sum([log_dbinom(N,m,p) for m,p in verbose_gen(zip(mapped_reads,ps),modulus=1000)])
Beispiel #10
0
def exp_reconstruction(reads,lamb,G):
    """Reconstruct fragment density map by assuming exponential extension of each read"""
    frag_map = [0]*G
    mfl = int(1/lamb)
    for (strand,start,stop) in verbose_gen(reads,modulus=10000):
        assert(stop - start == 75)
        for i in range(start,stop):
            frag_map[i] += 1
        ext_list = xrange(stop,stop+10*mfl,+1) if strand == "+" else xrange(start-10*mfl,start,+1)
        endpoint = stop if strand == "+" else start
        for i in ext_list:
            frag_map[i%G] += (1-lamb)**abs(i-endpoint)
    return frag_map
Beispiel #11
0
def predict_chip_ps5(ps,mean_frag_length,cells=100):
    G = len(ps)
    lamb = 1.0/mean_frag_length
    cutoff = min(5*mean_frag_length,G) # ignore contributions outside 5 times expected fragment length
    ks = range(-cutoff,cutoff)
    def left(i):
        return sum(ps[j]*product(1-ps[k] for k in range(j+1,i+1))*(1-lamb)**abs(j-i)
                   for j in range(i) if abs(j-i) < cutoff)
    def right(i):
        return sum(ps[j]*product(1-ps[k] for k in range(i,j))*(1-lamb)**abs(j-i)
                   for j in range(i,G) if abs(j-i) < cutoff)
    # return [cells*sum(ps[j]*product(1-ps[k] for k in range(i,j,mysign(j-i)))*(1-lamb)**abs(j-i)
    #                   for j in range(G) if abs(j-i) < cutoff)
    #         for i in range(G)]
    return [cells*(1-(1-left(i))*(1-right(i))) for i in verbose_gen(range(G))]
Beispiel #12
0
def esp_spec(ps,k,powsums=None):
    #print "calling esp(ps,%s)" % k
    if k == 0:
       return 1
    if powsums is None:
        print "computing powersums..."
        powsums = [powsum(ps,i) for i in verbose_gen(range(k+1))]
        print "finished with powersums"
    esp_array = [None]*(k+1)
    esp_array[0] = 1
    for cur_k in range(1,k+1):
        ans = sum((-1)**(i-1)*esp_array[cur_k-i]*powsums[i]
                  for i in range(1,cur_k+1))/float(cur_k)
        esp_array[cur_k] = ans
        #print esp_array
    return esp_array[k]
Beispiel #13
0
def ising(hs, J, iterations=50000, boundary="periodic", spins=None, burn_in=0):
    N = len(hs)
    if spins is None:
        spins = np.array([random.choice([-1, 1]) for i in range(N)])
    occupancies = np.zeros(N)
    for t in verbose_gen(xrange(iterations), modulus=1000):
        for i in range(N):
            current_energy = spins[i] * (hs[i] + J * (spins[(i - 1) % N] + spins[(i + 1) % N]))
            prop_energy = -current_energy
            p_prop = exp(-prop_energy) / (exp(-current_energy) + exp(-prop_energy))
            # print "p_prop:",p_prop
            if random.random() < p_prop:
                spins[i] *= -1
        if t % 1000 == 0:
            print sum(spins)
        if t > burn_in:
            occupancies += spins == 1
        # print "magnetization:",np.sum(spins == 1)
    return occupancies / (iterations - burn_in)
Beispiel #14
0
def cftp_ising(hs, J, replicas):
    samples = [cftp(hs, J) for i in verbose_gen(xrange(replicas))]
    cols = transpose(samples)
    return [mean(map(lambda c: (c + 1) / 2.0, col)) for col in cols]
Beispiel #15
0
def gibbs_sample_many(ks,q,t,n):
    """Sample system (ks,q) by gibbs sampling at time t, for n trials"""
    G = len(ks)
    return map(mean,transpose([ss_from_xs(gibbs_sample_iterate(ks,[G]*q,t),G)
                               for i in verbose_gen(xrange(n))]))
Beispiel #16
0
def sequential_sample_many(ks,q,n):
    return map(mean,transpose([sequential_sample_ref(ks,q) for i in verbose_gen(xrange(n))]))
Beispiel #17
0
def write_chip_seq_data(chip_seq_data,filename):
    genome = get_ecoli_genome()
    with open(filename,'w') as f:
        for (i,(base,val)) in verbose_gen(enumerate(zip(genome,chip_seq_data)),10**5):
            f.write("%s,%s,%s\n"%(i,base,val))
Beispiel #18
0
def max_in_window(scores,k):
    """Return max in window of radius k over circular array of scores"""
    max_scores = np.copy(scores)
    for j in verbose_gen(xrange(-k,k+1)):
        max_scores = np.maximum(max_scores,np.roll(scores,j))
    return max_scores
Beispiel #19
0
def make_chip_dataset(num_cells):
    return concat([chip(genome,rfd_xs(ps),MEAN_FRAGMENT_LENGTH) for i in verbose_gen(xrange(num_cells))])
Beispiel #20
0
def initialize_array():
    for i in verbose_gen(xrange(N), modulus=1000):
        Z(i, Q)
Beispiel #21
0
def chip_ps_ref(ps,mean_frag_length,cells=10000):
    """Do a chip seq experiment given the distribution ps"""
    G = len(ps)
    return concat(chip_ps(rfd_xs(ps),mean_frag_length)
                  for cell in verbose_gen(xrange(cells)))
Beispiel #22
0
def chip_ps_np(ps,mean_frag_length,cells=10000,verbose=False):
    """Do a chip seq experiment given the distribution ps"""
    w = 10
    G = len(ps)# + w - 1 #XXX HACK
    cell_iterator = verbose_gen(xrange(cells),modulus=1000) if verbose else xrange(cells)
    return concat(chip(G,rfd_xs_np(ps),mean_frag_length) for cell in cell_iterator)
Beispiel #23
0
def chip_ps_spec(ps,mean_frag_length,cells=10000):
    return concat(chip_ps_spec_single_cell(ps,mean_frag_length)
                  for i in verbose_gen(xrange(cells)))
def kmers(L):
    return verbose_gen(product(*[bases for i in range(L)]),modulus=10000)
Beispiel #25
0
def predict_chip_ps2(ps,mean_frag_length,cells=10000):
    G = len(ps)
    eff_lamb = 1.0/(mean_frag_length-0.5) # empircally determined; worrisome
    return [cells*alo2([p*tent(x,i,mean_frag_length) for i,p in enumerate(ps)])
            for x in verbose_gen(range(G))]
Beispiel #26
0
def show_chip_shadow(G,endpoints,mean_frag_length,cells=10000,trials=10):
    lamb = 1.0/mean_frag_length
    [plt.plot(map_reads(concat([chip(G,endpoints,mean_frag_length) for i in range(cells)]),G),color='b')
     for i in verbose_gen(range(trials))]