/
20130106x.py
155 lines (129 loc) · 4.9 KB
/
20130106x.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Write equilibrium distributions for four related processes.
Each process is a Wright-Fisher variant.
"""
import argparse
import math
import numpy as np
import scipy.linalg
import omnomnomial
RAW_RATE = 'raw-rate'
ADJUSTED_RATE = 'adjusted-rate'
DETERMINISTIC_MUT = 'deterministic-mut'
STOCHASTIC_MUT = 'stochastic-mut'
def get_inverse_dict(M):
"""
The input M[i,j] is count of allele j for pop state index i.
The output T[(i,j,...)] maps allele count tuple to pop state index
@param M: multinomial state map
@return: T
"""
return dict((tuple(state), i) for i, state in enumerate(M))
def get_macro_mut_rate_matrix(M, T):
nstates = M.shape[0]
pre_Q = np.zeros((nstates, nstates))
for i, (AB, Ab, aB, ab) in enumerate(M):
if AB > 0:
pre_Q[i, T[AB-1, Ab+1, aB, ab ]] = AB
pre_Q[i, T[AB-1, Ab, aB+1, ab ]] = AB
if Ab > 0:
pre_Q[i, T[AB+1, Ab-1, aB, ab ]] = Ab
pre_Q[i, T[AB, Ab-1, aB, ab+1]] = Ab
if aB > 0:
pre_Q[i, T[AB+1, Ab, aB-1, ab ]] = aB
pre_Q[i, T[AB, Ab, aB-1, ab+1]] = aB
if ab > 0:
pre_Q[i, T[AB, Ab+1, aB, ab-1]] = ab
pre_Q[i, T[AB, Ab, aB+1, ab-1]] = ab
Q = pre_Q - np.diag(np.sum(pre_Q, axis=1))
return Q
def get_adjusted_rate(mu):
"""
Get a rate that gives an expm matrix with the same entries as Jeff has.
"""
return -0.25 * math.log(1 - 4*mu*(1-mu))
def get_stationary_distribution(P):
nstates = len(P)
b = np.zeros(nstates)
A = P.T - np.eye(nstates)
A[0] = np.ones(nstates)
b[0] = 1
v = np.abs(scipy.linalg.solve(A, b, overwrite_a=True, overwrite_b=True))
return v / np.sum(v)
def gen_states(N, k):
if k == 1:
yield [N]
else:
for i in range(N+1):
for suffix in gen_states(N-i, k-1):
yield [i] + suffix
def hamdist(a, b):
return sum(1 for x, y in zip(a, b) if x != y)
def get_jeff_mut_trans(ascii_states, mu):
#FIXME: unused here, maybe add a test or something
k = len(ascii_states)
mmu = np.zeros((k, k))
for i, si in enumerate(ascii_states):
for j, sj in enumerate(ascii_states):
h = hamdist(si, sj)
mmu[i, j] = (mu ** h) * ((1 - mu) ** (2 - h))
return mmu
def get_micro_mut_rate_matrix(ascii_states):
k = len(ascii_states)
pre_Q = np.zeros((k, k))
for i, si in enumerate(ascii_states):
for j, sj in enumerate(ascii_states):
if hamdist(si, sj) == 1:
pre_Q[i, j] = 1
Q = pre_Q - np.diag(np.sum(pre_Q, axis=1))
return Q
def main(args):
# do some initialization
ascii_states = ['AB', 'Ab', 'aB', 'ab']
k = len(ascii_states)
N = args.N
M = np.array(list(gen_states(N, k)))
T = get_inverse_dict(M)
lmcs = omnomnomial.get_lmcs(M)
lps_neutral = np.log(M / float(N))
# write a file for each of the 2x2 option combos
for rate_adjustment in (RAW_RATE, ADJUSTED_RATE):
for mut_style in (DETERMINISTIC_MUT, STOCHASTIC_MUT):
# define the mutation rate between adjacent states
if rate_adjustment == RAW_RATE:
mu = args.mu
elif rate_adjustment == ADJUSTED_RATE:
mu = get_adjusted_rate(args.mu)
else:
raise Exception
# construct population genetic transition matrix P
if mut_style == DETERMINISTIC_MUT:
Q = get_micro_mut_rate_matrix(ascii_states)
mmu = scipy.linalg.expm(mu*Q)
lps = np.log(np.dot(M, mmu) / N)
P = np.exp(omnomnomial.get_log_transition_matrix(M, lmcs, lps))
elif mut_style == STOCHASTIC_MUT:
Q_mut = get_macro_mut_rate_matrix(M, T)
P_mut = scipy.linalg.expm(mu*Q_mut)
P_dft = np.exp(omnomnomial.get_log_transition_matrix(
M, lmcs, lps_neutral))
P = np.dot(P_mut, P_dft)
else:
raise Exception
# compute the equilibrium distribution
v = get_stationary_distribution(P)
# write the equilibrium distribution to the file
file_description = '_'.join((rate_adjustment, mut_style))
filename = file_description + '.txt'
with open(filename, 'w') as fout:
print >> fout, '\t'.join(ascii_states + ['probability'])
for i, (AB, Ab, aB, ab) in enumerate(M):
row = (AB, Ab, aB, ab, v[i])
print >> fout, '\t'.join(str(x) for x in row)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-N', type=int, default=20,
help='haploid population size')
parser.add_argument('-mu', type=float, default=0.025,
help='mutation probability per site per generation')
main(parser.parse_args())