/
20110531a.py
177 lines (161 loc) · 5.84 KB
/
20110531a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
Attempt to find branch lengths to cause a 2D MDS collision.
All vertices must have integer labels.
"""
from StringIO import StringIO
import math
import time
import numpy as np
import scipy
from scipy import linalg
from scipy import optimize
import Form
import FormOut
import Ftree
import FtreeIO
def get_form():
"""
@return: the body of a form
"""
# define default tree strings
true_s = '((1:3, 2:6)5:1, 3:9, 4:12)6;'
test_s = '((1, 3)5, 2, 4)6;'
# define the form objects
form_objects = [
Form.MultiLine('true_tree', 'true tree', true_s),
Form.MultiLine('test_tree', 'test topology', test_s)]
return form_objects
def get_form_out():
return FormOut.Report()
class Functor:
def __init__(self, T_test, Vp, C, w):
"""
@param T_test: test topology
@param Vp: matrix of column eigenvectors of Schur complement in L
@param C: target matrix
@param w: Fiedler and Fiedler+1 eigenvalues Schur complement in L
"""
self.T_test = T_test
self.Vp = Vp
self.C = C
self.w = w
# define an edge order
self.u_edges = sorted(self.T_test)
# precompute vertex lists
self.leaves = Ftree.T_to_leaves(T_test)
self.internal = Ftree.T_to_internal_vertices(T_test)
self.vertices = self.leaves + self.internal
def X_to_B_Vr(self, X):
"""
Unpack purely from X.
"""
start = 0
# extract log branch lengths
n = len(self.u_edges)
log_branch_lengths = X[start:start+n]
start += n
# extract first leaf eigenvector
n = len(self.internal)
vr1 = X[start:start+n]
start += n
# extract second leaf eigenvector
n = len(self.internal)
vr2 = X[start:start+n]
start += n
# build some more complicated data structures
B = dict((u_edge, math.exp(logb)) for u_edge, logb in zip(
self.u_edges, log_branch_lengths))
Vr = np.vstack([vr1, vr2]).T
# return the unpacked values
return B, Vr
def X_to_L_V(self, X):
"""
Unpack in a way that uses initialized state.
"""
B, Vr = self.X_to_B_Vr(X)
# get the laplacian matrix
L = Ftree.TB_to_L_principal(self.T_test, B, self.vertices)
# get the augmented vector
V = np.vstack([self.Vp, Vr])
# return the unpacked values
return L, V
def __call__(self, X):
"""
First few entries of X are logs of branch lengths.
Next few entries are vr1 entries.
Next few entries are vr2 entries.
@param X: a 1D numpy array of floats
"""
# unpack the parameter array
B, Vr = self.X_to_B_Vr(X)
L, V = self.X_to_L_V(X)
# get the error matrix
E = np.dot(L, V) - self.C
# compute the squared frobenius norm of the error
frob_norm_err = np.sum(E*E)
# use a hack to make sure we are really using the first two eigenvalues
L_schur = Ftree.TB_to_L_schur(self.T_test, B, self.leaves)
w_observed = scipy.linalg.eigvalsh(L_schur, eigvals=(1,2))
w_error = w_observed - self.w
eigenvalue_err = np.sum(w_error*w_error)
# return the total error
return frob_norm_err + eigenvalue_err
def get_response_content(fs):
nseconds_limit = 5.0
R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int)
R_test = FtreeIO.newick_to_R(fs.test_tree, int)
# get the unrooted tree topology
T_true = Ftree.R_to_T(R_true)
T_test = Ftree.R_to_T(R_test)
# check the trees for vertex compatibility
if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)):
raise ValueError('vertex sets are not equal')
if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)):
raise ValueError('leaf vertex sets are not equal')
if set(Ftree.T_to_internal_vertices(T_true)) != set(
Ftree.T_to_internal_vertices(T_test)):
raise ValueError('internal vertex sets are not equal')
# get the 2D MDS for the true tree
leaves = Ftree.T_to_leaves(T_true)
internal = Ftree.T_to_internal_vertices(T_true)
vertices = leaves + internal
L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves)
w_all, Vp_all = scipy.linalg.eigh(L_schur)
w, Vp = w_all[1:3], Vp_all[:, 1:3]
# make the constant matrix for Frobenius norm comparison
C = np.zeros((len(vertices), 2))
C[:len(leaves)] = w*Vp
# keep doing iterations until we run out of time
mymax = 256
t_initial = time.time()
while time.time() - t_initial < nseconds_limit / 2:
mymax *= 2
f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy())
initial_guess = np.ones(len(T_test) + 2*len(internal))
results = scipy.optimize.fmin(
f, initial_guess, ftol=1e-8, xtol=1e-8, full_output=True,
maxfun=mymax, maxiter=mymax)
xopt, fopt, itr, funcalls, warnflag = results
# look at the values from the longest running iteration
B, Vr = f.X_to_B_Vr(xopt)
L, V = f.X_to_L_V(xopt)
Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal)
Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves)
H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp)
N = dict((v, str(v)) for v in vertices)
# start writing the response
out = StringIO()
print >> out, 'xopt:', xopt
print >> out, 'fopt:', fopt
print >> out, 'number of iterations:', itr
print >> out, 'number of function calls:', funcalls
print >> out, 'warning flags:', warnflag
print >> out, 'first four eigenvalues:', w_all[:4]
print >> out, 'Vr:'
print >> out, Vr
print >> out, '-Lrr^-1 Lrp Vp:'
print >> out, np.dot(H_ext, Vp)
print >> out, C
print >> out, np.dot(L, V)
print >> out, FtreeIO.RBN_to_newick(R_test, B, N)
return out.getvalue()