forked from argriffing/xgcode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
20091103a.py
291 lines (271 loc) · 11 KB
/
20091103a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
"""
Create a tree MDS animation showing downweighting of internal nodes.
Create a tree MDS animation
showing progressive downweighting of internal nodes.
A sequence of .png files should be written
to some existing specified output directory.
If a web interface is used, maybe show one frame
at some stage between 0 and 1.
Input for web usage:
the tree, a progress fraction, a scaling factor,
image format, and image delivery.
Input for command line usage:
the path to the output directory for the images, a scaling factor, a tree,
and a physical width and height.
To convert a sequence of png images to an mpeg video:
ffmpeg -i frames/frame-%04d.png test.mpg
Resolutions preferred by YouTube are 640x360 and 480x360.
"""
from StringIO import StringIO
import random
import os
import math
from itertools import product
import argparse
import numpy as np
import cairo
from SnippetUtil import HandlingError
import SnippetUtil
import Form
import FormOut
import NewickIO
import Euclid
import FelTree
import CairoUtil
import Progress
import const
g_tree_string = const.read('20100730g').rstrip()
def get_form():
"""
@return: a list of form objects
"""
# define the form objects
form_objects = [
Form.MultiLine('tree_string', 'newick tree',
g_tree_string),
Form.Float('scale', 'scale the image of the tree by this factor',
200.0, low_exclusive=0.0),
Form.Float('progress', 'animation progress between 0.0 and 1.0',
0.5, low_inclusive=0.0, high_inclusive=1.0),
Form.ImageFormat()]
return form_objects
def get_form_out():
return FormOut.Image('frame')
def get_response_content(fs):
# define the requested physical size of the images (in pixels)
physical_size = (640, 480)
# build the newick tree from the string
tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
nvertices = len(list(tree.preorder()))
nleaves = len(list(tree.gen_tips()))
# Get ordered ids with the leaves first,
# and get the corresponding distance matrix.
ordered_ids = get_ordered_ids(tree)
D = np.array(tree.get_partial_distance_matrix(ordered_ids))
index_edges = get_index_edges(tree, ordered_ids)
# Create the reference points so that the video frames
# are not reflected arbitrarily.
reference_points = Euclid.edm_to_points(D).T[:2].T
# draw the image
ext = Form.g_imageformat_to_ext[fs.imageformat]
mass_vector = get_mass_vector(nvertices, nleaves, fs.progress)
points = get_canonical_2d_mds(D, mass_vector, reference_points)
return get_animation_frame(ext, physical_size, fs.scale,
mass_vector, index_edges, points)
def reflect_to_reference(points, reference_points):
"""
For 2D points, try each combination of reflections across the axes.
There are four possible combinations of reflections.
Use the reflection that gives points closest to the reference points.
@param points: rows are 2D points
@param reference_points: rows are 2D points
@return:
"""
if points.shape != reference_points.shape:
msg_a = 'the point array and the reference point array '
msg_b = 'should have the same shape'
raise ValueError(msg_a + msg_b)
if len(points.shape) != 2:
msg = 'the points argument should be a matrix-like numpy array'
raise ValueError(msg)
if points.shape[1] != 2:
raise ValueError('the points should be in 2D space')
reflectors = np.array(list(product((-1,1), repeat=2)))
best_error, best_reflector = min((np.linalg.norm(
points*r - reference_points), r) for r in reflectors)
return points * best_reflector
def get_index_edges(tree, ordered_ids):
"""
Given a tree and some ordered ids, get edges defined on indices.
@param tree: the tree object
@param ordered_ids: the returned index pairs are for this sequence
@return: a collection of index pairs defining edges
"""
# map ids to indices
id_to_index = dict((myid, index) for index, myid in enumerate(ordered_ids))
# each edge in this set is a frozenset of two indices
index_edges = set()
for node in tree.preorder():
index = id_to_index[id(node)]
for neighbor in node.gen_neighbors():
neighbor_index = id_to_index[id(neighbor)]
index_edges.add(frozenset([index, neighbor_index]))
return index_edges
def get_mass_vector(nvertices, nleaves, progress):
"""
The progress parameter goes from uniform weights to uniform tip weights.
@param nvertices: the number of vertices in the full tree
@param nleaves: the number of leaves in the tree
@param progress: progress in [0.0, 1.0]
@return: a nonnegative mass vector that sums to 1.0 for weighting the MDS
"""
# tips get weights proportional to 1.0
# and internal vertices get weights proportional to (1-progress)
mass_vector = np.ones(nvertices, dtype=float)
for i in range(nleaves, nvertices):
mass_vector[i] = 1-progress
return mass_vector / sum(mass_vector)
def get_canonical_2d_mds(D, m, reference_points):
"""
This function is about projecting the points.
It is like MDS except the reflections across the axes are not arbitrary.
Also it only uses the first two axes.
@param D: the full distance matrix
@param m: the mass vector
@param reference_points: a 2D reference projection of vertices of the tree
@return: the weighted MDS points as a numpy matrix
"""
X = Euclid.edm_to_weighted_points(D, m)
return reflect_to_reference(X.T[:2].T, reference_points)
def get_animation_frame(
image_format, physical_size, scale, mass_vector, index_edges, points):
"""
This function is about drawing the tree.
@param image_format: the image extension
@param physical_size: the width and height of the image in pixels
@param scale: a scaling factor
@param mass_vector: use this for visualizing the weights of the vertices
@param index_edges: defines the connectivity of the tree
@param points: an array of 2D points, the first few of which are leaves
@return: the animation frame as an image as a string
"""
# before we begin drawing we need to create the cairo surface and context
cairo_helper = CairoUtil.CairoHelper(image_format)
surface = cairo_helper.create_surface(physical_size[0], physical_size[1])
context = cairo.Context(surface)
# define some helper variables
x0 = physical_size[0] / 2.0
y0 = physical_size[1] / 2.0
npoints = len(points)
# draw an off-white background
context.save()
context.set_source_rgb(.9, .9, .9)
context.paint()
context.restore()
# draw the axes which are always in the center of the image
context.save()
context.set_source_rgb(.9, .7, .7)
context.move_to(x0, 0)
context.line_to(x0, physical_size[1])
context.stroke()
context.move_to(0, y0)
context.line_to(physical_size[0], y0)
context.stroke()
context.restore()
# draw the edges
context.save()
context.set_source_rgb(.8, .8, .8)
for edge in index_edges:
ai, bi = tuple(edge)
ax, ay = points[ai].tolist()
bx, by = points[bi].tolist()
context.move_to(x0 + ax*scale, y0 + ay*scale)
context.line_to(x0 + bx*scale, y0 + by*scale)
context.stroke()
context.restore()
# Draw vertices as translucent circles
# with radius defined by the mass vector.
context.save()
context.set_source_rgba(0.2, 0.2, 1.0, 0.5)
for point, mass in zip(points, mass_vector):
if mass:
x, y = point.tolist()
nx = x0 + x*scale
ny = y0 + y*scale
dot_radius = 3*mass*npoints
context.arc(nx, ny, dot_radius, 0, 2*math.pi)
context.fill()
context.restore()
# create the image
return cairo_helper.get_image_string()
def get_ordered_ids(tree):
"""
Maybe I could use postorder here instead.
@param tree: a tree
@return: a list of ids beginning with the leaves
"""
ordered_ids = []
ordered_ids.extend(id(node) for node in tree.gen_tips())
ordered_ids.extend(id(node) for node in tree.gen_internal_nodes())
return ordered_ids
def sigmoid(x):
t = (x - .5) * 12
return 1.0 / (1.0 + math.exp(-t))
def main(args):
# do some validation
if args.nframes < 2:
raise ValueError('nframes should be at least 2')
# define the requested physical size of the images (in pixels)
physical_size = (args.physical_width, args.physical_height)
# build the newick tree from the string
tree = NewickIO.parse(args.tree, FelTree.NewickTree)
nvertices = len(list(tree.preorder()))
nleaves = len(list(tree.gen_tips()))
# Get ordered ids with the leaves first,
# and get the corresponding distance matrix.
ordered_ids = get_ordered_ids(tree)
D = np.array(tree.get_partial_distance_matrix(ordered_ids))
index_edges = get_index_edges(tree, ordered_ids)
# Create the reference points
# so that the video frames are not reflected arbitrarily.
reference_points = Euclid.edm_to_points(D).T[:2].T
# create the animation frames and write them as image files
pbar = Progress.Bar(args.nframes)
for frame_index in range(args.nframes):
linear_progress = frame_index / float(args.nframes - 1)
if args.interpolation == 'sigmoid':
progress = sigmoid(linear_progress)
else:
progress = linear_progress
mass_vector = get_mass_vector(nvertices, nleaves, progress)
points = get_canonical_2d_mds(D, mass_vector, reference_points)
image_string = get_animation_frame(
args.image_format, physical_size, args.scale,
mass_vector, index_edges, points)
image_filename = 'frame-%04d.%s' % (frame_index, args.image_format)
image_pathname = os.path.join(args.output_directory, image_filename)
with open(image_pathname, 'wb') as fout:
fout.write(image_string)
pbar.update(frame_index+1)
pbar.finish()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--scale', type=float, default=1.0,
help='define the drawing scale')
parser.add_argument('--physical_width', type=int, default=480,
help='width (pixels)')
parser.add_argument('--physical_height', type=int, default=360,
help='height (pixels)')
parser.add_argument('--tree', default=g_tree_string,
help='newick tree with branch lengths')
parser.add_argument('--image_format', default='png',
choices=('png', 'svg', 'ps', 'pdf'),
help='image format')
parser.add_argument('--nframes', type=int, default=100,
help='number of animation frames (image files) to create')
parser.add_argument('--interpolation', default='sigmoid',
choices=('sigmoid', 'linear'),
help='weights change according to this function')
parser.add_argument('output_directory',
help='path to the output directory for .png frames')
main(parser.parse_args())