-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
306 lines (279 loc) · 11.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
""" This file contains different utility functions that are not connected
in anyway to the networks presented in the tutorials, but rather help in
processing the outputs into a more understandable way.
For example ``tile_raster_images`` helps in generating a easy to grasp
image from a set of samples or weights.
"""
import numpy as np
import PIL as PIL
# Stuff for visualizing diagnostics
from sklearn.neighbors import KernelDensity
import matplotlib as mpl
mpl.use('Agg')
class batch(object):
def __init__(self,batch_size):
self.batch_size = batch_size
def __call__(self,f):
def wrapper(t,X):
X = np.array(X)
p = 0
rem = 0
results = []
while p < len(X):
Z = X[p:p+self.batch_size]
if Z.shape[0] != self.batch_size:
zeros = np.zeros((self.batch_size-len(Z),X.shape[1]))
rem = len(Z)
Z = np.array(np.vstack((Z,zeros)),dtype=X.dtype)
temp_results = f(t,Z)
if rem != 0:
temp_results = temp_results[:rem]
results.extend(temp_results)
p += self.batch_size
return np.array(results,dtype='float32')
return wrapper
def scale_to_unit_interval(ndar, eps=1e-8):
""" Scales all values in the ndarray ndar to be between 0 and 1 """
ndar = ndar.copy()
ndar -= ndar.min()
ndar *= 1.0 / (ndar.max() + eps)
return ndar
def tile_raster_images(X, img_shape=None, tile_shape=None, tile_spacing=(0, 0),
scale=True, colorImg=False):
"""
Transform an array with one flattened image per row, into an array in
which images are reshaped and layed out like tiles on a floor.
This function is useful for visualizing datasets whose rows are images,
and also columns of matrices for transforming those rows
(such as the first layer of a neural net).
"""
X = X * 1.0 # converts ints to floats
if colorImg:
channelSize = X.shape[1]/3
X = (X[:,0:channelSize], X[:,channelSize:2*channelSize], X[:,2*channelSize:3*channelSize], None)
assert len(img_shape) == 2
assert len(tile_shape) == 2
assert len(tile_spacing) == 2
# The expression below can be re-written in a more C style as
# follows :
#
# out_shape = [0,0]
# out_shape[0] = (img_shape[0] + tile_spacing[0]) * tile_shape[0] -
# tile_spacing[0]
# out_shape[1] = (img_shape[1] + tile_spacing[1]) * tile_shape[1] -
# tile_spacing[1]
out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
in zip(img_shape, tile_shape, tile_spacing)]
if isinstance(X, tuple):
assert len(X) == 4
# Create an output np ndarray to store the image
out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
#colors default to 0, alpha defaults to 1 (opaque)
channel_defaults = [0, 0, 0, 255]
for i in xrange(4):
if X[i] is None:
# if channel is None, fill it with zeros of the correct
# dtype
out_array[:, :, i] = np.zeros(out_shape,
dtype='uint8') + channel_defaults[i]
if i < 3:
print('WHY AM I HERE (utils.py line 101)?')
else:
# use a recurrent call to compute the channel and store it
# in the output
xi = X[i]
if scale:
# shift and scale this channel to be in [0...1]
xi = (X[i] - X[i].min()) / (X[i].max() - X[i].min())
out_array[:, :, i] = tile_raster_images(xi, img_shape=img_shape, \
tile_shape=tile_shape, \
tile_shaping=tile_spacing, \
scale=False)
return out_array
else:
# if we are dealing with only one channel
H, W = img_shape
Hs, Ws = tile_spacing
mean_pix_val = np.zeros((1,)) + (128 * np.max(X.astype(np.float32)))
mean_pix_val = mean_pix_val.astype(np.uint8)
# generate a matrix to store the output
out_array = np.zeros(out_shape, dtype='uint8') + mean_pix_val[0]
for tile_row in xrange(tile_shape[0]):
for tile_col in xrange(tile_shape[1]):
if tile_row * tile_shape[1] + tile_col < X.shape[0]:
if scale:
# if we should scale values to be between 0 and 1
# do this by calling the `scale_to_unit_interval`
# function
tmp = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
this_img = scale_to_unit_interval(tmp)
else:
this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
# add the slice to the corresponding position in the
# output array
out_array[
tile_row * (H+Hs): tile_row * (H + Hs) + H,
tile_col * (W+Ws): tile_col * (W + Ws) + W
] \
= this_img * 255
return out_array
def visualize(EN, proto_key, layer_num, file_name):
W = EN.proto_nets[proto_key][layer_num].W.get_value(borrow=True).T
size = int(np.sqrt(W.shape[1]))
# hist(W.flatten(),bins=50)
image = PIL.Image.fromarray(tile_raster_images(X=W, \
img_shape=(size, size), tile_shape=(10,W.shape[0]/10), tile_spacing=(1, 1)))
image.save(file_name)
return
def visualize_net_layer(net_layer, file_name, colorImg=False, \
use_transpose=False, transform=None):
W = net_layer.W.get_value(borrow=False).T
if use_transpose:
W = net_layer.W.get_value(borrow=False)
if not (transform is None):
W = transform(W)
if colorImg:
size = int(np.sqrt(W.shape[1] / 3.0))
else:
size = int(np.sqrt(W.shape[1]))
num_rows = 10
num_cols = int((W.shape[0] / num_rows) + 0.999)
img_shape = (size, size)
tile_shape = (num_rows, num_cols)
image = tile_raster_images(X=W, img_shape=img_shape, tile_shape=tile_shape, \
tile_spacing=(1, 1), scale=True, colorImg=colorImg)
image = PIL.Image.fromarray(image)
image.save(file_name)
return
def visualize_samples(X_samp, file_name, num_rows=10):
d = int(np.sqrt(X_samp.shape[1]))
# hist(W.flatten(),bins=50)
image = PIL.Image.fromarray(tile_raster_images(X=X_samp, img_shape=(d, d), \
tile_shape=(num_rows,X_samp.shape[0]/num_rows),tile_spacing=(1, 1)))
image.save(file_name)
return
# Matrix to image
def mat_to_img(X, file_name, img_shape, num_rows=10, \
scale=True, colorImg=False, tile_spacing=(1,1)):
num_rows = int(num_rows)
num_cols = int((X.shape[0] / num_rows) + 0.999)
tile_shape = (num_rows, num_cols)
# make a tiled image from the given matrix's rows
image = tile_raster_images(X=X, img_shape=img_shape, \
tile_shape=tile_shape, tile_spacing=tile_spacing, \
scale=scale, colorImg=colorImg)
# convert to a standard image format and save to disk
image = PIL.Image.fromarray(image)
image.save(file_name)
return
def plot_kde_histogram(X, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X. Assume data is univariate.
"""
import matplotlib.pyplot as plt
X = X.ravel()
np.random.shuffle(X)
X = X[0:min(X.shape[0], 1000000)]
X_samp = X[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
# make a figure
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)))
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
def plot_kde_histogram2(X1, X2, f_name, bins=25):
"""
Plot KDE-smoothed histogram of the data in X1/X2. Assume data is 1D.
"""
import matplotlib.pyplot as plt
# make a figure and configure an axis
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hold(True)
for (X, style) in [(X1, '-'), (X2, '--')]:
X_samp = X.ravel()[:,np.newaxis]
X_min = np.min(X_samp)
X_max = np.max(X_samp)
X_range = X_max - X_min
sigma = X_range / float(bins)
plot_min = X_min - (X_range/3.0)
plot_max = X_max + (X_range/3.0)
plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
# make a kernel density estimator for the data in X
kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style)
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
def plot_stem(x, y, f_name):
"""
Plot a stem plot.
"""
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.stem(x, y, linefmt='b-', markerfmt='bo', basefmt='r-')
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
def plot_line(x, y, f_name):
"""
Plot a line plot.
"""
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y)
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format=None, \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return
def plot_scatter(x, y, f_name, x_label=None, y_label=None):
"""
Plot a scatter plot.
"""
import matplotlib.pyplot as plt
if x_label is None:
x_label = 'Posterior KLd'
if y_label is None:
y_label = 'Expected Log-likelihood'
fig = plt.figure()
ax = fig.add_subplot(111)
box = ax.get_position()
ax.set_position([box.x0+(0.05*box.width), box.y0+(0.05*box.height), 0.96*box.width, 0.96*box.height])
ax.set_xlabel(x_label, fontsize=22)
ax.set_ylabel(y_label, fontsize=22)
ax.hold(True)
ax.scatter(x, y, s=24, alpha=0.5, c=u'b', marker=u'o')
plt.sca(ax)
x_locs, x_labels = plt.xticks()
plt.xticks(x_locs, fontsize=18)
y_locs, y_labels = plt.yticks()
plt.yticks(y_locs, fontsize=18)
fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
orientation='portrait', papertype=None, format='png', \
transparent=False, bbox_inches=None, pad_inches=0.1, \
frameon=None)
plt.close(fig)
return