/
learn.py
353 lines (274 loc) · 13.3 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
from __future__ import division, print_function
import numpy as np
import pandas as pd
import tensorflow as tf
class siamese_network():
def __init__(self, input_shape=(512,12)):
"""
"""
n_frames, n_bins = input_shape
self.x_A = tf.placeholder('float', shape=[None, n_frames, n_bins], name='x_A')
self.x_B = tf.placeholder('float', shape=[None, n_frames, n_bins], name='x_B')
self.subnet_A = [tf.reshape(self.x_A, [-1, n_frames, n_bins, 1])]
self.subnet_B = [tf.reshape(self.x_B, [-1, n_frames, n_bins, 1])]
self.is_cover = tf.placeholder('float', shape=[None], name='is_cover')
self.train_log = None
self.log_count = 0
def add_conv_layer(self, shape=(4,1),
n_filters=None,
strides=[1, 1, 1, 1],
padding='SAME',
sigma=tf.nn.relu):
"""Add a simple 2D convolutional layer to each subnet."""
x_A = self.subnet_A[-1]
x_B = self.subnet_B[-1]
assert np.all(x_A.get_shape() == x_A.get_shape())
n_channels = int(x_A.get_shape()[-1])
# default n_filters is 2 * previous n_filters
if n_filters is None:
n_filters = n_channels * 2
W = self.weight_variable([shape[0], shape[1], n_channels, n_filters])
b = self.bias_variable([n_filters])
for subnet in [self.subnet_A, self.subnet_B]:
x = subnet[-1]
h = sigma(b + tf.nn.conv2d(x, W, strides=strides, padding=padding))
subnet.append(h)
# def add_attention(self):
# x_A = self.subnet_A[-1]
# shape_x = x_A.get_shape()
# batch_size, n_frames, n_bins, n_channels = [int(dim) for dim in shape_x]
# W = tf.weight_variable([1, n_bins, n_channels, 1])
# b = tf.bias_variable([1])
# for subnet in [self.subnet_A, self.subnet_B]:
# att = tf.softmax(tf.conv2d(W), W, strides=[1,1,1,1], padding='VALID')
# h =
# subnet.append(h)
def add_max_pool_layer(self, shape=(4, 1),
strides=None,
padding='SAME'):
"""Add a 2D max pool layer to each subnet."""
for subnet in [self.subnet_A, self.subnet_B]:
x = subnet[-1]
ksize = [1, shape[0], shape[1], 1]
if strides is None:
strides = ksize
h = tf.nn.max_pool(x, ksize=ksize, strides=strides, padding=padding)
subnet.append(h)
def add_fully_connected_layer(self, n_nodes, sigma=tf.nn.tanh):
"""Add a fully-connected layer to each subnet.
Args:
n_nodes (int): number of nodes.
sigma (tf.op): non-linearity as a Tensorflow function,
e.g., tf.nn.tanh or tf.nn.relu.
"""
x_A = self.subnet_A[-1]
x_B = self.subnet_B[-1]
assert np.all(x_A.get_shape() == x_A.get_shape())
n_nodes_in = np.prod([int(dim) for dim in x_A.get_shape()[1:]])
W = self.weight_variable([n_nodes_in, n_nodes])
b = self.bias_variable([n_nodes])
for subnet in [self.subnet_A, self.subnet_B]:
x = subnet[-1]
x_flat = tf.reshape(x, [-1, n_nodes_in])
h = sigma(tf.matmul(x_flat, W) + b)
subnet.append(h)
def add_matmul_layer(self,
filter_len=4,
n_filters=8,
strides=[1, 1, 1, 1],
sigma=tf.nn.tanh):
"""MATMUL layer.
For each subnet:
- makes 2 copies of the last layer
- performs convolution with shape (filter_len, 1) on one of the copies
using n_filters different filters
- transpose sother copy
- for each filter, matrix multiplies convolved copy with transposed copy
(summing over the 'height' dimension shape[1])
- flattens result and applies nonlinearilty sigma
Requires that last layer has shape [batch_size, n_frames, 1, n_channels].
Output has shape [batch_size, n_filters * n_channels**2].
Args:
filter_len (int): length of the filters in frames
n_filters (int): number of filters
strides: strides for the convolution operation (see tf.nn.conv2d)
sigma: non-linearity to apply to the result (e.g., tf.nn.relu
or tf.nn.tanh).
"""
x_A = self.subnet_A[-1]
x_B = self.subnet_B[-1]
assert np.all(x_A.get_shape() == x_A.get_shape())
shape_x = x_A.get_shape()
n_frames, n_bins, n_channels = [int(dim) for dim in shape_x[1:]]
if not n_bins == 1:
raise ValueError('dimension 2 should be 1 (in current implementation)')
W = self.weight_variable([filter_len, 1, 1, n_filters])
for subnet in [self.subnet_A, self.subnet_B]:
x = subnet[-1]
# transpose (None, n_frames, 1, N)
# -> (None, n_frames, N, 1)
x_T = tf.transpose(x, perm=[0,1,3,2])
# conv (None, n_frames, N, 1)
# -> (None, n_frames, N, n_filters)
x_conv = tf.nn.conv2d(x_T, W, strides=strides, padding='SAME')
# tile (None, n_frames, N, 1)
# -> (None, n_frames, N, n_filters)
x_tile = tf.tile(x_T, [1,1,1,n_filters])
# transpose (None, n_frames, N, n_filters)
# -> (None, n_filters, n_frames, N)
x_conv_T = tf.transpose(x_conv, perm=[0, 3, 1, 2])
x_tile_T = tf.transpose(x_tile, perm=[0, 3, 1, 2])
# matmul (None, n_filters, n_frames, N) x idem
# -> (None, n_filters, N**2)
matmul = tf.batch_matmul(x_conv_T, x_tile_T, adj_x=True)
# flatten (None, n_filters, N**2)
# -> (None, n_filters * N**2)
matmul_flat = tf.reshape(matmul, [-1, n_filters * n_channels**2])
subnet.append(sigma(matmul_flat))
def weight_variable(self, shape, weight_scale=0.1):
"""Return Tensorflow variable with a given dimension,
initialized with tf.truncated_normal with standard
deviation weight_scale.
Args:
shape (list): the variable's dimensions as a list
weight_scale (float): standard_deviation of initial
values
Returns:
tf.Variable: the variable
"""
initial = tf.truncated_normal(shape, stddev=weight_scale)
return tf.Variable(initial, name='a_weight')
def bias_variable(self, shape, weight_scale=0.1):
"""Return Tensorflow variable with a given dimension,
initialized as a tf.constant equal to
parameter weight_scale.
Args:
shape (list): the variable's dimensions as a list
weight_scale (float): initial value
Returns:
tf.Variable: the variable
"""
initial = tf.constant(weight_scale, shape=shape)
return tf.Variable(initial, name='a_bias')
# def n_layers(self):
# assert len(self.subnet_A) == len(self.subnet_B)
# return len(self.subnet_A)
# def remove_layers(self, n_keep):
# self.subnet_A = self.subnet_A[:n_keep+1]
# self.subnet_B = self.subnet_A[:n_keep+1]
def loss(self, m=10, alpha=1):
"""Return loss function for training butterfly networks as a tensor.
Minize pair distances while maximizing non-pair distances smaller
than `m`.
Returns:
tf.Tensor: butterfly loss as a tensor.
"""
y_A, y_B = self.subnet_A[-1], self.subnet_B[-1]
squared_dists = tf.reduce_sum(tf.square(y_A - y_B),
reduction_indices=1)
pair_errors = squared_dists
non_pair_errors = tf.square(tf.maximum(0.0, m - tf.sqrt(squared_dists)))
pair_loss = tf.reduce_mean(self.is_cover * pair_errors, name='pair_loss')
non_pair_loss = tf.reduce_mean((1 - self.is_cover) * non_pair_errors, name='non_pair_loss')
total_loss = tf.add(pair_loss, alpha * non_pair_loss, name='loss')
return total_loss, pair_loss, non_pair_loss
def bhattacharyya(self):
"""Approximate bhattacharyya distance between cover and non-cover distances.
Similar to Mahalanobis distance, but for distributions with different variances.
Assumes normality, hence approximate.
Returns:
tf.Tensor: bhattacharyya distance between distributions of the cover
and non-cover pairs' distances.
tf.Tensor: mean cover pair distance
tf.Tensor: mean non-cover pair distance
"""
y_A, y_B = self.subnet_A[-1], self.subnet_B[-1]
squared_dists = tf.reduce_sum(tf.square(y_A - y_B),
reduction_indices=1, )
cover_pairs = tf.where(tf.equal(self.is_cover, tf.ones_like(self.is_cover)))
non_cover_pairs = tf.where(tf.equal(self.is_cover, tf.zeros_like(self.is_cover)))
pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(cover_pairs, [-1])))
non_pair_dists = tf.sqrt(tf.gather(squared_dists, tf.reshape(non_cover_pairs, [-1])))
mu_pairs, sigma2_pairs = tf.nn.moments(pair_dists, axes=[0], name='d_pairs')
mu_non_pairs, sigma2_non_pairs = tf.nn.moments(non_pair_dists, axes=[0], name='d_non_pairs')
bhatt = tf.add( 0.25 * tf.log(0.25 * (sigma2_pairs/sigma2_non_pairs + sigma2_non_pairs/sigma2_pairs + 2)),
0.25 * (mu_pairs - mu_non_pairs)**2 / (sigma2_pairs + sigma2_non_pairs), name='bhatt')
return bhatt, mu_pairs, mu_non_pairs
def train_step(self, loss, learning_rate=3e-4):
if loss is None:
# only if not later needed for logging
loss, _, _ = self.loss()
adam = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return adam
def log_errors(self, session, train_batch, test_batch, metrics,
log_every=1, verbose=True):
"""Compute train and test metrics and add to training log `train_log`.
Args:
session (tf.Session): session in which to run the metric evaluation
train_batch (tuple): batch of input training data
(x_A, x_B, is_cover)
test_batch (tuple):
"""
def __strip__(metric_name, strip_slash=True, strip_colon=True):
# strip everything after '/' and/or ':' from var names
stripped = metric_name
if strip_slash:
stripped = metric_name.split('/')[0]
if strip_colon:
stripped = stripped.split(':')[0]
return stripped
if self.log_count % log_every == 0:
train_metric_names = ['TR.' + __strip__(metric.name) for metric in metrics]
test_metric_names = ['TE.' + __strip__(metric.name) for metric in metrics]
if self.train_log is None:
col_names = train_metric_names + test_metric_names
self.train_log = pd.DataFrame(columns=col_names)
# train and test feeds
train_feed = {self.x_A:train_batch[0], self.x_B:train_batch[1],
self.is_cover: train_batch[2]}
test_feed = {self.x_A:test_batch[0], self.x_B:test_batch[1],
self.is_cover: test_batch[2]}
# compute and log metrics
train_metrics = session.run(metrics, feed_dict=train_feed)
self.train_log.loc[self.log_count, train_metric_names] = train_metrics
test_metrics = session.run(metrics, feed_dict=test_feed)
self.train_log.loc[self.log_count, test_metric_names] = test_metrics
# optionally print last row
if verbose:
print(self.train_log[-1:], '\n')
self.log_count += 1
def fingerprint(self, chroma, n_patches=8, patch_len=64):
n_frames, n_bins = chroma.shape
if not n_frames == n_patches * patch_len:
chroma = paired_data.patchwork(chroma, n_patches=n_patches,
patch_len=patch_len)
fps = []
for i in range(12):
patchwork_transposed = np.roll(patchwork, -i, axis=1)
patchwork_tensorshaped = patchwork_transposed.reshape((1, n_patches*patch_len, 12))
network_out = self.subnet_A[-1]
fp = network_out.eval(feed_dict={x_A_in : patchwork_tensorshaped})
fps.append(fp.flatten())
return fps
def get_batches(arrays, batch_size=50):
"""Batch generator, no shuffling.
Args:
arrays (list): list of arrays. Arrays should have equal length
batch_size (int): number of examples per batch
Yields:
list: list of song pairs of length batch_size
Usage:
>>> batches = get_batches([X, Y], batch_size=50)
>>> x, y = batches.next()
"""
array_lengths = [len(array) for array in arrays]
n_examples = array_lengths[0]
if not np.all(np.array(array_lengths) == n_examples):
raise ValueError('Arrays must have the same length.')
start = 0
while True:
start = np.mod(start, n_examples)
stop = start + batch_size
batch = [np.take(array, range(start, stop), axis=0, mode='wrap') for array in arrays]
start = stop
yield batch