/
vgg_network.py
87 lines (76 loc) · 4.09 KB
/
vgg_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
Loads vgg16 from disk as a tensorflow model with batching to process style, content, and synthesized images
simultaneously (while abstracting the accessors to compute style/content loss based on that representation).
"""
from style_helpers import gramian
import tensorflow as tf
class VGGNetwork(object):
def __init__(self, name, input, i, j, k):
"""
:param input: A 4D-tensor of shape [batchSize, 224, 224, 3]
[0:i, :, :, :] holds i style images,
[i:i+j, :, :, :] holds j content images,
[i+j:i+j+k, :, :, :] holds k synthesized images
"""
self.name = name
self.num_style = i
self.num_content = j
self.num_synthesized = k
with open("models/vgg16.tfmodel", mode='rb') as f:
file_content = f.read()
graph_def = tf.GraphDef()
graph_def.ParseFromString(file_content)
tf.import_graph_def(graph_def, input_map={"images": input}, name=self.name)
def print_op_names(self):
"""
Utility for inspecting graph layers since this model is a bit big for Tensorboard.
"""
print([op.name for op in tf.get_default_graph().get_operations()])
def channels_for_layer(self, layer):
activations = tf.get_default_graph().get_tensor_by_name("%s/conv%d_1/Relu:0" % (self.name, layer))
return activations.get_shape().as_list()[3]
def gramian_for_layer(self, layer):
"""
Returns a matrix of cross-correlations between the activations of convolutional channels in a given layer.
"""
activations = self.activations_for_layer(layer)
# Reshape from (batch, width, height, channels) to (batch, channels, width, height)
shuffled_activations = tf.transpose(activations, perm=[0, 3, 1, 2])
return gramian(shuffled_activations)
def activations_for_layer(self, layer):
"""
:param layer: A tuple that indexes into the convolutional blocks of the VGG Net
"""
return tf.get_default_graph().get_tensor_by_name("{0}/conv{1}_{2}/Relu:0".format(self.name, layer[0], layer[1]))
def style_loss(self, layers):
activations = [self.activations_for_layer(i) for i in layers]
gramians = [self.gramian_for_layer(x) for x in layers]
# Slices are for style and synth image
gramian_diffs = [
tf.sub(
tf.tile(tf.slice(g, [0, 0, 0], [self.num_style, -1, -1]), [self.num_synthesized - self.num_style + 1, 1, 1]),
tf.slice(g, [self.num_style + self.num_content, 0, 0], [self.num_synthesized, -1, -1]))
for g in gramians]
Ns = [g.get_shape().as_list()[2] for g in gramians]
Ms = [a.get_shape().as_list()[1] * a.get_shape().as_list()[2] for a in activations]
scaled_diffs = [tf.square(g) for g in gramian_diffs]
style_loss = tf.div(
tf.add_n([tf.div(tf.reduce_sum(x), 4 * (N ** 2) * (M ** 2)) for x, N, M in zip(scaled_diffs, Ns, Ms)]),
len(layers))
return style_loss
def content_loss(self, layers):
activations = [self.activations_for_layer(i) for i in layers]
activation_diffs = [
tf.sub(
tf.tile(tf.slice(a, [self.num_style, 0, 0, 0], [self.num_content, -1, -1, -1]), [self.num_synthesized - self.num_content + 1, 1, 1, 1]),
tf.slice(a, [self.num_style + self.num_content, 0, 0, 0], [self.num_content, -1, -1, -1]))
for a in activations]
# This normalizer is in JCJohnson's paper, but not Gatys' I think?
Ns = [a.get_shape().as_list()[1] * a.get_shape().as_list()[2] * a.get_shape().as_list()[3] for a in activations]
content_loss = tf.div(tf.add_n([tf.div(tf.reduce_sum(tf.square(a)), n) for a, n in zip(activation_diffs, Ns)]), 2.0)
return content_loss
def combined_loss(self, style_layers, content_layers, alpha=0.001, beta=1.0):
style_loss = self.style_loss(style_layers)
content_loss = self.content_loss(content_layers)
combined_loss = tf.add(tf.mul(beta, style_loss), tf.mul(alpha, content_loss))
return combined_loss