def feature_decoder(self, inputs, reuse=False):
     """feature decoder of the inputs"""
     with tf.variable_scope('feature_decoder', reuse=reuse):
         with slim.arg_scope(self.msg_arg_scope()):
             net = custom_ops.residual_block_upsample(inputs,
                                                      128,
                                                      2,
                                                      scope='dec_2')
             net = custom_ops.residual_block_upsample(net,
                                                      64,
                                                      2,
                                                      scope='dec_1')
             # output processing
             outputs = slim.layer_norm(net,
                                       activation_fn=tf.nn.relu,
                                       scope='dec_0_preact')
             with slim.arg_scope([slim.conv2d],
                                 normalizer_fn=None,
                                 activation_fn=tf.tanh):
                 outputs = custom_ops.conv2d_same(outputs,
                                                  3,
                                                  7,
                                                  1,
                                                  scope='dec_0')
                 return outputs
Esempio n. 2
0
    def style_transfer(self, inputs, styles, reuse=False):
        """style transfer with the pipeline: Encode -> AdaIn -> Decode"""
        # image encoder
        with slim.arg_scope(vgg.vgg_arg_scope(self.weight_decay)):
            _, content_points = network_map[self.network_name](
                inputs, spatial_squeeze=False, is_training=False, reuse=reuse)
            content_label = content_points.keys()[0].split('/')[0]
            content_net = content_points[content_label + '/' + self.content_loss_layers[-1]]

        with slim.arg_scope(vgg.vgg_arg_scope(self.weight_decay)):
            _, style_points = network_map[self.network_name](
                styles, spatial_squeeze=False, is_training=False, reuse=True)
            style_label = style_points.keys()[0].split('/')[0]
            style_net = style_points[style_label + '/' + self.content_loss_layers[-1]]

        # adaptive instance normalization
        content_feature = adaptive_instance_normalization(content_net, style_net)
        content_features = {self.content_loss_layers[-1]: content_feature}

        # image decoder
        with tf.variable_scope('image_decoder', values=[content_feature], reuse=reuse):
            with slim.arg_scope(
                    [slim.conv2d],
                    weights_regularizer=slim.l2_regularizer(self.weight_decay),
                    normalizer_fn=slim.layer_norm,
                    activation_fn=tf.nn.relu):
                # mimic conv4_1
                net = custom_ops.conv2d_same(content_feature, 512, 3, 1, scope='conv1')
                # mimic conv3_4 + max_pool2d
                net = custom_ops.conv2d_resize(net, 256, 3, 2, scope='deconv1')
                # mimic conv3_3 + conv3_2 + conv3_1
                net = slim.repeat(
                    net, 3, custom_ops.conv2d_same, 256, 3, 1, scope='conv2')
                # mimic conv2_2 + max_pool2d
                net = custom_ops.conv2d_resize(net, 128, 3, 2, scope='deconv2')
                # mimic conv2_1
                net = custom_ops.conv2d_same(net, 128, 3, 1, scope='conv3')
                # mimic conv1_2 + max_pool2d
                net = custom_ops.conv2d_resize(net, 64, 3, 2, scope='deconv3')
                # mimic conv1_1
                net = custom_ops.conv2d_same(net, 64, 3, 1, scope='conv4')
                # get the output
                with slim.arg_scope([slim.conv2d], activation_fn=tf.tanh, normalizer_fn=None):
                    outputs = custom_ops.conv2d_same(net, 3, 3, 1, scope='output')

        # output the image and hidden variables
        return outputs * 150.0 + 127.5, content_features
def vgg_decoder(inputs,
                network_name='vgg_16',
                starting_layer='conv1/conv1_1',
                reuse=False,
                scope=None):
    """construct the decoder network for the vgg model

  Args:
    inputs: input features [batch_size, height, width, channel]
    network_name: the type of the network, default is vgg_16
    starting_layer: the starting reflectance layer, default is 'conv1/conv1_1'
    reuse" (optional) whether to reuse the network
    scope: (optional) the scope of the network

  Returns:
    outputs: the decoded feature maps
  """
    with tf.variable_scope(scope, 'image_decoder', reuse=reuse):
        # gather the output with identity mapping
        net = tf.identity(inputs)

        # starting inferring the network
        is_active = False
        for layer, layer_struct in network_map[network_name]:
            if layer == starting_layer:
                is_active = True
            if is_active:
                conv_type, num_outputs, kernel_size = layer_struct
                if conv_type == 'c':
                    net = custom_ops.conv2d_same(net,
                                                 num_outputs,
                                                 kernel_size,
                                                 1,
                                                 scope=layer)
                elif conv_type == 'uc':
                    net = custom_ops.conv2d_resize(net,
                                                   num_outputs,
                                                   kernel_size,
                                                   2,
                                                   scope=layer)
        with slim.arg_scope([slim.conv2d],
                            normalizer_fn=None,
                            activation_fn=tf.tanh):
            outputs = custom_ops.conv2d_same(net, 3, 7, 1, scope='output')
        return outputs * 150.0 + 127.5
 def feature_extractor(self, inputs, reuse=False):
     """feature extractor for the inputs"""
     with tf.variable_scope('feature_extractor', reuse=reuse):
         with slim.arg_scope(self.msg_arg_scope()):
             net = tf.div(inputs, 127.5)  # normalize the input features
             net = custom_ops.conv2d_same(net, 64, 7, 1, scope='enc_0')
             net = custom_ops.residual_block_downsample(net,
                                                        128,
                                                        2,
                                                        scope='enc_1')
             net = custom_ops.residual_block_downsample(net,
                                                        256,
                                                        2,
                                                        scope='enc_2')
             return net
def inspiration(inputs_features, style_features, kernel_size, reuse=False):
    """inspiration layer for the msg-net"""
    with tf.variable_scope('inspiration', reuse=reuse):
        # affine transform of the input content feature
        with slim.arg_scope([slim.conv2d],
                            activation_fn=None,
                            normalizer_fn=None):
            outputs = custom_ops.conv2d_same(inputs_features,
                                             kernel_size,
                                             1,
                                             1,
                                             scope='affine_matrix')

        # multiply with the style statistics
        style_feature = losses.compute_gram_matrix(style_features)
        style_feature = tf.expand_dims(style_feature, axis=0)
        outputs = tf.nn.conv2d(outputs,
                               style_feature, [1, 1, 1, 1],
                               padding='SAME')
        outputs.set_shape(shape=inputs_features.get_shape())
        return outputs