def _mobilenet_v2(net, depth_multiplier, output_stride, reuse=None, scope=None, final_endpoint=None): """Auxiliary function to add support for 'reuse' to mobilenet_v2. Args: net: Input tensor of shape [batch_size, height, width, channels]. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. output_stride: An integer that specifies the requested ratio of input to output spatial resolution. If not None, then we invoke atrous convolution if necessary to prevent the network from reducing the spatial resolution of the activation maps. Allowed values are 8 (accurate fully convolutional mode), 16 (fast fully convolutional mode), 32 (classification mode). reuse: Reuse model variables. scope: Optional variable scope. final_endpoint: The endpoint to construct the network up to. Returns: Features extracted by MobileNetv2. """ with tf.variable_scope(scope, 'MobilenetV2', [net], reuse=reuse) as scope: return mobilenet_lib.mobilenet_base(net, conv_defs=mobilenet_v2.V2_DEF, multiplier=depth_multiplier, final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT, output_stride=output_stride, scope=scope)
def testWithOutputStride8(self): out, _ = mobilenet.mobilenet_base( tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, output_stride=8, scope='MobilenetV2') self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def testWithOutputStride16(self): tf.reset_default_graph() out, _ = mobilenet.mobilenet_base( tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, output_stride=16) self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
def _mobilenet_v2(net, depth_multiplier, output_stride, reuse=None, scope=None, final_endpoint=None): """Auxiliary function to add support for 'reuse' to mobilenet_v2. Args: net: Input tensor of shape [batch_size, height, width, channels]. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. output_stride: An integer that specifies the requested ratio of input to output spatial resolution. If not None, then we invoke atrous convolution if necessary to prevent the network from reducing the spatial resolution of the activation maps. Allowed values are 8 (accurate fully convolutional mode), 16 (fast fully convolutional mode), 32 (classification mode). reuse: Reuse model variables. scope: Optional variable scope. final_endpoint: The endpoint to construct the network up to. Returns: Features extracted by MobileNetv2. """ with tf.variable_scope( scope, 'MobilenetV2', [net], reuse=reuse) as scope: return mobilenet_lib.mobilenet_base( net, conv_defs=mobilenet_v2.V2_DEF, multiplier=depth_multiplier, final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT, output_stride=output_stride, scope=scope)
def testWithOutputStride16AndExplicitPadding(self): tf.compat.v1.reset_default_graph() out, _ = mobilenet.mobilenet_base(tf.compat.v1.placeholder( tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, output_stride=16, use_explicit_padding=True) self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
def testWithOutputStride8AndExplicitPadding(self): tf.reset_default_graph() out, _ = mobilenet.mobilenet_base( tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, output_stride=8, use_explicit_padding=True, scope='MobilenetV2') self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def style_prediction_mobilenet(style_input_, activation_names, activation_depths, mobilenet_end_point='layer_19', mobilenet_trainable=True, style_params_trainable=False, style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings using MobileNetV2. Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. mobilenet_end_point: string. Specifies the endpoint to construct the MobileNetV2 network up to. This network is part of the style prediction network. mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked as trainable? style_params_trainable: bool. Should the mapping from bottleneck to beta and gamma parameters be marked as trainable? style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope( mobilenet_v2.training_scope(is_training=mobilenet_trainable)): _, end_points = mobilenet.mobilenet_base( style_input_, conv_defs=mobilenet_v2.V2_DEF, final_endpoint=mobilenet_end_point, scope='MobilenetV2') feat_convlayer = end_points[mobilenet_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean(feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with tf.variable_scope('mobilenet_conv'): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=mobilenet_trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=style_params_trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format( activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format( activation_names[i])] = gamma return style_params, bottleneck_feat
def style_prediction_mobilenet(style_input_, activation_names, activation_depths, mobilenet_end_point='layer_19', mobilenet_trainable=True, style_params_trainable=False, style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings using MobileNetV2. Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. mobilenet_end_point: string. Specifies the endpoint to construct the MobileNetV2 network up to. This network is part of the style prediction network. mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked as trainable? style_params_trainable: bool. Should the mapping from bottleneck to beta and gamma parameters be marked as trainable? style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope(mobilenet_v2.training_scope( is_training=mobilenet_trainable)): _, end_points = mobilenet.mobilenet_base( style_input_, conv_defs=mobilenet_v2.V2_DEF, final_endpoint=mobilenet_end_point, scope='MobilenetV2' ) feat_convlayer = end_points[mobilenet_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean( feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with tf.variable_scope('mobilenet_conv'): with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=mobilenet_trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=style_params_trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format(activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format(activation_names[i])] = gamma return style_params, bottleneck_feat
def mobilenet_base(*args, **kwargs): return mobilenet.mobilenet_base(*args, **kwargs)