def test_convert_collection_to_dict_clear_collection(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a21', t2) utils.collect_named_outputs('end_points', 'a22', t2) utils.convert_collection_to_dict('end_points', clear_collection=True) self.assertEqual(ops.get_collection('end_points'), [])
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
def test_convert_collection_to_dict(self): t1 = constant_op.constant(1.0, name='t1') t2 = constant_op.constant(2.0, name='t2') utils.collect_named_outputs('end_points', 'a1', t1) utils.collect_named_outputs('end_points', 'a21', t2) utils.collect_named_outputs('end_points', 'a22', t2) end_points = utils.convert_collection_to_dict('end_points') self.assertEqual(end_points['a1'], t1) self.assertEqual(end_points['a21'], t2) self.assertEqual(end_points['a22'], t2)
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. For use in SSD object detection network, which has this particular truncated version of VGG16 detailed in its paper. Args: inputs: a tensor of size [batch_size, height, width, channels]. scope: Optional scope for the variables. Returns: the last op containing the conv5 tensor and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) return net, end_points
def resnet_v2(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope=None): """Generator for v2 (preactivation) ResNet models. This function generates a family of ResNet v2 models. See the resnet_v2_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. If excluded, `inputs` should be the results of an activation-less convolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with arg_scope([layers_lib.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = layers.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = layers.softmax( net, scope='predictions') return net, end_points
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope([layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='alexnet_v2'): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=[end_points_collection]): net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def overfeat(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='overfeat'): """Contains the model definition for the OverFeat network. The definition for the network was obtained from: OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and Yann LeCun, 2014 http://arxiv.org/abs/1312.6229 Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 231x231. To use in fully convolutional mode, set spatial_squeeze to false. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers.conv2d( inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers.conv2d(net, 512, [3, 3], scope='conv3') net = layers.conv2d(net, 1024, [3, 3], scope='conv4') net = layers.conv2d(net, 1024, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def xfcn(inputs, dropout_rate, scope='xfcn'): """Defines the xfcn network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ im_size = tf.shape(inputs) with tf.variable_scope(scope, 'xfcn', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs of all intermediate layers. with slim.arg_scope([slim.conv2d, slim.separable_conv2d], outputs_collections=end_points_collection): # Entry flow # Block 1 net = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='xception_65/entry_flow/conv1_1') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_1/BatchNorm') net = tf.nn.relu(net) net = slim.conv2d(net, 64, [3, 3], scope='xception_65/entry_flow/conv1_2') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_2/BatchNorm') net = tf.nn.relu(net) residual_1 = slim.conv2d( net, 128, [1, 1], stride=2, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut' ) residual_1 = slim.batch_norm( residual_1, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 2 net = slim.separable_conv2d( net, 128, [3, 3], activation_fn=None, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_2 = tf.math.add(residual_1, net) net_2_drop = slim.dropout(net_2, keep_prob=dropout_rate) residual_2 = slim.conv2d( net_2, 256, [1, 1], stride=2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut' ) residual_2 = slim.batch_norm( residual_2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut/BatchNorm' ) # block 3 net = tf.nn.relu(net_2) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_3 = tf.math.add(net, residual_2) net_3_drop = slim.dropout(net_3, keep_prob=dropout_rate) residual_3 = slim.conv2d( net_3, 728, [1, 1], stride=2, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut' ) residual_3 = slim.batch_norm( residual_3, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut/BatchNorm' ) # block 4 net = tf.nn.relu(net_3) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_4 = tf.math.add(net, residual_3) net_4_drop = slim.dropout(net_4, keep_prob=dropout_rate) # middle flow # block 5 net = middle_flow_block(net_4, unit_num=1) # block 6 - 20 net = middle_flow_block(net, unit_num=2) net_5_drop = slim.dropout(net, keep_prob=dropout_rate) # Exit flow residual_20 = slim.conv2d( net, 1024, [1, 1], stride=2, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut') residual_20 = slim.batch_norm( residual_20, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 21 net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_6 = tf.math.add(net, residual_20) net_6_drop = slim.dropout(net_6, keep_prob=dropout_rate) # Get side outputs of the network with slim.arg_scope([slim.conv2d], biases_initializer=tf.zeros_initializer()): side_2 = slim.conv2d(net_2_drop, 16, [3, 3], rate=1, scope='conv2_2_16') side_3 = slim.conv2d(net_3_drop, 16, [3, 3], rate=2, scope='conv3_3_16') side_4 = slim.conv2d(net_4_drop, 16, [3, 3], rate=4, scope='conv4_3_16') side_5 = slim.conv2d(net_5_drop, 16, [3, 3], rate=4, scope='conv5_3_16') side_6 = slim.conv2d(net_6_drop, 16, [3, 3], rate=8, scope='conv6_3_16') # Supervise side outputs side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2') side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3') side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4') side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5') side_6_s = slim.conv2d(side_6, 1, [1, 1], scope='score-dsn_6') with slim.arg_scope([slim.convolution2d_transpose], outputs_collections=end_points_collection): # Side outputs side_2_s = slim.convolution2d_transpose( side_2_s, 1, 8, 4, scope='score-dsn_2-up') side_2_s = crop_features(side_2_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_2-cr', side_2_s) side_3_s = slim.convolution2d_transpose( side_3_s, 1, 16, 8, scope='score-dsn_3-up') side_3_s = crop_features(side_3_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_3-cr', side_3_s) side_4_s = slim.convolution2d_transpose( side_4_s, 1, 32, 16, scope='score-dsn_4-up') side_4_s = crop_features(side_4_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_4-cr', side_4_s) side_5_s = slim.convolution2d_transpose( side_5_s, 1, 32, 16, scope='score-dsn_5-up') side_5_s = crop_features(side_5_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_5-cr', side_5_s) side_6_s = slim.convolution2d_transpose( side_6_s, 1, 64, 32, scope='score-dsn_6-up') side_6_s = crop_features(side_6_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_6-cr', side_6_s) # Main output side_2_f = slim.convolution2d_transpose( side_2, 16, 8, 4, scope='score-multi2-up') side_2_f = crop_features(side_2_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi2-cr', side_2_f) side_3_f = slim.convolution2d_transpose( side_3, 16, 16, 8, scope='score-multi3-up') side_3_f = crop_features(side_3_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi3-cr', side_3_f) side_4_f = slim.convolution2d_transpose( side_4, 16, 32, 16, scope='score-multi4-up') side_4_f = crop_features(side_4_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi4-cr', side_4_f) side_5_f = slim.convolution2d_transpose( side_5, 16, 32, 16, scope='score-multi5-up') side_5_f = crop_features(side_5_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi5-cr', side_5_f) side_6_f = slim.convolution2d_transpose( side_6, 16, 64, 32, scope='score-multi6-up') side_6_f = crop_features(side_6_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi6-cr', side_6_f) concat_side = tf.concat( [side_2_f, side_3_f, side_4_f, side_5_f, side_6_f], axis=3) net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse') end_points = utils.convert_collection_to_dict(end_points_collection) return net, end_points