Esempio n. 1
0
 def test_convert_collection_to_dict_clear_collection(self):
   t1 = constant_op.constant(1.0, name='t1')
   t2 = constant_op.constant(2.0, name='t2')
   utils.collect_named_outputs('end_points', 'a1', t1)
   utils.collect_named_outputs('end_points', 'a21', t2)
   utils.collect_named_outputs('end_points', 'a22', t2)
   utils.convert_collection_to_dict('end_points', clear_collection=True)
   self.assertEqual(ops.get_collection('end_points'), [])
Esempio n. 2
0
 def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
     """A plain ResNet without extra layers before or after the ResNet blocks."""
     with variable_scope.variable_scope(scope, values=[inputs]):
         with arg_scope([layers.conv2d], outputs_collections='end_points'):
             net = resnet_utils.stack_blocks_dense(inputs, blocks,
                                                   output_stride)
             end_points = utils.convert_collection_to_dict('end_points')
             return net, end_points
Esempio n. 3
0
 def test_convert_collection_to_dict(self):
   t1 = constant_op.constant(1.0, name='t1')
   t2 = constant_op.constant(2.0, name='t2')
   utils.collect_named_outputs('end_points', 'a1', t1)
   utils.collect_named_outputs('end_points', 'a21', t2)
   utils.collect_named_outputs('end_points', 'a22', t2)
   end_points = utils.convert_collection_to_dict('end_points')
   self.assertEqual(end_points['a1'], t1)
   self.assertEqual(end_points['a21'], t2)
   self.assertEqual(end_points['a22'], t2)
Esempio n. 4
0
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    For use in SSD object detection network, which has this particular
    truncated version of VGG16 detailed in its paper.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      scope: Optional scope for the variables.

    Returns:
      the last op containing the conv5 tensor and end_points dict.
    """
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    2,
                                    layers.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv5')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return net, end_points
Esempio n. 5
0
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
    """Generator for v2 (preactivation) ResNet models.

  This function generates a family of ResNet v2 models. See the resnet_v2_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it. If excluded, `inputs` should be the
      results of an activation-less convolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.


  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    with variable_scope.variable_scope(scope,
                                       'resnet_v2', [inputs],
                                       reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with arg_scope(
            [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with arg_scope([layers.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with arg_scope([layers_lib.conv2d],
                                   activation_fn=None,
                                   normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = layers.max_pool2d(net, [3, 3],
                                            stride=2,
                                            scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = layers.batch_norm(net,
                                        activation_fn=nn_ops.relu,
                                        scope='postnorm')
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2],
                                               name='pool5',
                                               keepdims=True)
                if num_classes is not None:
                    net = layers_lib.conv2d(net,
                                            num_classes, [1, 1],
                                            activation_fn=None,
                                            normalizer_fn=None,
                                            scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = layers.softmax(
                        net, scope='predictions')
                return net, end_points
Esempio n. 6
0
def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          scope='vgg_a'):
    """Oxford Net VGG 11-Layers version A Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope([layers.conv2d, layers_lib.max_pool2d],
                       outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    1,
                                    layers.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net,
                                    1,
                                    layers.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv5')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = layers.conv2d(net,
                                4096, [7, 7],
                                padding='VALID',
                                scope='fc6')
            net = layers_lib.dropout(net,
                                     dropout_keep_prob,
                                     is_training=is_training,
                                     scope='dropout6')
            net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
            net = layers_lib.dropout(net,
                                     dropout_keep_prob,
                                     is_training=is_training,
                                     scope='dropout7')
            net = layers.conv2d(net,
                                num_classes, [1, 1],
                                activation_fn=None,
                                normalizer_fn=None,
                                scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Esempio n. 7
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
    """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=[end_points_collection]):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 192, [5, 5], scope='conv2')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 384, [3, 3], scope='conv4')
            net = layers.conv2d(net, 256, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

            # Use conv2d instead of fully_connected layers.
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                net = layers.conv2d(net,
                                    4096, [5, 5],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Esempio n. 8
0
def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
             scope='overfeat'):
  """Contains the model definition for the OverFeat network.

  The definition for the network was obtained from:
    OverFeat: Integrated Recognition, Localization and Detection using
    Convolutional Networks
    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
    Yann LeCun, 2014
    http://arxiv.org/abs/1312.6229

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 231x231. To use in fully
        convolutional mode, set spatial_squeeze to false.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.

  """
  with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d
    with arg_scope(
        [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
        outputs_collections=end_points_collection):
      net = layers.conv2d(
          inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
      net = layers.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
      net = layers.conv2d(net, 512, [3, 3], scope='conv3')
      net = layers.conv2d(net, 1024, [3, 3], scope='conv4')
      net = layers.conv2d(net, 1024, [3, 3], scope='conv5')
      net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
      with arg_scope(
          [layers.conv2d],
          weights_initializer=trunc_normal(0.005),
          biases_initializer=init_ops.constant_initializer(0.1)):
        # Use conv2d instead of fully_connected layers.
        net = layers.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout6')
        net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
        net = layers_lib.dropout(
            net, dropout_keep_prob, is_training=is_training, scope='dropout7')
        net = layers.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            normalizer_fn=None,
            biases_initializer=init_ops.zeros_initializer(),
            scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
def xfcn(inputs, dropout_rate, scope='xfcn'):
    """Defines the xfcn network
    Args:
    inputs: Tensorflow placeholder that contains the input image
    scope: Scope name for the network
    Returns:
    net: Output Tensor of the network
    end_points: Dictionary with all Tensors of the network
    """
    im_size = tf.shape(inputs)

    with tf.variable_scope(scope, 'xfcn', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs of all intermediate layers.
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            outputs_collections=end_points_collection):
            # Entry flow
            # Block 1
            net = slim.conv2d(inputs,
                              32, [3, 3],
                              stride=2,
                              padding='VALID',
                              scope='xception_65/entry_flow/conv1_1')
            net = slim.batch_norm(
                net, scope='xception_65/entry_flow/conv1_1/BatchNorm')
            net = tf.nn.relu(net)
            net = slim.conv2d(net,
                              64, [3, 3],
                              scope='xception_65/entry_flow/conv1_2')
            net = slim.batch_norm(
                net, scope='xception_65/entry_flow/conv1_2/BatchNorm')
            net = tf.nn.relu(net)
            residual_1 = slim.conv2d(
                net,
                128, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/shortcut'
            )
            residual_1 = slim.batch_norm(
                residual_1,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 2
            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                activation_fn=None,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )

            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')

            net_2 = tf.math.add(residual_1, net)

            net_2_drop = slim.dropout(net_2, keep_prob=dropout_rate)

            residual_2 = slim.conv2d(
                net_2,
                256, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/shortcut'
            )
            residual_2 = slim.batch_norm(
                residual_2,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 3
            net = tf.nn.relu(net_2)
            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_3 = tf.math.add(net, residual_2)

            net_3_drop = slim.dropout(net_3, keep_prob=dropout_rate)

            residual_3 = slim.conv2d(
                net_3,
                728, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/shortcut'
            )
            residual_3 = slim.batch_norm(
                residual_3,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 4
            net = tf.nn.relu(net_3)
            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_4 = tf.math.add(net, residual_3)

            net_4_drop = slim.dropout(net_4, keep_prob=dropout_rate)

            # middle flow
            # block 5
            net = middle_flow_block(net_4, unit_num=1)
            # block 6 - 20
            net = middle_flow_block(net, unit_num=2)
            net_5_drop = slim.dropout(net, keep_prob=dropout_rate)

            # Exit flow
            residual_20 = slim.conv2d(
                net,
                1024, [1, 1],
                stride=2,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/shortcut')
            residual_20 = slim.batch_norm(
                residual_20,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/shortcut/BatchNorm'
            )
            # block 21
            net = tf.nn.relu(net)
            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                1024, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                1024, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )
            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_6 = tf.math.add(net, residual_20)

            net_6_drop = slim.dropout(net_6, keep_prob=dropout_rate)

            # Get side outputs of the network
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=tf.zeros_initializer()):
                side_2 = slim.conv2d(net_2_drop,
                                     16, [3, 3],
                                     rate=1,
                                     scope='conv2_2_16')

                side_3 = slim.conv2d(net_3_drop,
                                     16, [3, 3],
                                     rate=2,
                                     scope='conv3_3_16')

                side_4 = slim.conv2d(net_4_drop,
                                     16, [3, 3],
                                     rate=4,
                                     scope='conv4_3_16')

                side_5 = slim.conv2d(net_5_drop,
                                     16, [3, 3],
                                     rate=4,
                                     scope='conv5_3_16')

                side_6 = slim.conv2d(net_6_drop,
                                     16, [3, 3],
                                     rate=8,
                                     scope='conv6_3_16')

                # Supervise side outputs
                side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2')
                side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3')
                side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4')
                side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5')
                side_6_s = slim.conv2d(side_6, 1, [1, 1], scope='score-dsn_6')
                with slim.arg_scope([slim.convolution2d_transpose],
                                    outputs_collections=end_points_collection):
                    # Side outputs
                    side_2_s = slim.convolution2d_transpose(
                        side_2_s, 1, 8, 4, scope='score-dsn_2-up')
                    side_2_s = crop_features(side_2_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_2-cr',
                                                side_2_s)

                    side_3_s = slim.convolution2d_transpose(
                        side_3_s, 1, 16, 8, scope='score-dsn_3-up')
                    side_3_s = crop_features(side_3_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_3-cr',
                                                side_3_s)

                    side_4_s = slim.convolution2d_transpose(
                        side_4_s, 1, 32, 16, scope='score-dsn_4-up')
                    side_4_s = crop_features(side_4_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_4-cr',
                                                side_4_s)

                    side_5_s = slim.convolution2d_transpose(
                        side_5_s, 1, 32, 16, scope='score-dsn_5-up')
                    side_5_s = crop_features(side_5_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_5-cr',
                                                side_5_s)

                    side_6_s = slim.convolution2d_transpose(
                        side_6_s, 1, 64, 32, scope='score-dsn_6-up')
                    side_6_s = crop_features(side_6_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_6-cr',
                                                side_6_s)

                    # Main output
                    side_2_f = slim.convolution2d_transpose(
                        side_2, 16, 8, 4, scope='score-multi2-up')
                    side_2_f = crop_features(side_2_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi2-cr',
                                                side_2_f)

                    side_3_f = slim.convolution2d_transpose(
                        side_3, 16, 16, 8, scope='score-multi3-up')
                    side_3_f = crop_features(side_3_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi3-cr',
                                                side_3_f)

                    side_4_f = slim.convolution2d_transpose(
                        side_4, 16, 32, 16, scope='score-multi4-up')
                    side_4_f = crop_features(side_4_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi4-cr',
                                                side_4_f)

                    side_5_f = slim.convolution2d_transpose(
                        side_5, 16, 32, 16, scope='score-multi5-up')
                    side_5_f = crop_features(side_5_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi5-cr',
                                                side_5_f)

                    side_6_f = slim.convolution2d_transpose(
                        side_6, 16, 64, 32, scope='score-multi6-up')
                    side_6_f = crop_features(side_6_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi6-cr',
                                                side_6_f)

                concat_side = tf.concat(
                    [side_2_f, side_3_f, side_4_f, side_5_f, side_6_f], axis=3)
                net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse')

        end_points = utils.convert_collection_to_dict(end_points_collection)

        return net, end_points