Exemple #1
0
def resnet_faster_rcnn_head(input, params):
    """
  Derived from https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow

  Args:
    input:
    params:

  Returns:

  """

    with tf.variable_scope('resnet_head', reuse=tf.AUTO_REUSE):
        block4 = [
            resnet_v1_block('block4', base_depth=256, num_units=3, stride=1)
        ]

        with slim.arg_scope(norm_arg_scope(params)):
            C5, _ = resnet_v1.resnet_v1(input,
                                        block4,
                                        global_pool=False,
                                        include_root_block=False,
                                        scope='resnet_v1_50',
                                        reuse=tf.AUTO_REUSE)

            return C5
Exemple #2
0
def resnet_v1_slim_fc (inputs, scope):
    """ slim version of resnet, can be replaced with
        
        resnet_v1.resnet_v1_50(inputs, num_classes=None, global_pool=True,
                               output_stride=None, reuse=False, scope=scope)

        or any of the other resnsets
    """
    blocks = [
      resnet_utils.Block('block1', resnet_v1.bottleneck,
                         [(64, 32, 1)] * 2 + [(64, 32, 2)]),
      resnet_utils.Block('block2', resnet_v1.bottleneck,
                         [(128, 64, 1)] * 3 + [(128, 64, 2)]),
      resnet_utils.Block('block3', resnet_v1.bottleneck,
                         [(256, 64, 1)] * 3 + [(256, 64, 2)]),
      resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)] * 1)
    ]
    return resnet_v1.resnet_v1(
      inputs, blocks,
      # all parameters below can be passed to resnet_v1.resnet_v1_??
      num_classes = None,       # don't produce final prediction
      global_pool = True,       # produce 1x1 output, equivalent to input of a FC layer
      output_stride = None,
      include_root_block=True,
      reuse=False,              # do not re-use network
                                # my understanding
                                # task1      image -> resnet1 -> output
                                # task2      image -> resnet2 -> output
                                # if both resnets are defined under the same scope,
                                # with reuse set to True, then some of the parameters
                                # will be shared between two tasks
      scope=scope)
Exemple #3
0
def resnet_v1_50(inputs,
                 config,
                 is_training=True,
                 scope='resnet_v1_50'):
    """Modified ResNet-50 model."""
    # Note : The base_depth was reduced to be able to fit into GPU memory
    blocks = [
        resnet_v1.resnet_v1_block('block1',
                                  base_depth=config.block1_depth,
                                  num_units=config.block1_units,
                                  stride=config.block1_stride),
        resnet_v1.resnet_v1_block('block2',
                                  base_depth=config.block2_depth,
                                  num_units=config.block2_units,
                                  stride=config.block2_stride),
        resnet_v1.resnet_v1_block('block3',
                                  base_depth=config.block3_depth,
                                  num_units=config.block3_units,
                                  stride=config.block3_stride),
        resnet_v1.resnet_v1_block('block4',
                                  base_depth=config.block4_depth,
                                  num_units=config.block4_units,
                                  stride=config.block4_stride),
    ]
    return resnet_v1.resnet_v1(inputs,
                               blocks,
                               is_training=is_training,
                               global_pool=False,
                               include_root_block=True,
                               scope=scope)
Exemple #4
0
def resnet_v1_slim_fc(inputs, scope):
    """ slim version of resnet, can be replaced with
        
        resnet_v1.resnet_v1_50(inputs, num_classes=None, global_pool=True,
                               output_stride=None, reuse=False, scope=scope)

        or any of the other resnsets
    """
    blocks = [
        resnet_utils.Block('block1', resnet_v1.bottleneck,
                           [(64, 32, 1)] * 2 + [(64, 32, 2)]),
        resnet_utils.Block('block2', resnet_v1.bottleneck,
                           [(128, 64, 1)] * 3 + [(128, 64, 2)]),
        resnet_utils.Block('block3', resnet_v1.bottleneck,
                           [(256, 64, 1)] * 3 + [(256, 64, 2)]),
        resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)] * 1)
    ]
    return resnet_v1.resnet_v1(
        inputs,
        blocks,
        # all parameters below can be passed to resnet_v1.resnet_v1_??
        num_classes=None,  # don't produce final prediction
        global_pool=
        True,  # produce 1x1 output, equivalent to input of a FC layer
        output_stride=None,
        include_root_block=True,
        reuse=False,  # do not re-use network
        # my understanding
        # task1      image -> resnet1 -> output
        # task2      image -> resnet2 -> output
        # if both resnets are defined under the same scope,
        # with reuse set to True, then some of the parameters
        # will be shared between two tasks
        scope=scope)
Exemple #5
0
def restnet_head(input, is_training, scope_name, stage):
    if stage == 'stage1':
        block4 = [
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            C5, _ = resnet_v1.resnet_v1(input,
                                        block4,
                                        global_pool=False,
                                        include_root_block=False,
                                        scope=scope_name)
            # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
            flatten = tf.reduce_mean(C5,
                                     axis=[1, 2],
                                     keep_dims=False,
                                     name='global_average_pooling')
            # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape')

        # global average pooling C5 to obtain fc layers
    else:
        fc_flatten = slim.flatten(input)
        net = slim.fully_connected(fc_flatten,
                                   1024,
                                   scope='fc_1_{}'.format(stage))
        net = slim.dropout(net,
                           keep_prob=0.5,
                           is_training=is_training,
                           scope='dropout_{}'.format(stage))
        flatten = slim.fully_connected(net,
                                       1024,
                                       scope='fc_2_{}'.format(stage))
    return flatten
Exemple #6
0
def image_to_head(input_tensor,is_training,reuse=None):
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net_conv=build_base(input_tensor)
    if fix_blocks>0:
        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            net_conv,_=resnet_v1.resnet_v1(net_conv,blocks[0:fix_blocks],
                                           global_pool=False,
                                           include_root_block=False,
                                           reuse=reuse,scope=scope)
    if fix_blocks<4:
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv,_=resnet_v1.resnet_v1(net_conv,
                                           blocks[fix_blocks:],
                                           global_pool=True,
                                           include_root_block=False,
                                           reuse=reuse,
                                           scope=scope)
    return net_conv
Exemple #7
0
def restnet_head(input, is_training, scope_name):
    block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, _ = resnet_v1.resnet_v1(input,
                                    block4,
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
        # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
        # C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling')
        # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape')

    # global average pooling C5 to obtain fc layers
    return C5
Exemple #8
0
def resnet_tiny2(inputs, num_classes=2, scope='resnet_tiny'):
    # seperate cls and fcn, add stop_gradient after fcn output
    blocks = [
        resnet_utils.Block('block1', resnet_v1.bottleneck,
                           [(64, 32, 1)] + [(64, 32, 2)]),
        resnet_utils.Block('block2', resnet_v1.bottleneck,
                           [(128, 64, 1)] + [(128, 64, 2)]),
        resnet_utils.Block('block3', resnet_v1.bottleneck,
                           [(256, 64, 1)] + [(128, 64, 2)]),
        resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)])
    ]
    net, _ = resnet_v1.resnet_v1(
        inputs,
        blocks,
        # all parameters below can be passed to resnet_v1.resnet_v1_??
        num_classes=None,  # don't produce final prediction
        global_pool=
        False,  # produce 1x1 output, equivalent to input of a FC layer
        output_stride=16,
        include_root_block=True,
        reuse=False,  # do not re-use network
        scope=scope)
    res_out = net  # keep this for later CLS usage
    net = slim.batch_norm(slim.conv2d_transpose(net, 64, 5, 2))
    net = slim.batch_norm(slim.conv2d_transpose(net, 32, 5, 2))
    net = slim.batch_norm(slim.conv2d_transpose(net, 16, 5, 2))
    net = slim.batch_norm(slim.conv2d_transpose(net, 8, 5, 2))
    net = slim.conv2d(net, num_classes, 5, 1, activation_fn=None)
    logits_fcn = tf.identity(net, 'logits_fcn')

    net = res_out
    net = tf.stop_gradient(net)
    # add a few layers to make the image size even smaller
    net = slim.conv2d(net, 128, 3, 1)
    net = slim.max_pool2d(net, 2, 2)
    net = slim.conv2d(net, 128, 3, 1)
    net = slim.max_pool2d(net, 2, 2)
    net = tf.reduce_mean(net, [1, 2], keep_dims=True)
    # add an extra layer
    net = slim.conv2d(net, 64, [1, 1])
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      normalizer_fn=None)
    logits_cls = tf.identity(net, 'logits_cls')

    return logits_fcn, logits_cls, 16
Exemple #9
0
    def restnet_head(self, inputs, scope_name, is_training):
        block4 = [
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]

        with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)):
            net, _ = resnet_v1.resnet_v1(inputs,
                                         block4,
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=scope_name)
            net_flatten = tf.reduce_mean(net,
                                         axis=[1, 2],
                                         keep_dims=False,
                                         name='global_average_pooling')
        # global average pooling C5 to obtain fc layers
        return net_flatten
Exemple #10
0
 def _resnet_small(self,
                   inputs,
                   num_classes=None,
                   global_pool=True,
                   output_stride=None,
                   include_root_block=True,
                   reuse=None,
                   scope='resnet_v1_small'):
   """A shallow and thin ResNet v1 for faster tests."""
   bottleneck = resnet_v1.bottleneck
   blocks = [
       resnet_utils.Block(
           'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
       resnet_utils.Block(
           'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
       resnet_utils.Block(
           'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
       resnet_utils.Block(
           'block4', bottleneck, [(32, 8, 1)] * 2)]
   return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool,
                              output_stride, include_root_block, reuse, scope)
 def _resnet_small(self,
                   inputs,
                   num_classes=None,
                   global_pool=True,
                   output_stride=None,
                   include_root_block=True,
                   reuse=None,
                   scope='resnet_v1_small'):
   """A shallow and thin ResNet v1 for faster tests."""
   bottleneck = resnet_v1.bottleneck
   blocks = [
       resnet_utils.Block(
           'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
       resnet_utils.Block(
           'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
       resnet_utils.Block(
           'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
       resnet_utils.Block(
           'block4', bottleneck, [(32, 8, 1)] * 2)]
   return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool,
                              output_stride, include_root_block, reuse, scope)
Exemple #12
0
def resnet_tiny (X, scope=None, reuse=True):
    blocks = [ 
        resnet_utils.Block('block1', resnet_v1.bottleneck,
                           [(64, 32, 1)] + [(64, 32, 2)]),
        resnet_utils.Block('block2', resnet_v1.bottleneck,
                           [(128, 64, 1)] + [(128, 64, 2)]),
        resnet_utils.Block('block3', resnet_v1.bottleneck,
                           [(256, 64, 1)]  + [(128, 64, 2)]),
        resnet_utils.Block('block4', resnet_v1.bottleneck,
                           [(256, 64, 1)]  + [(128, 64, 2)]),
        resnet_utils.Block('block5', resnet_v1.bottleneck,
                           [(256, 64, 1)]  + [(128, 64, 2)]),
        resnet_utils.Block('block6', resnet_v1.bottleneck, [(128, 64, 1)])
    	]   
    net,_ = resnet_v1.resnet_v1(
        X, blocks,
        # all parameters below can be passed to resnet_v1.resnet_v1_??
        num_classes = 2,       # don't produce final prediction
        global_pool = True,       # produce 1x1 output, equivalent to input of a FC layer
        reuse=reuse,              # do not re-use network
        scope=scope)
    return net
def resnet_v2_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='resnet_v1_50'):
    """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
    blocks = [
        rv2.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        rv2.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        rv2.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
    ]
    return rv2.resnet_v1(inputs,
                         blocks,
                         None,
                         is_training=False,
                         global_pool=False,
                         output_stride=16,
                         include_root_block=True,
                         reuse=False,
                         scope=scope)
  def build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError
    with tf.variable_scope('noise'):

      conv=slim.conv2d(self.noise, num_outputs=3, kernel_size=[5,5], stride=1 , padding='SAME', activation_fn=None, trainable=is_training, scope='constrained_conv')



    self._layers['noise']=conv
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):

    #assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        C_1 = self.build_base(conv)

        C_2, _ = resnet_v1.resnet_v1(C_1,
                                     blocks[0:1],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    #net=self.cbam_module(inputs=net,name="cbam_layer_1")
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C_3, _ = resnet_v1.resnet_v1(C_2,
                                     blocks[1:2],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    #net = self.cbam_module(inputs=net, name="cbam_layer_2")
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C_4, end_point = resnet_v1.resnet_v1(C_3,
                                     blocks[2:3],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    # mask_C_4 = self.cbam_module(inputs=C_4, name="C_4")
    self.end_point=end_point
    self._act_summaries.append(C_4)
    self._layers['head'] = C_4
    self._layers['C1'] = C_1
    self._layers['C2'] = C_2
    self._layers['C3'] = C_3






    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn1 = slim.conv2d(C_4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._layers['rpn1'] = rpn1
      rpn = self.cbam_module(inputs=rpn1, name="rpn_conv1")
      self._layers['rpn'] = rpn



      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      self._layers['rpn_cls_score'] = rpn_cls_score



      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')


      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")


      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")

      rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')

      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.TEST.MODE == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError
      # rcnn
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(C_4, rois, "pool5")
        #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
      else:
        raise NotImplementedError


    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, end_point1 = resnet_v1.resnet_v1(pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope=self._resnet_scope)



    self._layers['fc7']=fc7
    # self._layers['pool5'] =pool5
    self.end_point1=end_point1


    with tf.variable_scope(self._resnet_scope, self._resnet_scope):

      cls_fc7 = tf.reduce_mean(fc7, axis=[1, 2])

      cls_score = slim.fully_connected(cls_fc7, self._num_classes, weights_initializer=initializer,
                                       trainable=is_training, activation_fn=None, scope='cls_score')


      cls_prob = self._softmax_layer(cls_score, "cls_prob")

      box_fc7=tf.reduce_mean(fc7, axis=[1, 2])
      bbox_pred = slim.fully_connected(box_fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                                     trainable=is_training,
                                     activation_fn=None, scope='bbox_pred')

    if cfg.USE_MASK is True:
      with tf.variable_scope('feature_fuse', 'feature_fuse'):
            mask_fuse = C_3 * 0.5 + rpn * 0.5

            feature_fuse = slim.conv2d(mask_fuse, 1024, [1, 1], padding='VALID', trainable=is_training,
                                       weights_initializer=initializer, scope='mask_fuse')
      mask_box, indices = self._proposal_mask_layer(cls_prob, bbox_pred, rois, 'mask_proposal')
      mask_pool5 = self._crop_pool_layer(feature_fuse, mask_box, "mask_pool5")

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
          mask_fc7, _ = resnet_v1.resnet_v1(mask_pool5,
                                            blocks[-1:],
                                            global_pool=False,
                                            include_root_block=False,
                                            scope='mask_conv')

      self._act_summaries.append(mask_fc7)

      with tf.variable_scope('mask_predict', 'mask_predict'):

          upsampled_features=slim.conv2d_transpose(mask_fc7,256,2,2,activation_fn=None)
          self._act_summaries.append(upsampled_features)
          upsampled_features = slim.conv2d(upsampled_features, 64, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None,padding='VALID')
          self._act_summaries.append(upsampled_features)
          upsampled_features = slim.batch_norm(upsampled_features, activation_fn=None)
          self._act_summaries.append(upsampled_features)
          upsampled_features = tf.nn.relu(upsampled_features)
          self._act_summaries.append(upsampled_features)

          mask_predictions = slim.conv2d(upsampled_features, num_outputs=2,activation_fn=None,
                                         kernel_size=[1, 1],padding='VALID')
          self._act_summaries.append(mask_predictions)

      self._predictions["mask_out"] = tf.expand_dims(mask_predictions[:, :, :, 1], 3)
      mask_softmax=tf.nn.softmax(mask_predictions)


      self._predictions["mask_softmaxbg"] = tf.expand_dims(mask_softmax[:, :, :, 0], 3)
      self._predictions["mask_softmaxfg"] = tf.expand_dims(mask_softmax[:, :, :, 1], 3)

      self._predictions["rpn_cls_score"] = rpn_cls_score
      self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
      self._predictions["rpn_cls_prob"] = rpn_cls_prob
      self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
      self._predictions["cls_score"] = cls_score
      self._predictions["cls_prob"] = cls_prob
      self._predictions["bbox_pred"] = bbox_pred
      self._predictions["rois"] = rois
      self._predictions["mask_pred"] = mask_predictions

      self._score_summaries.update(self._predictions)

      return rois, cls_prob, bbox_pred, mask_predictions
    else:
      self._predictions["rpn_cls_score"] = rpn_cls_score
      self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
      self._predictions["rpn_cls_prob"] = rpn_cls_prob
      self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
      self._predictions["cls_score"] = cls_score
      self._predictions["cls_prob"] = cls_prob
      self._predictions["bbox_pred"] = bbox_pred
      self._predictions["rois"] = rois

      self._score_summaries.update(self._predictions)

      return rois, cls_prob, bbox_pred
Exemple #15
0
def resnet_base(img_batch, scope_name, is_training):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23  #101第3个block是23
    elif scope_name == 'resnet_v1_152':
        middle_num_units = 36
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 、resnet_v1_101 、resnet152. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=2)
    ]
    with slim.arg_scope(
            resnet_arg_scope(is_training=is_training)):  #resnet_arg_scope配置参数
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2,
                scope='conv1')  #RESNET第一个卷积层, 7*7*64, stride=2

            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]
                               ])  #padding 0 ?? 类似与后面的samepadding?
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')  #3*3最大池化
    #not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] #不冻结的Blocks层
    #net = tf.Print(net, [tf.shape(net)], summarize=10, message='net')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C2, end_points_C2 = resnet_v1.resnet_v1(
            net,
            blocks[0:1],  #传入的是一个resnet_utils.Block类  一整个Resnet block
            global_pool=False,
            include_root_block=False,
            scope=scope_name
        )  #返回当前构建resnet block层:C2 end_points_C2: collection中已有的特征图 越到后面越多

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C3, end_points_C3 = resnet_v1.resnet_v1(
            C2,
            blocks[1:2],
            global_pool=False,
            include_root_block=False,
            scope=scope_name)  #构建第二个block模块

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                num_classes=cfgs.num_classes,
                                                global_pool=True,
                                                include_root_block=False,
                                                scope=scope_name)
        C5 = tf.reshape(C5, [-1, cfgs.num_classes])
    return C5
Exemple #16
0
  def build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      #initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer = tf.contrib.layers.xavier_initializer()
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError

    assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
    if cfg.RESNET.FIXED_BLOCKS == 3:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    elif cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net, _ = resnet_v1.resnet_v1(net,
                                     blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    else:  # cfg.RESNET.FIXED_BLOCKS == 0
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    self._act_summaries.append(net_conv4)
    self._layers['head'] = net_conv4
    c=np.zeros((3,5,5))
    c[0]=[[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]]
    c[0]=c[0]/12

    c[1][1][1]=-1
    c[1][1][2]=2
    c[1][1][3]=-1
    c[1][2][1]=2
    c[1][2][2]=-4
    c[1][2][3]=2
    c[1][3][1]=-1
    c[1][3][2]=2
    c[1][3][3]=-1
    c[1]=c[1]/4

    c[2][2][1]=1
    c[2][2][2]=-2
    c[2][2][3]=1
    c[2]=c[2]/2

    Wcnn=np.zeros((5,5,3,3))
    for i in range(3):
      #k=i%10+1
      #Wcnn[i]=[c[3*k-3],c[3*k-2],c[3*k-1]]
      Wcnn[:,:,0,i]=c[i]
      Wcnn[:,:,1,i]=c[i]
      Wcnn[:,:,2,i]=c[i]
    if True:
      with tf.variable_scope('noise'):
        #kernel = tf.get_variable('weights',
                              #shape=[5, 5, 3, 3],
                              #initializer=tf.constant_initializer(c))
        conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME',name='srm')
      self._layers['noise']=conv
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        #srm_conv = tf.nn.tanh(conv, name='tanh')
        noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1')
        noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]])
        noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1')
        #net_sum=tf.concat(3,[net_conv4,noise_net])
        noise_conv4, _ = resnet_v1.resnet_v1(noise_net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope='noise')
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
      rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.TEST.MODE == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError
      # rcnn
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
        self._layers['pool5']=pool5
        #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
      else:
        raise NotImplementedError
    if True:
      noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5")
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope='noise')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, _ = resnet_v1.resnet_v1(pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope=self._resnet_scope)
    self._layers['fc7']=fc7
    with tf.variable_scope('noise_pred'):

      bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*8,compute_size=16,sequential=False)
      fc7=tf.Print(fc7,[tf.shape(fc7)],message='Value of %s' % 'fc', summarize=4, first_n=1)
      bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*8])
      bilinear_pool=tf.Print(bilinear_pool,[tf.shape(bilinear_pool)],message='Value of %s' % 'Blinear', summarize=4, first_n=1)
      bilinear_pool=tf.multiply(tf.sign(bilinear_pool),tf.sqrt(tf.abs(bilinear_pool)+1e-12))
      bilinear_pool=tf.nn.l2_normalize(bilinear_pool,dim=1)
      noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=tf.contrib.layers.xavier_initializer(),
                                       trainable=is_training, activation_fn=None, scope='cls_score')
      cls_prob = self._softmax_layer(noise_cls_score, "cls_prob")
      fc7 = tf.reduce_mean(fc7, axis=[1, 2])




      bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                                     trainable=is_training,
                                     activation_fn=None, scope='bbox_pred')

    self._predictions["rpn_cls_score"] = rpn_cls_score
    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
    self._predictions["rpn_cls_prob"] = rpn_cls_prob
    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
    self._predictions["cls_score"] = noise_cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = rois

    self._score_summaries.update(self._predictions)

    return rois, cls_prob,bbox_pred
Exemple #17
0
  def build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    blocks = [
      resnet_utils.Block('block1', bottleneck,
                         [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block('block2', bottleneck,
                         [(512, 128, 1)] * 3 + [(512, 128, 2)]),
      resnet_utils.Block('block3', bottleneck,
                         [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
      resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    if cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net, _ = resnet_v1.resnet_v1(self._image,
                                              blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                              global_pool=False,
                                              include_root_block=True,
                                              scope='resnet_v1_101')
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv5, _ = resnet_v1.resnet_v1(net,
                                              blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                              global_pool=False,
                                              include_root_block=False,
                                              scope='resnet_v1_101')
    else:
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv5, _ = resnet_v1.resnet_v1(self._image,
                                              blocks[0:-1],
                                              global_pool=False,
                                              include_root_block=True,
                                              scope='resnet_v1_101')

    self._act_summaries.append(net_conv5)
    self._layers['conv5_3'] = net_conv5
    with tf.variable_scope('resnet_v1_101', 'resnet_v1_101',
                           regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn = slim.conv2d(net_conv5, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_scales * 6, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_scales * 6, "rpn_cls_prob")
      rpn_bbox_pred = slim.conv2d(rpn, self._num_scales * 12, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.TEST.MODE == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError

      # rcnn
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv5, rois, "pool5")
      else:
        raise NotImplementedError

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, _ = resnet_v1.resnet_v1(pool5,
                                    blocks[-1:],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope='resnet_v1_101')
    with tf.variable_scope('resnet_v1_101', 'resnet_v1_101',
                           regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)):
      # Average pooling done by reduce_mean
      fc7 = tf.reduce_mean(fc7, axis=[1,2])
      cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training,
                              activation_fn=None, scope='cls_score')
      cls_prob = self._softmax_layer(cls_score, "cls_prob")
      bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                              trainable=is_training, 
                              activation_fn=None, scope='bbox_pred')
    self._predictions["rpn_cls_score"] = rpn_cls_score
    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
    self._predictions["rpn_cls_prob"] = rpn_cls_prob
    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
    self._predictions["cls_score"] = cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = rois

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred
Exemple #18
0
def resnet_base(img_batch, scope_name, is_training=False):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3', base_depth=256, num_units=9, stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False] * 0 + (4 - 0) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        #C2=tf.layers.average_pooling2d(inputs=C2, pool_size=3, strides=2,padding="valid")
        #C2=tf.reduce_mean(C2, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        C3 = slim.avg_pool2d(C3, 2)
        #C3 = tf.reduce_mean(C3, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    #return C3
    '''with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)'''
    return C3
Exemple #19
0
    def resnet_base(self, inputs, is_training):
        if self.scope_name == 'resnet_v1_50':
            middle_num_units = 6
        elif self.scope_name == 'resnet_v1_101':
            middle_num_units = 23
        else:
            raise NotImplementedError(
                'We only support resnet_v1_50 or resnet_v1_101. Check your network name....'
            )

        blocks = [
            resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
            # use stride 1 for the last conv4 layer.
            resnet_v1_block('block3',
                            base_depth=256,
                            num_units=middle_num_units,
                            stride=1)
        ]
        # when use fpn . stride list is [1, 2, 2]

        with slim.arg_scope(self.resnet_arg_scope(is_training=False)):
            with tf.variable_scope(self.scope_name, 'resnet_v1_101'):
                # Do the first few layers manually, because 'SAME' padding can behave inconsistently
                # for images of different sizes: sometimes 0, sometimes 1
                net = resnet_utils.conv2d_same(inputs,
                                               num_outputs=64,
                                               kernel_size=7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net,
                                      kernel_size=[3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')

        # generate freeze flag
        block_freeze = [False
                        ] * self.fixed_block + (4 - self.fixed_block) * [True]

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[0]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[0:1],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[1]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[1:2],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)
        # add_heatmap(C3, name='Layer/C3')
        # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[2]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[2:3],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)
        return net
Exemple #20
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    # clw note:调用slim的resnet_v1_block接口;下面的配置可以在ResNet论文中不同层数时的网络配置查到
    #           对于ResNet_v1_50,为 1(conv1)+ 3 * 3(conv2)+ 4 * 3(conv3)+ 6 * 3(conv4)+
    #           # 3 * 3(conv5) = 1+9+12+18+9+1fc=50
    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.

        # 注意这里block3的stride=1呢,正常的resnet不应该是stride=2;
        # 原因作者讲,tf.slim对resnet的实现方式和论文有一点不样。
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]   clw note:TODO

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    # clw note:在resnet.py文件中,定义了resenet_base网络以及resnet_head网络,一个作为基础的特征提取网络,
    # 另一个则作为RoI Pooling后的检测,分类顶层网络。在建立base网络时,根据not_freezed确定是否对特征提取网络进行再训练
    # 举例说明,比如ResNet50的conv2~conv5,对应卷积组个数分别为3,4,6,3
    # 比如默认FIXED_BLOCKS=1,not_freezed结果为[False, True, True, True],那么conv2,也就是block0不会被训练,conv3会被训练
    # 比如改成FIXED_BLOCKS=2,那么conv2,3都不会被训练,conv4会被训练;
    # 比如改成FIXED_BLOCKS=3,那么conv2,3,4都不会被训练;
    #
    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, _ = resnet_v1.resnet_v1(net,
                                    blocks[0:1],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, 'Layer/C2')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, _ = resnet_v1.resnet_v1(C2,
                                    blocks[1:2],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
    # add_heatmap(C3, name='Layer/C3')
    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
    # add_heatmap(C4, name='Layer/C4')
    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')

    # 网友提问:我看你的代码中关于采用resnet提取特征的,发现特征图是从conv_4之后那个进入roi pooling的,
    # 为什么不是从最后得到的特征图conv_5进入roi pooling呢,
    # 另外是采用的Object Detection Networks on Convolutional Feature Maps中NoC的方法吗?
    # 作者:没错,是NoC方法。采用resNet作为backbone的话,一般都用conv_5作为head对每个roi进行分类和回归。
    # 自注:确实论文中是用conv_4的输出送入RPN,然后统一RoI Pooling后送入全卷积层conv_5,相当于fc层,进行分类和回归;
    return C4
Exemple #21
0
def resnet_v1_slim (inputs,
                  num_classes=None,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,   # the above parameters will be directly passed to
                                # resnet.resnet_v1
                  scope='resnet_v1_slim'):
  blocks = [
      resnet_utils.Block('block1', resnet_v1.bottleneck,
                         [(64, 32, 1)] * 2 + [(64, 32, 2)]),
      # the last argument of Block is a list of "bottleneck" unit
      # configurations. Each entry is of the form  [depth, in-depth, stride]
      # Each "bottleneck" unit consists 3 layers:
      #    convolution from depth channels to in-depth channels
      #    convolution from in-depth channels to in-depth channels
      #    convolution from in-depth channels to depth channels
      # It's called "bottleneck" because the overall input and output
      # depth (# channels) are the same, while the in-depth in the 
      # middle is smaller.

      # Because each bottleneck has 3 layers, the above chain has
      # 3 * (2 + 1) = 9 layers.

      # By convention alll bottleneck units have stride = 1 except for the last which has
      # stride of 2.  The overall effect is after the whole chain, image size
      # is reduced by 2.

      # The original resnet implementation has:
      #   -- very long chains
      #   -- very large depth and in-depth values.
      # This is necessary for very big datasets like ImageNet, but for
      # smaller and simpler datasets we should be able to substantially
      # reduce these, as is what we do in this resnet_slim
      # 
      resnet_utils.Block('block2', resnet_v1.bottleneck,
                         [(128, 64, 1)] * 4 + [(128, 64, 2)]),
      # 3 * (4+1) = 15 layers
      resnet_utils.Block('block3', resnet_v1.bottleneck,
                         [(256, 64, 1)] * 4 + [(256, 64, 2)]),
      # 3 * (4+1) = 15 layers
      resnet_utils.Block('block4', resnet_v1.bottleneck, [(256, 64, 1)] * 2)
      # 3 * 2 = 6 layers
      # so we have  9 + 15 + 15 + 6 = 45 layers
      # there are two extra layers added by the system, so
      # by the reset nomenclature this network can be called a reset_v1_47
      
      # The first 3 Blocks each have stride = 2, and last Block is 1,
      # so the overall stride of this architecture is 8

      # If "output_stride" is smaller than 8, resnet_v1.resnet_v1
      # will add extra down-sizing layers to meet the requirement.
  ]
  return resnet_v1.resnet_v1(
      inputs,
      blocks,
      num_classes,
      global_pool,
      output_stride,
      include_root_block=True,
      reuse=reuse,
      scope=scope)
Exemple #22
0
    def build_network(self, sess, is_training=True):
        # select initializers
        if cfg.TRAIN.TRUNCATED:
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)
        bottleneck = resnet_v1.bottleneck
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 101:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 152:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 7 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        else:
            # other numbers are not supported
            raise NotImplementedError

        assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        if cfg.RESNET.FIXED_BLOCKS == 3:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[0:cfg.RESNET.FIXED_BLOCKS],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        elif cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net, _ = resnet_v1.resnet_v1(net,
                                             blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                             global_pool=False,
                                             include_root_block=False,
                                             scope=self._resnet_scope)

            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        else:  # cfg.RESNET.FIXED_BLOCKS == 0
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                   blocks[0:-1],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope=self._resnet_scope)
        self._act_summaries.append(net_conv4)
        self._layers['head'] = net_conv4

        if False:
            with tf.variable_scope('noise'):
                #kernel = tf.get_variable('weights',
                #shape=[5, 5, 3, 3],
                #initializer=tf.constant_initializer(c))
                conv = tf.nn.conv2d(self.noise,
                                    Wcnn, [1, 1, 1, 1],
                                    padding='SAME',
                                    name='srm')
            self._layers['noise'] = conv
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                #srm_conv = tf.nn.tanh(conv, name='tanh')
                noise_net = resnet_utils.conv2d_same(conv,
                                                     64,
                                                     7,
                                                     stride=2,
                                                     scope='conv1')
                noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                noise_net = slim.max_pool2d(noise_net, [3, 3],
                                            stride=2,
                                            padding='VALID',
                                            scope='pool1')
                #net_sum=tf.concat(3,[net_conv4,noise_net])
                noise_conv4, _ = resnet_v1.resnet_v1(noise_net,
                                                     blocks[0:-1],
                                                     global_pool=False,
                                                     include_root_block=False,
                                                     scope='noise')
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # build the anchors for the image
            self._anchor_component()

            # rpn
            rpn = slim.conv2d(net_conv4,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois")
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError
            # rcnn
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
                #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
            else:
                raise NotImplementedError
        if False:
            noise_pool5 = self._crop_pool_layer(noise_conv4, rois,
                                                "noise_pool5")
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5,
                                                   blocks[-1:],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope='noise')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, _ = resnet_v1.resnet_v1(pool5,
                                         blocks[-1:],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self._resnet_scope)
        self._layers['fc7'] = fc7
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            #pdb.set_trace()
            #noise_fc7 = tf.reduce_mean(noise_fc7, axis=[1, 2])
            #bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*4,compute_size=16,sequential=False)
            #bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*4])
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            cls_score = slim.fully_connected(fc7,
                                             self._num_classes,
                                             weights_initializer=initializer,
                                             trainable=is_training,
                                             activation_fn=None,
                                             scope='cls_score')
            #pdb.set_trace()
            #noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=initializer,
            #trainable=is_training, activation_fn=None, scope='noise_cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')
        #with tf.variable_scope(self._resnet_scope, self._resnet_scope):
        # Average pooling done by reduce_mean
        #fc7 = tf.reduce_mean(fc7, axis=[1, 2])
        #fc_con=tf.concat(1,[fc7,noise_fc])
        #cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer,
        #trainable=False, activation_fn=None, scope='cls_score')
        #cls_score1=cls_score+10*noise_cls_score
        #cls_prob = self._softmax_layer(noise_cls_score, "cls_prob")
        #bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
        #trainable=False,
        #activation_fn=None, scope='bbox_pred')
        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["cls_score"] = cls_score
        self._predictions["cls_prob"] = cls_prob
        self._predictions["bbox_pred"] = bbox_pred
        self._predictions["rois"] = rois

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
Exemple #23
0
    def resnet_base(self, img_batch, scope_name, is_training=True):

        if scope_name == 'resnet_v1_50':
            middle_num_units = 6
        elif scope_name == 'resnet_v1_101':
            middle_num_units = 23
        else:
            raise NotImplementedError(
                'We only support resnet_v1_50 or resnet_v1_101. ')

        blocks = [
            resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
            resnet_v1_block('block3',
                            base_depth=256,
                            num_units=middle_num_units,
                            stride=2),
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]
        # when use fpn . stride list is [1, 2, 2]

        with slim.arg_scope(self.resnet_arg_scope(is_training=False)):
            with tf.variable_scope(scope_name, scope_name):
                # Do the first few layers manually, because 'SAME' padding can behave inconsistently
                # for images of different sizes: sometimes 0, sometimes 1
                net = resnet_utils.conv2d_same(img_batch,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net, [3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')

        not_freezed = [False] * self.cfgs.FIXED_BLOCKS + (
            4 - self.cfgs.FIXED_BLOCKS) * [True]
        # Fixed_Blocks can be 1~3

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[0]))):
            C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                    blocks[0:1],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
        # self.add_heatmap(C2, name='Layer2/C2_heat')

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[1]))):
            C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                    blocks[1:2],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
        # self.add_heatmap(C3, name='Layer3/C3_heat')
        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[2]))):
            C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                    blocks[2:3],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # self.add_heatmap(C4, name='Layer4/C4_heat')

        # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
        with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)):
            C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                    blocks[3:4],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)
        # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
        # self.add_heatmap(C5, name='Layer5/C5_heat')

        feature_dict = {
            'C2':
            end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
            'C3':
            end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
            'C4':
            end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
                scope_name, middle_num_units - 1)],
            'C5':
            end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
            # 'C5': end_points_C5['{}/block4'.format(scope_name)],
        }

        return feature_dict
Exemple #24
0
def resnet_base_balance(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        # print('c5 input shape', input.shape)
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    add_heatmap(C3, 'img/feature_map_C3')
    add_heatmap(C4, 'img/feature_map_C4')
    add_heatmap(C5, 'img/feature_map_C5')
    C4_shape = tf.shape(C4)
    C4_resize = C4

    C3_resize = tf.image.resize_bilinear(C4, (C4_shape[1], C4_shape[2]))
    C3_resize = slim.conv2d(C3_resize,
                            1024, [1, 1],
                            trainable=is_training,
                            weights_initializer=cfgs.INITIALIZER,
                            activation_fn=tf.nn.relu,
                            scope='C3_conv1x1')
    C5_resize = tf.image.resize_bilinear(C5, (C4_shape[1], C4_shape[2]))
    C5_resize = slim.conv2d(C5_resize,
                            1024, [1, 1],
                            trainable=is_training,
                            weights_initializer=cfgs.INITIALIZER,
                            activation_fn=tf.nn.relu,
                            scope='C5_conv1x1')

    C_integrate = (C4_resize + C3_resize + C5_resize) / 3
    # # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    return C_integrate
def resnet_base(img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. ')

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 2, -1):  # build [P4, P3]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 2, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            p6 = slim.conv2d(
                pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'],
                num_outputs=256,
                kernel_size=[3, 3],
                padding="SAME",
                stride=2,
                scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6, name='p6_relu')

            p7 = slim.conv2d(p7,
                             num_outputs=256,
                             kernel_size=[3, 3],
                             padding="SAME",
                             stride=2,
                             scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
Exemple #26
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101 or mobilenetv2. '
            'Check your network name.')

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1)
    ]
    # when use fpn, stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

        if cfgs.ADD_FUSION:

            # C3_ = end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)]
            # # channels = C3_.get_shape().as_list()
            # filters1 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01)
            # C3_atrous_conv2d = tf.nn.atrous_conv2d(C3_, filters=filters1, rate=2, padding='SAME')
            # C3_shape = tf.shape(C3_atrous_conv2d)
            #
            # C2_ = end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)]
            # filters2 = tf.random_normal([3, 3, 256, 512], mean=0.0, stddev=0.01)
            # filters3 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01)
            # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_, filters=filters2, rate=2, padding='SAME')
            # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_atrous_conv2d, filters=filters3, rate=2, padding='SAME')
            # C2_downsampling = tf.image.resize_bilinear(C2_atrous_conv2d, (C3_shape[1], C3_shape[2]))
            #
            # C4_upsampling = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2]))
            # C4 = C3_atrous_conv2d + C4_upsampling + C2_downsampling

            # C4 = slim.conv2d(C4,
            #                  1024, [5, 5],
            #                  trainable=is_training,
            #                  weights_initializer=cfgs.INITIALIZER,
            #                  activation_fn=None,
            #                  scope='C4_conv5x5')

            C3_shape = tf.shape(end_points_C3[
                '{}/block2/unit_3/bottleneck_v1'.format(scope_name)])
            C4 = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2]))
            _C3 = slim.conv2d(end_points_C3[
                '{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
                              1024, [3, 3],
                              trainable=is_training,
                              weights_initializer=cfgs.INITIALIZER,
                              activation_fn=tf.nn.relu,
                              scope='C3_conv3x3')
            # _C3 = build_inception(end_points_C3['resnet_v1_101/block2/unit_3/bottleneck_v1'], is_training)

            C4 += _C3

        if cfgs.ADD_ATTENTION:
            with tf.variable_scope('build_C4_attention',
                                   regularizer=slim.l2_regularizer(
                                       cfgs.WEIGHT_DECAY)):
                # tf.summary.image('add_attention_before',
                #                  tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1))

                # SE_C4 = squeeze_excitation_layer(C4, 1024, 16, 'SE_C4', is_training)

                add_heatmap(
                    tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1),
                    'add_attention_before')
                C4_attention_layer = build_attention(C4, is_training)
                # C4_attention_layer = build_inception_attention(C4, is_training)

                C4_attention = tf.nn.softmax(C4_attention_layer)
                # C4_attention = C4_attention[:, :, :, 1]
                C4_attention = C4_attention[:, :, :, 0]
                C4_attention = tf.expand_dims(C4_attention, axis=-1)
                # tf.summary.image('C3_attention', C4_attention)
                add_heatmap(C4_attention, 'C4_attention')

                C4 = tf.multiply(C4_attention, C4)

                # C4 = SE_C4 * C4
                # tf.summary.image('add_attention_after', tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1))
                add_heatmap(
                    tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1),
                    'add_attention_after')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    if cfgs.ADD_ATTENTION:
        return C4, C4_attention_layer
    else:
        return C4
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 1, -1):  # build [P4, P3, P2]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 1, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            if "P6" in cfgs.LEVLES:
                P6 = slim.avg_pool2d(pyramid_dict['P5'],
                                     kernel_size=[1, 1],
                                     stride=2,
                                     scope='build_P6')
                pyramid_dict['P6'] = P6

    # for level in range(5, 1, -1):
    #     add_heatmap(feature_dict['C%d' % level], name='Layer%d/C%d_heat' % (level, level))
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    # return [P2, P3, P4, P5, P6]
    print("we are in Pyramid::-======>>>>")
    print(cfgs.LEVLES)
    print("base_anchor_size are: ", cfgs.BASE_ANCHOR_SIZE_LIST)
    print(20 * "__")
    return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]
def resnet_base(rgb_img_batch, ir_img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. ')
    org_scope_name = scope_name
    blocks = [resnet_v1_block('RGB/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]
    # when use fpn . stride list is [1, 2, 2]

    scope_name = "RGB/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_rgb = resnet_utils.conv2d_same(
                rgb_img_batch, 64, 7, stride=2, scope='conv1')
            net_rgb = tf.pad(net_rgb, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_rgb = slim.max_pool2d(
                net_rgb, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.RGB_FIXED_BLOCKS + (4-cfgs.RGB_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_rgb, end_points_C2_rgb = resnet_v1.resnet_v1(net_rgb,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_rgb, end_points_C3_rgb = resnet_v1.resnet_v1(C2_rgb,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_rgb, end_points_C4_rgb = resnet_v1.resnet_v1(C3_rgb,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_rgb, end_points_C5_rgb = resnet_v1.resnet_v1(C4_rgb,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')
    blocks = [resnet_v1_block('IR/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]

    scope_name = "IR/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_ir = resnet_utils.conv2d_same(
                ir_img_batch, 64, 7, stride=2, scope='conv1')
            net_ir = tf.pad(net_ir, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_ir = slim.max_pool2d(
                net_ir, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.IR_FIXED_BLOCKS + (4-cfgs.IR_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_ir, end_points_C2_ir = resnet_v1.resnet_v1(net_ir,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_ir, end_points_C3_ir = resnet_v1.resnet_v1(C2_ir,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_ir, end_points_C4_ir = resnet_v1.resnet_v1(C3_ir,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_ir, end_points_C5_ir = resnet_v1.resnet_v1(C4_ir,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    multi_end_points_C2 = tf.concat(axis=3, values = [end_points_C2_rgb['{}/block1/unit_2/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C2_ir['{}/block1/unit_2/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C3 = tf.concat(axis=3, values = [end_points_C3_rgb['{}/block2/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C3_ir['{}/block2/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C4 = tf.concat(axis=3, values = [end_points_C4_rgb['{}/block3/unit_{}/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name, middle_num_units - 1)], end_points_C4_ir['{}/block3/unit_{}/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name, middle_num_units - 1)]])

    multi_end_points_C5 = tf.concat(axis=3, values = [end_points_C5_rgb['{}/block4/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C5_ir['{}/block4/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])


    feature_dict = {'C2': multi_end_points_C2,
                    'C3': multi_end_points_C3,
                    'C4': multi_end_points_C4,
                    'C5': multi_end_points_C5,
                    # 'C5': end_points_C5['{}/block4'.format(scope_name)],
                    }

    scope_name = org_scope_name

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY),
                            activation_fn=None, normalizer_fn=None):

            conv_channels = 256
            last_fm = None
            for i in range(3):
                fm = feature_dict['C{}'.format(5-i)]
                fm_1x1_conv = slim.conv2d(fm,  num_outputs=conv_channels, kernel_size=[1, 1],
                                          stride=1, scope='p{}_1x1_conv'.format(5-i))
                if last_fm is not None:
                    h, w = tf.shape(fm_1x1_conv)[1], tf.shape(fm_1x1_conv)[2]
                    last_resize = tf.image.resize_bilinear(last_fm,
                                                           size=[h, w],
                                                           name='p{}_up2x'.format(5-i))

                    fm_1x1_conv = fm_1x1_conv + last_resize

                last_fm = fm_1x1_conv

                fm_3x3_conv = slim.conv2d(fm_1x1_conv,
                                          num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                                          stride=1, scope='p{}_3x3_conv'.format(5 - i))
                pyramid_dict['P{}'.format(5-i)] = fm_3x3_conv

            p6 = slim.conv2d(pyramid_dict['P5'],
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6)

            p7 = slim.conv2d(p7,
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
Exemple #29
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr')

    blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
              # use stride 1 for the last conv4 layer.

              resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)]
              # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2, scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(
                net, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, _ = resnet_v1.resnet_v1(net,
                                    blocks[0:1],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, _ = resnet_v1.resnet_v1(C2,
                                    blocks[1:2],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    return C2, C4
Exemple #30
0
def resnet_v1_slim(
    inputs,
    num_classes=None,
    global_pool=True,
    output_stride=None,
    reuse=None,  # the above parameters will be directly passed to
    # resnet.resnet_v1
    scope='resnet_v1_slim'):
    blocks = [
        resnet_utils.Block('block1', resnet_v1.bottleneck,
                           [(64, 32, 1)] * 2 + [(64, 32, 2)]),
        # the last argument of Block is a list of "bottleneck" unit
        # configurations. Each entry is of the form  [depth, in-depth, stride]
        # Each "bottleneck" unit consists 3 layers:
        #    convolution from depth channels to in-depth channels
        #    convolution from in-depth channels to in-depth channels
        #    convolution from in-depth channels to depth channels
        # It's called "bottleneck" because the overall input and output
        # depth (# channels) are the same, while the in-depth in the
        # middle is smaller.

        # Because each bottleneck has 3 layers, the above chain has
        # 3 * (2 + 1) = 9 layers.

        # By convention alll bottleneck units have stride = 1 except for the last which has
        # stride of 2.  The overall effect is after the whole chain, image size
        # is reduced by 2.

        # The original resnet implementation has:
        #   -- very long chains
        #   -- very large depth and in-depth values.
        # This is necessary for very big datasets like ImageNet, but for
        # smaller and simpler datasets we should be able to substantially
        # reduce these, as is what we do in this resnet_slim
        #
        resnet_utils.Block('block2', resnet_v1.bottleneck,
                           [(128, 64, 1)] * 4 + [(128, 64, 2)]),
        # 3 * (4+1) = 15 layers
        resnet_utils.Block('block3', resnet_v1.bottleneck,
                           [(256, 64, 1)] * 4 + [(256, 64, 2)]),
        # 3 * (4+1) = 15 layers
        resnet_utils.Block('block4', resnet_v1.bottleneck, [(256, 64, 1)] * 2)
        # 3 * 2 = 6 layers
        # so we have  9 + 15 + 15 + 6 = 45 layers
        # there are two extra layers added by the system, so
        # by the reset nomenclature this network can be called a reset_v1_47

        # The first 3 Blocks each have stride = 2, and last Block is 1,
        # so the overall stride of this architecture is 8

        # If "output_stride" is smaller than 8, resnet_v1.resnet_v1
        # will add extra down-sizing layers to meet the requirement.
    ]
    return resnet_v1.resnet_v1(inputs,
                               blocks,
                               num_classes,
                               global_pool,
                               output_stride,
                               include_root_block=True,
                               reuse=reuse,
                               scope=scope)