Beispiel #1
0
 def testEndPointsV1(self):
     """Test the end points of a tiny v1 bottleneck network."""
     blocks = [
         resnet_v1.resnet_v1_block('block1',
                                   base_depth=1,
                                   num_units=2,
                                   stride=2),
         resnet_v1.resnet_v1_block('block2',
                                   base_depth=2,
                                   num_units=2,
                                   stride=1),
     ]
     inputs = create_test_input(2, 32, 16, 3)
     with arg_scope(resnet_utils.resnet_arg_scope()):
         _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
     expected = [
         'tiny/block1/unit_1/bottleneck_v1/shortcut',
         'tiny/block1/unit_1/bottleneck_v1/conv1',
         'tiny/block1/unit_1/bottleneck_v1/conv2',
         'tiny/block1/unit_1/bottleneck_v1/conv3',
         'tiny/block1/unit_2/bottleneck_v1/conv1',
         'tiny/block1/unit_2/bottleneck_v1/conv2',
         'tiny/block1/unit_2/bottleneck_v1/conv3',
         'tiny/block2/unit_1/bottleneck_v1/shortcut',
         'tiny/block2/unit_1/bottleneck_v1/conv1',
         'tiny/block2/unit_1/bottleneck_v1/conv2',
         'tiny/block2/unit_1/bottleneck_v1/conv3',
         'tiny/block2/unit_2/bottleneck_v1/conv1',
         'tiny/block2/unit_2/bottleneck_v1/conv2',
         'tiny/block2/unit_2/bottleneck_v1/conv3'
     ]
     self.assertItemsEqual(expected, end_points)
Beispiel #2
0
 def testEndPointsV1(self):
   """Test the end points of a tiny v1 bottleneck network."""
   blocks = [
       resnet_v1.resnet_v1_block(
           'block1', base_depth=1, num_units=2, stride=2),
       resnet_v1.resnet_v1_block(
           'block2', base_depth=2, num_units=2, stride=1),
   ]
   inputs = create_test_input(2, 32, 16, 3)
   with arg_scope(resnet_utils.resnet_arg_scope()):
     _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
   expected = [
       'tiny/block1/unit_1/bottleneck_v1/shortcut',
       'tiny/block1/unit_1/bottleneck_v1/conv1',
       'tiny/block1/unit_1/bottleneck_v1/conv2',
       'tiny/block1/unit_1/bottleneck_v1/conv3',
       'tiny/block1/unit_2/bottleneck_v1/conv1',
       'tiny/block1/unit_2/bottleneck_v1/conv2',
       'tiny/block1/unit_2/bottleneck_v1/conv3',
       'tiny/block2/unit_1/bottleneck_v1/shortcut',
       'tiny/block2/unit_1/bottleneck_v1/conv1',
       'tiny/block2/unit_1/bottleneck_v1/conv2',
       'tiny/block2/unit_1/bottleneck_v1/conv3',
       'tiny/block2/unit_2/bottleneck_v1/conv1',
       'tiny/block2/unit_2/bottleneck_v1/conv2',
       'tiny/block2/unit_2/bottleneck_v1/conv3']
   self.assertItemsEqual(expected, end_points)
Beispiel #3
0
def resnet_v1_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 include_root_block=True,
                 reuse=None,
                 scope='resnet_v1_50'):
    """
        ResNet-50 model of [1]. See resnet_v1() for arg and return description.
        (same as what's in slim library now but reversing the 1 stride to accommodate the unet model)
    """
    blocks = [
        resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
        resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
        resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=2),
    ]

    return resnet_v1.resnet_v1(
        inputs,
        blocks,
        num_classes,
        is_training=is_training,
        global_pool=global_pool,
        output_stride=output_stride,
        include_root_block=include_root_block,
        reuse=reuse,
        scope=scope)
 def _decide_blocks(self):
     # configuration of blocks for 41 layers - a shallower ResNet
     # tested in Experiment 4 in project report
     # units in blocks: 3 x 64 + 3 x 128 + 4 x 256 + 3 x 512
     if self._num_layers == 41:
         self._blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=3,
                             stride=2),
             # use stride 1 for the last conv4 layer
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=4,
                             stride=1),
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
     else:
         # other numbers are not supported
         raise NotImplementedError
Beispiel #5
0
 def _decide_blocks(self):
     # choose different blocks for different number of layers
     self._blocks = [
         resnet_v1_block('block1', base_depth=16, num_units=3, stride=2),
         resnet_v1_block('block2', base_depth=32, num_units=4, stride=2),
         # use stride 1 for the last conv4 layer
         resnet_v1_block('block3', base_depth=64, num_units=6, stride=1),
         resnet_v1_block('block4', base_depth=128, num_units=3, stride=1)
     ]
Beispiel #6
0
    def __init__(self, is_training):
        self.name = self.__class__.__name__.lower()
        self.vars = {}      
        self.layers = []
        self.activations = []  
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.losses = {}
        self.score_summaries = {}
        self.event_summaries = {}
        
        
        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.H_boxes     = tf.placeholder(tf.float32, shape=[None , 5], name = 'H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None , 5], name = 'O_boxes')
        self.H_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'H_boxes_enc')
        self.O_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'O_boxes_enc')
        self.HO_boxes_enc= tf.placeholder(tf.float32, shape=[None , 4], name = 'HO_boxes_enc')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_HO')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 26], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 26], name = 'H_mask')
        self.H_num       = tf.placeholder(tf.int32) # pos
        self.ivs         = tf.placeholder(tf.int32, shape=[26], name = 'idx_GT_verbs')
        self.inputs      = tf.placeholder(tf.float32, shape=[None, 300], name = 'embedding')         
        self.support     = [tf.sparse_placeholder(tf.float32) for _ in range(1)]
        self.num_nonzero = tf.placeholder(tf.int32)
        self.in_dim      = 300
        self.hidden_dim  = 512
        self.out_dim     = 512
        self.num_classes = 26
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.train       = is_training 
        
        self.now_lr      = None
        self.optimizer   = None
        self.opt_op      = None        
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]

        self.build_all()
    def build(self,
              inputs,
              input_pixel_size,
              is_training,
              scope='resnet_v1_101'):  ## scope is important variable to set
        """ resnet
        args:
            inputs: a tensor of size [batch_size, height, width, channels].
            input_pixel_size: size of the input (H x W)
            is_training: True for training, False for validation/testing.
            scope: Optional scope for the variables.

        Returns:
            The last op containing the log predictions and end_points dict.
        """
        res_config = self.config
        fixed_block = res_config.fixed_block
        self._scope = scope
        import pudb
        pudb.set_trace()  # XXX BREAKPOINT

        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            net_base = self._build_base(inputs)

        blocks = [
            resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
            resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            net_conv1, net_dict1 = resnet_v1.resnet_v1(
                net_base,
                blocks[0:fixed_block],
                global_pool=False,
                include_root_block=False,  ## no resue
                scope=self._scope)

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv2, net_dict2 = resnet_v1.resnet_v1(
                net_conv1,
                blocks[fixed_block:],
                global_pool=False,
                include_root_block=False,
                scope=self._scope)

        feature_maps_out = net_conv2  # pyramid_fusion3 #pyramid_fusion1

        #end_points = slim.utils.convert_collection_to_dict(
        #            end_points_collection)

        return feature_maps_out, net_dict2  #end_points
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # Pattern.reshape( num_pos_neg, 64, 64, 3) 
        self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') #Object_augmented[:num_pos].reshape(num_pos, 5)
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask')
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label')
        self.H_num       = tf.placeholder(tf.int32)
        self.HO_weight   = np.array([3.3510249, 3.4552405, 4.0257854, 0.0, 4.088436, 
                                    3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 
                                    2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 
                                    3.9706533, 4.504736, 0.0, 1.4873443, 3.700363, 
                                    4.1058283, 3.6298118, 0.0, 6.490651, 5.0808263, 
                                    1.520838, 3.3888445, 0.0, 3.9899964], dtype = 'float32').reshape(1,29)
        self.H_weight   = np.array([4.0984106, 4.102459, 4.0414762, 4.060745, 4.0414762, 
                                    3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 
                                    2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 
                                    4.534647, 3.4699364, 2.9466882, 1.8585607, 3.9433942, 
                                    3.9433942, 4.3523254, 3.8368235, 6.4963055, 5.138182, 
                                    1.7807873, 4.080392, 1.9544303, 4.5761204],dtype = 'float32').reshape(1,29)
        self.binary_weight = np.array([1.0986122886681098, 0.4054651081081644], dtype = 'float32').reshape(1,2)
        self.num_classes = 29
        self.num_binary  = 2 # existence (0 or 1) of HOI
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #9
0
 def _decide_block(self):
     # choose different blocks for different number of layers
     base_depths = list()
     strides = list()
     self.blocks = list()
     scopes = ["block1", "block2", "block3", "block4"]
     for i in range(2):
         base_depths.append(int(math.pow(2, i+6)))
         strides.append(2)
     for i in range(2):
         base_depths.append(int(math.pow(2, i + 8)))
         strides.append(1)
     if self.num_layers == 50:
         num_units = [3, 4, 6, 3]
     elif self.num_layers == 101:
         num_units = [3, 4, 23, 3]
     elif self.num_layers == 152:
         num_units = [3, 8, 36, 3]
     else:
         # other numbers are not supported
         raise NotImplementedError
     for i in range(4):
         self.blocks.append(resnet_v1_block(
             scope=scopes[i], base_depth=base_depths[i],
             num_units=num_units[i], stride=strides[i]))
Beispiel #10
0
def resnet_faster_rcnn_head(input, params):
    """
  Derived from https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow

  Args:
    input:
    params:

  Returns:

  """

    with tf.variable_scope('resnet_head', reuse=tf.AUTO_REUSE):
        block4 = [
            resnet_v1_block('block4', base_depth=256, num_units=3, stride=1)
        ]

        with slim.arg_scope(norm_arg_scope(params)):
            C5, _ = resnet_v1.resnet_v1(input,
                                        block4,
                                        global_pool=False,
                                        include_root_block=False,
                                        scope='resnet_v1_50',
                                        reuse=tf.AUTO_REUSE)

            return C5
Beispiel #11
0
def restnet_head(input, is_training, scope_name, stage):
    if stage == 'stage1':
        block4 = [
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            C5, _ = resnet_v1.resnet_v1(input,
                                        block4,
                                        global_pool=False,
                                        include_root_block=False,
                                        scope=scope_name)
            # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
            flatten = tf.reduce_mean(C5,
                                     axis=[1, 2],
                                     keep_dims=False,
                                     name='global_average_pooling')
            # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape')

        # global average pooling C5 to obtain fc layers
    else:
        fc_flatten = slim.flatten(input)
        net = slim.fully_connected(fc_flatten,
                                   1024,
                                   scope='fc_1_{}'.format(stage))
        net = slim.dropout(net,
                           keep_prob=0.5,
                           is_training=is_training,
                           scope='dropout_{}'.format(stage))
        flatten = slim.fully_connected(net,
                                       1024,
                                       scope='fc_2_{}'.format(stage))
    return flatten
  def _decide_blocks(self):
    self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                      resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                      resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
                      resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    self._blocks2 = [resnet_v1_block('block1_prev', base_depth=64, num_units=3, stride=2),
                    resnet_v1_block('block2_prev', base_depth=128, num_units=4, stride=2),
                    resnet_v1_block('block3_prev', base_depth=256, num_units=23, stride=1),
                    resnet_v1_block('block4_prev', base_depth=512, num_units=3, stride=1)]
Beispiel #13
0
def resnet_v1_101_c4(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v1_101'):
  """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
      resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
      resnet_v1_block('block3', base_depth=256, num_units=23, stride=2)
  ]
  return resnet_v1.resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True,
                   reuse=reuse, scope=scope)
Beispiel #14
0
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name = 'sp')
        self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask')
        self.H_num       = tf.placeholder(tf.int32)
        self.num_classes = 29
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #15
0
def restnet_head(input, is_training, scope_name):
    block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, _ = resnet_v1.resnet_v1(input,
                                    block4,
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
        # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
        # C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling')
        # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape')

    # global average pooling C5 to obtain fc layers
    return C5
Beispiel #16
0
    def _decide_blocks(self):
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                            # use stride 1 for the last conv4 layer
                            resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

        elif self._num_layers == 101:
            self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                            # use stride 1 for the last conv4 layer
                            resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
                            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]
        else:
            # other numbers are not supported
            raise NotImplementedError
Beispiel #17
0
    def restnet_head(self, inputs, scope_name, is_training):
        block4 = [
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]

        with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)):
            net, _ = resnet_v1.resnet_v1(inputs,
                                         block4,
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=scope_name)
            net_flatten = tf.reduce_mean(net,
                                         axis=[1, 2],
                                         keep_dims=False,
                                         name='global_average_pooling')
        # global average pooling C5 to obtain fc layers
        return net_flatten
def resnet_base(img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. ')

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 2, -1):  # build [P4, P3]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 2, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            p6 = slim.conv2d(
                pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'],
                num_outputs=256,
                kernel_size=[3, 3],
                padding="SAME",
                stride=2,
                scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6, name='p6_relu')

            p7 = slim.conv2d(p7,
                             num_outputs=256,
                             kernel_size=[3, 3],
                             padding="SAME",
                             stride=2,
                             scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
Beispiel #19
0
 def _decide_blocks(self):
     # 关于不同数量层 选择不同的块
     if self._num_layers == 50:
         self._blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=4,
                             stride=2),
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=6,
                             stride=1),
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
     elif self._num_layers == 101:
         self._blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=4,
                             stride=2),
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=23,
                             stride=1),
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
     elif self._num_layers == 152:
         self._blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=8,
                             stride=2),
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=36,
                             stride=1),
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
     else:
         raise NotImplementedError
  def _decide_blocks(self):
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                      resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                      # use stride 1 for the last conv4 layer
                      resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                      resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    elif self._num_layers == 101:
      self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                      resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                      # use stride 1 for the last conv4 layer
                      resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
                      resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    elif self._num_layers == 152:
      self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                      resnet_v1_block('block2', base_depth=128, num_units=8, stride=2),
                      # use stride 1 for the last conv4 layer
                      resnet_v1_block('block3', base_depth=256, num_units=36, stride=1),
                      resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    else:
      # other numbers are not supported
      raise NotImplementedError
    def build_network(self, sess, is_training=True):
        # select initializers
        # if cfg.TRAIN.TRUNCATED:
        if cfg.FLAGS.initializer == "truncated":
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)
        bottleneck = resnet_v1.bottleneck
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            # blocks = [
            #   resnet_utils.Block('block1', bottleneck,
            #                      [(256, 64, 1)] * 2 + [(256, 64, 2)]),
            #   resnet_utils.Block('block2', bottleneck,
            #                      [(512, 128, 1)] * 3 + [(512, 128, 2)]),
            #   # Use stride-1 for the last conv4 layer
            #   resnet_utils.Block('block3', bottleneck,
            #                      [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
            #   resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            # ]
            blocks = [
                resnet_v1.resnet_v1_block('block1',
                                          base_depth=64,
                                          num_units=3,
                                          stride=2),
                resnet_v1.resnet_v1_block('block2',
                                          base_depth=128,
                                          num_units=4,
                                          stride=2),
                resnet_v1.resnet_v1_block('block3',
                                          base_depth=256,
                                          num_units=6,
                                          stride=1),
                resnet_v1.resnet_v1_block('block4',
                                          base_depth=512,
                                          num_units=3,
                                          stride=1),
            ]
        elif self._num_layers == 101:
            # blocks = [
            #   resnet_utils.Block('block1', bottleneck,
            #                      [(256, 64, 1)] * 2 + [(256, 64, 2)]),
            #   resnet_utils.Block('block2', bottleneck,
            #                      [(512, 128, 1)] * 3 + [(512, 128, 2)]),
            #   # Use stride-1 for the last conv4 layer
            #   resnet_utils.Block('block3', bottleneck,
            #                      [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
            #   resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            # ]
            blocks = [
                resnet_v1.resnet_v1_block('block1',
                                          base_depth=64,
                                          num_units=3,
                                          stride=2),
                resnet_v1.resnet_v1_block('block2',
                                          base_depth=128,
                                          num_units=4,
                                          stride=2),
                resnet_v1.resnet_v1_block('block3',
                                          base_depth=256,
                                          num_units=23,
                                          stride=1),
                resnet_v1.resnet_v1_block('block4',
                                          base_depth=512,
                                          num_units=3,
                                          stride=1),
            ]
        elif self._num_layers == 152:
            # blocks = [
            #   resnet_utils.Block('block1', bottleneck,
            #                      [(256, 64, 1)] * 2 + [(256, 64, 2)]),
            #   resnet_utils.Block('block2', bottleneck,
            #                      [(512, 128, 1)] * 7 + [(512, 128, 2)]),
            #   # Use stride-1 for the last conv4 layer
            #   resnet_utils.Block('block3', bottleneck,
            #                      [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
            #   resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            # ]
            blocks = [
                resnet_v1.resnet_v1_block('block1',
                                          base_depth=64,
                                          num_units=3,
                                          stride=2),
                resnet_v1.resnet_v1_block('block2',
                                          base_depth=128,
                                          num_units=8,
                                          stride=2),
                resnet_v1.resnet_v1_block('block3',
                                          base_depth=256,
                                          num_units=36,
                                          stride=1),
                resnet_v1.resnet_v1_block('block4',
                                          base_depth=512,
                                          num_units=3,
                                          stride=1),
            ]
        else:
            # other numbers are not supported
            raise NotImplementedError

        assert (0 <= cfg.FLAGS.fixed_blocks < 4)
        if cfg.FLAGS.fixed_blocks == 3:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[0:cfg.FLAGS.fixed_blocks],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        elif cfg.FLAGS.fixed_blocks > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net, _ = resnet_v1.resnet_v1(net,
                                             blocks[0:cfg.FLAGS.fixed_blocks],
                                             global_pool=False,
                                             include_root_block=False,
                                             scope=self._resnet_scope)

            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[cfg.FLAGS.fixed_blocks:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        else:  # cfg.RESNET.FIXED_BLOCKS == 0
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                   blocks[0:-1],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope=self._resnet_scope)

        self._act_summaries.append(net_conv4)
        self._layers['head'] = net_conv4
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # build the anchors for the image
            self._anchor_component()

            # rpn
            rpn = slim.conv2d(net_conv4,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois")
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.FLAGS.test_mode == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.FLAGS.test_mode == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError

            # rcnn
            if cfg.FLAGS.pooling_mode == 'crop':
                pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
            else:
                raise NotImplementedError

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, _ = resnet_v1.resnet_v1(pool5,
                                         blocks[-1:],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self._resnet_scope)

        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # Average pooling done by reduce_mean
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            cls_score = slim.fully_connected(fc7,
                                             self._num_classes,
                                             weights_initializer=initializer,
                                             trainable=is_training,
                                             activation_fn=None,
                                             scope='cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')
        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["cls_score"] = cls_score
        self._predictions["cls_prob"] = cls_prob
        self._predictions["bbox_pred"] = bbox_pred
        self._predictions["rois"] = rois

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
Beispiel #22
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr')

    blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
              # use stride 1 for the last conv4 layer.

              resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)]
              # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2, scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(
                net, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, _ = resnet_v1.resnet_v1(net,
                                    blocks[0:1],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, _ = resnet_v1.resnet_v1(C2,
                                    blocks[1:2],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    return C2, C4
Beispiel #23
0
  def _build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    elif self._num_layers == 101:
      blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
                resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    elif self._num_layers == 152:
      blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                resnet_v1_block('block2', base_depth=128, num_units=8, stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3', base_depth=256, num_units=36, stride=1),
                resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]

    else:
      # other numbers are not supported
      raise NotImplementedError

    assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3)
    # Now the base is always fixed during training
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
      net_conv = self._build_base()
    if cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net_conv, _ = resnet_v1.resnet_v1(net_conv,
                                     blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)
    if cfg.RESNET.FIXED_BLOCKS < 3:
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv, _ = resnet_v1.resnet_v1(net_conv,
                                           blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)

    self._act_summaries.append(net_conv)
    self._layers['head'] = net_conv
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()
      # region proposal network
      rois = self._region_proposal(net_conv, is_training, initializer)
      # region of interest pooling
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
      else:
        raise NotImplementedError

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, _ = resnet_v1.resnet_v1(pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope=self._resnet_scope)

    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # average pooling done by reduce_mean
      fc7 = tf.reduce_mean(fc7, axis=[1, 2])
      # region classification
      cls_prob, bbox_pred = self._region_classification(fc7, is_training, 
                                                        initializer, initializer_bbox)

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred
Beispiel #24
0
def resnet_base(img_batch, scope_name, is_training):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23  #101第3个block是23
    elif scope_name == 'resnet_v1_152':
        middle_num_units = 36
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 、resnet_v1_101 、resnet152. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=2)
    ]
    with slim.arg_scope(
            resnet_arg_scope(is_training=is_training)):  #resnet_arg_scope配置参数
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2,
                scope='conv1')  #RESNET第一个卷积层, 7*7*64, stride=2

            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]
                               ])  #padding 0 ?? 类似与后面的samepadding?
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')  #3*3最大池化
    #not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] #不冻结的Blocks层
    #net = tf.Print(net, [tf.shape(net)], summarize=10, message='net')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C2, end_points_C2 = resnet_v1.resnet_v1(
            net,
            blocks[0:1],  #传入的是一个resnet_utils.Block类  一整个Resnet block
            global_pool=False,
            include_root_block=False,
            scope=scope_name
        )  #返回当前构建resnet block层:C2 end_points_C2: collection中已有的特征图 越到后面越多

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C3, end_points_C3 = resnet_v1.resnet_v1(
            C2,
            blocks[1:2],
            global_pool=False,
            include_root_block=False,
            scope=scope_name)  #构建第二个block模块

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                num_classes=cfgs.num_classes,
                                                global_pool=True,
                                                include_root_block=False,
                                                scope=scope_name)
        C5 = tf.reshape(C5, [-1, cfgs.num_classes])
    return C5
Beispiel #25
0
    def __init__(self,
                 in_channels=3,
                 num_classes=0,
                 num_nodes=17 + 2,
                 edge_importance_weighting=True,
                 is_training=True,
                 num_fc=1024,
                 posetype=1,
                 bi_posegraph=False,
                 bodypart=False,
                 binary=False,
                 posemap=False,
                 posegraph=False,
                 semantic=False,
                 data_bn=True):
        self.predictions = {}
        self.train_summaries = []
        self.losses = {}
        self.lr = tf.placeholder(tf.float32)
        self.num_binary = 1  # existence of HOI (0 or 1)
        self.num_classes = 600
        self.gt_binary_label = tf.placeholder(tf.float32,
                                              shape=[None, 1],
                                              name='gt_binary_label')
        self.gt_class_HO = tf.placeholder(tf.float32,
                                          shape=[None, 600],
                                          name='gt_class_HO')
        self.is_training = is_training
        if self.is_training:
            self.keep_prob = cfg.TRAIN_DROP_OUT_BINARY
            self.keep_prob_tail = .5
        else:
            self.keep_prob = 1
            self.keep_prob_tail = 1

        self.image = tf.placeholder(tf.float32,
                                    shape=[1, None, None, 3],
                                    name='image')
        self.head = tf.placeholder(tf.float32,
                                   shape=[1, None, None, 1024],
                                   name='head')
        self.H_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='H_boxes')
        self.O_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='O_boxes')
        self.partboxes = tf.placeholder(tf.float32,
                                        shape=[None, 17, 5],
                                        name='part_boxes')
        self.semantic = tf.placeholder(tf.float32,
                                       shape=[None, 1024],
                                       name='semantic_feat')
        self.H_num = tf.placeholder(tf.int32)

        # Control the network architecture
        self.bodypart = bodypart
        self.binary = binary
        self.posemap = posemap
        self.posegraph = posegraph
        self.bi_posegraph = bi_posegraph
        self.posetype = posetype
        self.semantic_flag = semantic
        if self.posetype == 1:
            self.spatial = tf.placeholder(tf.float32,
                                          shape=[None, 64, 64, 2],
                                          name='sp')
        else:
            self.spatial = tf.placeholder(tf.float32,
                                          shape=[None, 64, 64, 3],
                                          name='sp')

        # ResNet 50 Network
        self.scope = 'resnet_v1_50'
        self.num_fc = 1024
        self.num_fc2 = num_fc
        self.stride = [
            16,
        ]
        if tf.__version__ == '1.1.0':
            self.blocks = [
                resnet_utils.Block('block1', resnet_v1.bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', resnet_v1.bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                resnet_utils.Block('block3', resnet_v1.bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3),
                resnet_utils.Block('block5', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3)
            ]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=6,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1),
                resnet_v1_block('block5',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        # GCN setting
        self.num_nodes = num_nodes
        self.c = in_channels
        self.data_bn = data_bn
        self.strategy = 'spatial'
        self.graph = Graph(strategy=self.strategy)
        self.A = tf.convert_to_tensor(self.graph.A.astype(
            np.float32))  # [None, num_nodes, num_nodes]
        self.spatial_kernel_size = self.A.shape[0]
        # [N, C, T, V, M] = [N, 3, 1, 19, 1]
        self.Gnodes = tf.placeholder(
            tf.float32,
            shape=[None, self.c, 1, self.num_nodes, 1],
            name='Gnodes')

        # ST_GCN
        self.depth_st_gcn_networks = 10
Beispiel #26
0
    def __init__(self, model_name):
        self.model_name = model_name
        self.visualize = {}
        self.test_visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image = tf.placeholder(tf.float32,
                                    shape=[1, None, None, 3],
                                    name='image')
        self.spatial = tf.placeholder(tf.float32,
                                      shape=[None, 64, 64, 3],
                                      name='sp')
        self.H_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='H_boxes')
        self.O_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='O_boxes')
        self.gt_class_HO = tf.placeholder(tf.float32,
                                          shape=[None, 600],
                                          name='gt_class_HO')
        self.H_num = tf.placeholder(tf.int32)  # positive nums
        self.image_id = tf.placeholder(tf.int32)
        self.num_classes = 600
        self.compose_num_classes = 600
        self.num_fc = 1024
        self.verb_num_classes = 117
        self.obj_num_classes = 80
        self.scope = 'resnet_v1_101'
        self.stride = [
            16,
        ]
        self.lr = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            raise Exception('wrong tensorflow version 1.1.0')
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=23,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1),
                resnet_v1_block('block5',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]
            if self.model_name.__contains__('unique_weights') or self.model_name.__contains__('_pa3')\
                    or self.model_name.__contains__('_pa4'):
                print("add block6 unique_weights2")
                self.blocks.append(
                    resnet_v1_block('block6',
                                    base_depth=512,
                                    num_units=3,
                                    stride=1))
        """We copy from TIN. calculated by log(1/(n_c/sum(n_c)) c is the category and n_c is 
        the number of positive samples"""
        self.HO_weight = np.array([
            9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923,
            8.714437, 8.59822, 12.977117, 6.2745423, 11.227917, 6.765012,
            9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503,
            6.664475, 6.811699, 6.644726, 9.170454, 13.670264, 3.903943,
            10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912,
            5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516,
            9.192927, 10.220277, 6.056692, 7.734048, 8.42324, 6.586457,
            6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238,
            8.071842, 10.431585, 12.417501, 11.530198, 11.227917, 4.0678477,
            8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731,
            7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745,
            14.363411, 8.45805, 10.269067, 9.820116, 14.363411, 11.272368,
            11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117,
            9.300817, 10.032678, 12.571651, 10.114916, 10.471591, 13.264799,
            14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933,
            14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472,
            12.060826, 8.862153, 9.799063, 12.753973, 12.753973, 10.412168,
            10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973,
            6.60636, 6.2896967, 4.5198326, 9.7887, 13.670264, 11.878505,
            11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679,
            9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381,
            11.144535, 13.264799, 5.888291, 11.227917, 10.779892, 7.643191,
            11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063,
            8.94731, 7.0330057, 8.129001, 7.2817025, 9.874775, 9.758241,
            11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799,
            9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665,
            8.8104515, 11.655361, 13.264799, 4.515912, 9.897503, 11.418972,
            8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776,
            12.417501, 0.6271591, 12.060826, 12.060826, 12.166186, 5.2946343,
            11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264,
            11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546,
            9.653881, 9.592727, 12.977117, 13.670264, 7.995224, 8.639826,
            12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597,
            12.417501, 11.47304, 10.451388, 8.95624, 10.996116, 11.144535,
            11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384,
            11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451,
            12.977117, 9.609821, 8.601359, 9.055143, 11.965516, 11.105314,
            13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411,
            8.5463, 13.670264, 7.9288645, 5.7561946, 9.075144, 9.0701065,
            5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264,
            8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537,
            4.7828183, 9.519224, 7.6492405, 8.031909, 7.8180614, 4.451856,
            10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411,
            10.556748, 10.556748, 14.363411, 13.670264, 14.363411, 8.037262,
            8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135,
            11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314,
            10.725825, 9.080208, 11.272368, 14.363411, 14.363411, 13.264799,
            6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826,
            9.488214, 5.4571533, 10.129305, 10.579222, 12.571651, 11.965516,
            6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361,
            9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826,
            10.302968, 9.551227, 8.75394, 10.579222, 9.944571, 14.363411,
            6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264,
            11.724354, 9.993963, 8.230013, 9.100721, 10.374427, 7.865129,
            6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324,
            9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144,
            9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 5.9870906,
            9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024,
            10.471591, 10.451388, 13.670264, 11.185357, 10.032678, 9.313555,
            12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451,
            8.965248, 10.451388, 7.0609145, 10.579222, 13.264799, 10.49221,
            8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 8.073695,
            9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526,
            10.674532, 14.363411, 7.995224, 12.753973, 5.5157638, 8.934066,
            10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411,
            12.753973, 12.417501, 8.59197, 10.513264, 10.338059, 14.363411,
            7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 14.363411,
            14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117,
            11.798462, 9.968962, 12.753973, 11.47304, 11.227917, 7.6763024,
            10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571,
            10.779892, 10.471591, 9.54313, 9.148476, 10.285873, 10.412168,
            12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 12.977117,
            11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625,
            10.9622135, 14.363411, 13.264799, 9.575919, 9.080208, 11.878505,
            7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264,
            11.878505, 12.166186, 7.616999, 9.44343, 8.288065, 8.8104515,
            8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 7.058223,
            5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313,
            10.996116, 12.753973, 7.8339925, 7.569945, 7.4427395, 5.560738,
            12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053,
            7.8849015, 7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411,
            10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 9.488214,
            8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904,
            14.363411, 14.363411, 9.407584, 5.281472, 11.272368, 12.060826,
            14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411,
            12.060826, 11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909,
            8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 10.996116,
            13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673,
            10.513264, 8.7430105, 10.338059, 13.264799, 6.878481, 9.065094,
            8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411,
            9.944571, 11.798462, 10.9622135, 11.031207, 7.652888, 4.334878,
            13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 14.363411,
            13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432,
            8.916674, 8.59509, 14.363411, 7.376845, 11.798462, 11.530198,
            11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591,
            9.6629305, 11.367679, 8.73579, 9.080208, 11.724354, 5.04781,
            7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 12.166186,
            7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753,
            11.031207, 8.344818, 10.513264, 10.032678, 14.363411, 14.363411,
            4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821
        ],
                                  dtype='float32').reshape(1, 600)
        num_inst_path = cfg.ROOT_DIR + '/Data/num_inst.npy'
        num_inst = np.load(num_inst_path)
        self.num_inst = num_inst

        verb_to_HO_matrix, obj_to_HO_matrix = get_convert_matrix(
            self.verb_num_classes, self.obj_num_classes)

        self.obj_to_HO_matrix = tf.constant(obj_to_HO_matrix, tf.float32)
        self.verb_to_HO_matrix = tf.constant(verb_to_HO_matrix, tf.float32)
        self.gt_obj_class = tf.cast(
            tf.matmul(
                self.gt_class_HO, self.obj_to_HO_matrix, transpose_b=True) > 0,
            tf.float32)
        self.gt_verb_class = tf.cast(
            tf.matmul(self.gt_class_HO,
                      self.verb_to_HO_matrix,
                      transpose_b=True) > 0, tf.float32)
Beispiel #27
0
def resnet_base(img_batch, scope_name, is_training=False):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3', base_depth=256, num_units=9, stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False] * 0 + (4 - 0) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        #C2=tf.layers.average_pooling2d(inputs=C2, pool_size=3, strides=2,padding="valid")
        #C2=tf.reduce_mean(C2, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        C3 = slim.avg_pool2d(C3, 2)
        #C3 = tf.reduce_mean(C3, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    #return C3
    '''with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)'''
    return C3
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 1, -1):  # build [P4, P3, P2]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 1, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            if "P6" in cfgs.LEVLES:
                P6 = slim.avg_pool2d(pyramid_dict['P5'],
                                     kernel_size=[1, 1],
                                     stride=2,
                                     scope='build_P6')
                pyramid_dict['P6'] = P6

    # for level in range(5, 1, -1):
    #     add_heatmap(feature_dict['C%d' % level], name='Layer%d/C%d_heat' % (level, level))
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    # return [P2, P3, P4, P5, P6]
    print("we are in Pyramid::-======>>>>")
    print(cfgs.LEVLES)
    print("base_anchor_size are: ", cfgs.BASE_ANCHOR_SIZE_LIST)
    print(20 * "__")
    return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]
Beispiel #29
0
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp')
        self.H_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name = 'gt_class_HO')
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label')
        self.H_num       = tf.placeholder(tf.int32)
        self.HO_weight   = np.array([
                9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923, 8.714437, 8.59822, 12.977117, 6.2745423, 
                11.227917, 6.765012, 9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503, 6.664475, 6.811699, 
                6.644726, 9.170454, 13.670264, 3.903943, 10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912, 
                5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516, 9.192927, 10.220277, 6.056692, 7.734048, 
                8.42324, 6.586457, 6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238, 8.071842, 10.431585, 
                12.417501, 11.530198, 11.227917, 4.0678477, 8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731, 
                7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745, 14.363411, 8.45805, 10.269067, 9.820116, 
                14.363411, 11.272368, 11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117, 9.300817, 10.032678, 
                12.571651, 10.114916, 10.471591, 13.264799, 14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933, 
                14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472, 12.060826, 8.862153, 9.799063, 12.753973, 
                12.753973, 10.412168, 10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973, 6.60636, 6.2896967, 
                4.5198326, 9.7887, 13.670264, 11.878505, 11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679, 
                9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381, 11.144535, 13.264799, 5.888291, 11.227917, 
                10.779892, 7.643191, 11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063, 8.94731, 7.0330057, 
                8.129001, 7.2817025, 9.874775, 9.758241, 11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799, 
                9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665, 8.8104515, 11.655361, 13.264799, 4.515912, 
                9.897503, 11.418972, 8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776, 12.417501, 0.6271591, 
                12.060826, 12.060826, 12.166186, 5.2946343, 11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264, 
                11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546, 9.653881, 9.592727, 12.977117, 13.670264, 
                7.995224, 8.639826, 12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597, 12.417501, 11.47304, 
                10.451388, 8.95624, 10.996116, 11.144535, 11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384, 
                11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451, 12.977117, 9.609821, 8.601359, 9.055143, 
                11.965516, 11.105314, 13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411, 8.5463, 13.670264, 
                7.9288645, 5.7561946, 9.075144, 9.0701065, 5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264, 
                8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537, 4.7828183, 9.519224, 7.6492405, 8.031909, 
                7.8180614, 4.451856, 10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411, 10.556748, 10.556748, 
                14.363411, 13.670264, 14.363411, 8.037262, 8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135, 
                11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314, 10.725825, 9.080208, 11.272368, 14.363411, 
                14.363411, 13.264799, 6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826, 9.488214, 5.4571533, 
                10.129305, 10.579222, 12.571651, 11.965516, 6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361, 
                9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826, 10.302968, 9.551227, 8.75394, 10.579222, 
                9.944571, 14.363411, 6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264, 11.724354, 9.993963, 
                8.230013, 9.100721, 10.374427, 7.865129, 6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324, 
                9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144, 9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 
                5.9870906, 9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024, 10.471591, 10.451388, 13.670264, 
                11.185357, 10.032678, 9.313555, 12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451, 8.965248, 
                10.451388, 7.0609145, 10.579222, 13.264799, 10.49221, 8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 
                8.073695, 9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526, 10.674532, 14.363411, 7.995224, 
                12.753973, 5.5157638, 8.934066, 10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411, 12.753973, 
                12.417501, 8.59197, 10.513264, 10.338059, 14.363411, 7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 
                14.363411, 14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117, 11.798462, 9.968962, 12.753973, 
                11.47304, 11.227917, 7.6763024, 10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571, 10.779892, 
                10.471591, 9.54313, 9.148476, 10.285873, 10.412168, 12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 
                12.977117, 11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625, 10.9622135, 14.363411, 13.264799, 
                9.575919, 9.080208, 11.878505, 7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264, 11.878505, 
                12.166186, 7.616999, 9.44343, 8.288065, 8.8104515, 8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 
                7.058223, 5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313, 10.996116, 12.753973, 7.8339925, 
                7.569945, 7.4427395, 5.560738, 12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053, 7.8849015, 
                7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411, 10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 
                9.488214, 8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904, 14.363411, 14.363411, 9.407584, 
                5.281472, 11.272368, 12.060826, 14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411, 12.060826, 
                11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909, 8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 
                10.996116, 13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673, 10.513264, 8.7430105, 10.338059, 
                13.264799, 6.878481, 9.065094, 8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411, 9.944571, 
                11.798462, 10.9622135, 11.031207, 7.652888, 4.334878, 13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 
                14.363411, 13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432, 8.916674, 8.59509, 14.363411, 
                7.376845, 11.798462, 11.530198, 11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591, 9.6629305, 
                11.367679, 8.73579, 9.080208, 11.724354, 5.04781, 7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 
                12.166186, 7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753, 11.031207, 8.344818, 10.513264, 
                10.032678, 14.363411, 14.363411, 4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821
            ], dtype = 'float32').reshape(1,600)
        self.binary_weight = np.array([1.6094379124341003, 0.22314355131420976], dtype = 'float32').reshape(1,2)
        self.num_classes = 600 # HOI
        self.num_binary  = 2 # existence (0 or 1) of HOI
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else: # we use tf 1.2.0 here, Resnet-50
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2), # a resnet_v1 bottleneck block
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #30
0
    def __init__(self, model_name):
        self.model_name = model_name
        self.visualize = {}
        self.test_visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp')
        # self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 24], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 24], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 24], name = 'sp_mask')
        self.gt_compose  = tf.placeholder(tf.float32, shape=[None, 222], name='gt_compose')
        self.gt_obj = tf.placeholder(tf.float32, shape=[None, 80], name='gt_obj')
        self.H_num       = tf.placeholder(tf.int32)
        self.image_id = tf.placeholder(tf.int32)
        self.num_classes = 24
        if self.model_name.__contains__('_t4_'):
            self.num_classes = 222
        if self.model_name.__contains__('_t5_'):
            self.verb_num_classes = 21
            self.num_classes = 222
        self.num_fc      = 1024
        self.verb_num_classes = 24
        self.obj_num_classes = 80
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        # self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
            if self.model_name.__contains__('unique_weights'):
                print("unique_weights2")
                self.blocks.append(resnet_v1_block('block6', base_depth=512, num_units=3, stride=1))

        # remove 3, 17 22, 23 27
        self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 4.088436,
                                   3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237,
                                   2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892,
                                   3.9706533, 4.504736, 1.4873443, 3.700363,
                                   4.1058283, 3.6298118, 5.0808263,
                                   1.520838, 3.3888445, 3.9899964], dtype='float32').reshape(1, 24)
        self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.0414762,
                                  3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364,
                                  2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065,
                                  4.534647, 3.4699364, 1.8585607, 3.9433942,
                                  3.9433942, 4.3523254, 5.138182,
                                  1.7807873, 4.080392, 4.5761204], dtype='float32').reshape(1, 24)
        self.reset_classes()
 def __init__(self):
     self.visualize = {}
     self.intermediate = {}
     self.predictions = {}
     self.score_summaries = {}
     self.event_summaries = {}
     self.train_summaries = []
     self.losses = {}
     self.image = tf.placeholder(tf.float32,
                                 shape=[1, None, None, 3],
                                 name='image')  #scene stream
     self.H_boxes = tf.placeholder(tf.float32,
                                   shape=[None, 5],
                                   name='H_boxes')  # Human stream
     self.P_boxes = tf.placeholder(tf.float32,
                                   shape=[None, 10, 5],
                                   name='P_boxes')  # PaSta stream
     self.gt_verb = tf.placeholder(tf.float32,
                                   shape=[None, 80],
                                   name='gt_class_verb')  # target verb
     self.H_num = tf.placeholder(tf.int32)
     self.verb_weight = np.array(verb80, dtype='float32').reshape(1, -1)
     self.num_classes = 80  # HOI
     self.num_pasta0 = 12  # pasta0 ankle
     self.num_pasta1 = 10  # pasta1 knee
     self.num_pasta2 = 5  # pasta2 hip
     self.num_pasta3 = 31  # pasta3 hand
     self.num_pasta4 = 5  # pasta4 shoulder
     self.num_pasta5 = 13  # pasta5 head
     self.num_fc = 1024
     self.scope = 'resnet_v1_50'
     self.stride = [
         16,
     ]
     self.lr = tf.placeholder(tf.float32)
     if tf.__version__ == '1.1.0':
         self.blocks = [
             resnet_utils.Block('block1', resnet_v1.bottleneck,
                                [(256, 64, 1)] * 2 + [(256, 64, 2)]),
             resnet_utils.Block('block2', resnet_v1.bottleneck,
                                [(512, 128, 1)] * 3 + [(512, 128, 2)]),
             resnet_utils.Block('block3', resnet_v1.bottleneck,
                                [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
             resnet_utils.Block('block4', resnet_v1.bottleneck,
                                [(2048, 512, 1)] * 3),
             resnet_utils.Block('block5', resnet_v1.bottleneck,
                                [(2048, 512, 1)] * 3)
         ]
     else:  # we use tf 1.2.0 here, Resnet-50
         from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
         self.blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),  # a resnet_v1 bottleneck block
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=4,
                             stride=2),
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=6,
                             stride=1),  # feature former
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1),
             resnet_v1_block('block5',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
Beispiel #32
0
    def _build_network(self, sess, is_training=True):
        # select initializers
        if cfg.TRAIN.TRUNCATED:
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)

        # choose different blocks for different number of layers
        if self._num_layers == 50:
            blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=6,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        elif self._num_layers == 101:
            blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=23,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        elif self._num_layers == 152:
            blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=8,
                                stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=36,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        else:
            # other numbers are not supported
            raise NotImplementedError

        assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3)
        # Now the base is always fixed during training
        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            net_conv = self._build_base()
            base_output = net_conv
        if cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net_conv, end_points_initial = resnet_v1.resnet_v1(
                    net_conv,
                    blocks[0:cfg.RESNET.FIXED_BLOCKS],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        if cfg.RESNET.FIXED_BLOCKS < 3:
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv, end_points_initial = resnet_v1.resnet_v1(
                    net_conv,
                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)

        self._act_summaries.append(net_conv)
        self._layers['head'] = net_conv
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # build the anchors for the image
            self._anchor_component()
            # region proposal network
            rois = self._region_proposal(net_conv, is_training, initializer)
            # region of interest pooling
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
            else:
                raise NotImplementedError

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, end_points_last = resnet_v1.resnet_v1(
                pool5,
                blocks[-1:],
                global_pool=False,
                include_root_block=False,
                scope=self._resnet_scope)

            print("Layer dimensions")
            print(base_output.get_shape())
            print(end_points_initial['resnet_v1_152/block2'].get_shape())
            print(end_points_initial['resnet_v1_152/block3'].get_shape())
            concatenated_features_lower = tf.concat(values=[tf.reduce_mean(base_output, axis=[1, 2]), \
                                                            tf.reduce_mean(end_points_initial['resnet_v1_152/block2'], axis=[1, 2]), \
                                                            tf.reduce_mean(end_points_initial['resnet_v1_152/block3'], axis=[1, 2])], axis=1)
            print("Concatenated lower features dimensions")
            feature_size = concatenated_features_lower.get_shape(
            ).dims[1].value
            print(feature_size)
            # concatenated_features = tf.reshape(tf.tile(concatenated_features_lower, [1, 256]), [-1, tf.shape(concatenated_features_lower)[1]])
            concatenated_features = tf.reshape(
                tf.tile(concatenated_features_lower, [1, 256]),
                [256, feature_size])
            print(concatenated_features.get_shape())
            concatenated_features = tf.concat(values=[
                concatenated_features,
                tf.reduce_mean(end_points_last['resnet_v1_152/block4'],
                               axis=[1, 2])
            ],
                                              axis=1)
            print(concatenated_features.get_shape())

        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # average pooling done by reduce_mean
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            print(fc7.get_shape())
            final_feature_vec = tf.concat(values=[concatenated_features, fc7],
                                          axis=1)
            print("Final feature vector dimensions")
            print(final_feature_vec.get_shape())
            # region classification
            cls_prob, bbox_pred = self._region_classification(
                fc7, is_training, initializer, initializer_bbox)

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
def resnet_base(rgb_img_batch, ir_img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. ')
    org_scope_name = scope_name
    blocks = [resnet_v1_block('RGB/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]
    # when use fpn . stride list is [1, 2, 2]

    scope_name = "RGB/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_rgb = resnet_utils.conv2d_same(
                rgb_img_batch, 64, 7, stride=2, scope='conv1')
            net_rgb = tf.pad(net_rgb, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_rgb = slim.max_pool2d(
                net_rgb, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.RGB_FIXED_BLOCKS + (4-cfgs.RGB_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_rgb, end_points_C2_rgb = resnet_v1.resnet_v1(net_rgb,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_rgb, end_points_C3_rgb = resnet_v1.resnet_v1(C2_rgb,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_rgb, end_points_C4_rgb = resnet_v1.resnet_v1(C3_rgb,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_rgb, end_points_C5_rgb = resnet_v1.resnet_v1(C4_rgb,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')
    blocks = [resnet_v1_block('IR/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]

    scope_name = "IR/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_ir = resnet_utils.conv2d_same(
                ir_img_batch, 64, 7, stride=2, scope='conv1')
            net_ir = tf.pad(net_ir, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_ir = slim.max_pool2d(
                net_ir, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.IR_FIXED_BLOCKS + (4-cfgs.IR_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_ir, end_points_C2_ir = resnet_v1.resnet_v1(net_ir,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_ir, end_points_C3_ir = resnet_v1.resnet_v1(C2_ir,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_ir, end_points_C4_ir = resnet_v1.resnet_v1(C3_ir,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_ir, end_points_C5_ir = resnet_v1.resnet_v1(C4_ir,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    multi_end_points_C2 = tf.concat(axis=3, values = [end_points_C2_rgb['{}/block1/unit_2/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C2_ir['{}/block1/unit_2/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C3 = tf.concat(axis=3, values = [end_points_C3_rgb['{}/block2/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C3_ir['{}/block2/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C4 = tf.concat(axis=3, values = [end_points_C4_rgb['{}/block3/unit_{}/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name, middle_num_units - 1)], end_points_C4_ir['{}/block3/unit_{}/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name, middle_num_units - 1)]])

    multi_end_points_C5 = tf.concat(axis=3, values = [end_points_C5_rgb['{}/block4/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C5_ir['{}/block4/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])


    feature_dict = {'C2': multi_end_points_C2,
                    'C3': multi_end_points_C3,
                    'C4': multi_end_points_C4,
                    'C5': multi_end_points_C5,
                    # 'C5': end_points_C5['{}/block4'.format(scope_name)],
                    }

    scope_name = org_scope_name

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY),
                            activation_fn=None, normalizer_fn=None):

            conv_channels = 256
            last_fm = None
            for i in range(3):
                fm = feature_dict['C{}'.format(5-i)]
                fm_1x1_conv = slim.conv2d(fm,  num_outputs=conv_channels, kernel_size=[1, 1],
                                          stride=1, scope='p{}_1x1_conv'.format(5-i))
                if last_fm is not None:
                    h, w = tf.shape(fm_1x1_conv)[1], tf.shape(fm_1x1_conv)[2]
                    last_resize = tf.image.resize_bilinear(last_fm,
                                                           size=[h, w],
                                                           name='p{}_up2x'.format(5-i))

                    fm_1x1_conv = fm_1x1_conv + last_resize

                last_fm = fm_1x1_conv

                fm_3x3_conv = slim.conv2d(fm_1x1_conv,
                                          num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                                          stride=1, scope='p{}_3x3_conv'.format(5 - i))
                pyramid_dict['P{}'.format(5-i)] = fm_3x3_conv

            p6 = slim.conv2d(pyramid_dict['P5'],
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6)

            p7 = slim.conv2d(p7,
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict