def resnet_faster_rcnn_head(input, params): """ Derived from https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow Args: input: params: Returns: """ with tf.variable_scope('resnet_head', reuse=tf.AUTO_REUSE): block4 = [ resnet_v1_block('block4', base_depth=256, num_units=3, stride=1) ] with slim.arg_scope(norm_arg_scope(params)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) return C5
def resnet_v1_slim_fc (inputs, scope): """ slim version of resnet, can be replaced with resnet_v1.resnet_v1_50(inputs, num_classes=None, global_pool=True, output_stride=None, reuse=False, scope=scope) or any of the other resnsets """ blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] * 2 + [(64, 32, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] * 3 + [(128, 64, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] * 3 + [(256, 64, 2)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)] * 1) ] return resnet_v1.resnet_v1( inputs, blocks, # all parameters below can be passed to resnet_v1.resnet_v1_?? num_classes = None, # don't produce final prediction global_pool = True, # produce 1x1 output, equivalent to input of a FC layer output_stride = None, include_root_block=True, reuse=False, # do not re-use network # my understanding # task1 image -> resnet1 -> output # task2 image -> resnet2 -> output # if both resnets are defined under the same scope, # with reuse set to True, then some of the parameters # will be shared between two tasks scope=scope)
def resnet_v1_50(inputs, config, is_training=True, scope='resnet_v1_50'): """Modified ResNet-50 model.""" # Note : The base_depth was reduced to be able to fit into GPU memory blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=config.block1_depth, num_units=config.block1_units, stride=config.block1_stride), resnet_v1.resnet_v1_block('block2', base_depth=config.block2_depth, num_units=config.block2_units, stride=config.block2_stride), resnet_v1.resnet_v1_block('block3', base_depth=config.block3_depth, num_units=config.block3_units, stride=config.block3_stride), resnet_v1.resnet_v1_block('block4', base_depth=config.block4_depth, num_units=config.block4_units, stride=config.block4_stride), ] return resnet_v1.resnet_v1(inputs, blocks, is_training=is_training, global_pool=False, include_root_block=True, scope=scope)
def resnet_v1_slim_fc(inputs, scope): """ slim version of resnet, can be replaced with resnet_v1.resnet_v1_50(inputs, num_classes=None, global_pool=True, output_stride=None, reuse=False, scope=scope) or any of the other resnsets """ blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] * 2 + [(64, 32, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] * 3 + [(128, 64, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] * 3 + [(256, 64, 2)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)] * 1) ] return resnet_v1.resnet_v1( inputs, blocks, # all parameters below can be passed to resnet_v1.resnet_v1_?? num_classes=None, # don't produce final prediction global_pool= True, # produce 1x1 output, equivalent to input of a FC layer output_stride=None, include_root_block=True, reuse=False, # do not re-use network # my understanding # task1 image -> resnet1 -> output # task2 image -> resnet2 -> output # if both resnets are defined under the same scope, # with reuse set to True, then some of the parameters # will be shared between two tasks scope=scope)
def restnet_head(input, is_training, scope_name, stage): if stage == 'stage1': block4 = [ resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling') # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape') # global average pooling C5 to obtain fc layers else: fc_flatten = slim.flatten(input) net = slim.fully_connected(fc_flatten, 1024, scope='fc_1_{}'.format(stage)) net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='dropout_{}'.format(stage)) flatten = slim.fully_connected(net, 1024, scope='fc_2_{}'.format(stage)) return flatten
def image_to_head(input_tensor,is_training,reuse=None): with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv=build_base(input_tensor) if fix_blocks>0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv,_=resnet_v1.resnet_v1(net_conv,blocks[0:fix_blocks], global_pool=False, include_root_block=False, reuse=reuse,scope=scope) if fix_blocks<4: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv,_=resnet_v1.resnet_v1(net_conv, blocks[fix_blocks:], global_pool=True, include_root_block=False, reuse=reuse, scope=scope) return net_conv
def restnet_head(input, is_training, scope_name): block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling') # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape') # global average pooling C5 to obtain fc layers return C5
def resnet_tiny2(inputs, num_classes=2, scope='resnet_tiny'): # seperate cls and fcn, add stop_gradient after fcn output blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] + [(64, 32, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(128, 64, 1)]) ] net, _ = resnet_v1.resnet_v1( inputs, blocks, # all parameters below can be passed to resnet_v1.resnet_v1_?? num_classes=None, # don't produce final prediction global_pool= False, # produce 1x1 output, equivalent to input of a FC layer output_stride=16, include_root_block=True, reuse=False, # do not re-use network scope=scope) res_out = net # keep this for later CLS usage net = slim.batch_norm(slim.conv2d_transpose(net, 64, 5, 2)) net = slim.batch_norm(slim.conv2d_transpose(net, 32, 5, 2)) net = slim.batch_norm(slim.conv2d_transpose(net, 16, 5, 2)) net = slim.batch_norm(slim.conv2d_transpose(net, 8, 5, 2)) net = slim.conv2d(net, num_classes, 5, 1, activation_fn=None) logits_fcn = tf.identity(net, 'logits_fcn') net = res_out net = tf.stop_gradient(net) # add a few layers to make the image size even smaller net = slim.conv2d(net, 128, 3, 1) net = slim.max_pool2d(net, 2, 2) net = slim.conv2d(net, 128, 3, 1) net = slim.max_pool2d(net, 2, 2) net = tf.reduce_mean(net, [1, 2], keep_dims=True) # add an extra layer net = slim.conv2d(net, 64, [1, 1]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None) logits_cls = tf.identity(net, 'logits_cls') return logits_fcn, logits_cls, 16
def restnet_head(self, inputs, scope_name, is_training): block4 = [ resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)): net, _ = resnet_v1.resnet_v1(inputs, block4, global_pool=False, include_root_block=False, scope=scope_name) net_flatten = tf.reduce_mean(net, axis=[1, 2], keep_dims=False, name='global_average_pooling') # global average pooling C5 to obtain fc layers return net_flatten
def _resnet_small(self, inputs, num_classes=None, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_small'): """A shallow and thin ResNet v1 for faster tests.""" bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block( 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), resnet_utils.Block( 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), resnet_utils.Block( 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), resnet_utils.Block( 'block4', bottleneck, [(32, 8, 1)] * 2)] return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool, output_stride, include_root_block, reuse, scope)
def resnet_tiny (X, scope=None, reuse=True): blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] + [(64, 32, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(256, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block5', resnet_v1.bottleneck, [(256, 64, 1)] + [(128, 64, 2)]), resnet_utils.Block('block6', resnet_v1.bottleneck, [(128, 64, 1)]) ] net,_ = resnet_v1.resnet_v1( X, blocks, # all parameters below can be passed to resnet_v1.resnet_v1_?? num_classes = 2, # don't produce final prediction global_pool = True, # produce 1x1 output, equivalent to input of a FC layer reuse=reuse, # do not re-use network scope=scope) return net
def resnet_v2_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, spatial_squeeze=True, reuse=None, scope='resnet_v1_50'): """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" blocks = [ rv2.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), rv2.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), rv2.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), ] return rv2.resnet_v1(inputs, blocks, None, is_training=False, global_pool=False, output_stride=16, include_root_block=True, reuse=False, scope=scope)
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError with tf.variable_scope('noise'): conv=slim.conv2d(self.noise, num_outputs=3, kernel_size=[5,5], stride=1 , padding='SAME', activation_fn=None, trainable=is_training, scope='constrained_conv') self._layers['noise']=conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) C_1 = self.build_base(conv) C_2, _ = resnet_v1.resnet_v1(C_1, blocks[0:1], global_pool=False, include_root_block=False, scope=self._resnet_scope) #net=self.cbam_module(inputs=net,name="cbam_layer_1") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C_3, _ = resnet_v1.resnet_v1(C_2, blocks[1:2], global_pool=False, include_root_block=False, scope=self._resnet_scope) #net = self.cbam_module(inputs=net, name="cbam_layer_2") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C_4, end_point = resnet_v1.resnet_v1(C_3, blocks[2:3], global_pool=False, include_root_block=False, scope=self._resnet_scope) # mask_C_4 = self.cbam_module(inputs=C_4, name="C_4") self.end_point=end_point self._act_summaries.append(C_4) self._layers['head'] = C_4 self._layers['C1'] = C_1 self._layers['C2'] = C_2 self._layers['C3'] = C_3 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn1 = slim.conv2d(C_4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._layers['rpn1'] = rpn1 rpn = self.cbam_module(inputs=rpn1, name="rpn_conv1") self._layers['rpn'] = rpn self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') self._layers['rpn_cls_score'] = rpn_cls_score # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(C_4, rois, "pool5") #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, end_point1 = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7']=fc7 # self._layers['pool5'] =pool5 self.end_point1=end_point1 with tf.variable_scope(self._resnet_scope, self._resnet_scope): cls_fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(cls_fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") box_fc7=tf.reduce_mean(fc7, axis=[1, 2]) bbox_pred = slim.fully_connected(box_fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') if cfg.USE_MASK is True: with tf.variable_scope('feature_fuse', 'feature_fuse'): mask_fuse = C_3 * 0.5 + rpn * 0.5 feature_fuse = slim.conv2d(mask_fuse, 1024, [1, 1], padding='VALID', trainable=is_training, weights_initializer=initializer, scope='mask_fuse') mask_box, indices = self._proposal_mask_layer(cls_prob, bbox_pred, rois, 'mask_proposal') mask_pool5 = self._crop_pool_layer(feature_fuse, mask_box, "mask_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): mask_fc7, _ = resnet_v1.resnet_v1(mask_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='mask_conv') self._act_summaries.append(mask_fc7) with tf.variable_scope('mask_predict', 'mask_predict'): upsampled_features=slim.conv2d_transpose(mask_fc7,256,2,2,activation_fn=None) self._act_summaries.append(upsampled_features) upsampled_features = slim.conv2d(upsampled_features, 64, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None,padding='VALID') self._act_summaries.append(upsampled_features) upsampled_features = slim.batch_norm(upsampled_features, activation_fn=None) self._act_summaries.append(upsampled_features) upsampled_features = tf.nn.relu(upsampled_features) self._act_summaries.append(upsampled_features) mask_predictions = slim.conv2d(upsampled_features, num_outputs=2,activation_fn=None, kernel_size=[1, 1],padding='VALID') self._act_summaries.append(mask_predictions) self._predictions["mask_out"] = tf.expand_dims(mask_predictions[:, :, :, 1], 3) mask_softmax=tf.nn.softmax(mask_predictions) self._predictions["mask_softmaxbg"] = tf.expand_dims(mask_softmax[:, :, :, 0], 3) self._predictions["mask_softmaxfg"] = tf.expand_dims(mask_softmax[:, :, :, 1], 3) self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._predictions["mask_pred"] = mask_predictions self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred, mask_predictions else: self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(img_batch, scope_name, is_training): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 #101第3个block是23 elif scope_name == 'resnet_v1_152': middle_num_units = 36 else: raise NotImplementedError( 'We only support resnet_v1_50 、resnet_v1_101 、resnet152. Check your network name....yjr' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=2) ] with slim.arg_scope( resnet_arg_scope(is_training=is_training)): #resnet_arg_scope配置参数 with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same( img_batch, 64, 7, stride=2, scope='conv1') #RESNET第一个卷积层, 7*7*64, stride=2 net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0] ]) #padding 0 ?? 类似与后面的samepadding? net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') #3*3最大池化 #not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] #不冻结的Blocks层 #net = tf.Print(net, [tf.shape(net)], summarize=10, message='net') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C2, end_points_C2 = resnet_v1.resnet_v1( net, blocks[0:1], #传入的是一个resnet_utils.Block类 一整个Resnet block global_pool=False, include_root_block=False, scope=scope_name ) #返回当前构建resnet block层:C2 end_points_C2: collection中已有的特征图 越到后面越多 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C3, end_points_C3 = resnet_v1.resnet_v1( C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) #构建第二个block模块 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], num_classes=cfgs.num_classes, global_pool=True, include_root_block=False, scope=scope_name) C5 = tf.reshape(C5, [-1, cfgs.num_classes]) return C5
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: #initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer = tf.contrib.layers.xavier_initializer() initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 c=np.zeros((3,5,5)) c[0]=[[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]] c[0]=c[0]/12 c[1][1][1]=-1 c[1][1][2]=2 c[1][1][3]=-1 c[1][2][1]=2 c[1][2][2]=-4 c[1][2][3]=2 c[1][3][1]=-1 c[1][3][2]=2 c[1][3][3]=-1 c[1]=c[1]/4 c[2][2][1]=1 c[2][2][2]=-2 c[2][2][3]=1 c[2]=c[2]/2 Wcnn=np.zeros((5,5,3,3)) for i in range(3): #k=i%10+1 #Wcnn[i]=[c[3*k-3],c[3*k-2],c[3*k-1]] Wcnn[:,:,0,i]=c[i] Wcnn[:,:,1,i]=c[i] Wcnn[:,:,2,i]=c[i] if True: with tf.variable_scope('noise'): #kernel = tf.get_variable('weights', #shape=[5, 5, 3, 3], #initializer=tf.constant_initializer(c)) conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME',name='srm') self._layers['noise']=conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #srm_conv = tf.nn.tanh(conv, name='tanh') noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1') noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]]) noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1') #net_sum=tf.concat(3,[net_conv4,noise_net]) noise_conv4, _ = resnet_v1.resnet_v1(noise_net, blocks[0:-1], global_pool=False, include_root_block=False, scope='noise') with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") self._layers['pool5']=pool5 #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError if True: noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='noise') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7']=fc7 with tf.variable_scope('noise_pred'): bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*8,compute_size=16,sequential=False) fc7=tf.Print(fc7,[tf.shape(fc7)],message='Value of %s' % 'fc', summarize=4, first_n=1) bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*8]) bilinear_pool=tf.Print(bilinear_pool,[tf.shape(bilinear_pool)],message='Value of %s' % 'Blinear', summarize=4, first_n=1) bilinear_pool=tf.multiply(tf.sign(bilinear_pool),tf.sqrt(tf.abs(bilinear_pool)+1e-12)) bilinear_pool=tf.nn.l2_normalize(bilinear_pool,dim=1) noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(noise_cls_score, "cls_prob") fc7 = tf.reduce_mean(fc7, axis=[1, 2]) bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = noise_cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob,bbox_pred
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net, _ = resnet_v1.resnet_v1(self._image, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=True, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope='resnet_v1_101') else: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(self._image, blocks[0:-1], global_pool=False, include_root_block=True, scope='resnet_v1_101') self._act_summaries.append(net_conv5) self._layers['conv5_3'] = net_conv5 with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv5, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_scales * 6, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_scales * 6, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_scales * 12, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv5, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1,2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(img_batch, scope_name, is_training=False): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=9, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * 0 + (4 - 0) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) #C2=tf.layers.average_pooling2d(inputs=C2, pool_size=3, strides=2,padding="valid") #C2=tf.reduce_mean(C2, axis=[1, 2], keep_dims=False, name='global_average_pooling') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) C3 = slim.avg_pool2d(C3, 2) #C3 = tf.reduce_mean(C3, axis=[1, 2], keep_dims=False, name='global_average_pooling') #return C3 '''with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name)''' return C3
def resnet_base(self, inputs, is_training): if self.scope_name == 'resnet_v1_50': middle_num_units = 6 elif self.scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(self.resnet_arg_scope(is_training=False)): with tf.variable_scope(self.scope_name, 'resnet_v1_101'): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(inputs, num_outputs=64, kernel_size=7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding='VALID', scope='pool1') # generate freeze flag block_freeze = [False ] * self.fixed_block + (4 - self.fixed_block) * [True] with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[0]))): net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=self.scope_name) with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[1]))): net, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope=self.scope_name) # add_heatmap(C3, name='Layer/C3') # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[2]))): net, _ = resnet_v1.resnet_v1(net, blocks[2:3], global_pool=False, include_root_block=False, scope=self.scope_name) return net
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr' ) # clw note:调用slim的resnet_v1_block接口;下面的配置可以在ResNet论文中不同层数时的网络配置查到 # 对于ResNet_v1_50,为 1(conv1)+ 3 * 3(conv2)+ 4 * 3(conv3)+ 6 * 3(conv4)+ # # 3 * 3(conv5) = 1+9+12+18+9+1fc=50 blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. # 注意这里block3的stride=1呢,正常的resnet不应该是stride=2; # 原因作者讲,tf.slim对resnet的实现方式和论文有一点不样。 resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1) ] # when use fpn . stride list is [1, 2, 2] clw note:TODO with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') # clw note:在resnet.py文件中,定义了resenet_base网络以及resnet_head网络,一个作为基础的特征提取网络, # 另一个则作为RoI Pooling后的检测,分类顶层网络。在建立base网络时,根据not_freezed确定是否对特征提取网络进行再训练 # 举例说明,比如ResNet50的conv2~conv5,对应卷积组个数分别为3,4,6,3 # 比如默认FIXED_BLOCKS=1,not_freezed结果为[False, True, True, True],那么conv2,也就是block0不会被训练,conv3会被训练 # 比如改成FIXED_BLOCKS=2,那么conv2,3都不会被训练,conv4会被训练; # 比如改成FIXED_BLOCKS=3,那么conv2,3,4都不会被训练; # not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, 'Layer/C2') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, _ = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C3, name='Layer/C3') # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, _ = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer/C4') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') # 网友提问:我看你的代码中关于采用resnet提取特征的,发现特征图是从conv_4之后那个进入roi pooling的, # 为什么不是从最后得到的特征图conv_5进入roi pooling呢, # 另外是采用的Object Detection Networks on Convolutional Feature Maps中NoC的方法吗? # 作者:没错,是NoC方法。采用resNet作为backbone的话,一般都用conv_5作为head对每个roi进行分类和回归。 # 自注:确实论文中是用conv_4的输出送入RPN,然后统一RoI Pooling后送入全卷积层conv_5,相当于fc层,进行分类和回归; return C4
def resnet_v1_slim (inputs, num_classes=None, global_pool=True, output_stride=None, reuse=None, # the above parameters will be directly passed to # resnet.resnet_v1 scope='resnet_v1_slim'): blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] * 2 + [(64, 32, 2)]), # the last argument of Block is a list of "bottleneck" unit # configurations. Each entry is of the form [depth, in-depth, stride] # Each "bottleneck" unit consists 3 layers: # convolution from depth channels to in-depth channels # convolution from in-depth channels to in-depth channels # convolution from in-depth channels to depth channels # It's called "bottleneck" because the overall input and output # depth (# channels) are the same, while the in-depth in the # middle is smaller. # Because each bottleneck has 3 layers, the above chain has # 3 * (2 + 1) = 9 layers. # By convention alll bottleneck units have stride = 1 except for the last which has # stride of 2. The overall effect is after the whole chain, image size # is reduced by 2. # The original resnet implementation has: # -- very long chains # -- very large depth and in-depth values. # This is necessary for very big datasets like ImageNet, but for # smaller and simpler datasets we should be able to substantially # reduce these, as is what we do in this resnet_slim # resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] * 4 + [(128, 64, 2)]), # 3 * (4+1) = 15 layers resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] * 4 + [(256, 64, 2)]), # 3 * (4+1) = 15 layers resnet_utils.Block('block4', resnet_v1.bottleneck, [(256, 64, 1)] * 2) # 3 * 2 = 6 layers # so we have 9 + 15 + 15 + 6 = 45 layers # there are two extra layers added by the system, so # by the reset nomenclature this network can be called a reset_v1_47 # The first 3 Blocks each have stride = 2, and last Block is 1, # so the overall stride of this architecture is 8 # If "output_stride" is smaller than 8, resnet_v1.resnet_v1 # will add extra down-sizing layers to meet the requirement. ] return resnet_v1.resnet_v1( inputs, blocks, num_classes, global_pool, output_stride, include_root_block=True, reuse=reuse, scope=scope)
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1( net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 if False: with tf.variable_scope('noise'): #kernel = tf.get_variable('weights', #shape=[5, 5, 3, 3], #initializer=tf.constant_initializer(c)) conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME', name='srm') self._layers['noise'] = conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #srm_conv = tf.nn.tanh(conv, name='tanh') noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1') noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]]) noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1') #net_sum=tf.concat(3,[net_conv4,noise_net]) noise_conv4, _ = resnet_v1.resnet_v1(noise_net, blocks[0:-1], global_pool=False, include_root_block=False, scope='noise') with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError if False: noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='noise') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7'] = fc7 with tf.variable_scope(self._resnet_scope, self._resnet_scope): #pdb.set_trace() #noise_fc7 = tf.reduce_mean(noise_fc7, axis=[1, 2]) #bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*4,compute_size=16,sequential=False) #bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*4]) fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') #pdb.set_trace() #noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=initializer, #trainable=is_training, activation_fn=None, scope='noise_cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') #with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean #fc7 = tf.reduce_mean(fc7, axis=[1, 2]) #fc_con=tf.concat(1,[fc7,noise_fc]) #cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, #trainable=False, activation_fn=None, scope='cls_score') #cls_score1=cls_score+10*noise_cls_score #cls_prob = self._softmax_layer(noise_cls_score, "cls_prob") #bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, #trainable=False, #activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(self, img_batch, scope_name, is_training=True): if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. ') blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(self.resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * self.cfgs.FIXED_BLOCKS + ( 4 - self.cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # self.add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # self.add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # self.add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # self.add_heatmap(C5, name='Layer5/C5_heat') feature_dict = { 'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)], 'C3': end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 'C4': end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format( scope_name, middle_num_units - 1)], 'C5': end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)], # 'C5': end_points_C5['{}/block4'.format(scope_name)], } return feature_dict
def resnet_base_balance(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): # print('c5 input shape', input.shape) C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) add_heatmap(C3, 'img/feature_map_C3') add_heatmap(C4, 'img/feature_map_C4') add_heatmap(C5, 'img/feature_map_C5') C4_shape = tf.shape(C4) C4_resize = C4 C3_resize = tf.image.resize_bilinear(C4, (C4_shape[1], C4_shape[2])) C3_resize = slim.conv2d(C3_resize, 1024, [1, 1], trainable=is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='C3_conv1x1') C5_resize = tf.image.resize_bilinear(C5, (C4_shape[1], C4_shape[2])) C5_resize = slim.conv2d(C5_resize, 1024, [1, 1], trainable=is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='C5_conv1x1') C_integrate = (C4_resize + C3_resize + C5_resize) / 3 # # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') return C_integrate
def resnet_base(img_batch, scope_name, is_training=True): if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. ') blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') feature_dict = { 'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)], 'C3': end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 'C4': end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format( scope_name, middle_num_units - 1)], 'C5': end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)], # 'C5': end_points_C5['{}/block4'.format(scope_name)], } pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): P5 = slim.conv2d(feature_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') pyramid_dict['P5'] = P5 for level in range(4, 2, -1): # build [P4, P3] pyramid_dict['P%d' % level] = fusion_two_layer( C_i=feature_dict["C%d" % level], P_j=pyramid_dict["P%d" % (level + 1)], scope='build_P%d' % level) for level in range(5, 2, -1): pyramid_dict['P%d' % level] = slim.conv2d( pyramid_dict['P%d' % level], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=1, scope="fuse_P%d" % level) p6 = slim.conv2d( pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p6_conv') pyramid_dict['P6'] = p6 p7 = tf.nn.relu(p6, name='p6_relu') p7 = slim.conv2d(p7, num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p7_conv') pyramid_dict['P7'] = p7 # for level in range(7, 1, -1): # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) return pyramid_dict
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101 or mobilenetv2. ' 'Check your network name.') blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1) ] # when use fpn, stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, _ = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) if cfgs.ADD_FUSION: # C3_ = end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)] # # channels = C3_.get_shape().as_list() # filters1 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01) # C3_atrous_conv2d = tf.nn.atrous_conv2d(C3_, filters=filters1, rate=2, padding='SAME') # C3_shape = tf.shape(C3_atrous_conv2d) # # C2_ = end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)] # filters2 = tf.random_normal([3, 3, 256, 512], mean=0.0, stddev=0.01) # filters3 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01) # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_, filters=filters2, rate=2, padding='SAME') # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_atrous_conv2d, filters=filters3, rate=2, padding='SAME') # C2_downsampling = tf.image.resize_bilinear(C2_atrous_conv2d, (C3_shape[1], C3_shape[2])) # # C4_upsampling = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2])) # C4 = C3_atrous_conv2d + C4_upsampling + C2_downsampling # C4 = slim.conv2d(C4, # 1024, [5, 5], # trainable=is_training, # weights_initializer=cfgs.INITIALIZER, # activation_fn=None, # scope='C4_conv5x5') C3_shape = tf.shape(end_points_C3[ '{}/block2/unit_3/bottleneck_v1'.format(scope_name)]) C4 = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2])) _C3 = slim.conv2d(end_points_C3[ '{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 1024, [3, 3], trainable=is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='C3_conv3x3') # _C3 = build_inception(end_points_C3['resnet_v1_101/block2/unit_3/bottleneck_v1'], is_training) C4 += _C3 if cfgs.ADD_ATTENTION: with tf.variable_scope('build_C4_attention', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): # tf.summary.image('add_attention_before', # tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1)) # SE_C4 = squeeze_excitation_layer(C4, 1024, 16, 'SE_C4', is_training) add_heatmap( tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1), 'add_attention_before') C4_attention_layer = build_attention(C4, is_training) # C4_attention_layer = build_inception_attention(C4, is_training) C4_attention = tf.nn.softmax(C4_attention_layer) # C4_attention = C4_attention[:, :, :, 1] C4_attention = C4_attention[:, :, :, 0] C4_attention = tf.expand_dims(C4_attention, axis=-1) # tf.summary.image('C3_attention', C4_attention) add_heatmap(C4_attention, 'C4_attention') C4 = tf.multiply(C4_attention, C4) # C4 = SE_C4 * C4 # tf.summary.image('add_attention_after', tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1)) add_heatmap( tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1), 'add_attention_after') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') if cfgs.ADD_ATTENTION: return C4, C4_attention_layer else: return C4
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') feature_dict = { 'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)], 'C3': end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 'C4': end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format( scope_name, middle_num_units - 1)], 'C5': end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)], # 'C5': end_points_C5['{}/block4'.format(scope_name)], } pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): P5 = slim.conv2d(feature_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') pyramid_dict['P5'] = P5 for level in range(4, 1, -1): # build [P4, P3, P2] pyramid_dict['P%d' % level] = fusion_two_layer( C_i=feature_dict["C%d" % level], P_j=pyramid_dict["P%d" % (level + 1)], scope='build_P%d' % level) for level in range(5, 1, -1): pyramid_dict['P%d' % level] = slim.conv2d( pyramid_dict['P%d' % level], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=1, scope="fuse_P%d" % level) if "P6" in cfgs.LEVLES: P6 = slim.avg_pool2d(pyramid_dict['P5'], kernel_size=[1, 1], stride=2, scope='build_P6') pyramid_dict['P6'] = P6 # for level in range(5, 1, -1): # add_heatmap(feature_dict['C%d' % level], name='Layer%d/C%d_heat' % (level, level)) # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) # return [P2, P3, P4, P5, P6] print("we are in Pyramid::-======>>>>") print(cfgs.LEVLES) print("base_anchor_size are: ", cfgs.BASE_ANCHOR_SIZE_LIST) print(20 * "__") return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]
def resnet_base(rgb_img_batch, ir_img_batch, scope_name, is_training=True): if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. ') org_scope_name = scope_name blocks = [resnet_v1_block('RGB/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('RGB/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('RGB/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('RGB/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)] # when use fpn . stride list is [1, 2, 2] scope_name = "RGB/"+org_scope_name with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net_rgb = resnet_utils.conv2d_same( rgb_img_batch, 64, 7, stride=2, scope='conv1') net_rgb = tf.pad(net_rgb, [[0, 0], [1, 1], [1, 1], [0, 0]]) net_rgb = slim.max_pool2d( net_rgb, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.RGB_FIXED_BLOCKS + (4-cfgs.RGB_FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2_rgb, end_points_C2_rgb = resnet_v1.resnet_v1(net_rgb, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3_rgb, end_points_C3_rgb = resnet_v1.resnet_v1(C2_rgb, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4_rgb, end_points_C4_rgb = resnet_v1.resnet_v1(C3_rgb, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5_rgb, end_points_C5_rgb = resnet_v1.resnet_v1(C4_rgb, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') blocks = [resnet_v1_block('IR/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('IR/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('IR/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('IR/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)] scope_name = "IR/"+org_scope_name with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net_ir = resnet_utils.conv2d_same( ir_img_batch, 64, 7, stride=2, scope='conv1') net_ir = tf.pad(net_ir, [[0, 0], [1, 1], [1, 1], [0, 0]]) net_ir = slim.max_pool2d( net_ir, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.IR_FIXED_BLOCKS + (4-cfgs.IR_FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2_ir, end_points_C2_ir = resnet_v1.resnet_v1(net_ir, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3_ir, end_points_C3_ir = resnet_v1.resnet_v1(C2_ir, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4_ir, end_points_C4_ir = resnet_v1.resnet_v1(C3_ir, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5_ir, end_points_C5_ir = resnet_v1.resnet_v1(C4_ir, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') multi_end_points_C2 = tf.concat(axis=3, values = [end_points_C2_rgb['{}/block1/unit_2/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C2_ir['{}/block1/unit_2/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) multi_end_points_C3 = tf.concat(axis=3, values = [end_points_C3_rgb['{}/block2/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C3_ir['{}/block2/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) multi_end_points_C4 = tf.concat(axis=3, values = [end_points_C4_rgb['{}/block3/unit_{}/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name, middle_num_units - 1)], end_points_C4_ir['{}/block3/unit_{}/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name, middle_num_units - 1)]]) multi_end_points_C5 = tf.concat(axis=3, values = [end_points_C5_rgb['{}/block4/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C5_ir['{}/block4/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) feature_dict = {'C2': multi_end_points_C2, 'C3': multi_end_points_C3, 'C4': multi_end_points_C4, 'C5': multi_end_points_C5, # 'C5': end_points_C5['{}/block4'.format(scope_name)], } scope_name = org_scope_name pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): conv_channels = 256 last_fm = None for i in range(3): fm = feature_dict['C{}'.format(5-i)] fm_1x1_conv = slim.conv2d(fm, num_outputs=conv_channels, kernel_size=[1, 1], stride=1, scope='p{}_1x1_conv'.format(5-i)) if last_fm is not None: h, w = tf.shape(fm_1x1_conv)[1], tf.shape(fm_1x1_conv)[2] last_resize = tf.image.resize_bilinear(last_fm, size=[h, w], name='p{}_up2x'.format(5-i)) fm_1x1_conv = fm_1x1_conv + last_resize last_fm = fm_1x1_conv fm_3x3_conv = slim.conv2d(fm_1x1_conv, num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=1, scope='p{}_3x3_conv'.format(5 - i)) pyramid_dict['P{}'.format(5-i)] = fm_3x3_conv p6 = slim.conv2d(pyramid_dict['P5'], num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=2, scope='p6_conv') pyramid_dict['P6'] = p6 p7 = tf.nn.relu(p6) p7 = slim.conv2d(p7, num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=2, scope='p7_conv') pyramid_dict['P7'] = p7 # for level in range(7, 1, -1): # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) return pyramid_dict
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr') blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same( img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d( net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, _ = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, _ = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') return C2, C4
def resnet_v1_slim( inputs, num_classes=None, global_pool=True, output_stride=None, reuse=None, # the above parameters will be directly passed to # resnet.resnet_v1 scope='resnet_v1_slim'): blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(64, 32, 1)] * 2 + [(64, 32, 2)]), # the last argument of Block is a list of "bottleneck" unit # configurations. Each entry is of the form [depth, in-depth, stride] # Each "bottleneck" unit consists 3 layers: # convolution from depth channels to in-depth channels # convolution from in-depth channels to in-depth channels # convolution from in-depth channels to depth channels # It's called "bottleneck" because the overall input and output # depth (# channels) are the same, while the in-depth in the # middle is smaller. # Because each bottleneck has 3 layers, the above chain has # 3 * (2 + 1) = 9 layers. # By convention alll bottleneck units have stride = 1 except for the last which has # stride of 2. The overall effect is after the whole chain, image size # is reduced by 2. # The original resnet implementation has: # -- very long chains # -- very large depth and in-depth values. # This is necessary for very big datasets like ImageNet, but for # smaller and simpler datasets we should be able to substantially # reduce these, as is what we do in this resnet_slim # resnet_utils.Block('block2', resnet_v1.bottleneck, [(128, 64, 1)] * 4 + [(128, 64, 2)]), # 3 * (4+1) = 15 layers resnet_utils.Block('block3', resnet_v1.bottleneck, [(256, 64, 1)] * 4 + [(256, 64, 2)]), # 3 * (4+1) = 15 layers resnet_utils.Block('block4', resnet_v1.bottleneck, [(256, 64, 1)] * 2) # 3 * 2 = 6 layers # so we have 9 + 15 + 15 + 6 = 45 layers # there are two extra layers added by the system, so # by the reset nomenclature this network can be called a reset_v1_47 # The first 3 Blocks each have stride = 2, and last Block is 1, # so the overall stride of this architecture is 8 # If "output_stride" is smaller than 8, resnet_v1.resnet_v1 # will add extra down-sizing layers to meet the requirement. ] return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool, output_stride, include_root_block=True, reuse=reuse, scope=scope)