def rpn(self, image, features, inputs): ''' 1.对features 加上rpn head,提取出proposal box,proposal score,用inputs这个标注 和proposal box,proposal score计算box_loss,label_loss 2.根据image的尺寸,以及上述proposal box,proposal score生成proposal region(绝对坐标), 返回 BoxProposals:封装了选择的(proposal_boxes) losses:[label_loss,box_loss] :param image: (1,None,None,3) :param features:[(1,None,None,1024)] for resnetC4 :param inputs: (None,None,A,2)anchor_label,(None,None,A,4)anchor_boxes, :return: ''' featuremap = features[0] rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if self.training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if self.training else cfg.RPN.TEST_POST_NMS_TOPK) if self.training: losses = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def rpn(self, image, features, inputs): featuremap = features[0] rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors( get_all_anchors(stride=cfg.RPN.ANCHOR_STRIDE, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if self.training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if self.training else cfg.RPN.TEST_POST_NMS_TOPK) if self.training: losses = rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def _get_anchors(self, shape2d): """ Returns: FSxFSxNAx4 anchors, """ # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE) with tf.name_scope('anchors'): all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32) fm_anchors = tf.slice( all_anchors, [0, 0, 0, 0], tf.stack([ shape2d[0], shape2d[1], -1, -1]), name='fm_anchors') return fm_anchors
def _get_anchors(self, image): """ Returns: FSxFSxNAx4 anchors, """ # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE) with tf.name_scope('anchors'): all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32) fm_anchors = tf.slice( all_anchors, [0, 0, 0, 0], tf.stack([ tf.shape(image)[0] // config.ANCHOR_STRIDE, tf.shape(image)[1] // config.ANCHOR_STRIDE, -1, -1]), name='fm_anchors') return fm_anchors
def rpn(self, image, features, inputs): featuremap = features[0] rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK) losses = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) return BoxProposals(proposal_boxes), losses
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if cfg.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW #with varreplace.freeze_variables(stop_gradient=True, skip_collection=True): featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) # freeze # featuremap = tf.stop_gradient(featuremap) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes featuremap = resnet_conv5(featuremap, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) rfcn_cls = Conv2D('rfcn_cls', featuremap, cfg.DATA.NUM_CLASS*3*3, (1, 1), data_format='channels_first') rfcn_reg = Conv2D('rfcn_reg', featuremap, cfg.DATA.NUM_CLASS*4*3*3, (1, 1), data_format='channels_first') boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) classify_vote = VotePooling('votepooling_cls', rfcn_cls, boxes_on_featuremap, 3, 3) classify_regr = VotePooling('votepooling_regr', rfcn_reg, boxes_on_featuremap, 3, 3, isCls=False) classify_regr = tf.reshape(classify_regr, [-1, cfg.DATA.NUM_CLASS, 4]) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(classify_regr, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, classify_vote, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, classify_vote, classify_regr) if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='final_masks')
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = tf.expand_dims(image, 0) # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE) with tf.name_scope('anchors'): all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32) fm_anchors = tf.slice( all_anchors, [0, 0, 0, 0], tf.stack([ tf.shape(image)[1] // config.ANCHOR_STRIDE, tf.shape(image)[2] // config.ANCHOR_STRIDE, -1, -1 ]), name='fm_anchors') anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image = image_preprocess(image, bgr=True) image = tf.transpose(image, [0, 3, 1, 2]) # resnet50 featuremap = pretrained_resnet_conv4(image, [3, 4, 6]) rpn_label_logits, rpn_box_logits = rpn_head(featuremap) rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) decoded_boxes = decode_bbox_target( rpn_box_logits, fm_anchors) # (fHxfWxNA)x4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( decoded_boxes, tf.reshape(rpn_label_logits, [-1]), tf.shape(image)[2:]) if is_training: rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized) # nxc fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses( rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits, fastrcnn_box_logits) wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') self.cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, wd_cost ], 'total_cost') for k in self.cost, wd_cost: add_moving_summary(k) else: roi_resized = roi_align( featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_fastrcnn = resnet_conv5(roi_resized) # nxc label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) label_probs = tf.nn.softmax(label_logits, name='fastrcnn_all_probs') # NP, labels = tf.argmax(label_logits, axis=1) fg_ind, fg_box_logits = fastrcnn_predict_boxes( labels, fastrcnn_box_logits) fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs') fg_boxes = tf.gather(proposal_boxes, fg_ind) fg_box_logits = fg_box_logits / tf.constant( config.FASTRCNN_BBOX_REG_WEIGHTS) decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes) # Nfx4, floatbox decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')
'anchor_boxes') gt_boxes = tf.placeholder(tf.float32, (None, 4), 'gt_boxes') gt_labels = tf.placeholder(tf.int64, (None, ), 'gt_labels') image = preprocess(image_P) Load_Weights = [] with TowerContext('', is_training=Trainining_is): featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target( rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK, config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # which was fixed in TF 1.6 def ff_true(): feature_fastrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, config.NUM_CLASS) # Return C5 feature to be shared with mask branch return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) if get_tf_version_number() >= 1.6: feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true( ) else: logger.warn("This example may drop support for TF < 1.6 soon.") feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14 matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt) # nfg x H x W target_masks_for_fg = crop_and_resize( tf.expand_dims(matched_gt_masks, 1), fg_sampled_boxes, tf.range(tf.size(fg_inds_wrt_gt)), 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align( featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond( tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self._preprocess(image) # 1CHW featuremap = pretrained_resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.slice_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(decoded_boxes, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d) if is_training: # sample proposal boxes in training rcnn_sampled_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE) else: # use all proposal boxes in inference boxes_on_featuremap = proposal_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def ff_true(): feature_fastrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head( 'fastrcnn', feature_fastrcnn, config.NUM_CLASS) return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_sampled_boxes, fg_inds_wrt_sample) with tf.name_scope('fg_sample_patch_viz'): fg_sampled_patches = crop_and_resize( image, fg_sampled_boxes, tf.zeros_like(fg_inds_wrt_sample, dtype=tf.int32), 300) fg_sampled_patches = tf.transpose(fg_sampled_patches, [0, 2, 3, 1]) fg_sampled_patches = tf.reverse(fg_sampled_patches, axis=[-1]) # BGR->RGB tf.summary.image('viz', fg_sampled_patches, max_outputs=30) matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) encoded_boxes = encode_bbox_target( matched_gt_boxes, fg_sampled_boxes) * tf.constant( config.FASTRCNN_BBOX_REG_WEIGHTS) fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses( rcnn_labels, fastrcnn_label_logits, encoded_boxes, tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample)) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_head( 'maskrcnn', fg_feature, config.NUM_CLASS) # #fg x #cat x 14x14 gt_masks_for_fg = tf.gather(gt_masks, fg_inds_wrt_gt) # nfg x H x W target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks_for_fg, 1), fg_sampled_boxes, tf.range(tf.size(fg_inds_wrt_gt)), 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: label_probs = tf.nn.softmax( fastrcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile( tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( fastrcnn_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions( decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align( featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond( tf.size(final_probs) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels, gt_masks = inputs['gt_boxes'], inputs[ 'gt_labels'], inputs['gt_masks'] if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt, rcnn_masks = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels, gt_masks) matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt, name='gt_boxes_per_fg_proposal') matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt, name='gt_masks_per_fg_proposal') else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes angles = tf.ones((tf.shape(proposal_boxes)[0], 1)) * (-45.) x1y1, x2y2 = proposal_boxes[:, 0:2], proposal_boxes[:, 2:4] wh = x2y2 - x1y1 xy = (x2y2 + x1y1) * 0.5 rcnn_masks = tf.concat([xy, wh, angles], axis=1) rcnn_labels, matched_gt_boxes, matched_gt_masks = None, None, None # ToDo boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits, fastrcnn_mask_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(rcnn_boxes, rcnn_masks, fastrcnn_box_logits, fastrcnn_mask_logits, fastrcnn_label_logits, rcnn_labels, matched_gt_boxes, matched_gt_masks) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss fastrcnn_label_loss, fastrcnn_box_loss, mask_loss = fastrcnn_head.losses( ) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mask_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: # ToDo final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, fastrcnn_head) indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(fastrcnn_mask_logits, indices, name='final_masks')
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if cfg.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, cfg.RPN.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target( rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target(rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK, config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # which was fixed in TF 1.6 def ff_true(): feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, config.NUM_CLASS) # Return C5 feature to be shared with mask branch return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) if get_tf_version_number() >= 1.6: feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true() else: logger.warn("This example may drop support for TF < 1.6 soon.") feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align(featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond(tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if cfg.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, cfg.RPN.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target(rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_ids, orig_shape = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w # decode into actual image coordinates pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # size? #proposals*h*w*c? roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples # outputs from fg proposals fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) # rcnn_labels: the labels of the proposals # fg_sampled_boxes: fg proposals # matched_gt_boxes: just like RPN, the gt boxes # that match the corresponding fg proposals fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) # acquire pred for re-id training # turning NMS off gives re-id branch more training samples if cfg.RE_ID.NMS: boxes, final_labels, final_probs = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) else: boxes, final_labels, final_probs = self.fastrcnn_inference_id( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) # scale = tf.sqrt(tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) * # tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32)) # final_boxes = boxes / scale # # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. # final_boxes = tf_clip_boxes(final_boxes, orig_shape) # IOU, discard bad dets, assign re-id labels # the results are already NMS so no need to NMS again # crop from conv4 with dets (maybe plus gts) # feedforward re-id branch # resizing during ROIalign? iou = pairwise_iou(boxes, gt_boxes) # are the gt boxes resized? tp_mask = tf.reduce_max(iou, axis=1) >= cfg.RE_ID.IOU_THRESH iou = tf.boolean_mask(iou, tp_mask) # return iou to debug def re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap): with tf.variable_scope('id_head'): num_of_samples_used = tf.get_variable( 'num_of_samples_used', initializer=0, trainable=False) num_of_samples_used = num_of_samples_used.assign_add( tf.shape(pred_boxes)[0]) boxes_on_featuremap = pred_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # name scope? # stop gradient roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_idhead = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling( 'gap', feature_idhead, data_format='channels_first') init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature_gap, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc8', hidden, 256, kernel_initializer=init, activation=tf.nn.relu) id_logits = FullyConnected( 'class', hidden, cfg.DATA.NUM_ID, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=pred_matching_gt_ids, logits=id_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') return label_loss, num_of_samples_used def check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap): pred_gt_ind = tf.argmax(iou, axis=1) # output following tensors # pick out the -2 class here pred_matching_gt_ids = tf.gather(gt_ids, pred_gt_ind) pred_boxes = tf.boolean_mask(boxes, tp_mask) # label 1 corresponds to unid pedes unid_ind = tf.not_equal(pred_matching_gt_ids, 1) pred_matching_gt_ids = tf.boolean_mask(pred_matching_gt_ids, unid_ind) pred_boxes = tf.boolean_mask(pred_boxes, unid_ind) ret = tf.cond( tf.equal(tf.size(pred_boxes), 0), lambda: (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)), lambda: re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap)) return ret with tf.name_scope('id_head'): # no detection has IOU > 0.7, re-id returns 0 loss re_id_loss, num_of_samples_used = tf.cond( tf.equal(tf.size(iou), 0), lambda: (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)), lambda: check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap)) add_tensor_summary(num_of_samples_used, ['scalar'], name='num_of_samples_used') # for debug, use tensor name to take out the handle # return re_id_loss # pred_gt_ind = tf.argmax(iou, axis=1) # # output following tensors # # pick out the -2 class here # pred_gt_ids = tf.gather(gt_ids, pred_gt_ind) # pred_boxes = tf.boolean_mask(boxes, tp_mask) # unid_ind = pred_gt_ids != 1 # return unid_ind # return tf.shape(boxes)[0] unnormed_id_loss = tf.identity(re_id_loss, name='unnormed_id_loss') re_id_loss = tf.divide(re_id_loss, cfg.RE_ID.LOSS_NORMALIZATION, 're_id_loss') add_moving_summary(unnormed_id_loss) add_moving_summary(re_id_loss) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') # weights on the losses? total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, re_id_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: if cfg.RE_ID.QUERY_EVAL: # resize the gt_boxes in dataflow final_boxes = gt_boxes else: final_boxes, final_labels, _ = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) with tf.variable_scope('id_head'): preds_on_featuremap = final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # name scope? # stop gradient roi_resized = roi_align(featuremap, preds_on_featuremap, 14) feature_idhead = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_idhead, data_format='channels_first') hidden = FullyConnected('fc6', feature_gap, 1024, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, 1024, activation=tf.nn.relu) fv = FullyConnected('fc8', hidden, 256, activation=tf.nn.relu) id_logits = FullyConnected( 'class', fv, cfg.DATA.NUM_ID, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) scale = tf.sqrt( tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) * tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32)) rescaled_final_boxes = final_boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. # rescaled_final_boxes_pre_clip = tf.identity(rescaled_final_boxes, name='re_boxes_pre_clip') rescaled_final_boxes = tf_clip_boxes(rescaled_final_boxes, orig_shape) rescaled_final_boxes = tf.identity(rescaled_final_boxes, 'rescaled_final_boxes') fv = tf.identity(fv, name='feature_vector') prob = tf.nn.softmax(id_logits, name='re_id_probs')
def build_graph(self, *inputs): # TODO need to make tensorpack handles dict better inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference proposals = BoxProposals(proposal_boxes) boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if is_training: all_losses = [] # rpn loss all_losses.extend( rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)) # fastrcnn loss all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference proposals = BoxProposals(proposal_boxes) boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if is_training: all_losses = [] # rpn loss all_losses.extend(rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)) # fastrcnn loss all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost( '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks')