def rpn(self, image, features, inputs): assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [RPNAnchors( all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn))] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] multilevel_pred_boxes = [anchor.decode_logits(logits) for anchor, logits in zip(multilevel_anchors, multilevel_box_logits)] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_pred_boxes, multilevel_label_logits, image_shape2d) if self.training: losses = multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def rpn_house(self, image, features, inputs): assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn(strides=cfg.FPN.ANCHOR_STRIDES, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE) multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}_house'.format(i + 2)], inputs['anchor_boxes_lvl{}_house'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn_house', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] multilevel_pred_boxes = [ anchor.decode_logits(logits) for anchor, logits in zip( multilevel_anchors, multilevel_box_logits) ] # We should filter multilevel_pred_boxes? proposal_boxes, proposal_scores = generate_fpn_proposals_ori( multilevel_pred_boxes, multilevel_label_logits, image_shape2d) output_house_bbox = tf.identity(proposal_boxes, name='output/boxes_house') tf.print("output_house_bbox = ", output_house_bbox) output_house_score = tf.identity(proposal_scores, name='output/scores_house') tf.print("output_house_score = ", output_house_score) # tf.print() if self.training: losses = multilevel_rpn_losses_ori(multilevel_anchors, multilevel_label_logits, multilevel_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def rpn_damage(self, image, features, inputs, house_bboxes): # Filter out the ones that are not within the house bbox assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn(strides=cfg.FPN.ANCHOR_STRIDES, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE) # Filter anchors here: lvl and house bboxes # TODO: IOU filter. masks = filter_anchors_inner(house_bboxes, all_anchors_fpn, 0.3) multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}_damage'.format(i + 2)], inputs['anchor_boxes_lvl{}_damage'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn_damage', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] # Get target for anchor multilevel_pred_boxes = [ anchor.decode_logits(logits) for anchor, logits in zip( multilevel_anchors, multilevel_box_logits) ] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_pred_boxes, multilevel_label_logits, image_shape2d, masks) if self.training: losses = multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits, masks) else: losses = [] return BoxProposals(proposal_boxes), losses
def rpn(self, image, features, inputs): print(f"rpn({image.shape} {features} {len(inputs)}") # rpn((1, 3, ?, ?) [<tf.Tensor 'tower-pred-0/fpn/posthoc_3x3_p2/output:0' shape=(1, 256, ?, ?) dtype=float32>, <tf.Tensor 'tower-pred-0/fpn/posthoc_3x3_p3/output:0' shape=(1, 256, ?, ?) dtype=float32>, <tf.Tensor 'tower-pred-0/fpn/posthoc_3x3_p4/output:0' shape=(1, 256, ?, ?) dtype=float32>, <tf.Tensor 'tower-pred-0/fpn/posthoc_3x3_p5/output:0' shape=(1, 256, ?, ?) dtype=float32>, <tf.Tensor 'tower-pred-0/fpn/maxpool_p6/output:0' shape=(1, 256, ?, ?) dtype=float32>] 10 assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn( strides=cfg.FPN.ANCHOR_STRIDES, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE) multilevel_anchors = [RPNAnchors( all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn))] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] multilevel_pred_boxes = [anchor.decode_logits(logits) for anchor, logits in zip(multilevel_anchors, multilevel_box_logits)] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_pred_boxes, multilevel_label_logits, image_shape2d) if self.training: losses = multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] image = self.preprocess(inputs['image']) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456 ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: proposals = BoxProposals(proposal_boxes) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align( p23456[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(p23456[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, image_shape2d, cfg.DATA.NUM_CLASS) if is_training: all_losses = [] all_losses.extend( multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits)) all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks')
def rpn(self, image, features, inputs, orig_image_dims, seed_gen): """ The RPN part of the graph that generate the RPN proposal and losses Args: image: BS x NumChannel x H_image x W_image features: ([tf.Tensor]): A list of 5 FPN feature maps, i.e. level P23456, each with BS x NumChannel x H_feature x W_feature inputs: dict, contains all input information orig_image_dims: BS x 3 Returns: proposal_boxes: top K region proposals, K x 5 losses: scalar, sum of the label loss and box loss """ assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = orig_image_dims[: ,:2] all_anchors_fpn = get_all_anchors_fpn() rpn_outputs = [] if not cfg.TRAIN.RPN_NCHW: features = [nchw_to_nhwc_transform(c) for c in features] for pi in features: # label_logits: BS x H_feaure x W_feature x NA, box_logits: BS x (NA * 4) x H_feature x W_feature label_logits, box_logits = rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS), seed_gen=seed_gen, fp16=self.fp16) rpn_outputs.append((label_logits, box_logits)) multilevel_label_logits = [k[0] for k in rpn_outputs] # Num_level * [BS x H_feature x W_feature x NA] multilevel_box_logits = [k[1] for k in rpn_outputs] # Num_level * [BS x (NA * 4) x H_feature x W_feature] # proposal_boxes: K x 5, proposal_scores: 1-D K if cfg.RPN.TOPK_PER_IMAGE: proposal_boxes, proposal_scores = generate_fpn_proposals_topk_per_image(all_anchors_fpn, multilevel_box_logits, multilevel_label_logits, image_shape2d, cfg.TRAIN.BATCH_SIZE_PER_GPU) else: proposal_boxes, proposal_scores = generate_fpn_proposals(all_anchors_fpn, multilevel_box_logits, multilevel_label_logits, image_shape2d, cfg.TRAIN.BATCH_SIZE_PER_GPU) if self.training: multilevel_anchor_labels = [inputs['anchor_labels_lvl{}'.format(i + 2)] for i in range(len(all_anchors_fpn))] multilevel_anchor_boxes = [inputs['anchor_boxes_lvl{}'.format(i + 2)] for i in range(len(all_anchors_fpn))] multilevel_box_logits_reshaped = [] for box_logits in multilevel_box_logits: shp = tf.shape(box_logits) # BS x (NA * 4) x H_feature x W_feature box_logits_t = tf.transpose(box_logits, [0, 2, 3, 1]) # BS x H_feature x W_feature x (NA * 4) box_logits_t = tf.reshape(box_logits_t, tf.stack([shp[0], shp[2], shp[3], -1, 4])) # BS x H_feature x W_feature x NA x 4 multilevel_box_logits_reshaped.append(box_logits_t) rpn_losses = [] for i in range(cfg.TRAIN.BATCH_SIZE_PER_GPU): orig_image_hw = orig_image_dims[i, :2] si_all_anchors_fpn = get_all_anchors_fpn() si_multilevel_box_logits = [box_logits[i] for box_logits in multilevel_box_logits_reshaped] # [H_feature x W_feature x NA x 4] * Num_levels si_multilevel_label_logits = [label_logits[i] for label_logits in multilevel_label_logits] # [H_feature x W_feature x NA] * Num_levels si_multilevel_anchor_labels = [anchor_labels[i] for anchor_labels in multilevel_anchor_labels] si_multilevel_anchors_boxes = [anchor_boxes[i] for anchor_boxes in multilevel_anchor_boxes] si_multilevel_anchors = [RPNAnchors(si_all_anchors_fpn[j], si_multilevel_anchor_labels[j], si_multilevel_anchors_boxes[j]) for j in range(len(features))] # Given the original image dims, find what size each layer of the FPN feature map would be (follow FPN padding logic) mult = float \ (cfg.FPN.RESOLUTION_REQUIREMENT) # the image is padded so that it is a multiple of this (32 with default config). orig_image_hw_after_fpn_padding = tf.ceil(tf.cast(orig_image_hw, tf.float32) / mult) * mult featuremap_dims_per_level = [] for lvl, stride in enumerate(cfg.FPN.ANCHOR_STRIDES): featuremap_dims_float = orig_image_hw_after_fpn_padding / float(stride) featuremap_dims_per_level.append \ (tf.cast(tf.math.floor(featuremap_dims_float + 0.5), tf.int32)) # Fix bankers rounding si_multilevel_anchors_narrowed = [anchors.narrow_to_featuremap_dims(dims) for anchors, dims in zip(si_multilevel_anchors, featuremap_dims_per_level)] si_multilevel_box_logits_narrowed = [box_logits[:dims[0], :dims[1] ,: ,:] for box_logits, dims in zip(si_multilevel_box_logits, featuremap_dims_per_level)] si_multilevel_label_logits_narrowed = [label_logits[:dims[0], :dims[1] ,:] for label_logits, dims in zip(si_multilevel_label_logits, featuremap_dims_per_level)] si_losses = multilevel_rpn_losses(si_multilevel_anchors_narrowed, si_multilevel_label_logits_narrowed, si_multilevel_box_logits_narrowed) rpn_losses.extend(si_losses) with tf.name_scope('rpn_losses'): total_label_loss = tf.truediv(tf.add_n(rpn_losses[::2]), tf.cast(cfg.TRAIN.BATCH_SIZE_PER_GPU, dtype=tf.float32), name='label_loss') total_box_loss = tf.truediv(tf.add_n(rpn_losses[1::2]), tf.cast(cfg.TRAIN.BATCH_SIZE_PER_GPU, dtype=tf.float32), name='box_loss') add_moving_summary(total_label_loss, total_box_loss) losses = [total_label_loss, total_box_loss] else: losses = [] return proposal_boxes, losses
def build_graph(self, *inputs): num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training image = inputs[0] input_anchors = inputs[1: 1 + 2 * num_fpn_level] multilevel_anchors = [RPNAnchors(*args) for args in zip(get_all_anchors_fpn(), input_anchors[0::2], input_anchors[1::2])] gt_boxes, gt_labels = inputs[11], inputs[12] if cfg.MODE_MASK: gt_masks = inputs[-1] image = self.preprocess(image) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) if is_training: rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. rcnn_boxes = proposal_boxes roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head_func( 'fastrcnn', roi_feature_fastrcnn, cfg.DATA.NUM_CLASS) if is_training: # rpn loss: rpn_label_loss, rpn_box_loss = multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits) # fastrcnn loss: matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], fg_sampled_boxes, 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost =regularize_cost('fastrcnn/.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') #wd_cost = regularize_cost( # '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): num_fpn_level = len(config.ANCHOR_STRIDES_FPN) assert len(config.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training image = inputs[0] input_anchors = inputs[1:1 + 2 * num_fpn_level] multilevel_anchor_labels = input_anchors[0::2] multilevel_anchor_boxes = input_anchors[1::2] gt_boxes, gt_labels = inputs[11], inputs[12] if config.MODE_MASK: gt_masks = inputs[-1] image = self.preprocess(image) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) # Multi-Level RPN Proposals multilevel_proposals = [] rpn_loss_collection = [] for lvl in range(num_fpn_level): rpn_label_logits, rpn_box_logits = rpn_head( 'rpn', p23456[lvl], config.FPN_NUM_CHANNEL, len(config.ANCHOR_RATIOS)) with tf.name_scope('FPN_lvl{}'.format(lvl + 2)): anchors = tf.constant(get_all_anchors_fpn()[lvl], name='rpn_anchor_lvl{}'.format(lvl + 2)) anchors, anchor_labels, anchor_boxes = \ self.narrow_to_featuremap(p23456[lvl], anchors, multilevel_anchor_labels[lvl], multilevel_anchor_boxes[lvl]) anchor_boxes_encoded = encode_bbox_target( anchor_boxes, anchors) pred_boxes_decoded = decode_bbox_target( rpn_box_logits, anchors) proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) multilevel_proposals.append((proposal_boxes, proposal_scores)) if is_training: label_loss, box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) rpn_loss_collection.extend([label_loss, box_loss]) # Merge proposals from multi levels, pick top K proposal_boxes = tf.concat([x[0] for x in multilevel_proposals], axis=0) # nx4 proposal_scores = tf.concat([x[1] for x in multilevel_proposals], axis=0) # n proposal_topk = tf.minimum( tf.size(proposal_scores), config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_boxes = tf.gather(proposal_boxes, topk_indices) if is_training: rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. rcnn_boxes = proposal_boxes roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_2fc_head( 'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS) if is_training: # rpn loss is already defined above with tf.name_scope('rpn_losses'): rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2], name='label_loss') rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2], name='box_loss') add_moving_summary(rpn_total_box_loss, rpn_total_label_loss) # fastrcnn loss: matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], fg_sampled_boxes, 14) mask_logits = maskrcnn_upXconv_head('maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt) # fg x H x W target_masks_for_fg = crop_and_resize( tf.expand_dims(matched_gt_masks, 1), fg_sampled_boxes, tf.range(tf.size(fg_inds_wrt_gt)), 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n( rpn_loss_collection + [fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) mask_logits = maskrcnn_upXconv_head('maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): num_fpn_level = len(config.ANCHOR_STRIDES_FPN) assert len(config.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training image = inputs[0] input_anchors = inputs[1: 1 + 2 * num_fpn_level] multilevel_anchor_labels = input_anchors[0::2] multilevel_anchor_boxes = input_anchors[1::2] gt_boxes, gt_labels = inputs[11], inputs[12] if config.MODE_MASK: gt_masks = inputs[-1] image = self.preprocess(image) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) # Multi-Level RPN Proposals multilevel_proposals = [] rpn_loss_collection = [] for lvl in range(num_fpn_level): rpn_label_logits, rpn_box_logits = rpn_head( 'rpn', p23456[lvl], config.FPN_NUM_CHANNEL, len(config.ANCHOR_RATIOS)) with tf.name_scope('FPN_lvl{}'.format(lvl + 2)): anchors = tf.constant(get_all_anchors_fpn()[lvl], name='rpn_anchor_lvl{}'.format(lvl + 2)) anchors, anchor_labels, anchor_boxes = \ self.narrow_to_featuremap(p23456[lvl], anchors, multilevel_anchor_labels[lvl], multilevel_anchor_boxes[lvl]) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, anchors) pred_boxes_decoded = decode_bbox_target(rpn_box_logits, anchors) proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) multilevel_proposals.append((proposal_boxes, proposal_scores)) if is_training: label_loss, box_loss = rpn_losses( anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) rpn_loss_collection.extend([label_loss, box_loss]) # Merge proposals from multi levels, pick top K proposal_boxes = tf.concat([x[0] for x in multilevel_proposals], axis=0) # nx4 proposal_scores = tf.concat([x[1] for x in multilevel_proposals], axis=0) # n proposal_topk = tf.minimum(tf.size(proposal_scores), config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_boxes = tf.gather(proposal_boxes, topk_indices) if is_training: rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. rcnn_boxes = proposal_boxes roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_2fc_head( 'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS) if is_training: # rpn loss is already defined above with tf.name_scope('rpn_losses'): rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2], name='label_loss') rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2], name='box_loss') add_moving_summary(rpn_total_box_loss, rpn_total_label_loss) # fastrcnn loss: matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], fg_sampled_boxes, 14) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n(rpn_loss_collection + [ fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [RPNAnchors( all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn))] image = self.preprocess(inputs['image']) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: proposals = BoxProposals(proposal_boxes) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(p23456[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead( proposals, roi_func, fastrcnn_head_func, image_shape2d, cfg.DATA.NUM_CLASS) if is_training: all_losses = [] all_losses.extend(multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits)) all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost( '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks')