def _model_outputs(): """Generates outputs from the model.""" fpn_feats, rpn_fn, faster_rcnn_fn, mask_rcnn_fn = model( features, labels, all_anchors, mode, params) rpn_score_outputs, rpn_box_outputs = rpn_fn(fpn_feats) (class_outputs, box_outputs, class_targets, box_targets, box_rois, proposal_to_label_map) = faster_rcnn_fn(fpn_feats, rpn_score_outputs, rpn_box_outputs) encoded_box_targets = mask_rcnn_architecture.encode_box_targets( box_rois, box_targets, class_targets, params['bbox_reg_weights']) if mode != tf.estimator.ModeKeys.TRAIN: # Use TEST.NMS in the reference for this value. Reference: https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/core/config.py#L227 # pylint: disable=line-too-long # The mask branch takes inputs from different places in training vs in # eval/predict. In training, the mask branch uses proposals combined with # labels to produce both mask outputs and targets. At test time, it uses # the post-processed predictions to generate masks. # Generate detections one image at a time. batch_size, _, _ = class_outputs.get_shape().as_list() detections = [] softmax_class_outputs = tf.nn.softmax(class_outputs) for i in range(batch_size): detections.append( anchors.generate_detections_per_image_op( softmax_class_outputs[i], box_outputs[i], box_rois[i], labels['source_ids'][i], labels['image_info'][i], params['test_detections_per_image'], params['test_rpn_post_nms_topn'], params['test_nms'], params['bbox_reg_weights'])) detections = tf.stack(detections, axis=0) mask_outputs = mask_rcnn_fn(fpn_feats, detections=detections) else: (mask_outputs, select_class_targets, select_box_targets, select_box_rois, select_proposal_to_label_map, mask_targets) = mask_rcnn_fn(fpn_feats, class_targets, box_targets, box_rois, proposal_to_label_map) model_outputs = { 'rpn_score_outputs': rpn_score_outputs, 'rpn_box_outputs': rpn_box_outputs, 'class_outputs': class_outputs, 'box_outputs': box_outputs, 'class_targets': class_targets, 'box_targets': encoded_box_targets, 'box_rois': box_rois, 'mask_outputs': mask_outputs, } if mode == tf.estimator.ModeKeys.TRAIN: model_outputs.update({ 'select_class_targets': select_class_targets, 'select_box_targets': select_box_targets, 'select_box_rois': select_box_rois, 'select_proposal_to_label_map': select_proposal_to_label_map, 'mask_targets': mask_targets, }) else: model_outputs.update({'detections': detections}) return model_outputs
def _model_outputs(): """Generates outputs from the model.""" model_outputs = {} with tf.variable_scope('resnet%s' % params['resnet_depth']): resnet_fn = resnet.resnet_v1( params['resnet_depth'], num_batch_norm_group=params['num_batch_norm_group']) backbone_feats = resnet_fn(features['images'], params['is_training_bn']) fpn_feats = fpn.fpn(backbone_feats, params['min_level'], params['max_level']) rpn_score_outputs, rpn_box_outputs = heads.rpn_head( fpn_feats, params['min_level'], params['max_level'], len(params['aspect_ratios'] * params['num_scales'])) if mode == tf.estimator.ModeKeys.TRAIN: rpn_pre_nms_topn = params['rpn_pre_nms_topn'] rpn_post_nms_topn = params['rpn_post_nms_topn'] else: rpn_pre_nms_topn = params['test_rpn_pre_nms_topn'] rpn_post_nms_topn = params['test_rpn_post_nms_topn'] _, rpn_box_rois = mask_rcnn_architecture.proposal_op( rpn_score_outputs, rpn_box_outputs, all_anchors, features['image_info'], rpn_pre_nms_topn, rpn_post_nms_topn, params['rpn_nms_threshold'], params['rpn_min_size']) rpn_box_rois = tf.to_float(rpn_box_rois) if mode == tf.estimator.ModeKeys.TRAIN: # Sampling box_targets, class_targets, rpn_box_rois, proposal_to_label_map = ( mask_rcnn_architecture.proposal_label_op( rpn_box_rois, labels['gt_boxes'], labels['gt_classes'], features['image_info'], batch_size_per_im=params['batch_size_per_im'], fg_fraction=params['fg_fraction'], fg_thresh=params['fg_thresh'], bg_thresh_hi=params['bg_thresh_hi'], bg_thresh_lo=params['bg_thresh_lo'])) # Performs multi-level RoIAlign. box_roi_features = ops.multilevel_crop_and_resize(fpn_feats, rpn_box_rois, output_size=7) class_outputs, box_outputs = heads.box_head( box_roi_features, num_classes=params['num_classes'], mlp_head_dim=params['fast_rcnn_mlp_head_dim']) if mode != tf.estimator.ModeKeys.TRAIN: batch_size, _, _ = class_outputs.get_shape().as_list() detections = [] softmax_class_outputs = tf.nn.softmax(class_outputs) for i in range(batch_size): detections.append( anchors.generate_detections_per_image_op( softmax_class_outputs[i], box_outputs[i], rpn_box_rois[i], features['source_ids'][i], features['image_info'][i], params['test_detections_per_image'], params['test_rpn_post_nms_topn'], params['test_nms'], params['bbox_reg_weights'])) detections = tf.stack(detections, axis=0) model_outputs.update({ 'detections': detections, }) else: encoded_box_targets = mask_rcnn_architecture.encode_box_targets( rpn_box_rois, box_targets, class_targets, params['bbox_reg_weights']) model_outputs.update({ 'rpn_score_outputs': rpn_score_outputs, 'rpn_box_outputs': rpn_box_outputs, 'class_outputs': class_outputs, 'box_outputs': box_outputs, 'class_targets': class_targets, 'box_targets': encoded_box_targets, 'box_rois': rpn_box_rois, }) # Faster-RCNN mode. if not params['include_mask']: return model_outputs # Mask sampling if mode != tf.estimator.ModeKeys.TRAIN: selected_box_rois = detections[:, :, 1:5] class_indices = tf.to_int32(detections[:, :, 6]) else: (selected_class_targets, selected_box_targets, selected_box_rois, proposal_to_label_map) = ( mask_rcnn_architecture.select_fg_for_masks( class_targets, box_targets, rpn_box_rois, proposal_to_label_map, max_num_fg=int(params['batch_size_per_im'] * params['fg_fraction']))) class_indices = tf.to_int32(selected_class_targets) mask_roi_features = ops.multilevel_crop_and_resize(fpn_feats, selected_box_rois, output_size=14) mask_outputs = heads.mask_head( mask_roi_features, class_indices, num_classes=params['num_classes'], mrcnn_resolution=params['mrcnn_resolution']) model_outputs.update({ 'mask_outputs': mask_outputs, }) if mode == tf.estimator.ModeKeys.TRAIN: mask_targets = mask_rcnn_architecture.get_mask_targets( selected_box_rois, proposal_to_label_map, selected_box_targets, labels['cropped_gt_masks'], params['mrcnn_resolution']) model_outputs.update({ 'mask_targets': mask_targets, 'selected_class_targets': selected_class_targets, }) return model_outputs
def _model_outputs(): """Generates outputs from the model.""" fpn_feats = mask_rcnn_architecture.resnet_fpn( features['images'], params['min_level'], params['max_level'], params['resnet_depth'], params['is_training_bn']) rpn_score_outputs, rpn_box_outputs = mask_rcnn_architecture.rpn_net( fpn_feats, params['min_level'], params['max_level'], len(params['aspect_ratios'] * params['num_scales'])) if mode != tf.estimator.ModeKeys.TRAIN: # The mask branch takes inputs from different places in training vs in # eval/predict. In training, the mask branch uses proposals combined with # labels to produce both mask outputs and targets. At test time, it uses # the post-processed predictions to generate masks. # Generate detections one image at a time. class_outputs, box_outputs, box_rois = ( mask_rcnn_architecture.faster_rcnn_fn( fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors, features['image_info'], params, is_training=False)) batch_size, _, _ = class_outputs.get_shape().as_list() detections = [] softmax_class_outputs = tf.nn.softmax(class_outputs) for i in range(batch_size): detections.append( anchors.generate_detections_per_image_op( softmax_class_outputs[i], box_outputs[i], box_rois[i], features['source_ids'][i], features['image_info'][i], params['test_detections_per_image'], params['test_rpn_post_nms_topn'], params['test_nms'], params['bbox_reg_weights']) ) detections = tf.stack(detections, axis=0) if params['include_mask']: mask_outputs = mask_rcnn_architecture.mask_rcnn_fn( fpn_feats, params, is_training=False, detections=detections) else: (class_outputs, box_outputs, box_rois, class_targets, box_targets, proposal_to_label_map) = mask_rcnn_architecture.faster_rcnn_fn( fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors, features['image_info'], params, is_training=True, labels=labels) encoded_box_targets = mask_rcnn_architecture.encode_box_targets( box_rois, box_targets, class_targets, params['bbox_reg_weights']) if params['include_mask']: mask_outputs, select_class_targets, mask_targets = ( mask_rcnn_architecture.mask_rcnn_fn( fpn_feats, params, is_training=True, detections=None, labels=labels, class_targets=class_targets, box_targets=box_targets, box_rois=box_rois, proposal_to_label_map=proposal_to_label_map)) if mode == tf.estimator.ModeKeys.TRAIN: model_outputs = { 'rpn_score_outputs': rpn_score_outputs, 'rpn_box_outputs': rpn_box_outputs, 'class_outputs': class_outputs, 'box_outputs': box_outputs, 'class_targets': class_targets, 'box_targets': encoded_box_targets, 'box_rois': box_rois, } if params['include_mask']: model_outputs.update({ 'mask_outputs': mask_outputs, 'mask_targets': mask_targets, 'select_class_targets': select_class_targets, }) else: model_outputs = { 'detections': detections, } if params['include_mask']: model_outputs.update({ 'mask_outputs': mask_outputs, }) return model_outputs
def _model_outputs(self, features, labels, image_size, mode, params): """Generates outputs from the model.""" all_anchors = anchors.Anchors( params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], image_size) if params['conv0_space_to_depth_block_size'] != 0: image_size = tuple(x // params['conv0_space_to_depth_block_size'] for x in image_size) if params['transpose_input']: images = tf.reshape( features['images'], [image_size[0], image_size[1], params['batch_size'], -1]) images = tf.transpose(images, [2, 0, 1, 3]) else: images = tf.reshape( features['images'], [params['batch_size'], image_size[0], image_size[1], -1]) fpn_feats = fpn.resnet_fpn(images, params['min_level'], params['max_level'], params['resnet_depth'], params['conv0_kernel_size'], params['conv0_space_to_depth_block_size'], params['is_training_bn']) rpn_score_outputs, rpn_box_outputs = mask_rcnn_architecture.rpn_net( fpn_feats, params['min_level'], params['max_level'], len(params['aspect_ratios'] * params['num_scales'])) if mode == tf.estimator.ModeKeys.PREDICT: # Use TEST.NMS in the reference for this value. Reference: https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/core/config.py#L227 # pylint: disable=line-too-long # The mask branch takes inputs from different places in training vs in # eval/predict. In training, the mask branch uses proposals combined # with labels to produce both mask outputs and targets. At test time, # it uses the post-processed predictions to generate masks. # Generate detections one image at a time. (class_outputs, box_outputs, box_rois) = mask_rcnn_architecture.faster_rcnn( fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors, features['image_info'], mode, params) batch_size, _, _ = class_outputs.get_shape().as_list() detections = [] softmax_class_outputs = tf.nn.softmax(class_outputs) for i in range(batch_size): device = core_assignment_utils.get_core_assignment( i, params['num_cores_per_replica']) with tf.device(device): detections.append( post_processing.generate_detections_per_image_op( softmax_class_outputs[i], box_outputs[i], box_rois[i], features['source_ids'][i], features['image_info'][i], params['test_detections_per_image'], params['test_rpn_post_nms_topn'], params['test_nms'], params['bbox_reg_weights'])) device = core_assignment_utils.get_core_assignment( core_assignment_utils.CORE_1, params['num_cores_per_replica']) with tf.device(device): detections = tf.stack(detections, axis=0) mask_outputs = mask_rcnn_architecture.mask_rcnn( fpn_feats, mode, params, detections=detections) return {'detections': detections, 'mask_outputs': mask_outputs} else: (class_outputs, box_outputs, box_rois, class_targets, box_targets, proposal_to_label_map) = mask_rcnn_architecture.faster_rcnn( fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors, features['image_info'], mode, params, labels) encoded_box_targets = mask_rcnn_architecture.encode_box_targets( box_rois, box_targets, class_targets, params['bbox_reg_weights']) (mask_outputs, select_class_targets, mask_targets) = mask_rcnn_architecture.mask_rcnn( fpn_feats, mode, params, labels, class_targets, box_targets, box_rois, proposal_to_label_map) return { 'rpn_score_outputs': rpn_score_outputs, 'rpn_box_outputs': rpn_box_outputs, 'class_outputs': class_outputs, 'box_outputs': box_outputs, 'class_targets': class_targets, 'box_targets': encoded_box_targets, 'box_rois': box_rois, 'select_class_targets': select_class_targets, 'mask_outputs': mask_outputs, 'mask_targets': mask_targets,}