コード例 #1
0
def build_model_graph(features, labels, is_training, params):
    """Builds the forward model graph."""
    model_outputs = {}
    is_gpu_inference = not is_training and params['use_batched_nms']

    batch_size, image_height, image_width, _ = features['images'].get_shape(
    ).as_list()

    if 'source_ids' not in features:
        features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)

    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'],
                                  (image_height, image_width))

    MODELS["backbone"] = resnet.Resnet_Model("resnet50",
                                             data_format='channels_last',
                                             trainable=is_training,
                                             finetune_bn=params['finetune_bn'])

    backbone_feats = MODELS["backbone"](
        features['images'],
        training=is_training,
    )

    MODELS["FPN"] = fpn.FPNNetwork(params['min_level'],
                                   params['max_level'],
                                   trainable=is_training)
    fpn_feats = MODELS["FPN"](backbone_feats, training=is_training)

    model_outputs.update({'fpn_features': fpn_feats})

    def rpn_head_fn(features, min_level=2, max_level=6, num_anchors=3):
        """Region Proposal Network (RPN) for Mask-RCNN."""
        scores_outputs = dict()
        box_outputs = dict()

        MODELS["RPN_Heads"] = heads.RPN_Head_Model(name="rpn_head",
                                                   num_anchors=num_anchors,
                                                   trainable=is_training)

        for level in range(min_level, max_level + 1):
            scores_outputs[level], box_outputs[level] = MODELS["RPN_Heads"](
                features[level], training=is_training)

        return scores_outputs, box_outputs

    rpn_score_outputs, rpn_box_outputs = rpn_head_fn(
        features=fpn_feats,
        min_level=params['min_level'],
        max_level=params['max_level'],
        num_anchors=len(params['aspect_ratios'] * params['num_scales']))

    if is_training:
        rpn_pre_nms_topn = params['train_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['train_rpn_post_nms_topn']
        rpn_nms_threshold = params['train_rpn_nms_threshold']

    else:
        rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['test_rpn_post_nms_topn']
        rpn_nms_threshold = params['test_rpn_nms_thresh']

    if params['use_custom_box_proposals_op']:
        rpn_box_scores, rpn_box_rois = roi_ops.custom_multilevel_propose_rois(
            scores_outputs=rpn_score_outputs,
            box_outputs=rpn_box_outputs,
            all_anchors=all_anchors,
            image_info=features['image_info'],
            rpn_pre_nms_topn=rpn_pre_nms_topn,
            rpn_post_nms_topn=rpn_post_nms_topn,
            rpn_nms_threshold=rpn_nms_threshold,
            rpn_min_size=params['rpn_min_size'])

    else:
        rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
            scores_outputs=rpn_score_outputs,
            box_outputs=rpn_box_outputs,
            all_anchors=all_anchors,
            image_info=features['image_info'],
            rpn_pre_nms_topn=rpn_pre_nms_topn,
            rpn_post_nms_topn=rpn_post_nms_topn,
            rpn_nms_threshold=rpn_nms_threshold,
            rpn_min_size=params['rpn_min_size'],
            bbox_reg_weights=None,
            use_batched_nms=params['use_batched_nms'])

    rpn_box_rois = tf.cast(rpn_box_rois, dtype=tf.float32)

    if is_training:
        rpn_box_rois = tf.stop_gradient(rpn_box_rois)
        rpn_box_scores = tf.stop_gradient(
            rpn_box_scores)  # TODO Jonathan: Unused => Shall keep ?

        # Sampling
        box_targets, class_targets, rpn_box_rois, proposal_to_label_map = training_ops.proposal_label_op(
            rpn_box_rois,
            labels['gt_boxes'],
            labels['gt_classes'],
            batch_size_per_im=params['batch_size_per_im'],
            fg_fraction=params['fg_fraction'],
            fg_thresh=params['fg_thresh'],
            bg_thresh_hi=params['bg_thresh_hi'],
            bg_thresh_lo=params['bg_thresh_lo'])

    # Performs multi-level RoIAlign.
    box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        features=fpn_feats,
        boxes=rpn_box_rois,
        output_size=7,
        is_gpu_inference=is_gpu_inference)

    MODELS["Box_Head"] = heads.Box_Head_Model(
        num_classes=params['num_classes'],
        mlp_head_dim=params['fast_rcnn_mlp_head_dim'],
        trainable=is_training)

    class_outputs, box_outputs, _ = MODELS["Box_Head"](inputs=box_roi_features)

    if not is_training:
        if params['use_batched_nms']:
            generate_detections_fn = postprocess_ops.generate_detections_gpu

        else:
            generate_detections_fn = postprocess_ops.generate_detections_tpu

        detections = generate_detections_fn(
            class_outputs=class_outputs,
            box_outputs=box_outputs,
            anchor_boxes=rpn_box_rois,
            image_info=features['image_info'],
            pre_nms_num_detections=params['test_rpn_post_nms_topn'],
            post_nms_num_detections=params['test_detections_per_image'],
            nms_threshold=params['test_nms'],
            bbox_reg_weights=params['bbox_reg_weights'])

        model_outputs.update({
            'num_detections': detections[0],
            'detection_boxes': detections[1],
            'detection_classes': detections[2],
            'detection_scores': detections[3],
        })

    else:  # is training
        encoded_box_targets = training_ops.encode_box_targets(
            boxes=rpn_box_rois,
            gt_boxes=box_targets,
            gt_labels=class_targets,
            bbox_reg_weights=params['bbox_reg_weights'])

        model_outputs.update({
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
            'class_targets': class_targets,
            'box_targets': encoded_box_targets,
            'box_rois': rpn_box_rois,
        })

    # Faster-RCNN mode.
    if not params['include_mask']:
        return model_outputs

    # Mask sampling
    if not is_training:
        selected_box_rois = model_outputs['detection_boxes']
        class_indices = model_outputs['detection_classes']

        # If using GPU for inference, delay the cast until when Gather ops show up
        # since GPU inference supports float point better.
        # TODO(laigd): revisit this when newer versions of GPU libraries is
        # released.
        if not params['use_batched_nms']:
            class_indices = tf.cast(class_indices, dtype=tf.int32)

    else:
        selected_class_targets, selected_box_targets, \
        selected_box_rois, proposal_to_label_map = training_ops.select_fg_for_masks(
            class_targets=class_targets,
            box_targets=box_targets,
            boxes=rpn_box_rois,
            proposal_to_label_map=proposal_to_label_map,
            max_num_fg=int(params['batch_size_per_im'] * params['fg_fraction'])
        )

        class_indices = tf.cast(selected_class_targets, dtype=tf.int32)

    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        features=fpn_feats,
        boxes=selected_box_rois,
        output_size=14,
        is_gpu_inference=is_gpu_inference)

    MODELS["Mask_Head"] = heads.Mask_Head_Model(
        class_indices,
        num_classes=params['num_classes'],
        mrcnn_resolution=params['mrcnn_resolution'],
        is_gpu_inference=is_gpu_inference,
        trainable=is_training,
        name="mask_head")

    mask_outputs = MODELS["Mask_Head"](inputs=mask_roi_features)

    if MPI_local_rank() == 0:
        # Print #FLOPs in model.
        compute_model_statistics(batch_size, is_training=is_training)

    if is_training:
        mask_targets = training_ops.get_mask_targets(
            fg_boxes=selected_box_rois,
            fg_proposal_to_label_map=proposal_to_label_map,
            fg_box_targets=selected_box_targets,
            mask_gt_labels=labels['cropped_gt_masks'],
            output_size=params['mrcnn_resolution'])

        model_outputs.update({
            'mask_outputs': mask_outputs,
            'mask_targets': mask_targets,
            'selected_class_targets': selected_class_targets,
        })

    else:
        model_outputs.update({
            'detection_masks': tf.nn.sigmoid(mask_outputs),
        })

    return model_outputs
コード例 #2
0
  def _get_session_config(mode, use_xla, use_amp, use_tf_distributed=False, allow_xla_at_inference=False):

      assert mode in ('train', 'eval')

      rewrite_options = rewriter_config_pb2.RewriterConfig(
          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # arithmetic_optimization=rewriter_config_pb2.RewriterConfig.ON,

          # constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
          # constant_folding=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # debug_stripper=rewriter_config_pb2.RewriterConfig.OFF,
          # debug_stripper=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # dependency_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # disable_model_pruning=False,           # INCOMPATIBLE with AMP
          # function_optimization=True,
          # implementation_selector=True,

          # loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # loop_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST

          # The default setting (SCHEDULING and SWAPPING HEURISTICS only)
          # memory_optimization=rewriter_config_pb2.RewriterConfig.DEFAULT_MEM_OPT,

          # Disabled in the meta-optimizer.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT,

          # Driven by manual op-level annotations.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,

          # Swapping heuristic will move a tensor from the GPU to the CPU and move it
          # back when needed to reduce peak memory usage..
          # memory_optimization=rewriter_config_pb2.RewriterConfig.SWAPPING_HEURISTICS,

          # Recomputation heuristics will recompute ops (such as Relu activation)
          # during backprop instead of storing them, reducing peak memory usage.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.RECOMPUTATION_HEURISTICS,

          # Scheduling will split big ops such as AddN and try to enforce a schedule of
          # the new computations that decreases peak memory usage.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS,

          # Use any combination of swapping and recomputation heuristics.
          # memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS,

          meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.TWO,
          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
          # meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.DEFAULT_NUM_ITERS,

          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.ON,         # TO TEST
          #
          # remapping=rewriter_config_pb2.RewriterConfig.OFF,
          # remapping=rewriter_config_pb2.RewriterConfig.ON,                   # TO TEST

          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.ON,  # TO TEST

          # shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
          # shape_optimization=rewriter_config_pb2.RewriterConfig.ON,           # TO TEST
      )

      if use_amp:
          logging.info("[%s] AMP is activated - Experiment Feature" % mode)
          rewrite_options.auto_mixed_precision = True

      config = tf.compat.v1.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=False,
          graph_options=tf.compat.v1.GraphOptions(
              rewrite_options=rewrite_options,
              # infer_shapes=True  # Heavily drops throughput by 30%
          )
      )

      if use_tf_distributed:
        config.gpu_options.force_gpu_compatible = False

      else:
        config.gpu_options.force_gpu_compatible = True  # Force pinned memory

        if MPI_is_distributed():
            config.gpu_options.visible_device_list = str(MPI_local_rank())

      if use_xla and (mode == "train" or allow_xla_at_inference):
          logging.info("[%s] XLA is activated - Experiment Feature" % mode)
          config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
          # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2

      if mode == 'train':
          config.intra_op_parallelism_threads = 1  # Avoid pool of Eigen threads

          if MPI_is_distributed():
              config.inter_op_parallelism_threads = max(2, multiprocessing.cpu_count() // hvd.local_size())

          elif not use_tf_distributed:
              config.inter_op_parallelism_threads = 4

      return config