Beispiel #1
0
    def test_get_correct_box_encoding_and_class_prediction_shapes(self):
        rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor(
            is_training=False,
            num_classes=2,
            conv_hyperparams=self._build_conv_hyperparams(),
            freeze_batchnorm=False,
            num_spatial_bins=[3, 3],
            depth=4,
            crop_size=[12, 12],
            box_code_size=4)

        def graph_fn(image_features, proposal_boxes):

            box_predictions = rfcn_box_predictor([image_features],
                                                 proposal_boxes=proposal_boxes)
            box_encodings = tf.concat(
                box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
            class_predictions_with_background = tf.concat(box_predictions[
                box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
                                                          axis=1)
            return (box_encodings, class_predictions_with_background)

        image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
        proposal_boxes = np.random.rand(4, 2, 4).astype(np.float32)
        (box_encodings, class_predictions_with_background) = self.execute(
            graph_fn, [image_features, proposal_boxes])

        self.assertAllEqual(box_encodings.shape, [8, 1, 2, 4])
        self.assertAllEqual(class_predictions_with_background.shape, [8, 1, 3])
Beispiel #2
0
 def graph_fn(image_features, proposal_boxes):
     rfcn_box_predictor = box_predictor.RfcnKerasBoxPredictor(
         is_training=False,
         num_classes=2,
         conv_hyperparams=self._build_conv_hyperparams(),
         freeze_batchnorm=False,
         num_spatial_bins=[3, 3],
         depth=4,
         crop_size=[12, 12],
         box_code_size=4)
     box_predictions = rfcn_box_predictor([image_features],
                                          proposal_boxes=proposal_boxes)
     box_encodings = tf.concat(
         box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
     class_predictions_with_background = tf.concat(box_predictions[
         box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
                                                   axis=1)
     return (box_encodings, class_predictions_with_background)
Beispiel #3
0
def build_keras(hyperparams_fn,
                freeze_batchnorm,
                inplace_batchnorm_update,
                num_predictions_per_location_list,
                box_predictor_config,
                is_training,
                num_classes,
                add_background_class=True):
    """Builds a Keras-based box predictor based on the configuration.

  Builds Keras-based box predictor based on the configuration.
  See box_predictor.proto for configurable options. Also, see box_predictor.py
  for more details.

  Args:
    hyperparams_fn: A function that takes a hyperparams_pb2.Hyperparams
      proto and returns a `hyperparams_builder.KerasLayerHyperparams`
      for Conv or FC hyperparameters.
    freeze_batchnorm: Whether to freeze batch norm parameters during
      training or not. When training with a small batch size (e.g. 1), it is
      desirable to freeze batch norm update and use pretrained batch norm
      params.
    inplace_batchnorm_update: Whether to update batch norm moving average
      values inplace. When this is false train op must add a control
      dependency on tf.graphkeys.UPDATE_OPS collection in order to update
      batch norm statistics.
    num_predictions_per_location_list: A list of integers representing the
      number of box predictions to be made per spatial location for each
      feature map.
    box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
      configuration.
    is_training: Whether the models is in training mode.
    num_classes: Number of classes to predict.
    add_background_class: Whether to add an implicit background class.

  Returns:
    box_predictor: box_predictor.KerasBoxPredictor object.

  Raises:
    ValueError: On unknown box predictor, or one with no Keras box predictor.
  """
    if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
        raise ValueError('box_predictor_config not of type '
                         'box_predictor_pb2.BoxPredictor.')

    box_predictor_oneof = box_predictor_config.WhichOneof(
        'box_predictor_oneof')

    if box_predictor_oneof == 'convolutional_box_predictor':
        config_box_predictor = box_predictor_config.convolutional_box_predictor
        conv_hyperparams = hyperparams_fn(
            config_box_predictor.conv_hyperparams)
        # Optionally apply clipping to box encodings, when box_encodings_clip_range
        # is set.
        box_encodings_clip_range = None
        if config_box_predictor.HasField('box_encodings_clip_range'):
            box_encodings_clip_range = BoxEncodingsClipRange(
                min=config_box_predictor.box_encodings_clip_range.min,
                max=config_box_predictor.box_encodings_clip_range.max)

        return build_convolutional_keras_box_predictor(
            is_training=is_training,
            num_classes=num_classes,
            add_background_class=add_background_class,
            conv_hyperparams=conv_hyperparams,
            freeze_batchnorm=freeze_batchnorm,
            inplace_batchnorm_update=inplace_batchnorm_update,
            num_predictions_per_location_list=num_predictions_per_location_list,
            use_dropout=config_box_predictor.use_dropout,
            dropout_keep_prob=config_box_predictor.dropout_keep_probability,
            box_code_size=config_box_predictor.box_code_size,
            kernel_size=config_box_predictor.kernel_size,
            num_layers_before_predictor=(
                config_box_predictor.num_layers_before_predictor),
            min_depth=config_box_predictor.min_depth,
            max_depth=config_box_predictor.max_depth,
            class_prediction_bias_init=(
                config_box_predictor.class_prediction_bias_init),
            use_depthwise=config_box_predictor.use_depthwise,
            box_encodings_clip_range=box_encodings_clip_range)

    if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
        config_box_predictor = (
            box_predictor_config.weight_shared_convolutional_box_predictor)
        conv_hyperparams = hyperparams_fn(
            config_box_predictor.conv_hyperparams)
        apply_batch_norm = config_box_predictor.conv_hyperparams.HasField(
            'batch_norm')
        # During training phase, logits are used to compute the loss. Only apply
        # sigmoid at inference to make the inference graph TPU friendly. This is
        # required because during TPU inference, model.postprocess is not called.
        score_converter_fn = build_score_converter(
            config_box_predictor.score_converter, is_training)
        # Optionally apply clipping to box encodings, when box_encodings_clip_range
        # is set.
        box_encodings_clip_range = None
        if config_box_predictor.HasField('box_encodings_clip_range'):
            box_encodings_clip_range = BoxEncodingsClipRange(
                min=config_box_predictor.box_encodings_clip_range.min,
                max=config_box_predictor.box_encodings_clip_range.max)

        return build_weight_shared_convolutional_keras_box_predictor(
            is_training=is_training,
            num_classes=num_classes,
            conv_hyperparams=conv_hyperparams,
            freeze_batchnorm=freeze_batchnorm,
            inplace_batchnorm_update=inplace_batchnorm_update,
            num_predictions_per_location_list=num_predictions_per_location_list,
            depth=config_box_predictor.depth,
            num_layers_before_predictor=(
                config_box_predictor.num_layers_before_predictor),
            box_code_size=config_box_predictor.box_code_size,
            kernel_size=config_box_predictor.kernel_size,
            add_background_class=add_background_class,
            class_prediction_bias_init=(
                config_box_predictor.class_prediction_bias_init),
            use_dropout=config_box_predictor.use_dropout,
            dropout_keep_prob=config_box_predictor.dropout_keep_probability,
            share_prediction_tower=config_box_predictor.share_prediction_tower,
            apply_batch_norm=apply_batch_norm,
            use_depthwise=config_box_predictor.use_depthwise,
            score_converter_fn=score_converter_fn,
            box_encodings_clip_range=box_encodings_clip_range)

    if box_predictor_oneof == 'mask_rcnn_box_predictor':
        config_box_predictor = box_predictor_config.mask_rcnn_box_predictor
        fc_hyperparams = hyperparams_fn(config_box_predictor.fc_hyperparams)
        conv_hyperparams = None
        if config_box_predictor.HasField('conv_hyperparams'):
            conv_hyperparams = hyperparams_fn(
                config_box_predictor.conv_hyperparams)
        return build_mask_rcnn_keras_box_predictor(
            is_training=is_training,
            num_classes=num_classes,
            add_background_class=add_background_class,
            fc_hyperparams=fc_hyperparams,
            freeze_batchnorm=freeze_batchnorm,
            use_dropout=config_box_predictor.use_dropout,
            dropout_keep_prob=config_box_predictor.dropout_keep_probability,
            box_code_size=config_box_predictor.box_code_size,
            share_box_across_classes=(
                config_box_predictor.share_box_across_classes),
            predict_instance_masks=config_box_predictor.predict_instance_masks,
            conv_hyperparams=conv_hyperparams,
            mask_height=config_box_predictor.mask_height,
            mask_width=config_box_predictor.mask_width,
            mask_prediction_num_conv_layers=(
                config_box_predictor.mask_prediction_num_conv_layers),
            mask_prediction_conv_depth=(
                config_box_predictor.mask_prediction_conv_depth),
            masks_are_class_agnostic=(
                config_box_predictor.masks_are_class_agnostic),
            convolve_then_upsample_masks=(
                config_box_predictor.convolve_then_upsample_masks))

    if box_predictor_oneof == 'rfcn_box_predictor':
        config_box_predictor = box_predictor_config.rfcn_box_predictor
        conv_hyperparams = hyperparams_fn(
            config_box_predictor.conv_hyperparams)
        box_predictor_object = rfcn_keras_box_predictor.RfcnKerasBoxPredictor(
            is_training=is_training,
            num_classes=num_classes,
            conv_hyperparams=conv_hyperparams,
            freeze_batchnorm=freeze_batchnorm,
            crop_size=[
                config_box_predictor.crop_height,
                config_box_predictor.crop_width
            ],
            num_spatial_bins=[
                config_box_predictor.num_spatial_bins_height,
                config_box_predictor.num_spatial_bins_width
            ],
            depth=config_box_predictor.depth,
            box_code_size=config_box_predictor.box_code_size)
        return box_predictor_object

    raise ValueError(
        'Unknown box predictor for Keras: {}'.format(box_predictor_oneof))
def build_keras(hyperparams_fn, freeze_batchnorm, inplace_batchnorm_update,
                num_predictions_per_location_list, box_predictor_config,
                is_training, num_classes, add_background_class=True):
  
  if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
    raise ValueError('box_predictor_config not of type '
                     'box_predictor_pb2.BoxPredictor.')

  box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')

  if box_predictor_oneof == 'convolutional_box_predictor':
    config_box_predictor = box_predictor_config.convolutional_box_predictor
    conv_hyperparams = hyperparams_fn(
        config_box_predictor.conv_hyperparams)
    # Optionally apply clipping to box encodings, when box_encodings_clip_range
    # is set.
    box_encodings_clip_range = None
    if config_box_predictor.HasField('box_encodings_clip_range'):
      box_encodings_clip_range = BoxEncodingsClipRange(
          min=config_box_predictor.box_encodings_clip_range.min,
          max=config_box_predictor.box_encodings_clip_range.max)

    return build_convolutional_keras_box_predictor(
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=add_background_class,
        conv_hyperparams=conv_hyperparams,
        freeze_batchnorm=freeze_batchnorm,
        inplace_batchnorm_update=inplace_batchnorm_update,
        num_predictions_per_location_list=num_predictions_per_location_list,
        use_dropout=config_box_predictor.use_dropout,
        dropout_keep_prob=config_box_predictor.dropout_keep_probability,
        box_code_size=config_box_predictor.box_code_size,
        kernel_size=config_box_predictor.kernel_size,
        num_layers_before_predictor=(
            config_box_predictor.num_layers_before_predictor),
        min_depth=config_box_predictor.min_depth,
        max_depth=config_box_predictor.max_depth,
        class_prediction_bias_init=(
            config_box_predictor.class_prediction_bias_init),
        use_depthwise=config_box_predictor.use_depthwise,
        box_encodings_clip_range=box_encodings_clip_range)

  if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
    config_box_predictor = (
        box_predictor_config.weight_shared_convolutional_box_predictor)
    conv_hyperparams = hyperparams_fn(config_box_predictor.conv_hyperparams)
    apply_batch_norm = config_box_predictor.conv_hyperparams.HasField(
        'batch_norm')
    # During training phase, logits are used to compute the loss. Only apply
    # sigmoid at inference to make the inference graph TPU friendly. This is
    # required because during TPU inference, model.postprocess is not called.
    score_converter_fn = build_score_converter(
        config_box_predictor.score_converter, is_training)
    # Optionally apply clipping to box encodings, when box_encodings_clip_range
    # is set.
    box_encodings_clip_range = None
    if config_box_predictor.HasField('box_encodings_clip_range'):
      box_encodings_clip_range = BoxEncodingsClipRange(
          min=config_box_predictor.box_encodings_clip_range.min,
          max=config_box_predictor.box_encodings_clip_range.max)

    return build_weight_shared_convolutional_keras_box_predictor(
        is_training=is_training,
        num_classes=num_classes,
        conv_hyperparams=conv_hyperparams,
        freeze_batchnorm=freeze_batchnorm,
        inplace_batchnorm_update=inplace_batchnorm_update,
        num_predictions_per_location_list=num_predictions_per_location_list,
        depth=config_box_predictor.depth,
        num_layers_before_predictor=(
            config_box_predictor.num_layers_before_predictor),
        box_code_size=config_box_predictor.box_code_size,
        kernel_size=config_box_predictor.kernel_size,
        add_background_class=add_background_class,
        class_prediction_bias_init=(
            config_box_predictor.class_prediction_bias_init),
        use_dropout=config_box_predictor.use_dropout,
        dropout_keep_prob=config_box_predictor.dropout_keep_probability,
        share_prediction_tower=config_box_predictor.share_prediction_tower,
        apply_batch_norm=apply_batch_norm,
        use_depthwise=config_box_predictor.use_depthwise,
        score_converter_fn=score_converter_fn,
        box_encodings_clip_range=box_encodings_clip_range)

  if box_predictor_oneof == 'mask_rcnn_box_predictor':
    config_box_predictor = box_predictor_config.mask_rcnn_box_predictor
    fc_hyperparams = hyperparams_fn(config_box_predictor.fc_hyperparams)
    conv_hyperparams = None
    if config_box_predictor.HasField('conv_hyperparams'):
      conv_hyperparams = hyperparams_fn(
          config_box_predictor.conv_hyperparams)
    return build_mask_rcnn_keras_box_predictor(
        is_training=is_training,
        num_classes=num_classes,
        add_background_class=add_background_class,
        fc_hyperparams=fc_hyperparams,
        freeze_batchnorm=freeze_batchnorm,
        use_dropout=config_box_predictor.use_dropout,
        dropout_keep_prob=config_box_predictor.dropout_keep_probability,
        box_code_size=config_box_predictor.box_code_size,
        share_box_across_classes=(
            config_box_predictor.share_box_across_classes),
        predict_instance_masks=config_box_predictor.predict_instance_masks,
        conv_hyperparams=conv_hyperparams,
        mask_height=config_box_predictor.mask_height,
        mask_width=config_box_predictor.mask_width,
        mask_prediction_num_conv_layers=(
            config_box_predictor.mask_prediction_num_conv_layers),
        mask_prediction_conv_depth=(
            config_box_predictor.mask_prediction_conv_depth),
        masks_are_class_agnostic=(
            config_box_predictor.masks_are_class_agnostic),
        convolve_then_upsample_masks=(
            config_box_predictor.convolve_then_upsample_masks))

  if box_predictor_oneof == 'rfcn_box_predictor':
    config_box_predictor = box_predictor_config.rfcn_box_predictor
    conv_hyperparams = hyperparams_fn(config_box_predictor.conv_hyperparams)
    box_predictor_object = rfcn_keras_box_predictor.RfcnKerasBoxPredictor(
        is_training=is_training,
        num_classes=num_classes,
        conv_hyperparams=conv_hyperparams,
        freeze_batchnorm=freeze_batchnorm,
        crop_size=[config_box_predictor.crop_height,
                   config_box_predictor.crop_width],
        num_spatial_bins=[config_box_predictor.num_spatial_bins_height,
                          config_box_predictor.num_spatial_bins_width],
        depth=config_box_predictor.depth,
        box_code_size=config_box_predictor.box_code_size)
    return box_predictor_object

  raise ValueError(
      'Unknown box predictor for Keras: {}'.format(box_predictor_oneof))