Ejemplo n.º 1
0
 def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
     batch_norm {
       decay: 0.7
       center: false
       scale: true
       epsilon: 0.03
       train: false
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
   batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
   self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
   self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
   self.assertFalse(batch_norm_params['center'])
   self.assertTrue(batch_norm_params['scale'])
   self.assertFalse(batch_norm_params['is_training'])
Ejemplo n.º 2
0
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
                                 reuse_weights=None):
  """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
  feature_type = feature_extractor_config.type
  depth_multiplier = feature_extractor_config.depth_multiplier
  min_depth = feature_extractor_config.min_depth
  pad_to_multiple = feature_extractor_config.pad_to_multiple
  batch_norm_trainable = feature_extractor_config.batch_norm_trainable
  use_explicit_padding = feature_extractor_config.use_explicit_padding
  use_depthwise = feature_extractor_config.use_depthwise
  conv_hyperparams = hyperparams_builder.build(
      feature_extractor_config.conv_hyperparams, is_training)

  if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))

  feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  return feature_extractor_class(is_training, depth_multiplier, min_depth,
                                 pad_to_multiple, conv_hyperparams,
                                 batch_norm_trainable, reuse_weights,
                                 use_explicit_padding, use_depthwise)
 def test_return_non_default_batch_norm_params_with_train_during_train(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
     batch_norm {
       decay: 0.7
       center: false
       scale: true
       epsilon: 0.03
       train: true
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
   batch_norm_params = conv_scope_arguments['normalizer_params']
   self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
   self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
   self.assertFalse(batch_norm_params['center'])
   self.assertTrue(batch_norm_params['scale'])
   self.assertTrue(batch_norm_params['is_training'])
Ejemplo n.º 4
0
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
                                 reuse_weights=None):
  """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
  feature_type = feature_extractor_config.type
  depth_multiplier = feature_extractor_config.depth_multiplier
  min_depth = feature_extractor_config.min_depth
  pad_to_multiple = feature_extractor_config.pad_to_multiple
  use_explicit_padding = feature_extractor_config.use_explicit_padding
  use_depthwise = feature_extractor_config.use_depthwise
  conv_hyperparams = hyperparams_builder.build(
      feature_extractor_config.conv_hyperparams, is_training)
  override_base_feature_extractor_hyperparams = (
      feature_extractor_config.override_base_feature_extractor_hyperparams)

  if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))

  feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  kwargs = {
      'is_training':
          is_training,
      'depth_multiplier':
          depth_multiplier,
      'min_depth':
          min_depth,
      'pad_to_multiple':
          pad_to_multiple,
      'conv_hyperparams_fn':
          conv_hyperparams,
      'reuse_weights':
          reuse_weights,
      'use_explicit_padding':
          use_explicit_padding,
      'use_depthwise':
          use_depthwise,
      'override_base_feature_extractor_hyperparams':
          override_base_feature_extractor_hyperparams
  }

  if feature_extractor_config.HasField('fpn'):
    kwargs.update({
        'fpn_min_level': feature_extractor_config.fpn.min_level,
        'fpn_max_level': feature_extractor_config.fpn.max_level,
    })

  return feature_extractor_class(**kwargs)
Ejemplo n.º 5
0
 def _build_arg_scope_with_conv_hyperparams(self):
   conv_hyperparams = hyperparams_pb2.Hyperparams()
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
   return hyperparams_builder.build(conv_hyperparams, is_training=True)
 def test_default_arg_scope_has_conv2d_transpose_op(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l1_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
 def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
   conv_hyperparams_text_proto = """
     op: FC
     regularizer {
       l1_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
Ejemplo n.º 8
0
 def _build_arg_scope_with_hyperparams(
     self, op_type=hyperparams_pb2.Hyperparams.CONV):
   hyperparams = hyperparams_pb2.Hyperparams()
   hyperparams_text_proto = """
     activation: NONE
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   text_format.Merge(hyperparams_text_proto, hyperparams)
   hyperparams.op = op_type
   return hyperparams_builder.build(hyperparams, is_training=True)
 def test_use_relu_6_activation(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
     activation: RELU_6
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
 def test_do_not_use_batch_norm_if_default(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
   self.assertEqual(conv_scope_arguments['normalizer_params'], None)
Ejemplo n.º 11
0
 def _build_conv_arg_scope_no_batch_norm(self):
   conv_hyperparams = hyperparams_pb2.Hyperparams()
   conv_hyperparams_text_proto = """
     activation: RELU_6
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       random_normal_initializer {
         stddev: 0.01
         mean: 0.0
       }
     }
   """
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
   return hyperparams_builder.build(conv_hyperparams, is_training=True)
 def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l1_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   kwargs_1, kwargs_2, kwargs_3 = scope.values()
   self.assertDictEqual(kwargs_1, kwargs_2)
   self.assertDictEqual(kwargs_1, kwargs_3)
 def test_use_relu_6_activation(self):
     conv_hyperparams_text_proto = """
   regularizer {
     l2_regularizer {
     }
   }
   initializer {
     truncated_normal_initializer {
     }
   }
   activation: RELU_6
 """
     conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
     text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
     scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                          is_training=True)
     scope = scope_fn()
     conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
     self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
Ejemplo n.º 14
0
 def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l1_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   kwargs_1, kwargs_2, kwargs_3 = scope.values()
   self.assertDictEqual(kwargs_1, kwargs_2)
   self.assertDictEqual(kwargs_1, kwargs_3)
Ejemplo n.º 15
0
 def test_use_relu_activation(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
     activation: RELU
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
Ejemplo n.º 16
0
def _build_ssd_feature_extractor(feature_extractor_config,
                                 is_training,
                                 reuse_weights=None,
                                 inplace_batchnorm_update=False):
    """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.
    inplace_batchnorm_update: Whether to update batch_norm inplace during
      training. This is required for batch norm to work correctly on TPUs. When
      this is false, user must add a control dependency on
      tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
      norm moving average parameters.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
    feature_type = feature_extractor_config.type
    depth_multiplier = feature_extractor_config.depth_multiplier
    min_depth = feature_extractor_config.min_depth
    pad_to_multiple = feature_extractor_config.pad_to_multiple
    batch_norm_trainable = feature_extractor_config.batch_norm_trainable
    use_explicit_padding = feature_extractor_config.use_explicit_padding
    use_depthwise = feature_extractor_config.use_depthwise
    conv_hyperparams = hyperparams_builder.build(
        feature_extractor_config.conv_hyperparams, is_training)

    if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
        raise ValueError(
            'Unknown ssd feature_extractor: {}'.format(feature_type))

    feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
    return feature_extractor_class(is_training, depth_multiplier, min_depth,
                                   pad_to_multiple, conv_hyperparams,
                                   batch_norm_trainable, reuse_weights,
                                   use_explicit_padding, use_depthwise,
                                   inplace_batchnorm_update)
 def test_variance_in_range_with_truncated_normal_initializer(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
         mean: 0.0
         stddev: 0.8
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=0.49, tol=1e-1)
 def test_variance_in_range_with_truncated_normal_initializer(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       truncated_normal_initializer {
         mean: 0.0
         stddev: 0.8
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=0.49, tol=1e-1)
 def _build_arg_scope_with_conv_hyperparams(self):
     conv_hyperparams = hyperparams_pb2.Hyperparams()
     conv_hyperparams_text_proto = """
   activation: RELU_6
   regularizer {
     l2_regularizer {
     }
   }
   initializer {
     random_normal_initializer {
       stddev: 0.01
       mean: 0.0
     }
   }
   batch_norm {
     train: true,
   }
 """
     text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
     return hyperparams_builder.build(conv_hyperparams, is_training=True)
 def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       variance_scaling_initializer {
         factor: 2.0
         mode: FAN_IN
         uniform: true
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=2. / 100.)
 def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       variance_scaling_initializer {
         factor: 2.0
         mode: FAN_IN
         uniform: true
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=2. / 100.)
 def test_return_l1_regularized_weights(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l1_regularizer {
         weight: 0.5
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   regularizer = conv_scope_arguments['weights_regularizer']
   weights = np.array([1., -1, 4., 2.])
   with self.test_session() as sess:
     result = sess.run(regularizer(tf.constant(weights)))
   self.assertAllClose(np.abs(weights).sum() * 0.5, result)
 def test_return_l1_regularized_weights(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l1_regularizer {
         weight: 0.5
       }
     }
     initializer {
       truncated_normal_initializer {
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
   conv_scope_arguments = scope.values()[0]
   regularizer = conv_scope_arguments['weights_regularizer']
   weights = np.array([1., -1, 4., 2.])
   with self.test_session() as sess:
     result = sess.run(regularizer(tf.constant(weights)))
   self.assertAllClose(np.abs(weights).sum() * 0.5, result)
Ejemplo n.º 24
0
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
                                 reuse_weights=None,
                                 inplace_batchnorm_update=False):
  """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.
    inplace_batchnorm_update: Whether to update batch_norm inplace during
      training. This is required for batch norm to work correctly on TPUs. When
      this is false, user must add a control dependency on
      tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
      norm moving average parameters.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
  feature_type = feature_extractor_config.type
  depth_multiplier = feature_extractor_config.depth_multiplier
  min_depth = feature_extractor_config.min_depth
  pad_to_multiple = feature_extractor_config.pad_to_multiple
  batch_norm_trainable = feature_extractor_config.batch_norm_trainable
  use_explicit_padding = feature_extractor_config.use_explicit_padding
  use_depthwise = feature_extractor_config.use_depthwise
  conv_hyperparams = hyperparams_builder.build(
      feature_extractor_config.conv_hyperparams, is_training)

  if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))

  feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  return feature_extractor_class(is_training, depth_multiplier, min_depth,
                                 pad_to_multiple, conv_hyperparams,
                                 batch_norm_trainable, reuse_weights,
                                 use_explicit_padding, use_depthwise,
                                 inplace_batchnorm_update)
Ejemplo n.º 25
0
 def test_variance_in_range_with_random_normal_initializer(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       random_normal_initializer {
         mean: 0.0
         stddev: 0.8
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=0.64, tol=1e-1)
Ejemplo n.º 26
0
 def test_variance_in_range_with_random_normal_initializer(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       random_normal_initializer {
         mean: 0.0
         stddev: 0.8
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=0.64, tol=1e-1)
Ejemplo n.º 27
0
def _build_lstm_feature_extractor(feature_extractor_config,
                                  is_training,
                                  lstm_state_depth,
                                  reuse_weights=None):
    """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    lstm_state_depth: An integer of the depth of the lstm state.
    reuse_weights: If the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """

    feature_type = feature_extractor_config.type
    depth_multiplier = feature_extractor_config.depth_multiplier
    min_depth = feature_extractor_config.min_depth
    pad_to_multiple = feature_extractor_config.pad_to_multiple
    use_explicit_padding = feature_extractor_config.use_explicit_padding
    use_depthwise = feature_extractor_config.use_depthwise
    conv_hyperparams = hyperparams_builder.build(
        feature_extractor_config.conv_hyperparams, is_training)
    override_base_feature_extractor_hyperparams = (
        feature_extractor_config.override_base_feature_extractor_hyperparams)

    if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
        raise ValueError(
            'Unknown ssd feature_extractor: {}'.format(feature_type))

    feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
    return feature_extractor_class(
        is_training, depth_multiplier, min_depth, pad_to_multiple,
        conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
        override_base_feature_extractor_hyperparams, lstm_state_depth)
Ejemplo n.º 28
0
 def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       variance_scaling_initializer {
         factor: 2.0
         mode: FAN_AVG
         uniform: false
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=4. / (100. + 40.))
Ejemplo n.º 29
0
 def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
   conv_hyperparams_text_proto = """
     regularizer {
       l2_regularizer {
       }
     }
     initializer {
       variance_scaling_initializer {
         factor: 2.0
         mode: FAN_AVG
         uniform: false
       }
     }
   """
   conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
   text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
   scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                        is_training=True)
   scope = scope_fn()
   conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
   initializer = conv_scope_arguments['weights_initializer']
   self._assert_variance_in_range(initializer, shape=[100, 40],
                                  variance=4. / (100. + 40.))
Ejemplo n.º 30
0
  def test_return_l2_regularizer_weights(self):
    conv_hyperparams_text_proto = """
      regularizer {
        l2_regularizer {
          weight: 0.42
        }
      }
      initializer {
        truncated_normal_initializer {
        }
      }
    """
    conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
    scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                         is_training=True)
    scope = scope_fn()
    conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]

    regularizer = conv_scope_arguments['weights_regularizer']
    weights = np.array([1., -1, 4., 2.])
    with self.test_session() as sess:
      result = sess.run(regularizer(tf.constant(weights)))
    self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
Ejemplo n.º 31
0
    def test_return_l2_regularizer_weights(self):
        conv_hyperparams_text_proto = """
      regularizer {
        l2_regularizer {
          weight: 0.42
        }
      }
      initializer {
        truncated_normal_initializer {
        }
      }
    """
        conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
        text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
        scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
                                             is_training=True)
        scope = scope_fn()
        conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]

        regularizer = conv_scope_arguments['weights_regularizer']
        weights = np.array([1., -1, 4., 2.])
        with self.test_session() as sess:
            result = sess.run(regularizer(tf.constant(weights)))
        self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
Ejemplo n.º 32
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    first_stage_positive_balance_fraction = (
        frcnn_config.first_stage_positive_balance_fraction)
    first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
    first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries
    }

    if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
Ejemplo n.º 33
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training,
      frcnn_config.inplace_batchnorm_update)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'proposal',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  use_static_shapes = frcnn_config.use_static_shapes
  first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  if (frcnn_config.first_stage_nms_iou_threshold < 0 or
      frcnn_config.first_stage_nms_iou_threshold > 1.0):
    raise ValueError('iou_threshold not in [0, 1.0].')
  if (is_training and frcnn_config.second_stage_batch_size >
      first_stage_max_proposals):
    raise ValueError('second_stage_batch_size should be no greater than '
                     'first_stage_max_proposals.')
  first_stage_non_max_suppression_fn = functools.partial(
      post_processing.batch_multiclass_non_max_suppression,
      score_thresh=frcnn_config.first_stage_nms_score_threshold,
      iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
      max_size_per_class=frcnn_config.first_stage_max_proposals,
      max_total_size=frcnn_config.first_stage_max_proposals,
      use_static_shapes=use_static_shapes)
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_target_assigner = target_assigner.create_target_assigner(
      'FasterRCNN',
      'detection',
      use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
      positive_fraction=frcnn_config.second_stage_balance_fraction,
      is_static=(frcnn_config.use_static_balanced_label_sampler and
                 use_static_shapes))
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  crop_and_resize_fn = (
      ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
      else ops.native_crop_and_resize)
  clip_anchors_to_image = (
      frcnn_config.clip_anchors_to_image)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_target_assigner': first_stage_target_assigner,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope_fn':
      first_stage_box_predictor_arg_scope_fn,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_sampler': first_stage_sampler,
      'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_target_assigner': second_stage_target_assigner,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_sampler': second_stage_sampler,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries,
      'crop_and_resize_fn': crop_and_resize_fn,
      'clip_anchors_to_image': clip_anchors_to_image,
      'use_static_shapes': use_static_shapes,
      'resize_masks': frcnn_config.resize_masks
  }

  if isinstance(second_stage_box_predictor,
                rfcn_box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
Ejemplo n.º 34
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries, **kwargs):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

    Builds R-FCN model if the second_stage_box_predictor in the config is of type
    `rfcn_box_predictor` else builds a Faster R-CNN model.

    Args:
      frcnn_config: A faster_rcnn.proto object containing the config for the
        desired FasterRCNNMetaArch or RFCNMetaArch.
      is_training: True if this model is being built for training purposes.
      add_summaries: Whether to add tf summaries in the model.
      kwargs: key-value
              'rpn_type' is the type of rpn which is 'cascade_rpn','orign_rpn'
                  and 'without_rpn' which need some boxes replacing the proposal
                  generated by rpn
              'filter_fn_arg' is the args of filter fn which need the boxes to filter
                  the proposals.
              'replace_rpn_arg' is a dictionary.
                  only if the rpn_type=='without_rpn' and not None, it's useful in order to
                  replace the proposals generated by rpn with the gt which maybe adjusted.
                   'type': a string which is 'gt' or 'others'.
                   'scale': a float which is used to scale the boxes(maybe gt).

    Returns:
      FasterRCNNMetaArch based on the config.

    Raises:
      ValueError: If frcnn_config.type is not recognized (i.e. not registered in
        model_class_map).
    """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
            frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0 or
            frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and frcnn_config.second_stage_batch_size >
            first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler and
                   use_static_shapes))
    (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
     ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (
        ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
        else ops.native_crop_and_resize)
    clip_anchors_to_image = (
        frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
            first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
            first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
            second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
            second_stage_localization_loss_weight,
        'second_stage_classification_loss':
            second_stage_classification_loss,
        'second_stage_classification_loss_weight':
            second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    filter_fn_arg = kwargs.get('filter_fn_arg')
    if filter_fn_arg:
        filter_fn = functools.partial(filter_bbox, **filter_fn_arg)
        common_kwargs['filter_fn'] = filter_fn
    rpn_type = kwargs.get('rpn_type')
    if rpn_type:
        common_kwargs['rpn_type'] = rpn_type
    replace_rpn_arg = kwargs.get('replace_rpn_arg')
    if replace_rpn_arg:
        common_kwargs['replace_rpn_arg'] = replace_rpn_arg

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
Ejemplo n.º 35
0
def _build_rcnn_attention_model(rcnna_config, is_training, is_calibration):
    """Builds a R-CNN attention model based on the model config.

  Args:
    rcnna_config: A rcnn_attention.proto object containing the config for the
    desired RCNNAttention model.
    is_training: True if this model is being built for training purposes.

  Returns:
    RCNNAttentionMetaArch based on the config.
  Raises:
    ValueError: If rcnna_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = rcnna_config.num_classes
    k_shot = rcnna_config.k_shot
    image_resizer_fn = image_resizer_builder.build(rcnna_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        rcnna_config.feature_extractor, is_training)

    first_stage_only = rcnna_config.first_stage_only
    first_stage_anchor_generator = anchor_generator_builder.build(
        rcnna_config.first_stage_anchor_generator)

    first_stage_atrous_rate = rcnna_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope = hyperparams_builder.build(
        rcnna_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        rcnna_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = rcnna_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = rcnna_config.first_stage_minibatch_size
    first_stage_positive_balance_fraction = (
        rcnna_config.first_stage_positive_balance_fraction)
    first_stage_nms_score_threshold = rcnna_config.first_stage_nms_score_threshold
    first_stage_nms_iou_threshold = rcnna_config.first_stage_nms_iou_threshold
    first_stage_max_proposals = rcnna_config.first_stage_max_proposals
    first_stage_loc_loss_weight = (
        rcnna_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = rcnna_config.first_stage_objectness_loss_weight

    initial_crop_size = rcnna_config.initial_crop_size
    maxpool_kernel_size = rcnna_config.maxpool_kernel_size
    maxpool_stride = rcnna_config.maxpool_stride

    second_stage_box_predictor = build_box_predictor(
        hyperparams_builder.build,
        rcnna_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = rcnna_config.second_stage_batch_size
    second_stage_balance_fraction = rcnna_config.second_stage_balance_fraction
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         rcnna_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        rcnna_config.second_stage_localization_loss_weight)
    second_stage_classification_loss_weight = (
        rcnna_config.second_stage_classification_loss_weight)

    hard_example_miner = None
    if rcnna_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            rcnna_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    attention_tree = None
    if rcnna_config.HasField('attention_tree'):
        attention_tree = attention_tree_builder.build(
            hyperparams_builder.build, rcnna_config.attention_tree,
            rcnna_config.k_shot, num_classes, rcnna_config.num_negative_bags,
            is_training, is_calibration)

    second_stage_convline = None
    if rcnna_config.HasField('second_stage_convline'):
        second_stage_convline = convline_builder.build(
            hyperparams_builder.build, None,
            rcnna_config.second_stage_convline, is_training)

    common_kwargs = {
        'is_training': is_training,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'first_stage_only': first_stage_only,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope':
        first_stage_box_predictor_arg_scope,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'initial_crop_size': initial_crop_size,
        'maxpool_kernel_size': maxpool_kernel_size,
        'maxpool_stride': maxpool_stride,
        'second_stage_mask_rcnn_box_predictor': second_stage_box_predictor,
        'num_classes': num_classes
    }

    if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
        raise ValueError('RFCNBoxPredictor is not supported.')
    elif rcnna_config.build_faster_rcnn_arch:
        model = faster_rcnn_meta_arch.FasterRCNNMetaArch(**common_kwargs)
        model._k_shot = k_shot
        model._tree_debug_tensors = lambda: {}
        return model
    else:
        return rcnn_attention_meta_arch.RCNNAttentionMetaArch(
            k_shot=k_shot,
            attention_tree=attention_tree,
            second_stage_convline=second_stage_convline,
            attention_tree_only=rcnna_config.attention_tree_only,
            add_gt_boxes_to_rpn=rcnna_config.add_gt_boxes_to_rpn,
            **common_kwargs)
Ejemplo n.º 36
0
def _build_faster_rcnn_model(frcnn_config, is_training, mtl=None):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
    desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    FasterRCNNMetaArch based on the config.
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor_kwargs = {}
    feature_extractor_kwargs[
        'freeze_layer'] = frcnn_config.feature_extractor.freeze_layer
    feature_extractor_kwargs[
        'batch_norm_trainable'] = frcnn_config.feature_extractor.batch_norm_trainable

    if frcnn_config.feature_extractor.HasField('weight_decay'):
        feature_extractor_kwargs['weight_decay'] = \
            frcnn_config.feature_extractor.weight_decay
    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor,
        is_training and frcnn_config.feature_extractor.trainable,
        reuse_weights=tf.AUTO_REUSE,
        **feature_extractor_kwargs)

    first_stage_only = frcnn_config.first_stage_only
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_clip_window = frcnn_config.first_stage_clip_window
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_trainable = \
        frcnn_config.first_stage_box_predictor_trainable
    first_stage_box_predictor_arg_scope = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    first_stage_positive_balance_fraction = (
        frcnn_config.first_stage_positive_balance_fraction)
    first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
    first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training
        and frcnn_config.second_stage_box_predictor.trainable,
        num_classes=num_classes,
        reuse_weights=tf.AUTO_REUSE)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)

    if mtl.window:
        window_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build,
            mtl.window_box_predictor,
            is_training=is_training and mtl.window_box_predictor.trainable,
            num_classes=num_classes + 1,
            reuse_weights=tf.AUTO_REUSE)
    else:
        window_box_predictor = second_stage_box_predictor

    if mtl.closeness:
        closeness_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build,
            mtl.closeness_box_predictor,
            is_training=is_training and mtl.closeness_box_predictor.trainable,
            num_classes=num_classes + 1,
            reuse_weights=tf.AUTO_REUSE)
    else:
        closeness_box_predictor = second_stage_box_predictor

    if mtl.edgemask:
        edgemask_predictor = mask_predictor_builder.build(
            hyperparams_builder.build,
            mtl.edgemask_predictor,
            is_training=is_training and mtl.edgemask_predictor.trainable,
            num_classes=2,
            reuse_weights=tf.AUTO_REUSE,
            channels=1)
    else:
        edgemask_predictor = None

    mtl_refiner_arg_scope = None
    if mtl.refine:
        mtl_refiner_arg_scope = hyperparams_builder.build(
            mtl.refiner_fc_hyperparams, is_training)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'first_stage_only': first_stage_only,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_clip_window': first_stage_clip_window,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_trainable':
        first_stage_box_predictor_trainable,
        'first_stage_box_predictor_arg_scope':
        first_stage_box_predictor_arg_scope,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_positive_balance_fraction':
        first_stage_positive_balance_fraction,
        'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
        'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_balance_fraction': second_stage_balance_fraction,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'mtl': mtl,
        'mtl_refiner_arg_scope': mtl_refiner_arg_scope,
        'window_box_predictor': window_box_predictor,
        'closeness_box_predictor': closeness_box_predictor,
        'edgemask_predictor': edgemask_predictor
    }

    if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
 def _build_arg_scope_with_hyperparams(self,
                                       hyperparams_text_proto,
                                       is_training):
   hyperparams = hyperparams_pb2.Hyperparams()
   text_format.Merge(hyperparams_text_proto, hyperparams)
   return hyperparams_builder.build(hyperparams, is_training=is_training)
Ejemplo n.º 38
0
def _build_ssd_feature_extractor(feature_extractor_config,
                                 is_training,
                                 freeze_batchnorm,
                                 reuse_weights=None):
  """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    freeze_batchnorm: Whether to freeze batch norm parameters during
      training or not. When training with a small batch size (e.g. 1), it is
      desirable to freeze batch norm update and use pretrained batch norm
      params.
    reuse_weights: if the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
  feature_type = feature_extractor_config.type
  is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
  depth_multiplier = feature_extractor_config.depth_multiplier
  min_depth = feature_extractor_config.min_depth
  pad_to_multiple = feature_extractor_config.pad_to_multiple
  use_explicit_padding = feature_extractor_config.use_explicit_padding
  use_depthwise = feature_extractor_config.use_depthwise

  if is_keras_extractor:
    conv_hyperparams = hyperparams_builder.KerasLayerHyperparams(
        feature_extractor_config.conv_hyperparams)
  else:
    conv_hyperparams = hyperparams_builder.build(
        feature_extractor_config.conv_hyperparams, is_training)
  override_base_feature_extractor_hyperparams = (
      feature_extractor_config.override_base_feature_extractor_hyperparams)

  if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and (
      not is_keras_extractor):
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))

  if is_keras_extractor:
    feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
        feature_type]
  else:
    feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  kwargs = {
      'is_training':
          is_training,
      'depth_multiplier':
          depth_multiplier,
      'min_depth':
          min_depth,
      'pad_to_multiple':
          pad_to_multiple,
      'use_explicit_padding':
          use_explicit_padding,
      'use_depthwise':
          use_depthwise,
      'override_base_feature_extractor_hyperparams':
          override_base_feature_extractor_hyperparams
  }

  if is_keras_extractor:
    kwargs.update({
        'conv_hyperparams': conv_hyperparams,
        'inplace_batchnorm_update': False,
        'freeze_batchnorm': freeze_batchnorm
    })
  else:
    kwargs.update({
        'conv_hyperparams_fn': conv_hyperparams,
        'reuse_weights': reuse_weights,
    })

  if feature_extractor_config.HasField('fpn'):
    kwargs.update({
        'fpn_min_level':
            feature_extractor_config.fpn.min_level,
        'fpn_max_level':
            feature_extractor_config.fpn.max_level,
        'additional_layer_depth':
            feature_extractor_config.fpn.additional_layer_depth,
    })

  return feature_extractor_class(**kwargs)
Ejemplo n.º 39
0
def _build_ssd_feature_extractor(feature_extractor_config,
                                 is_training,
                                 freeze_batchnorm,
                                 reuse_weights=None):
  
  feature_type = feature_extractor_config.type
  is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
  depth_multiplier = feature_extractor_config.depth_multiplier
  min_depth = feature_extractor_config.min_depth
  pad_to_multiple = feature_extractor_config.pad_to_multiple
  use_explicit_padding = feature_extractor_config.use_explicit_padding
  use_depthwise = feature_extractor_config.use_depthwise

  if is_keras_extractor:
    conv_hyperparams = hyperparams_builder.KerasLayerHyperparams(
        feature_extractor_config.conv_hyperparams)
  else:
    conv_hyperparams = hyperparams_builder.build(
        feature_extractor_config.conv_hyperparams, is_training)
  override_base_feature_extractor_hyperparams = (
      feature_extractor_config.override_base_feature_extractor_hyperparams)

  if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and (
      not is_keras_extractor):
    raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))

  if is_keras_extractor:
    feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
        feature_type]
  else:
    feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
  kwargs = {
      'is_training':
          is_training,
      'depth_multiplier':
          depth_multiplier,
      'min_depth':
          min_depth,
      'pad_to_multiple':
          pad_to_multiple,
      'use_explicit_padding':
          use_explicit_padding,
      'use_depthwise':
          use_depthwise,
      'override_base_feature_extractor_hyperparams':
          override_base_feature_extractor_hyperparams
  }

  if feature_extractor_config.HasField('replace_preprocessor_with_placeholder'):
    kwargs.update({
        'replace_preprocessor_with_placeholder':
            feature_extractor_config.replace_preprocessor_with_placeholder
    })

  if feature_extractor_config.HasField('num_layers'):
    kwargs.update({'num_layers': feature_extractor_config.num_layers})

  if is_keras_extractor:
    kwargs.update({
        'conv_hyperparams': conv_hyperparams,
        'inplace_batchnorm_update': False,
        'freeze_batchnorm': freeze_batchnorm
    })
  else:
    kwargs.update({
        'conv_hyperparams_fn': conv_hyperparams,
        'reuse_weights': reuse_weights,
    })

  if feature_extractor_config.HasField('fpn'):
    kwargs.update({
        'fpn_min_level':
            feature_extractor_config.fpn.min_level,
        'fpn_max_level':
            feature_extractor_config.fpn.max_level,
        'additional_layer_depth':
            feature_extractor_config.fpn.additional_layer_depth,
    })

  return feature_extractor_class(**kwargs)
Ejemplo n.º 40
0
def _build_ssd_feature_extractor(feature_extractor_config,
                                 is_training,
                                 freeze_batchnorm,
                                 reuse_weights=None):
    """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    freeze_batchnorm: Whether to freeze batch norm parameters during
      training or not. When training with a small batch size (e.g. 1), it is
      desirable to freeze batch norm update and use pretrained batch norm
      params.
    reuse_weights: if the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
    feature_type = feature_extractor_config.type
    is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
    depth_multiplier = feature_extractor_config.depth_multiplier
    min_depth = feature_extractor_config.min_depth
    pad_to_multiple = feature_extractor_config.pad_to_multiple
    use_explicit_padding = feature_extractor_config.use_explicit_padding
    use_depthwise = feature_extractor_config.use_depthwise

    if is_keras_extractor:
        conv_hyperparams = hyperparams_builder.KerasLayerHyperparams(
            feature_extractor_config.conv_hyperparams)
    else:
        conv_hyperparams = hyperparams_builder.build(
            feature_extractor_config.conv_hyperparams, is_training)
    override_base_feature_extractor_hyperparams = (
        feature_extractor_config.override_base_feature_extractor_hyperparams)

    if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and (
            not is_keras_extractor):
        raise ValueError(
            'Unknown ssd feature_extractor: {}'.format(feature_type))

    if is_keras_extractor:
        feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
            feature_type]
    else:
        feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
    kwargs = {
        'is_training':
        is_training,
        'depth_multiplier':
        depth_multiplier,
        'min_depth':
        min_depth,
        'pad_to_multiple':
        pad_to_multiple,
        'use_explicit_padding':
        use_explicit_padding,
        'use_depthwise':
        use_depthwise,
        'override_base_feature_extractor_hyperparams':
        override_base_feature_extractor_hyperparams
    }

    if feature_extractor_config.HasField(
            'replace_preprocessor_with_placeholder'):
        kwargs.update({
            'replace_preprocessor_with_placeholder':
            feature_extractor_config.replace_preprocessor_with_placeholder
        })

    if feature_extractor_config.HasField('num_layers'):
        kwargs.update({'num_layers': feature_extractor_config.num_layers})

    if is_keras_extractor:
        kwargs.update({
            'conv_hyperparams': conv_hyperparams,
            'inplace_batchnorm_update': False,
            'freeze_batchnorm': freeze_batchnorm
        })
    else:
        kwargs.update({
            'conv_hyperparams_fn': conv_hyperparams,
            'reuse_weights': reuse_weights,
        })

    if feature_extractor_config.HasField('fpn'):
        kwargs.update({
            'fpn_min_level':
            feature_extractor_config.fpn.min_level,
            'fpn_max_level':
            feature_extractor_config.fpn.max_level,
            'additional_layer_depth':
            feature_extractor_config.fpn.additional_layer_depth,
        })

    return feature_extractor_class(**kwargs)
Ejemplo n.º 41
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
  """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.
  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = frcnn_config.num_classes
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training)

  number_of_stages = frcnn_config.number_of_stages
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)

  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
  first_stage_positive_balance_fraction = (
      frcnn_config.first_stage_positive_balance_fraction)
  first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
  first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

  initial_crop_size = frcnn_config.initial_crop_size
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride

  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = frcnn_config.second_stage_batch_size
  second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          frcnn_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      frcnn_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'number_of_stages': number_of_stages,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope':
      first_stage_box_predictor_arg_scope,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_positive_balance_fraction':
      first_stage_positive_balance_fraction,
      'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
      'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_balance_fraction': second_stage_balance_fraction,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner,
      'add_summaries': add_summaries}

  if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)
Ejemplo n.º 42
0
def _build_faster_rcnn_model(frcnn_config,
                             is_training,
                             add_summaries,
                             meta_architecture='faster_rcnn'):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    feature_extractor = _build_faster_rcnn_feature_extractor(
        frcnn_config.feature_extractor, is_training,
        frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
        frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    # TODO(bhattad): When eval is supported using static shapes, add separate
    # use_static_shapes_for_trainig and use_static_shapes_for_evaluation.
    use_static_shapes = frcnn_config.use_static_shapes and is_training
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=frcnn_config.use_static_balanced_label_sampler
        and is_training)
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    first_stage_proposals_path = frcnn_config.first_stage_proposals_path
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes and is_training)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher,
        iou_threshold=frcnn_config.second_stage_target_iou_threshold)
    second_stage_box_predictor = box_predictor_builder.build(
        hyperparams_builder.build,
        frcnn_config.second_stage_box_predictor,
        is_training=is_training,
        num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=frcnn_config.use_static_balanced_label_sampler
        and is_training)
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training': is_training,
        'num_classes': num_classes,
        'image_resizer_fn': image_resizer_fn,
        'feature_extractor': feature_extractor,
        'number_of_stages': number_of_stages,
        'first_stage_anchor_generator': first_stage_anchor_generator,
        'first_stage_target_assigner': first_stage_target_assigner,
        'first_stage_atrous_rate': first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
        'first_stage_minibatch_size': first_stage_minibatch_size,
        'first_stage_sampler': first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals': first_stage_max_proposals,
        'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
        'second_stage_target_assigner': second_stage_target_assigner,
        'second_stage_batch_size': second_stage_batch_size,
        'second_stage_sampler': second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss': second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner': hard_example_miner,
        'add_summaries': add_summaries,
        'crop_and_resize_fn': crop_and_resize_fn,
        'clip_anchors_to_image': clip_anchors_to_image,
        'use_static_shapes': use_static_shapes,
        'resize_masks': frcnn_config.resize_masks
    }

    if isinstance(second_stage_box_predictor,
                  rfcn_box_predictor.RfcnBoxPredictor):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn':
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn_override_RPN':
        return faster_rcnn_meta_arch_override_RPN.FasterRCNNMetaArchOverrideRPN(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            first_stage_proposals_path=first_stage_proposals_path,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
    elif meta_architecture == 'faster_rcnn_rpn_blend':
        common_kwargs['use_matmul_crop_and_resize'] = False
        common_kwargs[
            'first_stage_nms_iou_threshold'] = frcnn_config.first_stage_nms_iou_threshold
        common_kwargs[
            'first_stage_nms_score_threshold'] = frcnn_config.first_stage_nms_score_threshold
        common_kwargs.pop('crop_and_resize_fn')
        common_kwargs.pop('first_stage_non_max_suppression_fn')
        common_kwargs.pop('resize_masks')
        common_kwargs.pop('use_static_shapes')
        return faster_rcnn_meta_arch_rpn_blend.FasterRCNNMetaArchRPNBlend(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            first_stage_proposals_path=first_stage_proposals_path,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
 def _build_arg_scope_with_hyperparams(self,
                                       hyperparams_text_proto,
                                       is_training):
   hyperparams = hyperparams_pb2.Hyperparams()
   text_format.Merge(hyperparams_text_proto, hyperparams)
   return hyperparams_builder.build(hyperparams, is_training=is_training)
Ejemplo n.º 44
0
def _build_ssd_feature_extractor(feature_extractor_config,
                                 is_training,
                                 reuse_weights=None):
    """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.

  Args:
    feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
    is_training: True if this feature extractor is being built for training.
    reuse_weights: if the feature extractor should reuse weights.

  Returns:
    ssd_meta_arch.SSDFeatureExtractor based on config.

  Raises:
    ValueError: On invalid feature extractor type.
  """
    feature_type = feature_extractor_config.type
    depth_multiplier = feature_extractor_config.depth_multiplier
    min_depth = feature_extractor_config.min_depth
    pad_to_multiple = feature_extractor_config.pad_to_multiple
    use_explicit_padding = feature_extractor_config.use_explicit_padding
    use_depthwise = feature_extractor_config.use_depthwise
    conv_hyperparams = hyperparams_builder.build(
        feature_extractor_config.conv_hyperparams, is_training)
    override_base_feature_extractor_hyperparams = (
        feature_extractor_config.override_base_feature_extractor_hyperparams)

    if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
        raise ValueError(
            'Unknown ssd feature_extractor: {}'.format(feature_type))

    feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
    kwargs = {
        'is_training':
        is_training,
        'depth_multiplier':
        depth_multiplier,
        'min_depth':
        min_depth,
        'pad_to_multiple':
        pad_to_multiple,
        'conv_hyperparams_fn':
        conv_hyperparams,
        'reuse_weights':
        reuse_weights,
        'use_explicit_padding':
        use_explicit_padding,
        'use_depthwise':
        use_depthwise,
        'override_base_feature_extractor_hyperparams':
        override_base_feature_extractor_hyperparams
    }

    if feature_extractor_config.HasField('fpn'):
        kwargs.update({
            'fpn_min_level':
            feature_extractor_config.fpn.min_level,
            'fpn_max_level':
            feature_extractor_config.fpn.max_level,
            'additional_layer_depth':
            feature_extractor_config.fpn.additional_layer_depth,
        })

    return feature_extractor_class(**kwargs)
Ejemplo n.º 45
0
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
    """Builds a Faster R-CNN or R-FCN detection model based on the model config.

  Builds R-FCN model if the second_stage_box_predictor in the config is of type
  `rfcn_box_predictor` else builds a Faster R-CNN model.

  Args:
    frcnn_config: A faster_rcnn.proto object containing the config for the
      desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    FasterRCNNMetaArch based on the config.

  Raises:
    ValueError: If frcnn_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
    num_classes = frcnn_config.num_classes
    image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)

    is_keras = (frcnn_config.feature_extractor.type
                in FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP)

    if is_keras:
        feature_extractor = _build_faster_rcnn_keras_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)
    else:
        feature_extractor = _build_faster_rcnn_feature_extractor(
            frcnn_config.feature_extractor,
            is_training,
            inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)

    number_of_stages = frcnn_config.number_of_stages
    first_stage_anchor_generator = anchor_generator_builder.build(
        frcnn_config.first_stage_anchor_generator)

    first_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'proposal',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
    if is_keras:
        first_stage_box_predictor_arg_scope_fn = (
            hyperparams_builder.KerasLayerHyperparams(
                frcnn_config.first_stage_box_predictor_conv_hyperparams))
    else:
        first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
            frcnn_config.first_stage_box_predictor_conv_hyperparams,
            is_training)
    first_stage_box_predictor_kernel_size = (
        frcnn_config.first_stage_box_predictor_kernel_size)
    first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
    first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
    use_static_shapes = frcnn_config.use_static_shapes and (
        frcnn_config.use_static_shapes_for_eval or is_training)
    first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    first_stage_max_proposals = frcnn_config.first_stage_max_proposals
    if (frcnn_config.first_stage_nms_iou_threshold < 0
            or frcnn_config.first_stage_nms_iou_threshold > 1.0):
        raise ValueError('iou_threshold not in [0, 1.0].')
    if (is_training and
            frcnn_config.second_stage_batch_size > first_stage_max_proposals):
        raise ValueError('second_stage_batch_size should be no greater than '
                         'first_stage_max_proposals.')
    first_stage_non_max_suppression_fn = functools.partial(
        post_processing.batch_multiclass_non_max_suppression,
        score_thresh=frcnn_config.first_stage_nms_score_threshold,
        iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
        max_size_per_class=frcnn_config.first_stage_max_proposals,
        max_total_size=frcnn_config.first_stage_max_proposals,
        use_static_shapes=use_static_shapes,
        use_partitioned_nms=frcnn_config.use_partitioned_nms_in_first_stage,
        use_combined_nms=frcnn_config.use_combined_nms_in_first_stage)
    first_stage_loc_loss_weight = (
        frcnn_config.first_stage_localization_loss_weight)
    first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight

    initial_crop_size = frcnn_config.initial_crop_size
    maxpool_kernel_size = frcnn_config.maxpool_kernel_size
    maxpool_stride = frcnn_config.maxpool_stride

    second_stage_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN',
        'detection',
        use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
    if is_keras:
        second_stage_box_predictor = box_predictor_builder.build_keras(
            hyperparams_builder.KerasLayerHyperparams,
            freeze_batchnorm=False,
            inplace_batchnorm_update=False,
            num_predictions_per_location_list=[1],
            box_predictor_config=frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    else:
        second_stage_box_predictor = box_predictor_builder.build(
            hyperparams_builder.build,
            frcnn_config.second_stage_box_predictor,
            is_training=is_training,
            num_classes=num_classes)
    second_stage_batch_size = frcnn_config.second_stage_batch_size
    second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
        positive_fraction=frcnn_config.second_stage_balance_fraction,
        is_static=(frcnn_config.use_static_balanced_label_sampler
                   and use_static_shapes))
    (second_stage_non_max_suppression_fn,
     second_stage_score_conversion_fn) = post_processing_builder.build(
         frcnn_config.second_stage_post_processing)
    second_stage_localization_loss_weight = (
        frcnn_config.second_stage_localization_loss_weight)
    second_stage_classification_loss = (
        losses_builder.build_faster_rcnn_classification_loss(
            frcnn_config.second_stage_classification_loss))
    second_stage_classification_loss_weight = (
        frcnn_config.second_stage_classification_loss_weight)
    second_stage_mask_prediction_loss_weight = (
        frcnn_config.second_stage_mask_prediction_loss_weight)

    hard_example_miner = None
    if frcnn_config.HasField('hard_example_miner'):
        hard_example_miner = losses_builder.build_hard_example_miner(
            frcnn_config.hard_example_miner,
            second_stage_classification_loss_weight,
            second_stage_localization_loss_weight)

    crop_and_resize_fn = (ops.matmul_crop_and_resize
                          if frcnn_config.use_matmul_crop_and_resize else
                          ops.native_crop_and_resize)
    clip_anchors_to_image = (frcnn_config.clip_anchors_to_image)

    common_kwargs = {
        'is_training':
        is_training,
        'num_classes':
        num_classes,
        'image_resizer_fn':
        image_resizer_fn,
        'feature_extractor':
        feature_extractor,
        'number_of_stages':
        number_of_stages,
        'first_stage_anchor_generator':
        first_stage_anchor_generator,
        'first_stage_target_assigner':
        first_stage_target_assigner,
        'first_stage_atrous_rate':
        first_stage_atrous_rate,
        'first_stage_box_predictor_arg_scope_fn':
        first_stage_box_predictor_arg_scope_fn,
        'first_stage_box_predictor_kernel_size':
        first_stage_box_predictor_kernel_size,
        'first_stage_box_predictor_depth':
        first_stage_box_predictor_depth,
        'first_stage_minibatch_size':
        first_stage_minibatch_size,
        'first_stage_sampler':
        first_stage_sampler,
        'first_stage_non_max_suppression_fn':
        first_stage_non_max_suppression_fn,
        'first_stage_max_proposals':
        first_stage_max_proposals,
        'first_stage_localization_loss_weight':
        first_stage_loc_loss_weight,
        'first_stage_objectness_loss_weight':
        first_stage_obj_loss_weight,
        'second_stage_target_assigner':
        second_stage_target_assigner,
        'second_stage_batch_size':
        second_stage_batch_size,
        'second_stage_sampler':
        second_stage_sampler,
        'second_stage_non_max_suppression_fn':
        second_stage_non_max_suppression_fn,
        'second_stage_score_conversion_fn':
        second_stage_score_conversion_fn,
        'second_stage_localization_loss_weight':
        second_stage_localization_loss_weight,
        'second_stage_classification_loss':
        second_stage_classification_loss,
        'second_stage_classification_loss_weight':
        second_stage_classification_loss_weight,
        'hard_example_miner':
        hard_example_miner,
        'add_summaries':
        add_summaries,
        'crop_and_resize_fn':
        crop_and_resize_fn,
        'clip_anchors_to_image':
        clip_anchors_to_image,
        'use_static_shapes':
        use_static_shapes,
        'resize_masks':
        frcnn_config.resize_masks,
        'return_raw_detections_during_predict':
        (frcnn_config.return_raw_detections_during_predict)
    }

    if (isinstance(second_stage_box_predictor,
                   rfcn_box_predictor.RfcnBoxPredictor)
            or isinstance(second_stage_box_predictor,
                          rfcn_keras_box_predictor.RfcnKerasBoxPredictor)):
        return rfcn_meta_arch.RFCNMetaArch(
            second_stage_rfcn_box_predictor=second_stage_box_predictor,
            **common_kwargs)
    else:
        return faster_rcnn_meta_arch.FasterRCNNMetaArch(
            initial_crop_size=initial_crop_size,
            maxpool_kernel_size=maxpool_kernel_size,
            maxpool_stride=maxpool_stride,
            second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
            second_stage_mask_prediction_loss_weight=(
                second_stage_mask_prediction_loss_weight),
            **common_kwargs)
Ejemplo n.º 46
0
def _build_sin_model(sin_config, is_training):
  """Builds a SIN detection model based on the model config.

  Args:
    sin_config: A faster_rcnn.proto object containing the config for the
    desired FasterRCNNMetaArch or RFCNMetaArch.
    is_training: True if this model is being built for training purposes.

  Returns:
    SINMetaArch based on the config.
  Raises:
    ValueError: If sin_config.type is not recognized (i.e. not registered in
      model_class_map).
  """
  num_classes = sin_config.num_classes
  image_resizer_fn = image_resizer_builder.build(sin_config.image_resizer)

  feature_extractor = _build_faster_rcnn_feature_extractor(
      sin_config.feature_extractor, is_training, reuse_weights=tf.AUTO_REUSE)

  fc_hyperparams = hyperparams_builder.build(
      sin_config.second_stage_box_predictor.sin_box_predictor.fc_hyperparams,
      is_training)

  first_stage_only = sin_config.first_stage_only
  first_stage_anchor_generator = anchor_generator_builder.build(
      sin_config.first_stage_anchor_generator)

  first_stage_atrous_rate = sin_config.first_stage_atrous_rate
  first_stage_box_predictor_arg_scope = hyperparams_builder.build(
      sin_config.first_stage_box_predictor_conv_hyperparams, is_training)
  first_stage_box_predictor_kernel_size = (
      sin_config.first_stage_box_predictor_kernel_size)
  first_stage_box_predictor_depth = sin_config.first_stage_box_predictor_depth
  first_stage_minibatch_size = sin_config.first_stage_minibatch_size
  first_stage_positive_balance_fraction = (
      sin_config.first_stage_positive_balance_fraction)
  first_stage_nms_score_threshold = sin_config.first_stage_nms_score_threshold
  first_stage_nms_iou_threshold = sin_config.first_stage_nms_iou_threshold
  first_stage_max_proposals = sin_config.first_stage_max_proposals
  first_stage_loc_loss_weight = (
      sin_config.first_stage_localization_loss_weight)
  first_stage_obj_loss_weight = sin_config.first_stage_objectness_loss_weight

  initial_crop_size = sin_config.initial_crop_size
  maxpool_kernel_size = sin_config.maxpool_kernel_size
  maxpool_stride = sin_config.maxpool_stride

  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      sin_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
  second_stage_batch_size = sin_config.second_stage_batch_size
  second_stage_balance_fraction = sin_config.second_stage_balance_fraction
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(sin_config.second_stage_post_processing)
  second_stage_localization_loss_weight = (
      sin_config.second_stage_localization_loss_weight)
  second_stage_classification_loss = (
      losses_builder.build_faster_rcnn_classification_loss(
          sin_config.second_stage_classification_loss))
  second_stage_classification_loss_weight = (
      sin_config.second_stage_classification_loss_weight)
  second_stage_mask_prediction_loss_weight = (
      sin_config.second_stage_mask_prediction_loss_weight)

  hard_example_miner = None
  if sin_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        sin_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)

  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'fc_hyperparams': fc_hyperparams,
      'first_stage_only': first_stage_only,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope':
      first_stage_box_predictor_arg_scope,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_positive_balance_fraction':
      first_stage_positive_balance_fraction,
      'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
      'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_balance_fraction': second_stage_balance_fraction,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss':
      second_stage_classification_loss,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner}

  if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return sin_meta_arch.SINMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_box_predictor=second_stage_box_predictor,
        second_stage_mask_prediction_loss_weight=(
            second_stage_mask_prediction_loss_weight),
        **common_kwargs)