コード例 #1
0
 def scope_fn():
     with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
           if batch_norm_params is not None else
           context_manager.IdentityContextManager()):
         with slim.arg_scope(affected_ops,
                             weights_regularizer=_build_slim_regularizer(
                                 hyperparams_config.regularizer),
                             weights_initializer=_build_initializer(
                                 hyperparams_config.initializer),
                             activation_fn=_build_activation_fn(
                                 hyperparams_config.activation),
                             normalizer_fn=normalizer_fn) as sc:
             return sc
コード例 #2
0
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
コード例 #3
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['layer_15/expansion_output', 'layer_19', '', '', '',
             ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=True, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    with slim.arg_scope([resnet_v1.bottleneck],
                                        use_bounded_activations=self.
                                        _use_bounded_activations):
                        _, activations = self._resnet_base_fn(
                            inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                       self._pad_to_multiple),
                            num_classes=None,
                            is_training=None,
                            global_pool=False,
                            output_stride=None,
                            store_non_strided_activations=True,
                            scope=scope)

            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=self._base_feature_map_depth,
                    num_layers=self._num_layers,
                    image_features={
                        'image_features':
                        self._filter_features(activations)['block3']
                    })
        return feature_maps.values()
コード例 #5
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """

        feature_map_layout = {
            'from_layer': ['Cell_7', 'Cell_11', '', '', '',
                           ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(
                pnasnet_large_arg_scope_for_detection(
                    is_batch_norm_training=self._is_training)):
            with slim.arg_scope(
                [slim.conv2d, slim.batch_norm, slim.separable_conv2d],
                    reuse=self._reuse_weights):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = pnasnet.build_pnasnet_large(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        num_classes=None,
                        is_training=self._is_training,
                        final_endpoint='Cell_11')
        with tf.compat.v1.variable_scope('SSD_feature_maps',
                                         reuse=self._reuse_weights):
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
コード例 #6
0
    def extract_features(self, preprocessed_inputs):

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '',
             ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
コード例 #7
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=0,
                    num_layers=6,
                    image_features={
                        'image_features': image_features['Conv2d_11_pointwise']
                    })
        return list(feature_maps.values())
コード例 #8
0
def scope_fn():
    batch_norm = slim.batch_norm
    affected_ops = [slim.conv2d, slim.separable_conv2d, slim.fully_connected]
    batch_norm_params = {
        'decay': batch_norm.decay,
        'center': batch_norm.center,
        'scale': batch_norm.scale,
        'epsilon': batch_norm.epsilon,
        # Remove is_training parameter from here and deprecate it in the proto
        # once we refactor Faster RCNN models to set is_training through an outer
        # arg_scope in the meta architecture.
        'is_training': True and batch_norm.train
    }
    with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
            if batch_norm_params is not None else
            context_manager.IdentityContextManager()):
        with slim.arg_scope(
            affected_ops,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            weights_regularizer=slim.l2_regularizer(0.0005),
            activation_fn=tf.nn.relu,
            normalizer_fn=slim.batch_norm) as sc:
            return sc
コード例 #9
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps
コード例 #10
0
 def test_identity_context_manager(self):
     with context_manager.IdentityContextManager() as identity_context:
         self.assertIsNone(identity_context)
コード例 #11
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        # store_non_strided_activations=True,
                        scope=scope)
                    image_features = self._filter_features(image_features)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope(self._fpn_scope_name,
                                       reuse=self._reuse_weights):
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append('block{}'.format(level - 1))
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=self._additional_layer_depth,
                        use_explicit_padding=True)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(
                            fpn_features['top_down_block{}'.format(level - 1)])
                    last_feature_map = fpn_features['top_down_block{}'.format(
                        base_fpn_max_level - 1)]
                    # Construct coarse features
                    for i in range(base_fpn_max_level, self._fpn_max_level):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=self._additional_layer_depth,
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_block{}'.format(i))
                        feature_maps.append(last_feature_map)
        return feature_maps
    def extract_features(self,
                         preprocessed_inputs,
                         state_saver=None,
                         state_name='lstm_state',
                         unroll_length=5,
                         scope=None):
        """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._is_training)):
            with (slim.arg_scope(self._conv_hyperparams_fn())
                  if self._override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    # Base network.
                    with tf.variable_scope(scope,
                                           self._base_network_scope,
                                           reuse=self._reuse_weights) as scope:
                        net, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with slim.arg_scope([slim.batch_norm],
                                fused=False,
                                is_training=self._is_training):
                # ConvLSTM layers.
                with tf.variable_scope(
                        'LSTM', reuse=self._reuse_weights) as lstm_scope:
                    lstm_cell = lstm_cells.BottleneckConvLSTMCell(
                        filter_size=(3, 3),
                        output_size=(net.shape[1].value, net.shape[2].value),
                        num_units=max(self._min_depth, self._lstm_state_depth),
                        activation=tf.nn.relu6,
                        visualize_gates=True)

                    net_seq = list(tf.split(net, unroll_length))
                    if state_saver is None:
                        init_state = lstm_cell.init_state(
                            state_name, net.shape[0].value / unroll_length,
                            tf.float32)
                    else:
                        c = state_saver.state('%s_c' % state_name)
                        h = state_saver.state('%s_h' % state_name)
                        init_state = (c, h)

                    # Identities added for inputing state tensors externally.
                    c_ident = tf.identity(init_state[0],
                                          name='lstm_state_in_c')
                    h_ident = tf.identity(init_state[1],
                                          name='lstm_state_in_h')
                    init_state = (c_ident, h_ident)

                    net_seq, states_out = rnn_decoder.rnn_decoder(
                        net_seq, init_state, lstm_cell, scope=lstm_scope)
                    batcher_ops = None
                    self._states_out = states_out
                    if state_saver is not None:
                        self._step = state_saver.state('%s_step' % state_name)
                        batcher_ops = [
                            state_saver.save_state('%s_c' % state_name,
                                                   states_out[-1][0]),
                            state_saver.save_state('%s_h' % state_name,
                                                   states_out[-1][1]),
                            state_saver.save_state('%s_step' % state_name,
                                                   self._step - 1)
                        ]
                    with tf_ops.control_dependencies(batcher_ops):
                        image_features['Conv2d_13_pointwise_lstm'] = tf.concat(
                            net_seq, 0)

                    # Identities added for reading output states, to be reused externally.
                    tf.identity(states_out[-1][0], name='lstm_state_out_c')
                    tf.identity(states_out[-1][1], name='lstm_state_out_h')

                # SSD layers.
                with tf.variable_scope('FeatureMaps',
                                       reuse=self._reuse_weights):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=self._feature_map_layout,
                        depth_multiplier=(self._depth_multiplier),
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
コード例 #13
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
コード例 #14
0
def backbone_net(inputs, image_size, is_training=True, depth_multiplier=0.5):
    
    pad_to_multiple = 14 if image_size == 112 else (10 if image_size == 80 else 8)
    use_explicit_padding = False
    depth_multiplier = depth_multiplier

    print('construct backbone_net for image_size', image_size, 'depth_multiplier = ', depth_multiplier)
    use_depthwise = True
    override_base_feature_extractor_hyperparams = False
    reuse_weights = None
    min_depth = 16

    specs = [
            op(slim.conv2d, stride=2, num_outputs=64, kernel_size=[3, 3]),
            # todo: Depthwise Conv3×3
            op(slim.separable_conv2d, stride=1, kernel_size=[3, 3], num_outputs=None, multiplier_func=dummy_depth_multiplier),    
            # 562×64Bottleneck 2 64 5 2
            op(ops.expanded_conv, stride=2, num_outputs=64),            
        ]
    for _ in range(0, 4):
        specs.append(op(ops.expanded_conv, stride=1, num_outputs=64))

    # 282×64Bottleneck212812
    specs.append(op(ops.expanded_conv, stride=2, num_outputs=128))

    # 142×128Bottleneck412861    
    for _ in range(0, 6):            
        specs.append(op(ops.expanded_conv, 
            expansion_size=expand_input(4), 
            num_outputs=128,
            stride=1))

    kernel_size = [7, 7] if image_size == 112 else ([5,5] if image_size == 80 else [4,4])
    specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1'))
    specs.append(op(slim.conv2d, stride=2, kernel_size=[3, 3], num_outputs=32, scope='S2'))
    specs.append(op(slim.conv2d, stride=1, kernel_size=kernel_size, 
        num_outputs=128, scope='S3', padding='VALID'))

    # print('specs = ', specs, ' len = ', len(specs))

    arch = dict(
        defaults={
            # Note: these parameters of batch norm affect the architecture
            # that's why they are here and not in training_scope.
            (slim.batch_norm,): {'center': True, 'scale': True},
            (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
                'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
            },
            (ops.expanded_conv,): {
                'expansion_size': expand_input(2),
                'split_expansion': 1,
                'normalizer_fn': slim.batch_norm,
                'residual': True,
            },
            (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME', 'weights_initializer': slim.xavier_initializer()}
        },

        spec=specs
    )

    print('input to backbone_net ' , inputs)
    with tf.variable_scope('Backbone', reuse=reuse_weights) as scope:
        with slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \
            slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=min_depth):
            with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training))
                if override_base_feature_extractor_hyperparams else
                context_manager.IdentityContextManager()):
                _, image_features = mobilenet_v2.mobilenet_base(
                  od_ops.pad_to_multiple(inputs, pad_to_multiple),                  
                  depth_multiplier=depth_multiplier,
                  is_training=is_training,
                  use_explicit_padding=use_explicit_padding,
                  conv_defs=arch,
                  scope=scope)
                # do a fully connected layer here
                # TODO
                layer_15 = image_features['layer_15']
                layer_16 = image_features['layer_16']
                layer_17 = image_features['layer_17']
                # batch_size = tf.shape(S1)[0]                

                S1 = slim.flatten(layer_15, scope='S1flatten') # tf.reshape(S1, [batch_size, -1])
                S2 = slim.flatten(layer_16, scope='S2flatten') # [batch_size, -1])
                S3 = slim.flatten(layer_17, scope='S3flatten') # [batch_size, -1])
                before_dense = tf.concat([S1, S2, S3], 1)
                
                for i in range(1, 18):
                    print('layer_' + str(i), image_features['layer_' + str(i)])
                # print('layer_17', layer_17)
                print('S1', S1)
                print('S2', S2)
                print('S3', S3)

                # to_test = slim.conv2d(image_features['layer_19'])
                print('image image_features', image_features.keys())
                with slim.arg_scope([slim.batch_norm], is_training=is_training, center=True, scale=True):
                    return image_features, slim.fully_connected(before_dense, 
                            136, 
                            activation_fn=tf.nn.relu6,
                            normalizer_fn=slim.batch_norm,
                            weights_initializer=slim.xavier_initializer()), (image_features['layer_1'], inputs, image_features['layer_2'])
コード例 #15
0
def backbone_net(inputs,
                 image_size,
                 is_training=True,
                 depth_multiplier=0.5,
                 **kwargs):

    pad_to_multiple = 10
    use_explicit_padding = False
    depth_multiplier = depth_multiplier

    print('construct backbone_net for image_size', image_size,
          'depth_multiplier = ', depth_multiplier)
    use_depthwise = True
    override_base_feature_extractor_hyperparams = False
    reuse_weights = None
    min_depth = 16

    specs = [
        op(slim.conv2d,
           stride=2,
           num_outputs=64,
           kernel_size=[3, 3],
           activation_fn=tf.nn.elu),
        # todo: Depthwise Conv3×3
        op(ops.expanded_conv, stride=1, kernel_size=[3, 3], num_outputs=64),
        # 562×64Bottleneck 2 64 5 2
        op(slim.max_pool2d, kernel_size=[3, 3], padding='SAME', stride=1),
        op(ops.expanded_conv, stride=2, num_outputs=64, kernel_size=[3, 3]),
    ]
    for _ in range(0, 4):
        specs.append(
            op(ops.expanded_conv, stride=1, num_outputs=64, kernel_size=[3,
                                                                         3]))

    # 282×64Bottleneck212812
    specs.append(
        op(ops.expanded_conv, stride=2, num_outputs=128, kernel_size=[3, 3]))

    # 142×128Bottleneck412861
    mid_conv_n = kwargs.get('mid_conv_n', 4)
    for _ in range(0, mid_conv_n):
        specs.append(
            op(ops.expanded_conv,
               expansion_size=expand_input(4),
               num_outputs=128,
               stride=1))

    kernel_size = [5, 5]
    specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1'))
    specs.append(
        op(slim.conv2d,
           stride=2,
           kernel_size=[3, 3],
           num_outputs=32,
           scope='S2',
           activation_fn=tf.nn.elu))
    specs.append(
        op(slim.conv2d,
           stride=1,
           kernel_size=kernel_size,
           num_outputs=128,
           scope='S3',
           padding='VALID',
           activation_fn=tf.nn.elu))

    # print('specs = ', specs, ' len = ', len(specs))

    arch = dict(
        defaults={
            # Note: these parameters of batch norm affect the architecture
            # that's why they are here and not in training_scope.
            (
                slim.batch_norm, ): {
                'center': True,
                'scale': True
            },
            (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
                'normalizer_fn': slim.batch_norm,
                'activation_fn': tf.nn.relu6
            },
            (ops.expanded_conv, ): {
                'expansion_size': expand_input(2),
                'split_expansion': 1,
                'normalizer_fn': slim.batch_norm,
                'residual': True,
            },
            (slim.conv2d, slim.separable_conv2d): {
                'padding': 'SAME',
                'weights_initializer': slim.xavier_initializer()
            }
        },
        spec=specs)

    print('input to backbone_net ', inputs)
    with tf.variable_scope('Backbone', reuse=reuse_weights) as scope:
        with slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \
            slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=min_depth):
            with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training))
                  if override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                _, image_features = mobilenet_v2.mobilenet_base(
                    od_ops.pad_to_multiple(inputs, pad_to_multiple),
                    depth_multiplier=depth_multiplier,
                    is_training=is_training,
                    use_explicit_padding=use_explicit_padding,
                    conv_defs=arch,
                    scope=scope)
                # do a fully connected layer here
                # TODO

                print('image image_features', image_features.keys())
                all_layers = []
                for layer_name in image_features.keys():
                    if re.match('^layer_\\d+$', layer_name) is not None:
                        all_layers.append(layer_name)

                def layer_key(val):
                    return int(val.split('_')[1])

                all_layers.sort(key=layer_key)
                print('all_layers', all_layers)
                layer_15 = image_features[all_layers[-3]]
                layer_16 = image_features[all_layers[-2]]
                layer_17 = image_features[all_layers[-1]]
                # batch_size = tf.shape(S1)[0]

                S1 = slim.flatten(
                    layer_15,
                    scope='S1flatten')  # tf.reshape(S1, [batch_size, -1])
                S2 = slim.flatten(layer_16,
                                  scope='S2flatten')  # [batch_size, -1])
                S3 = slim.flatten(layer_17,
                                  scope='S3flatten')  # [batch_size, -1])
                before_dense = tf.concat([S1, S2, S3], 1)

                for l in all_layers:
                    print(l, image_features[l])
                # print('layer_17', layer_17)
                print('S1', S1)
                print('S2', S2)
                print('S3', S3)

                # to_test = slim.conv2d(image_features['layer_19'])
                print('before fully_connected', before_dense)
                with slim.arg_scope(
                    [slim.fully_connected],
                        weights_initializer=slim.xavier_initializer(),
                        normalizer_fn=None,
                        activation_fn=tf.nn.tanh):
                    fc_x = kwargs.get('fc_x_n', 2)
                    print('fully_connected before last x ', fc_x)
                    pre_chin = slim.fully_connected(before_dense, 34 * fc_x)
                    pre_left_eye_brow = slim.fully_connected(
                        before_dense, 10 * fc_x)
                    pre_right_eye_brow = slim.fully_connected(
                        before_dense, 10 * fc_x)
                    pre_nose = slim.fully_connected(before_dense, 18 * fc_x)
                    pre_left_eye = slim.fully_connected(
                        before_dense, 12 * fc_x)
                    pre_right_eye = slim.fully_connected(
                        before_dense, 12 * fc_x)
                    pre_mouth = slim.fully_connected(before_dense, 40 * fc_x)

                    chin = slim.fully_connected(pre_chin, 34)
                    left_eye_brow = slim.fully_connected(pre_left_eye_brow, 10)
                    right_eye_brow = slim.fully_connected(
                        pre_right_eye_brow, 10)
                    nose = slim.fully_connected(pre_nose, 18)
                    left_eye = slim.fully_connected(pre_left_eye, 12)
                    right_eye = slim.fully_connected(pre_right_eye, 12)
                    mouth = slim.fully_connected(pre_mouth, 40)

                    landmarks = tf.concat([
                        chin, left_eye_brow, right_eye_brow, nose, left_eye,
                        right_eye, mouth
                    ], -1)
                    return image_features, landmarks, None
コード例 #16
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)

            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'Conv2d_3_pointwise', 'Conv2d_5_pointwise',
                        'Conv2d_11_pointwise', 'Conv2d_13_pointwise'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(256))
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=depth_fn(256),
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 13))
                        feature_maps.append(last_feature_map)
        return feature_maps
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        store_non_strided_activations=True,
                        scope=scope)
                    image_features = self._filter_features(image_features)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope(self._fpn_scope_name,
                                       reuse=self._reuse_weights):
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in ['block2', 'block3', 'block4']],
                        depth=256)
                    last_feature_map = fpn_features['top_down_block4']
                    coarse_features = {}
                    for i in range(5, 7):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=256,
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_block{}'.format(i))
                        coarse_features['bottom_up_block{}'.format(
                            i)] = last_feature_map
        return [
            fpn_features['top_down_block2'], fpn_features['top_down_block3'],
            fpn_features['top_down_block4'],
            coarse_features['bottom_up_block5'],
            coarse_features['bottom_up_block6']
        ]
コード例 #18
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    Raises:
      ValueError if conv_defs is not provided or from_layer does not meet the
        size requirement.
    """

        if not self._conv_defs:
            raise ValueError('Must provide backbone conv defs.')

        if len(self._from_layer) != 2:
            raise ValueError('SSD input feature names are not provided.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            [self._from_layer[0], self._from_layer[1], '', '', '', ''],
            'layer_depth': [-1, -1, 128, 128, 128,
                            128],  #[-1, -1, 512, 256, 256, 128]
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        with tf.variable_scope(self._scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v3.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v3.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        conv_defs=self._conv_defs,
                        final_endpoint=self._from_layer[1],
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()