예제 #1
0
  def test_rewrite_nn_resize_op_quantized(self):
    g = tf.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8))
      x_conv = tf.contrib.slim.conv2d(x, 8, 1)
      y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8))
      s = ops.nearest_neighbor_upsampling(x_conv, 2)
      t = s + y

      graph_rewriter_config = graph_rewriter_pb2.GraphRewriter()
      graph_rewriter_config.quantization.delay = 500000
      graph_rewriter_fn = graph_rewriter_builder.build(
          graph_rewriter_config, is_training=False)
      graph_rewriter_fn()

      exporter.rewrite_nn_resize_op(is_quantized=True)

    resize_op_found = False
    for op in g.get_operations():
      if op.type == 'ResizeNearestNeighbor':
        resize_op_found = True
        self.assertEqual(op.inputs[0].op.type, 'FakeQuantWithMinMaxVars')
        self.assertEqual(op.outputs[0].consumers()[0], t.op)
        break

    self.assertTrue(resize_op_found)
예제 #2
0
파일: mask_head.py 프로젝트: pcm17/models
  def predict(self, features, num_predictions_per_location=1):
    """Performs mask prediction.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing features for a batch of images.
      num_predictions_per_location: Int containing number of predictions per
        location.

    Returns:
      instance_masks: A float tensor of shape
          [batch_size, 1, num_classes, mask_height, mask_width].

    Raises:
      ValueError: If num_predictions_per_location is not 1.
    """
    if num_predictions_per_location != 1:
      raise ValueError('Only num_predictions_per_location=1 is supported')
    num_conv_channels = self._mask_prediction_conv_depth
    if num_conv_channels == 0:
      num_feature_channels = features.get_shape().as_list()[3]
      num_conv_channels = self._get_mask_predictor_conv_depth(
          num_feature_channels, self._num_classes)
    with slim.arg_scope(self._conv_hyperparams_fn()):
      if not self._convolve_then_upsample:
        features = tf.image.resize_bilinear(
            features, [self._mask_height, self._mask_width],
            align_corners=True)
      for _ in range(self._mask_prediction_num_conv_layers - 1):
        features = slim.conv2d(
            features,
            num_outputs=num_conv_channels,
            kernel_size=[3, 3])
      if self._convolve_then_upsample:
        # Replace Transposed Convolution with a Nearest Neighbor upsampling step
        # followed by 3x3 convolution.
        height_scale = self._mask_height / features.shape[1].value
        width_scale = self._mask_width / features.shape[2].value
        features = ops.nearest_neighbor_upsampling(
            features, height_scale=height_scale, width_scale=width_scale)
        features = slim.conv2d(
            features,
            num_outputs=num_conv_channels,
            kernel_size=[3, 3])

      num_masks = 1 if self._masks_are_class_agnostic else self._num_classes
      mask_predictions = slim.conv2d(
          features,
          num_outputs=num_masks,
          activation_fn=None,
          normalizer_fn=None,
          kernel_size=[3, 3])
      return tf.expand_dims(
          tf.transpose(mask_predictions, perm=[0, 3, 1, 2]),
          axis=1,
          name='MaskPredictor')
예제 #3
0
def fpn_top_down_feature_maps(image_features,
                              depth,
                              use_depthwise=False,
                              scope=None):
  """Generates `top-down` feature maps for Feature Pyramid Networks.

  See https://arxiv.org/abs/1612.03144 for details.

  Args:
    image_features: list of tuples of (tensor_name, image_feature_tensor).
      Spatial resolutions of succesive tensors must reduce exactly by a factor
      of 2.
    depth: depth of output feature maps.
    use_depthwise: use depthwise separable conv instead of regular conv.
    scope: A scope name to wrap this op under.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
  """
  with tf.name_scope(scope, 'top_down'):
    num_levels = len(image_features)
    output_feature_maps_list = []
    output_feature_map_keys = []
    with slim.arg_scope(
        [slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1):
      top_down = slim.conv2d(
          image_features[-1][1],
          depth, [1, 1], activation_fn=None, normalizer_fn=None,
          scope='projection_%d' % num_levels)
      output_feature_maps_list.append(top_down)
      output_feature_map_keys.append(
          'top_down_%s' % image_features[-1][0])

      for level in reversed(range(num_levels - 1)):
        top_down = ops.nearest_neighbor_upsampling(top_down, 2)
        residual = slim.conv2d(
            image_features[level][1], depth, [1, 1],
            activation_fn=None, normalizer_fn=None,
            scope='projection_%d' % (level + 1))
        top_down += residual
        if use_depthwise:
          conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
        else:
          conv_op = slim.conv2d
        output_feature_maps_list.append(conv_op(
            top_down,
            depth, [3, 3],
            scope='smoothing_%d' % (level + 1)))
        output_feature_map_keys.append('top_down_%s' % image_features[level][0])
      return collections.OrderedDict(reversed(
          list(zip(output_feature_map_keys, output_feature_maps_list))))
예제 #4
0
  def test_rewrite_nn_resize_op(self):
    g = tf.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8))
      y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8))
      s = ops.nearest_neighbor_upsampling(x, 2)
      t = s + y
      exporter.rewrite_nn_resize_op()

    resize_op_found = False
    for op in g.get_operations():
      if op.type == 'ResizeNearestNeighbor':
        resize_op_found = True
        self.assertEqual(op.inputs[0], x)
        self.assertEqual(op.outputs[0].consumers()[0], t.op)
        break

    self.assertTrue(resize_op_found)
예제 #5
0
def fpn_top_down_feature_maps(image_features,
                              depth,
                              use_depthwise=False,
                              use_explicit_padding=False,
                              use_bounded_activations=False,
                              scope=None,
                              use_native_resize_op=False):
  """Generates `top-down` feature maps for Feature Pyramid Networks.

  See https://arxiv.org/abs/1612.03144 for details.

  Args:
    image_features: list of tuples of (tensor_name, image_feature_tensor).
      Spatial resolutions of succesive tensors must reduce exactly by a factor
      of 2.
    depth: depth of output feature maps.
    use_depthwise: whether to use depthwise separable conv instead of regular
      conv.
    use_explicit_padding: whether to use explicit padding.
    use_bounded_activations: Whether or not to clip activations to range
      [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend
      themselves to quantized inference.
    scope: A scope name to wrap this op under.
    use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for
      the upsampling process instead of reshape and broadcasting implementation.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
  """
  with tf.name_scope(scope, 'top_down'):
    num_levels = len(image_features)
    output_feature_maps_list = []
    output_feature_map_keys = []
    padding = 'VALID' if use_explicit_padding else 'SAME'
    kernel_size = 3
    with slim.arg_scope(
        [slim.conv2d, slim.separable_conv2d], padding=padding, stride=1):
      top_down = slim.conv2d(
          image_features[-1][1],
          depth, [1, 1], activation_fn=None, normalizer_fn=None,
          scope='projection_%d' % num_levels)
      if use_bounded_activations:
        top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                    ACTIVATION_BOUND)
      output_feature_maps_list.append(top_down)
      output_feature_map_keys.append(
          'top_down_%s' % image_features[-1][0])

      for level in reversed(range(num_levels - 1)):
        if use_native_resize_op:
          with tf.name_scope('nearest_neighbor_upsampling'):
            top_down_shape = top_down.shape.as_list()
            top_down = tf.image.resize_nearest_neighbor(
                top_down, [top_down_shape[1] * 2, top_down_shape[2] * 2])
        else:
          top_down = ops.nearest_neighbor_upsampling(top_down, scale=2)
        residual = slim.conv2d(
            image_features[level][1], depth, [1, 1],
            activation_fn=None, normalizer_fn=None,
            scope='projection_%d' % (level + 1))
        if use_bounded_activations:
          residual = tf.clip_by_value(residual, -ACTIVATION_BOUND,
                                      ACTIVATION_BOUND)
        if use_explicit_padding:
          # slice top_down to the same shape as residual
          residual_shape = tf.shape(residual)
          top_down = top_down[:, :residual_shape[1], :residual_shape[2], :]
        top_down += residual
        if use_bounded_activations:
          top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                      ACTIVATION_BOUND)
        if use_depthwise:
          conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
        else:
          conv_op = slim.conv2d
        if use_explicit_padding:
          top_down = ops.fixed_padding(top_down, kernel_size)
        output_feature_maps_list.append(conv_op(
            top_down,
            depth, [kernel_size, kernel_size],
            scope='smoothing_%d' % (level + 1)))
        output_feature_map_keys.append('top_down_%s' % image_features[level][0])
      return collections.OrderedDict(reversed(
          list(zip(output_feature_map_keys, output_feature_maps_list))))
예제 #6
0
def fpn_top_down_feature_maps(image_features,
                              depth,
                              use_depthwise=False,
                              use_explicit_padding=False,
                              use_bounded_activations=False,
                              scope=None,
                              use_native_resize_op=False):
    """Generates `top-down` feature maps for Feature Pyramid Networks.

  See https://arxiv.org/abs/1612.03144 for details.

  Args:
    image_features: list of tuples of (tensor_name, image_feature_tensor).
      Spatial resolutions of succesive tensors must reduce exactly by a factor
      of 2.
    depth: depth of output feature maps.
    use_depthwise: whether to use depthwise separable conv instead of regular
      conv.
    use_explicit_padding: whether to use explicit padding.
    use_bounded_activations: Whether or not to clip activations to range
      [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend
      themselves to quantized inference.
    scope: A scope name to wrap this op under.
    use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for
      the upsampling process instead of reshape and broadcasting implementation.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].
  """
    with tf.name_scope(scope, 'top_down'):
        num_levels = len(image_features)
        output_feature_maps_list = []
        output_feature_map_keys = []
        padding = 'VALID' if use_explicit_padding else 'SAME'
        kernel_size = 3
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            padding=padding,
                            stride=1):
            top_down = slim.conv2d(image_features[-1][1],
                                   depth, [1, 1],
                                   activation_fn=None,
                                   normalizer_fn=None,
                                   scope='projection_%d' % num_levels)
            if use_bounded_activations:
                top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                            ACTIVATION_BOUND)
            output_feature_maps_list.append(top_down)
            output_feature_map_keys.append('top_down_%s' %
                                           image_features[-1][0])

            for level in reversed(range(num_levels - 1)):
                if use_native_resize_op:
                    with tf.name_scope('nearest_neighbor_upsampling'):
                        top_down_shape = top_down.shape.as_list()
                        top_down = tf.image.resize_nearest_neighbor(
                            top_down,
                            [top_down_shape[1] * 2, top_down_shape[2] * 2])
                else:
                    top_down = ops.nearest_neighbor_upsampling(top_down,
                                                               scale=2)
                residual = slim.conv2d(image_features[level][1],
                                       depth, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='projection_%d' % (level + 1))
                if use_bounded_activations:
                    residual = tf.clip_by_value(residual, -ACTIVATION_BOUND,
                                                ACTIVATION_BOUND)
                if use_explicit_padding:
                    # slice top_down to the same shape as residual
                    residual_shape = tf.shape(residual)
                    top_down = top_down[:, :residual_shape[1], :
                                        residual_shape[2], :]
                top_down += residual
                if use_bounded_activations:
                    top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND,
                                                ACTIVATION_BOUND)
                if use_depthwise:
                    conv_op = functools.partial(slim.separable_conv2d,
                                                depth_multiplier=1)
                else:
                    conv_op = slim.conv2d
                if use_explicit_padding:
                    top_down = ops.fixed_padding(top_down, kernel_size)
                output_feature_maps_list.append(
                    conv_op(top_down,
                            depth, [kernel_size, kernel_size],
                            scope='smoothing_%d' % (level + 1)))
                output_feature_map_keys.append('top_down_%s' %
                                               image_features[level][0])
            return collections.OrderedDict(
                reversed(
                    list(zip(output_feature_map_keys,
                             output_feature_maps_list))))
예제 #7
0
 def nearest_neighbor_upsampling(image):
     return ops.nearest_neighbor_upsampling(image, scale=2)
예제 #8
0
    def build(self, input_shapes):
        num_conv_channels = self._mask_prediction_conv_depth
        if num_conv_channels == 0:
            num_feature_channels = input_shapes.as_list()[3]
            num_conv_channels = self._get_mask_predictor_conv_depth(
                num_feature_channels, self._num_classes)

        for i in range(self._mask_prediction_num_conv_layers - 1):
            self._mask_predictor_layers.append(
                tf.keras.layers.Conv2D(
                    num_conv_channels, [3, 3],
                    padding='SAME',
                    name='MaskPredictor_conv2d_{}'.format(i),
                    **self._conv_hyperparams.params()))
            self._mask_predictor_layers.append(
                self._conv_hyperparams.build_batch_norm(
                    training=(self._is_training
                              and not self._freeze_batchnorm),
                    name='MaskPredictor_batchnorm_{}'.format(i)))
            self._mask_predictor_layers.append(
                self._conv_hyperparams.build_activation_layer(
                    name='MaskPredictor_activation_{}'.format(i)))

        if self._convolve_then_upsample:
            # Replace Transposed Convolution with a Nearest Neighbor upsampling step
            # followed by 3x3 convolution.
            height_scale = self._mask_height / shape_utils.get_dim_as_int(
                input_shapes[1])
            width_scale = self._mask_width / shape_utils.get_dim_as_int(
                input_shapes[2])
            # pylint: disable=g-long-lambda
            self._mask_predictor_layers.append(
                tf.keras.layers.Lambda(
                    lambda features: ops.nearest_neighbor_upsampling(
                        features,
                        height_scale=height_scale,
                        width_scale=width_scale)))
            # pylint: enable=g-long-lambda
            self._mask_predictor_layers.append(
                tf.keras.layers.Conv2D(num_conv_channels, [3, 3],
                                       padding='SAME',
                                       name='MaskPredictor_upsample_conv2d',
                                       **self._conv_hyperparams.params()))
            self._mask_predictor_layers.append(
                self._conv_hyperparams.build_batch_norm(
                    training=(self._is_training
                              and not self._freeze_batchnorm),
                    name='MaskPredictor_upsample_batchnorm'))
            self._mask_predictor_layers.append(
                self._conv_hyperparams.build_activation_layer(
                    name='MaskPredictor_upsample_activation'))

        num_masks = 1 if self._masks_are_class_agnostic else self._num_classes
        self._mask_predictor_layers.append(
            tf.keras.layers.Conv2D(
                num_masks, [3, 3],
                padding='SAME',
                name='MaskPredictor_last_conv2d',
                **self._conv_hyperparams.params(use_bias=True)))

        self.built = True
예제 #9
0
 def graph_fn(inputs):
   custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
   tf_op_output = tf.image.resize_images(
       inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
   return (custom_op_output, tf_op_output)