def test_rewrite_nn_resize_op_quantized(self): g = tf.Graph() with g.as_default(): x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) x_conv = tf.contrib.slim.conv2d(x, 8, 1) y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8)) s = ops.nearest_neighbor_upsampling(x_conv, 2) t = s + y graph_rewriter_config = graph_rewriter_pb2.GraphRewriter() graph_rewriter_config.quantization.delay = 500000 graph_rewriter_fn = graph_rewriter_builder.build( graph_rewriter_config, is_training=False) graph_rewriter_fn() exporter.rewrite_nn_resize_op(is_quantized=True) resize_op_found = False for op in g.get_operations(): if op.type == 'ResizeNearestNeighbor': resize_op_found = True self.assertEqual(op.inputs[0].op.type, 'FakeQuantWithMinMaxVars') self.assertEqual(op.outputs[0].consumers()[0], t.op) break self.assertTrue(resize_op_found)
def predict(self, features, num_predictions_per_location=1): """Performs mask prediction. Args: features: A float tensor of shape [batch_size, height, width, channels] containing features for a batch of images. num_predictions_per_location: Int containing number of predictions per location. Returns: instance_masks: A float tensor of shape [batch_size, 1, num_classes, mask_height, mask_width]. Raises: ValueError: If num_predictions_per_location is not 1. """ if num_predictions_per_location != 1: raise ValueError('Only num_predictions_per_location=1 is supported') num_conv_channels = self._mask_prediction_conv_depth if num_conv_channels == 0: num_feature_channels = features.get_shape().as_list()[3] num_conv_channels = self._get_mask_predictor_conv_depth( num_feature_channels, self._num_classes) with slim.arg_scope(self._conv_hyperparams_fn()): if not self._convolve_then_upsample: features = tf.image.resize_bilinear( features, [self._mask_height, self._mask_width], align_corners=True) for _ in range(self._mask_prediction_num_conv_layers - 1): features = slim.conv2d( features, num_outputs=num_conv_channels, kernel_size=[3, 3]) if self._convolve_then_upsample: # Replace Transposed Convolution with a Nearest Neighbor upsampling step # followed by 3x3 convolution. height_scale = self._mask_height / features.shape[1].value width_scale = self._mask_width / features.shape[2].value features = ops.nearest_neighbor_upsampling( features, height_scale=height_scale, width_scale=width_scale) features = slim.conv2d( features, num_outputs=num_conv_channels, kernel_size=[3, 3]) num_masks = 1 if self._masks_are_class_agnostic else self._num_classes mask_predictions = slim.conv2d( features, num_outputs=num_masks, activation_fn=None, normalizer_fn=None, kernel_size=[3, 3]) return tf.expand_dims( tf.transpose(mask_predictions, perm=[0, 3, 1, 2]), axis=1, name='MaskPredictor')
def fpn_top_down_feature_maps(image_features, depth, use_depthwise=False, scope=None): """Generates `top-down` feature maps for Feature Pyramid Networks. See https://arxiv.org/abs/1612.03144 for details. Args: image_features: list of tuples of (tensor_name, image_feature_tensor). Spatial resolutions of succesive tensors must reduce exactly by a factor of 2. depth: depth of output feature maps. use_depthwise: use depthwise separable conv instead of regular conv. scope: A scope name to wrap this op under. Returns: feature_maps: an OrderedDict mapping keys (feature map names) to tensors where each tensor has shape [batch, height_i, width_i, depth_i]. """ with tf.name_scope(scope, 'top_down'): num_levels = len(image_features) output_feature_maps_list = [] output_feature_map_keys = [] with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], padding='SAME', stride=1): top_down = slim.conv2d( image_features[-1][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % num_levels) output_feature_maps_list.append(top_down) output_feature_map_keys.append( 'top_down_%s' % image_features[-1][0]) for level in reversed(range(num_levels - 1)): top_down = ops.nearest_neighbor_upsampling(top_down, 2) residual = slim.conv2d( image_features[level][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % (level + 1)) top_down += residual if use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d output_feature_maps_list.append(conv_op( top_down, depth, [3, 3], scope='smoothing_%d' % (level + 1))) output_feature_map_keys.append('top_down_%s' % image_features[level][0]) return collections.OrderedDict(reversed( list(zip(output_feature_map_keys, output_feature_maps_list))))
def test_rewrite_nn_resize_op(self): g = tf.Graph() with g.as_default(): x = array_ops.placeholder(dtypes.float32, shape=(8, 10, 10, 8)) y = array_ops.placeholder(dtypes.float32, shape=(8, 20, 20, 8)) s = ops.nearest_neighbor_upsampling(x, 2) t = s + y exporter.rewrite_nn_resize_op() resize_op_found = False for op in g.get_operations(): if op.type == 'ResizeNearestNeighbor': resize_op_found = True self.assertEqual(op.inputs[0], x) self.assertEqual(op.outputs[0].consumers()[0], t.op) break self.assertTrue(resize_op_found)
def fpn_top_down_feature_maps(image_features, depth, use_depthwise=False, use_explicit_padding=False, use_bounded_activations=False, scope=None, use_native_resize_op=False): """Generates `top-down` feature maps for Feature Pyramid Networks. See https://arxiv.org/abs/1612.03144 for details. Args: image_features: list of tuples of (tensor_name, image_feature_tensor). Spatial resolutions of succesive tensors must reduce exactly by a factor of 2. depth: depth of output feature maps. use_depthwise: whether to use depthwise separable conv instead of regular conv. use_explicit_padding: whether to use explicit padding. use_bounded_activations: Whether or not to clip activations to range [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend themselves to quantized inference. scope: A scope name to wrap this op under. use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for the upsampling process instead of reshape and broadcasting implementation. Returns: feature_maps: an OrderedDict mapping keys (feature map names) to tensors where each tensor has shape [batch, height_i, width_i, depth_i]. """ with tf.name_scope(scope, 'top_down'): num_levels = len(image_features) output_feature_maps_list = [] output_feature_map_keys = [] padding = 'VALID' if use_explicit_padding else 'SAME' kernel_size = 3 with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], padding=padding, stride=1): top_down = slim.conv2d( image_features[-1][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % num_levels) if use_bounded_activations: top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, ACTIVATION_BOUND) output_feature_maps_list.append(top_down) output_feature_map_keys.append( 'top_down_%s' % image_features[-1][0]) for level in reversed(range(num_levels - 1)): if use_native_resize_op: with tf.name_scope('nearest_neighbor_upsampling'): top_down_shape = top_down.shape.as_list() top_down = tf.image.resize_nearest_neighbor( top_down, [top_down_shape[1] * 2, top_down_shape[2] * 2]) else: top_down = ops.nearest_neighbor_upsampling(top_down, scale=2) residual = slim.conv2d( image_features[level][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % (level + 1)) if use_bounded_activations: residual = tf.clip_by_value(residual, -ACTIVATION_BOUND, ACTIVATION_BOUND) if use_explicit_padding: # slice top_down to the same shape as residual residual_shape = tf.shape(residual) top_down = top_down[:, :residual_shape[1], :residual_shape[2], :] top_down += residual if use_bounded_activations: top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, ACTIVATION_BOUND) if use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if use_explicit_padding: top_down = ops.fixed_padding(top_down, kernel_size) output_feature_maps_list.append(conv_op( top_down, depth, [kernel_size, kernel_size], scope='smoothing_%d' % (level + 1))) output_feature_map_keys.append('top_down_%s' % image_features[level][0]) return collections.OrderedDict(reversed( list(zip(output_feature_map_keys, output_feature_maps_list))))
def fpn_top_down_feature_maps(image_features, depth, use_depthwise=False, use_explicit_padding=False, use_bounded_activations=False, scope=None, use_native_resize_op=False): """Generates `top-down` feature maps for Feature Pyramid Networks. See https://arxiv.org/abs/1612.03144 for details. Args: image_features: list of tuples of (tensor_name, image_feature_tensor). Spatial resolutions of succesive tensors must reduce exactly by a factor of 2. depth: depth of output feature maps. use_depthwise: whether to use depthwise separable conv instead of regular conv. use_explicit_padding: whether to use explicit padding. use_bounded_activations: Whether or not to clip activations to range [-ACTIVATION_BOUND, ACTIVATION_BOUND]. Bounded activations better lend themselves to quantized inference. scope: A scope name to wrap this op under. use_native_resize_op: If True, uses tf.image.resize_nearest_neighbor op for the upsampling process instead of reshape and broadcasting implementation. Returns: feature_maps: an OrderedDict mapping keys (feature map names) to tensors where each tensor has shape [batch, height_i, width_i, depth_i]. """ with tf.name_scope(scope, 'top_down'): num_levels = len(image_features) output_feature_maps_list = [] output_feature_map_keys = [] padding = 'VALID' if use_explicit_padding else 'SAME' kernel_size = 3 with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding=padding, stride=1): top_down = slim.conv2d(image_features[-1][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % num_levels) if use_bounded_activations: top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, ACTIVATION_BOUND) output_feature_maps_list.append(top_down) output_feature_map_keys.append('top_down_%s' % image_features[-1][0]) for level in reversed(range(num_levels - 1)): if use_native_resize_op: with tf.name_scope('nearest_neighbor_upsampling'): top_down_shape = top_down.shape.as_list() top_down = tf.image.resize_nearest_neighbor( top_down, [top_down_shape[1] * 2, top_down_shape[2] * 2]) else: top_down = ops.nearest_neighbor_upsampling(top_down, scale=2) residual = slim.conv2d(image_features[level][1], depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='projection_%d' % (level + 1)) if use_bounded_activations: residual = tf.clip_by_value(residual, -ACTIVATION_BOUND, ACTIVATION_BOUND) if use_explicit_padding: # slice top_down to the same shape as residual residual_shape = tf.shape(residual) top_down = top_down[:, :residual_shape[1], : residual_shape[2], :] top_down += residual if use_bounded_activations: top_down = tf.clip_by_value(top_down, -ACTIVATION_BOUND, ACTIVATION_BOUND) if use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if use_explicit_padding: top_down = ops.fixed_padding(top_down, kernel_size) output_feature_maps_list.append( conv_op(top_down, depth, [kernel_size, kernel_size], scope='smoothing_%d' % (level + 1))) output_feature_map_keys.append('top_down_%s' % image_features[level][0]) return collections.OrderedDict( reversed( list(zip(output_feature_map_keys, output_feature_maps_list))))
def nearest_neighbor_upsampling(image): return ops.nearest_neighbor_upsampling(image, scale=2)
def build(self, input_shapes): num_conv_channels = self._mask_prediction_conv_depth if num_conv_channels == 0: num_feature_channels = input_shapes.as_list()[3] num_conv_channels = self._get_mask_predictor_conv_depth( num_feature_channels, self._num_classes) for i in range(self._mask_prediction_num_conv_layers - 1): self._mask_predictor_layers.append( tf.keras.layers.Conv2D( num_conv_channels, [3, 3], padding='SAME', name='MaskPredictor_conv2d_{}'.format(i), **self._conv_hyperparams.params())) self._mask_predictor_layers.append( self._conv_hyperparams.build_batch_norm( training=(self._is_training and not self._freeze_batchnorm), name='MaskPredictor_batchnorm_{}'.format(i))) self._mask_predictor_layers.append( self._conv_hyperparams.build_activation_layer( name='MaskPredictor_activation_{}'.format(i))) if self._convolve_then_upsample: # Replace Transposed Convolution with a Nearest Neighbor upsampling step # followed by 3x3 convolution. height_scale = self._mask_height / shape_utils.get_dim_as_int( input_shapes[1]) width_scale = self._mask_width / shape_utils.get_dim_as_int( input_shapes[2]) # pylint: disable=g-long-lambda self._mask_predictor_layers.append( tf.keras.layers.Lambda( lambda features: ops.nearest_neighbor_upsampling( features, height_scale=height_scale, width_scale=width_scale))) # pylint: enable=g-long-lambda self._mask_predictor_layers.append( tf.keras.layers.Conv2D(num_conv_channels, [3, 3], padding='SAME', name='MaskPredictor_upsample_conv2d', **self._conv_hyperparams.params())) self._mask_predictor_layers.append( self._conv_hyperparams.build_batch_norm( training=(self._is_training and not self._freeze_batchnorm), name='MaskPredictor_upsample_batchnorm')) self._mask_predictor_layers.append( self._conv_hyperparams.build_activation_layer( name='MaskPredictor_upsample_activation')) num_masks = 1 if self._masks_are_class_agnostic else self._num_classes self._mask_predictor_layers.append( tf.keras.layers.Conv2D( num_masks, [3, 3], padding='SAME', name='MaskPredictor_last_conv2d', **self._conv_hyperparams.params(use_bias=True))) self.built = True
def graph_fn(inputs): custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2) tf_op_output = tf.image.resize_images( inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) return (custom_op_output, tf_op_output)