def _extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) image_features = self.mobilenet_v2( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple)) feature_maps = self.feature_map_generator({ 'layer_15/expansion_output': image_features[0], 'layer_19': image_features[1] }) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): with slim.arg_scope([resnet_v1.bottleneck], use_bounded_activations=self. _use_bounded_activations): _, activations = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=self._base_feature_map_depth, num_layers=self._num_layers, image_features={ 'image_features': self._filter_features(activations)['block3'] }) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ feature_map_layout = { 'from_layer': ['Cell_7', 'Cell_11', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope( pnasnet_large_arg_scope_for_detection( is_batch_norm_training=self._is_training)): with slim.arg_scope( [slim.conv2d, slim.batch_norm, slim.separable_conv2d], reuse=self._reuse_weights): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = pnasnet.build_pnasnet_large( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=self._is_training, final_endpoint='Cell_11') with tf.variable_scope('SSD_feature_maps', reuse=self._reuse_weights): with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) image_features = self._mobilenet_v1( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple)) feature_block_list = [] for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): feature_block_list.append(self._feature_blocks[level - 2]) feature_start_index = len(self._feature_blocks) - self._num_levels fpn_input_image_features = [ (key, image_features[feature_start_index + index]) for index, key in enumerate(feature_block_list) ] fpn_features = self._fpn_features_generator(fpn_input_image_features) feature_maps = [] for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( self._feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( self._feature_blocks[self._base_fpn_max_level - 2])] for coarse_feature_layers in self._coarse_feature_layers: for layer in coarse_feature_layers: last_feature_map = layer(last_feature_map) feature_maps.append(last_feature_map) return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=0, num_layers=6, image_features={ 'image_features': image_features['Conv2d_11_pointwise'] }) return feature_maps.values()
def _extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) image_features = self._resnet_v1( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple)) feature_block_list = [] for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): feature_block_list.append('block{}'.format(level - 1)) feature_block_map = dict(zip(self._resnet_block_names, image_features)) fpn_input_image_features = [(feature_block, feature_block_map[feature_block]) for feature_block in feature_block_list] fpn_features = self._fpn_features_generator(fpn_input_image_features) feature_maps = [] for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_block{}'.format(level - 1)]) last_feature_map = fpn_features['top_down_block{}'.format( self._base_fpn_max_level - 1)] for coarse_feature_layers in self._coarse_feature_layers: for layer in coarse_feature_layers: last_feature_map = layer(last_feature_map) feature_maps.append(last_feature_map) return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, conv_defs=self._conv_defs, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self. _min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise, use_explicit_padding=self._use_explicit_padding) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features padding = 'VALID' if self._use_explicit_padding else 'SAME' kernel_size = 3 for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if self._use_explicit_padding: last_feature_map = ops.fixed_padding( last_feature_map, kernel_size) last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[kernel_size, kernel_size], stride=2, padding=padding, scope='bottom_up_Conv2d_{}'.format( i - base_fpn_max_level + 13)) feature_maps.append(last_feature_map) return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, min_base_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) image_features = self._filter_features(image_features) depth_fn = lambda d: max(int(d * self._depth_multiplier), self. _min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append('block{}'.format(level - 1)) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth)) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append( fpn_features['top_down_block{}'.format(level - 1)]) last_feature_map = fpn_features['top_down_block{}'.format( base_fpn_max_level - 1)] # Construct coarse features for i in range(base_fpn_max_level, self._fpn_max_level): last_feature_map = slim.conv2d( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_block{}'.format(i)) feature_maps.append(last_feature_map) return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()