def extract_features(images, model_options, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, nas_training_hyper_parameters=None): """Extracts features by the particular model_variant. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. nas_training_hyper_parameters: A dictionary storing hyper-parameters for training nas models. Its keys are: - `drop_path_keep_prob`: Probability to keep each path in the cell when training. - `total_training_steps`: Total training steps to help drop path probability calculation. Returns: concat_logits: A tensor of size [batch, feature_height, feature_width, feature_channels], where feature_height/feature_width are determined by the images height/width and output_stride. end_points: A dictionary from components of the network to the corresponding activation. """ features, end_points = feature_extractor.extract_features( images, output_stride=model_options.output_stride, multi_grid=model_options.multi_grid, model_variant=model_options.model_variant, depth_multiplier=model_options.depth_multiplier, divisible_by=model_options.divisible_by, weight_decay=weight_decay, reuse=reuse, is_training=is_training, preprocessed_images_dtype=model_options.preprocessed_images_dtype, fine_tune_batch_norm=fine_tune_batch_norm, nas_architecture_options=model_options.nas_architecture_options, nas_training_hyper_parameters=nas_training_hyper_parameters, use_bounded_activation=model_options.use_bounded_activation) if not model_options.aspp_with_batch_norm: return features, end_points else: if model_options.dense_prediction_cell_config is not None: tf.compat.v1.logging.info('Using dense prediction cell config.') dense_prediction_layer = dense_prediction_cell.DensePredictionCell( config=model_options.dense_prediction_cell_config, hparams={ 'conv_rate_multiplier': 16 // model_options.output_stride, }) concat_logits = dense_prediction_layer.build_cell( features, output_stride=model_options.output_stride, crop_size=model_options.crop_size, image_pooling_crop_size=model_options.image_pooling_crop_size, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) return concat_logits, end_points else: # The following codes employ the DeepLabv3 ASPP module. Note that we # could express the ASPP module as one particular dense prediction # cell architecture. We do not do so but leave the following codes # for backward compatibility. batch_norm_params = utils.get_batch_norm_params( decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=model_options.sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn( model_options.sync_batch_norm_method) activation_fn = (tf.nn.relu6 if model_options.use_bounded_activation else tf.nn.relu) with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=batch_norm, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): depth = model_options.aspp_convs_filters branch_logits = [] if model_options.add_image_level_feature: if model_options.crop_size is not None: image_pooling_crop_size = model_options.image_pooling_crop_size # If image_pooling_crop_size is not specified, use crop_size. if image_pooling_crop_size is None: image_pooling_crop_size = model_options.crop_size pool_height = scale_dimension( image_pooling_crop_size[0], 1. / model_options.output_stride) pool_width = scale_dimension( image_pooling_crop_size[1], 1. / model_options.output_stride) image_feature = slim.avg_pool2d( features, [pool_height, pool_width], model_options.image_pooling_stride, padding='VALID') resize_height = scale_dimension( model_options.crop_size[0], 1. / model_options.output_stride) resize_width = scale_dimension( model_options.crop_size[1], 1. / model_options.output_stride) else: # If crop_size is None, we simply do global pooling. pool_height = tf.shape(features)[1] pool_width = tf.shape(features)[2] image_feature = tf.reduce_mean(features, axis=[1, 2], keepdims=True) resize_height = pool_height resize_width = pool_width image_feature_activation_fn = tf.nn.relu image_feature_normalizer_fn = batch_norm if model_options.aspp_with_squeeze_and_excitation: image_feature_activation_fn = tf.nn.sigmoid if model_options.image_se_uses_qsigmoid: image_feature_activation_fn = utils.q_sigmoid image_feature_normalizer_fn = None image_feature = slim.conv2d( image_feature, depth, 1, activation_fn=image_feature_activation_fn, normalizer_fn=image_feature_normalizer_fn, scope=IMAGE_POOLING_SCOPE) image_feature = _resize_bilinear( image_feature, [resize_height, resize_width], image_feature.dtype) # Set shape for resize_height/resize_width if they are not Tensor. if isinstance(resize_height, tf.Tensor): resize_height = None if isinstance(resize_width, tf.Tensor): resize_width = None image_feature.set_shape( [None, resize_height, resize_width, depth]) if not model_options.aspp_with_squeeze_and_excitation: branch_logits.append(image_feature) # Employ a 1x1 convolution. branch_logits.append( slim.conv2d(features, depth, 1, scope=ASPP_SCOPE + str(0))) if model_options.atrous_rates: # Employ 3x3 convolutions with different atrous rates. for i, rate in enumerate(model_options.atrous_rates, 1): scope = ASPP_SCOPE + str(i) if model_options.aspp_with_separable_conv: aspp_features = split_separable_conv2d( features, filters=depth, rate=rate, weight_decay=weight_decay, scope=scope) else: aspp_features = slim.conv2d(features, depth, 3, rate=rate, scope=scope) branch_logits.append(aspp_features) # Merge branch logits. concat_logits = tf.concat(branch_logits, 3) if model_options.aspp_with_concat_projection: concat_logits = slim.conv2d( concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE) concat_logits = slim.dropout( concat_logits, keep_prob=0.9, is_training=is_training, scope=CONCAT_PROJECTION_SCOPE + '_dropout') if (model_options.add_image_level_feature and model_options.aspp_with_squeeze_and_excitation): concat_logits *= image_feature return concat_logits, end_points
def refine_by_decoder(features, end_points, crop_size=None, decoder_output_stride=None, decoder_use_separable_conv=False, decoder_use_sum_merge=False, decoder_filters=256, decoder_output_is_logits=False, model_variant=None, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, use_bounded_activation=False, sync_batch_norm_method='None'): """Adds the decoder to obtain sharper segmentation results. Args: features: A tensor of size [batch, features_height, features_width, features_channels]. end_points: A dictionary from components of the network to the corresponding activation. crop_size: A tuple [crop_height, crop_width] specifying whole patch crop size. decoder_output_stride: A list of integers specifying the output stride of low-level features used in the decoder module. decoder_use_separable_conv: Employ separable convolution for decoder or not. decoder_use_sum_merge: Boolean, decoder uses simple sum merge or not. decoder_filters: Integer, decoder filter size. decoder_output_is_logits: Boolean, using decoder output as logits or not. model_variant: Model variant for feature extraction. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. use_bounded_activation: Whether or not to use bounded activations. Bounded activations better lend themselves to quantized inference. sync_batch_norm_method: String, method used to sync batch norm. Currently only support `None` (no sync batch norm) and `tpu` (use tpu code to sync batch norm). Returns: Decoder output with size [batch, decoder_height, decoder_width, decoder_channels]. Raises: ValueError: If crop_size is None. """ if crop_size is None: raise ValueError('crop_size must be provided when using decoder.') batch_norm_params = utils.get_batch_norm_params( decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) decoder_depth = decoder_filters projected_filters = 48 if decoder_use_sum_merge: # When using sum merge, the projected filters must be equal to decoder # filters. projected_filters = decoder_filters if decoder_output_is_logits: # Overwrite the setting when decoder output is logits. activation_fn = None normalizer_fn = None conv2d_kernel = 1 # Use original conv instead of separable conv. decoder_use_separable_conv = False else: # Default setting when decoder output is not logits. activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu normalizer_fn = batch_norm conv2d_kernel = 3 with slim.arg_scope([slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=normalizer_fn, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): with tf.variable_scope(DECODER_SCOPE, DECODER_SCOPE, [features]): decoder_features = features decoder_stage = 0 scope_suffix = '' for output_stride in decoder_output_stride: feature_list = feature_extractor.networks_to_feature_maps[ model_variant][feature_extractor. DECODER_END_POINTS][output_stride] # If only one decoder stage, we do not change the scope name in # order for backward compactibility. if decoder_stage: scope_suffix = '_{}'.format(decoder_stage) for i, name in enumerate(feature_list): decoder_features_list = [decoder_features] # MobileNet and NAS variants use different naming convention. if ('mobilenet' in model_variant or model_variant.startswith('mnas') or model_variant.startswith('nas')): feature_name = name else: feature_name = '{}/{}'.format( feature_extractor.name_scope[model_variant], name) decoder_features_list.append( slim.conv2d(end_points[feature_name], projected_filters, 1, scope='feature_projection' + str(i) + scope_suffix)) # Determine the output size. decoder_height = scale_dimension( crop_size[0], 1.0 / output_stride) decoder_width = scale_dimension( crop_size[1], 1.0 / output_stride) # Resize to decoder_height/decoder_width. for j, feature in enumerate(decoder_features_list): decoder_features_list[j] = _resize_bilinear( feature, [decoder_height, decoder_width], feature.dtype) h = (None if isinstance(decoder_height, tf.Tensor) else decoder_height) w = (None if isinstance(decoder_width, tf.Tensor) else decoder_width) decoder_features_list[j].set_shape( [None, h, w, None]) if decoder_use_sum_merge: decoder_features = _decoder_with_sum_merge( decoder_features_list, decoder_depth, conv2d_kernel=conv2d_kernel, decoder_use_separable_conv= decoder_use_separable_conv, weight_decay=weight_decay, scope_suffix=scope_suffix) else: if not decoder_use_separable_conv: scope_suffix = str(i) + scope_suffix decoder_features = _decoder_with_concat_merge( decoder_features_list, decoder_depth, decoder_use_separable_conv= decoder_use_separable_conv, weight_decay=weight_decay, scope_suffix=scope_suffix) decoder_stage += 1 return decoder_features
def extract_features( images, # 提取经过主干网络和ASPP后的特征 再最后经过1x1卷积之后加上一层dropout层 model_options, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, nas_training_hyper_parameters=None): """Extracts features by the particular model_variant. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. nas_training_hyper_parameters: A dictionary storing hyper-parameters for training nas models. Its keys are: - `drop_path_keep_prob`: Probability to keep each path in the cell when training. - `total_training_steps`: Total training steps to help drop path probability calculation. Returns: concat_logits: A tensor of size [batch, feature_height, feature_width, feature_channels], where feature_height/feature_width are determined by the images height/width and output_stride. end_points: A dictionary from components of the network to the corresponding activation. """ features, end_points = feature_extractor.extract_features( # 经过主干网络得到的特征图 images, output_stride=model_options.output_stride, # 默认为 16 multi_grid=model_options.multi_grid, # 默认为None 使用resnet时为[1,2,4] model_variant=model_options.model_variant, # xception_65 模型名称 depth_multiplier=model_options. depth_multiplier, # 深度乘子 默认为1.0 mobilenet中使用 divisible_by=model_options.divisible_by, # mobilenet中使用 默认为None weight_decay=weight_decay, # 权重衰退 0.0004 reuse=reuse, is_training=is_training, preprocessed_images_dtype=model_options. preprocessed_images_dtype, # 预处理图像类型 fine_tune_batch_norm=fine_tune_batch_norm, # 微调BN层 nas_architecture_options=model_options.nas_architecture_options, nas_training_hyper_parameters=nas_training_hyper_parameters, use_bounded_activation=model_options.use_bounded_activation ) # 使用边界激活函数 False if not model_options.aspp_with_batch_norm: # mobileNet中设置 若不需要ASPP,直接返回主干网络提取的特征图 return features, end_points else: if model_options.dense_prediction_cell_config is not None: tf.logging.info('Using dense prediction cell config.') dense_prediction_layer = dense_prediction_cell.DensePredictionCell( config=model_options.dense_prediction_cell_config, hparams={ 'conv_rate_multiplier': 16 // model_options.output_stride, }) concat_logits = dense_prediction_layer.build_cell( features, output_stride=model_options.output_stride, crop_size=model_options.crop_size, image_pooling_crop_size=model_options.image_pooling_crop_size, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) return concat_logits, end_points else: # The following codes employ the DeepLabv3 ASPP module. Note that we # could express the ASPP module as one particular dense prediction # cell architecture. We do not do so but leave the following codes # for backward compatibility.# 空洞空间金字塔池化 ASPP batch_norm_params = utils.get_batch_norm_params( # 定义BN层参数 decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=model_options.sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn( # BN层 model_options.sync_batch_norm_method) activation_fn = ( # 激活函数:有指定边界激活函数就用relu6否则用relu tf.nn.relu6 if model_options.use_bounded_activation else tf.nn.relu) with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=batch_norm, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): depth = model_options.aspp_convs_filters # ASPP卷积过滤器的数量 256 branch_logits = [] # 存储ASPP中并行的特征 # 添加image_pooling层 if model_options.add_image_level_feature: # 添加图像水平特征 默认为True if model_options.crop_size is not None: image_pooling_crop_size = model_options.image_pooling_crop_size # If image_pooling_crop_size is not specified, use crop_size. if image_pooling_crop_size is None: #若image_pooling_crop_size未指定,用crop_size image_pooling_crop_size = model_options.crop_size # image_pooling池化输出的高度,宽度进行尺度变化 pool_height = scale_dimension( image_pooling_crop_size[0], 1. / model_options.output_stride) pool_width = scale_dimension( image_pooling_crop_size[1], 1. / model_options.output_stride) image_feature = slim.avg_pool2d( # image pooling采用平均池化 features, [pool_height, pool_width], model_options.image_pooling_stride, padding='VALID') resize_height = scale_dimension( # 高度映射 保证固定维度的输出 model_options.crop_size[0], 1. / model_options.output_stride) resize_width = scale_dimension( # 宽度映射 保证固定维度的输出 model_options.crop_size[1], 1. / model_options.output_stride) else: # If crop_size is None, we simply do global pooling. 如果crop_size为空,我们用全局池化 pool_height = tf.shape(features)[1] pool_width = tf.shape(features)[2] image_feature = tf.reduce_mean( # 若crop_size为空,采用全局池化 features, axis=[1, 2], keepdims=True) resize_height = pool_height resize_width = pool_width image_feature_activation_fn = tf.nn.relu image_feature_normalizer_fn = batch_norm if model_options.aspp_with_squeeze_and_excitation: # 一般为False,暂不考虑 image_feature_activation_fn = tf.nn.sigmoid if model_options.image_se_uses_qsigmoid: image_feature_activation_fn = utils.q_sigmoid image_feature_normalizer_fn = None image_feature = slim.conv2d( image_feature, depth, 1, # image_pooling出来的特征进行1x1卷积 activation_fn=image_feature_activation_fn, normalizer_fn=image_feature_normalizer_fn, scope=IMAGE_POOLING_SCOPE) image_feature = _resize_bilinear( # 上采样 image_feature, [resize_height, resize_width], image_feature.dtype) # Set shape for resize_height/resize_width if they are not Tensor. if isinstance(resize_height, tf.Tensor): resize_height = None if isinstance(resize_width, tf.Tensor): resize_width = None image_feature.set_shape( [None, resize_height, resize_width, depth]) if not model_options.aspp_with_squeeze_and_excitation: branch_logits.append(image_feature) # Employ a 1x1 convolution. 添加使用1x1卷积 branch_logits.append( slim.conv2d(features, depth, 1, scope=ASPP_SCOPE + str(0))) if model_options.atrous_rates: # 空洞卷积 # Employ 3x3 convolutions with different atrous rates. 为每个3X3的卷积使用指定的空洞率 for i, rate in enumerate(model_options.atrous_rates, 1): # rate 为空洞率 scope = ASPP_SCOPE + str(i) if model_options.aspp_with_separable_conv: # 若使用空洞的深度可分离卷积,可大大减少计算量 默认为True aspp_features = split_separable_conv2d( features, filters=depth, rate=rate, weight_decay=weight_decay, scope=scope) else: aspp_features = slim.conv2d(features, depth, 3, rate=rate, scope=scope) branch_logits.append( aspp_features) # 将空洞卷积提取的特征加入列表 # Merge branch logits. concat_logits = tf.concat(branch_logits, 3) # 将这些特征图进行按照通道的那个维度进行级联组合 if model_options.aspp_with_concat_projection: # 级联之后添加1x1卷积 默认为True concat_logits = slim.conv2d( concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE) concat_logits = slim.dropout( # 再加dropout层 防止过拟合 concat_logits, keep_prob=0.9, is_training=is_training, scope=CONCAT_PROJECTION_SCOPE + '_dropout') if (model_options.add_image_level_feature and model_options.aspp_with_squeeze_and_excitation): concat_logits *= image_feature return concat_logits, end_points