def xception_arg_scope(weight_decay=0.00004, batch_norm_decay=0.9997, batch_norm_epsilon=0.001, batch_norm_scale=True, weights_initializer_stddev=0.09, regularize_depthwise=False, use_batch_norm=True, use_bounded_activation=False, sync_batch_norm_method='None'): """Defines the default Xception arg scope. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. batch_norm_epsilon: Small constant to prevent division by zero when normalizing activations by their variance in batch normalization. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. weights_initializer_stddev: The standard deviation of the trunctated normal weight initializer. regularize_depthwise: Whether or not apply L2-norm regularization on the depthwise convolution weights. use_batch_norm: Whether or not to use batch normalization. use_bounded_activation: Whether or not to use bounded activations. Bounded activations better lend themselves to quantized inference. sync_batch_norm_method: String, sync batchnorm method. Currently only support `None`. Also, it is only effective for Xception. Returns: An `arg_scope` to use for the Xception models. """ batch_norm_params = { 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, } if regularize_depthwise: depthwise_regularizer = slim.l2_regularizer(weight_decay) else: depthwise_regularizer = None activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) with slim.arg_scope([slim.conv2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer( stddev=weights_initializer_stddev), activation_fn=activation_fn, normalizer_fn=batch_norm if use_batch_norm else None): with slim.arg_scope([batch_norm], **batch_norm_params): with slim.arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(weight_decay)): with slim.arg_scope([slim.separable_conv2d], weights_regularizer=depthwise_regularizer): with slim.arg_scope( [xception_module], use_bounded_activation=use_bounded_activation, use_explicit_padding=not use_bounded_activation ) as arg_sc: return arg_sc
def nas_arg_scope(weight_decay=4e-5, batch_norm_decay=0.9997, batch_norm_epsilon=0.001, sync_batch_norm_method='None'): """Default arg scope for the NAS models.""" batch_norm_params = { # Decay for the moving averages. 'decay': batch_norm_decay, # epsilon to prevent 0s in variance. 'epsilon': batch_norm_epsilon, 'scale': True, } batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) weights_regularizer = contrib_layers.l2_regularizer(weight_decay) weights_initializer = contrib_layers.variance_scaling_initializer( factor=1 / 3.0, mode='FAN_IN', uniform=True) with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], weights_regularizer=weights_regularizer, weights_initializer=weights_initializer): with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'): with arg_scope([slim.conv2d, slim.separable_conv2d], activation_fn=None, biases_initializer=None): with arg_scope([batch_norm], **batch_norm_params) as sc: return sc
def hnasnet(images, num_classes, is_training=True, global_pool=False, output_stride=8, nas_architecture_options=None, nas_training_hyper_parameters=None, reuse=None, scope='hnasnet', final_endpoint=None, sync_batch_norm_method='None'): """Builds hierarchical model.""" if nas_architecture_options is None: raise ValueError( 'Using NAS model variants. nas_architecture_options cannot be None.') hparams = config(num_conv_filters=nas_architecture_options[ 'nas_stem_output_num_conv_filters']) if nas_training_hyper_parameters: hparams.set_hparam('drop_path_keep_prob', nas_training_hyper_parameters['drop_path_keep_prob']) hparams.set_hparam('total_training_steps', nas_training_hyper_parameters['total_training_steps']) if not is_training: tf.logging.info('During inference, setting drop_path_keep_prob = 1.0.') hparams.set_hparam('drop_path_keep_prob', 1.0) tf.logging.info(hparams) operations = [ 'atrous_5x5', 'separable_3x3_2', 'separable_3x3_2', 'atrous_3x3', 'separable_3x3_2', 'separable_3x3_2', 'separable_5x5_2', 'separable_5x5_2', 'separable_5x5_2', 'atrous_5x5' ] used_hiddenstates = [1, 1, 0, 0, 0, 0, 0] hiddenstate_indices = [1, 0, 1, 0, 3, 1, 4, 2, 3, 5] backbone = [0, 0, 0, 1, 2, 1, 2, 2, 3, 3, 2, 1] batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) cell = NASBaseCell(hparams.num_conv_filters, operations, used_hiddenstates, hiddenstate_indices, hparams.drop_path_keep_prob, len(backbone), hparams.total_training_steps, batch_norm_fn=batch_norm) with arg_scope([slim.dropout, batch_norm], is_training=is_training): return _build_nas_base( images, cell=cell, backbone=backbone, num_classes=num_classes, hparams=hparams, global_pool=global_pool, output_stride=output_stride, nas_use_classification_head=nas_architecture_options[ 'nas_use_classification_head'], reuse=reuse, scope=scope, final_endpoint=final_endpoint, batch_norm_fn=batch_norm, nas_remove_os32_stride=nas_architecture_options[ 'nas_remove_os32_stride'])
def pnasnet(images, num_classes, is_training=True, global_pool=False, output_stride=16, nas_architecture_options=None, nas_training_hyper_parameters=None, reuse=None, scope='pnasnet', final_endpoint=None, sync_batch_norm_method='None'): """Builds PNASNet model.""" if nas_architecture_options is None: raise ValueError( 'Using NAS model variants. nas_architecture_options cannot be None.' ) hparams = config(num_conv_filters=nas_architecture_options[ 'nas_stem_output_num_conv_filters']) if nas_training_hyper_parameters: hparams.set_hparam( 'drop_path_keep_prob', nas_training_hyper_parameters['drop_path_keep_prob']) hparams.set_hparam( 'total_training_steps', nas_training_hyper_parameters['total_training_steps']) if not is_training: tf.logging.info('During inference, setting drop_path_keep_prob = 1.0.') hparams.set_hparam('drop_path_keep_prob', 1.0) tf.logging.info(hparams) if output_stride == 8: backbone = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] elif output_stride == 16: backbone = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2] elif output_stride == 32: backbone = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] else: raise ValueError('Unsupported output_stride ', output_stride) batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) cell = nas_genotypes.PNASCell(hparams.num_conv_filters, hparams.drop_path_keep_prob, len(backbone), hparams.total_training_steps, batch_norm_fn=batch_norm) with arg_scope([slim.dropout, batch_norm], is_training=is_training): return _build_nas_base( images, cell=cell, backbone=backbone, num_classes=num_classes, hparams=hparams, global_pool=global_pool, output_stride=output_stride, nas_use_classification_head=nas_architecture_options[ 'nas_use_classification_head'], reuse=reuse, scope=scope, final_endpoint=final_endpoint, batch_norm_fn=batch_norm, nas_remove_os32_stride=nas_architecture_options[ 'nas_remove_os32_stride'])
def refine_by_decoder(features, end_points, crop_size=None, decoder_output_stride=None, decoder_use_separable_conv=False, decoder_use_sum_merge=False, decoder_filters=256, decoder_output_is_logits=False, model_variant=None, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, use_bounded_activation=False, sync_batch_norm_method='None'): """Adds the decoder to obtain sharper segmentation results. Args: features: A tensor of size [batch, features_height, features_width, features_channels]. end_points: A dictionary from components of the network to the corresponding activation. crop_size: A tuple [crop_height, crop_width] specifying whole patch crop size. decoder_output_stride: A list of integers specifying the output stride of low-level features used in the decoder module. decoder_use_separable_conv: Employ separable convolution for decoder or not. decoder_use_sum_merge: Boolean, decoder uses simple sum merge or not. decoder_filters: Integer, decoder filter size. decoder_output_is_logits: Boolean, using decoder output as logits or not. model_variant: Model variant for feature extraction. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. use_bounded_activation: Whether or not to use bounded activations. Bounded activations better lend themselves to quantized inference. sync_batch_norm_method: String, method used to sync batch norm. Currently only support `None` (no sync batch norm) and `tpu` (use tpu code to sync batch norm). Returns: Decoder output with size [batch, decoder_height, decoder_width, decoder_channels]. Raises: ValueError: If crop_size is None. """ if crop_size is None: raise ValueError('crop_size must be provided when using decoder.') batch_norm_params = utils.get_batch_norm_params( decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) decoder_depth = decoder_filters projected_filters = 48 if decoder_use_sum_merge: # When using sum merge, the projected filters must be equal to decoder # filters. projected_filters = decoder_filters if decoder_output_is_logits: # Overwrite the setting when decoder output is logits. activation_fn = None normalizer_fn = None conv2d_kernel = 1 # Use original conv instead of separable conv. decoder_use_separable_conv = False else: # Default setting when decoder output is not logits. activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu normalizer_fn = batch_norm conv2d_kernel = 3 with slim.arg_scope([slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=normalizer_fn, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): with tf.variable_scope(DECODER_SCOPE, DECODER_SCOPE, [features]): decoder_features = features decoder_stage = 0 scope_suffix = '' for output_stride in decoder_output_stride: feature_list = feature_extractor.networks_to_feature_maps[ model_variant][feature_extractor. DECODER_END_POINTS][output_stride] # If only one decoder stage, we do not change the scope name in # order for backward compactibility. if decoder_stage: scope_suffix = '_{}'.format(decoder_stage) for i, name in enumerate(feature_list): decoder_features_list = [decoder_features] # MobileNet and NAS variants use different naming convention. if ('mobilenet' in model_variant or model_variant.startswith('mnas') or model_variant.startswith('nas')): feature_name = name else: feature_name = '{}/{}'.format( feature_extractor.name_scope[model_variant], name) decoder_features_list.append( slim.conv2d(end_points[feature_name], projected_filters, 1, scope='feature_projection' + str(i) + scope_suffix)) # Determine the output size. decoder_height = scale_dimension( crop_size[0], 1.0 / output_stride) decoder_width = scale_dimension( crop_size[1], 1.0 / output_stride) # Resize to decoder_height/decoder_width. for j, feature in enumerate(decoder_features_list): decoder_features_list[j] = _resize_bilinear( feature, [decoder_height, decoder_width], feature.dtype) h = (None if isinstance(decoder_height, tf.Tensor) else decoder_height) w = (None if isinstance(decoder_width, tf.Tensor) else decoder_width) decoder_features_list[j].set_shape( [None, h, w, None]) if decoder_use_sum_merge: decoder_features = _decoder_with_sum_merge( decoder_features_list, decoder_depth, conv2d_kernel=conv2d_kernel, decoder_use_separable_conv= decoder_use_separable_conv, weight_decay=weight_decay, scope_suffix=scope_suffix) else: if not decoder_use_separable_conv: scope_suffix = str(i) + scope_suffix decoder_features = _decoder_with_concat_merge( decoder_features_list, decoder_depth, decoder_use_separable_conv= decoder_use_separable_conv, weight_decay=weight_decay, scope_suffix=scope_suffix) decoder_stage += 1 return decoder_features
def extract_features(images, model_options, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, nas_training_hyper_parameters=None): """Extracts features by the particular model_variant. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. nas_training_hyper_parameters: A dictionary storing hyper-parameters for training nas models. Its keys are: - `drop_path_keep_prob`: Probability to keep each path in the cell when training. - `total_training_steps`: Total training steps to help drop path probability calculation. Returns: concat_logits: A tensor of size [batch, feature_height, feature_width, feature_channels], where feature_height/feature_width are determined by the images height/width and output_stride. end_points: A dictionary from components of the network to the corresponding activation. """ features, end_points = feature_extractor.extract_features( images, output_stride=model_options.output_stride, multi_grid=model_options.multi_grid, model_variant=model_options.model_variant, depth_multiplier=model_options.depth_multiplier, divisible_by=model_options.divisible_by, weight_decay=weight_decay, reuse=reuse, is_training=is_training, preprocessed_images_dtype=model_options.preprocessed_images_dtype, fine_tune_batch_norm=fine_tune_batch_norm, nas_architecture_options=model_options.nas_architecture_options, nas_training_hyper_parameters=nas_training_hyper_parameters, use_bounded_activation=model_options.use_bounded_activation) if not model_options.aspp_with_batch_norm: return features, end_points else: if model_options.dense_prediction_cell_config is not None: tf.compat.v1.logging.info('Using dense prediction cell config.') dense_prediction_layer = dense_prediction_cell.DensePredictionCell( config=model_options.dense_prediction_cell_config, hparams={ 'conv_rate_multiplier': 16 // model_options.output_stride, }) concat_logits = dense_prediction_layer.build_cell( features, output_stride=model_options.output_stride, crop_size=model_options.crop_size, image_pooling_crop_size=model_options.image_pooling_crop_size, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) return concat_logits, end_points else: # The following codes employ the DeepLabv3 ASPP module. Note that we # could express the ASPP module as one particular dense prediction # cell architecture. We do not do so but leave the following codes # for backward compatibility. batch_norm_params = utils.get_batch_norm_params( decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=model_options.sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn( model_options.sync_batch_norm_method) activation_fn = (tf.nn.relu6 if model_options.use_bounded_activation else tf.nn.relu) with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=batch_norm, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): depth = model_options.aspp_convs_filters branch_logits = [] if model_options.add_image_level_feature: if model_options.crop_size is not None: image_pooling_crop_size = model_options.image_pooling_crop_size # If image_pooling_crop_size is not specified, use crop_size. if image_pooling_crop_size is None: image_pooling_crop_size = model_options.crop_size pool_height = scale_dimension( image_pooling_crop_size[0], 1. / model_options.output_stride) pool_width = scale_dimension( image_pooling_crop_size[1], 1. / model_options.output_stride) image_feature = slim.avg_pool2d( features, [pool_height, pool_width], model_options.image_pooling_stride, padding='VALID') resize_height = scale_dimension( model_options.crop_size[0], 1. / model_options.output_stride) resize_width = scale_dimension( model_options.crop_size[1], 1. / model_options.output_stride) else: # If crop_size is None, we simply do global pooling. pool_height = tf.shape(features)[1] pool_width = tf.shape(features)[2] image_feature = tf.reduce_mean(features, axis=[1, 2], keepdims=True) resize_height = pool_height resize_width = pool_width image_feature_activation_fn = tf.nn.relu image_feature_normalizer_fn = batch_norm if model_options.aspp_with_squeeze_and_excitation: image_feature_activation_fn = tf.nn.sigmoid if model_options.image_se_uses_qsigmoid: image_feature_activation_fn = utils.q_sigmoid image_feature_normalizer_fn = None image_feature = slim.conv2d( image_feature, depth, 1, activation_fn=image_feature_activation_fn, normalizer_fn=image_feature_normalizer_fn, scope=IMAGE_POOLING_SCOPE) image_feature = _resize_bilinear( image_feature, [resize_height, resize_width], image_feature.dtype) # Set shape for resize_height/resize_width if they are not Tensor. if isinstance(resize_height, tf.Tensor): resize_height = None if isinstance(resize_width, tf.Tensor): resize_width = None image_feature.set_shape( [None, resize_height, resize_width, depth]) if not model_options.aspp_with_squeeze_and_excitation: branch_logits.append(image_feature) # Employ a 1x1 convolution. branch_logits.append( slim.conv2d(features, depth, 1, scope=ASPP_SCOPE + str(0))) if model_options.atrous_rates: # Employ 3x3 convolutions with different atrous rates. for i, rate in enumerate(model_options.atrous_rates, 1): scope = ASPP_SCOPE + str(i) if model_options.aspp_with_separable_conv: aspp_features = split_separable_conv2d( features, filters=depth, rate=rate, weight_decay=weight_decay, scope=scope) else: aspp_features = slim.conv2d(features, depth, 3, rate=rate, scope=scope) branch_logits.append(aspp_features) # Merge branch logits. concat_logits = tf.concat(branch_logits, 3) if model_options.aspp_with_concat_projection: concat_logits = slim.conv2d( concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE) concat_logits = slim.dropout( concat_logits, keep_prob=0.9, is_training=is_training, scope=CONCAT_PROJECTION_SCOPE + '_dropout') if (model_options.add_image_level_feature and model_options.aspp_with_squeeze_and_excitation): concat_logits *= image_feature return concat_logits, end_points
def resnet_arg_scope(weight_decay=0.0001, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True, activation_fn=tf.nn.relu, use_batch_norm=True, sync_batch_norm_method='None', normalization_method='unspecified', use_weight_standardization=False): """Defines the default ResNet arg scope. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. batch_norm_epsilon: Small constant to prevent division by zero when normalizing activations by their variance in batch normalization. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. activation_fn: The activation function which is used in ResNet. use_batch_norm: Deprecated in favor of normalization_method. sync_batch_norm_method: String, sync batchnorm method. normalization_method: String, one of `batch`, `none`, or `group`, to use batch normalization, no normalization, or group normalization. use_weight_standardization: Boolean, whether to use weight standardization. Returns: An `arg_scope` to use for the resnet models. """ batch_norm_params = { 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, } batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) if normalization_method == 'batch': normalizer_fn = batch_norm elif normalization_method == 'none': normalizer_fn = None elif normalization_method == 'group': normalizer_fn = slim.group_norm elif normalization_method == 'unspecified': normalizer_fn = batch_norm if use_batch_norm else None else: raise ValueError('Unrecognized normalization_method %s' % normalization_method) with slim.arg_scope( [conv2d_ws.conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=slim.variance_scaling_initializer(), activation_fn=activation_fn, normalizer_fn=normalizer_fn, use_weight_standardization=use_weight_standardization): with slim.arg_scope([batch_norm], **batch_norm_params): # The following implies padding='SAME' for pool1, which makes feature # alignment easier for dense prediction tasks. This is also used in # https://github.com/facebook/fb.resnet.torch. However the accompanying # code of 'Deep Residual Learning for Image Recognition' uses # padding='VALID' for pool1. You can switch to that choice by setting # slim.arg_scope([slim.max_pool2d], padding='VALID'). with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def resnet_v1_beta(inputs, blocks, num_classes=None, is_training=None, global_pool=True, output_stride=None, root_block_fn=None, reuse=None, scope=None, sync_batch_norm_method='None'): """Generator for v1 ResNet models (beta variant). This function generates a family of modified ResNet v1 models. In particular, the first original 7x7 convolution is replaced with three 3x3 convolutions. See the resnet_v1_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. The code is modified from slim/nets/resnet_v1.py, and please refer to it for more details. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: Enable/disable is_training for batch normalization. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. root_block_fn: The function consisting of convolution operations applied to the root input. If root_block_fn is None, use the original setting of RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. sync_batch_norm_method: String, sync batchnorm method. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ if root_block_fn is None: root_block_fn = functools.partial(conv2d_ws.conv2d_same, num_outputs=64, kernel_size=7, stride=2, scope='conv1') batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([ conv2d_ws.conv2d, bottleneck, lite_bottleneck, resnet_utils.stack_blocks_dense ], outputs_collections=end_points_collection): if is_training is not None: arg_scope = slim.arg_scope([batch_norm], is_training=is_training) else: arg_scope = slim.arg_scope([]) with arg_scope: net = inputs if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.') output_stride //= 4 net = root_block_fn(net) net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) if global_pool: # Global average pooling. net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = conv2d_ws.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits', use_weight_standardization=False) # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = slim.softmax( net, scope='predictions') return net, end_points
def resnet_mod(inputs, num_classes=None, is_training=None, global_pool=False, output_stride=None, multi_grid=None, root_depth_multiplier=0.25, reuse=None, scope='resnet_v1_18', sync_batch_norm_method='None'): """ A custom Resnet variant based on v2 preact architecture. """ ## define the multi_grid/atrous blocks if multi_grid is None: multi_grid = [1, 1] else: if len(multi_grid) != 2: raise ValueError('Expect multi_grid to have length 2.') block4_args = [] for rate in multi_grid: block4_args.append({'depth': 512, 'stride': 1, 'unit_rate': rate}) blocks = [ resnet_v2_small_beta_block('block1', base_depth=64, num_units=1, stride=2), resnet_v2_small_beta_block('block2', base_depth=128, num_units=1, stride=2), resnet_v2_small_beta_block('block3', base_depth=256, num_units=1, stride=2), resnet_utils.Block('block4', lite_bottleneck_v2, block4_args), ] # root_block_fn = root_block_fn_for_beta_variant root_block_fn = functools.partial(conv2d_ws.conv2d_same, num_outputs=64, kernel_size=3, stride=2, scope='root_conv1') batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) with tf.variable_scope(scope, 'resnet_mod', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([ slim.conv2d, conv2d_ws.conv2d, lite_bottleneck_v2, resnet_utils.stack_blocks_dense ], outputs_collections=end_points_collection): if is_training is not None: arg_scope = slim.arg_scope([batch_norm], is_training=is_training) else: arg_scope = slim.arg_scope([]) with arg_scope: net = inputs if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.') output_stride //= 2 net = root_block_fn(net) # net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) ## add a batchnorm and relu layer since the last conv output don't have them in v2 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def xception( inputs, # Xception网络的入口 blocks, num_classes=None, is_training=True, global_pool=True, keep_prob=0.5, output_stride=None, reuse=None, scope=None, sync_batch_norm_method='None'): """Generator for Xception models. This function generates a family of Xception models. See the xception_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce Xception of various depths. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. Must be floating point. If a pretrained checkpoint is used, pixel values should be the same as during training (see go/slim-classification-models for specifics). blocks: A list of length equal to the number of Xception blocks. Each element is an Xception Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If 0 or None, we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. keep_prob: Keep probability used in the pre-logits dropout layer. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. sync_batch_norm_method: String, sync batchnorm method. Currently only support `None`. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is 0 or None, then net is the output of the last Xception block, potentially after global average pooling. If num_classes is a non-zero integer, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with tf.variable_scope(scope, 'xception', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + 'end_points' batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method) with slim.arg_scope([ slim.conv2d, slim.separable_conv2d, xception_module, stack_blocks_dense ], outputs_collections=end_points_collection): with slim.arg_scope([batch_norm], is_training=is_training): net = inputs if output_stride is not None: if output_stride % 2 != 0: raise ValueError( 'The output_stride needs to be a multiple of 2.') output_stride //= 2 # Root block function operated on inputs. 开始卷积 网络路口 net = resnet_utils.conv2d_same(net, 32, 3, stride=2, scope='entry_flow/conv1_1') net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='entry_flow/conv1_2') # Extract features for entry_flow, middle_flow, and exit_flow. # 级联所有的Xception的block net = stack_blocks_dense(net, blocks, output_stride) # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection, clear_collection=True) if global_pool: # 采用全局池化 # Global average pooling. net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True) end_points['global_pool'] = net if num_classes: net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, scope='prelogits_dropout') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') end_points[sc.name + '/logits'] = net end_points['predictions'] = slim.softmax( net, scope='predictions') return net, end_points
def extract_features( images, # 提取经过主干网络和ASPP后的特征 再最后经过1x1卷积之后加上一层dropout层 model_options, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False, nas_training_hyper_parameters=None): """Extracts features by the particular model_variant. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. nas_training_hyper_parameters: A dictionary storing hyper-parameters for training nas models. Its keys are: - `drop_path_keep_prob`: Probability to keep each path in the cell when training. - `total_training_steps`: Total training steps to help drop path probability calculation. Returns: concat_logits: A tensor of size [batch, feature_height, feature_width, feature_channels], where feature_height/feature_width are determined by the images height/width and output_stride. end_points: A dictionary from components of the network to the corresponding activation. """ features, end_points = feature_extractor.extract_features( # 经过主干网络得到的特征图 images, output_stride=model_options.output_stride, # 默认为 16 multi_grid=model_options.multi_grid, # 默认为None 使用resnet时为[1,2,4] model_variant=model_options.model_variant, # xception_65 模型名称 depth_multiplier=model_options. depth_multiplier, # 深度乘子 默认为1.0 mobilenet中使用 divisible_by=model_options.divisible_by, # mobilenet中使用 默认为None weight_decay=weight_decay, # 权重衰退 0.0004 reuse=reuse, is_training=is_training, preprocessed_images_dtype=model_options. preprocessed_images_dtype, # 预处理图像类型 fine_tune_batch_norm=fine_tune_batch_norm, # 微调BN层 nas_architecture_options=model_options.nas_architecture_options, nas_training_hyper_parameters=nas_training_hyper_parameters, use_bounded_activation=model_options.use_bounded_activation ) # 使用边界激活函数 False if not model_options.aspp_with_batch_norm: # mobileNet中设置 若不需要ASPP,直接返回主干网络提取的特征图 return features, end_points else: if model_options.dense_prediction_cell_config is not None: tf.logging.info('Using dense prediction cell config.') dense_prediction_layer = dense_prediction_cell.DensePredictionCell( config=model_options.dense_prediction_cell_config, hparams={ 'conv_rate_multiplier': 16 // model_options.output_stride, }) concat_logits = dense_prediction_layer.build_cell( features, output_stride=model_options.output_stride, crop_size=model_options.crop_size, image_pooling_crop_size=model_options.image_pooling_crop_size, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) return concat_logits, end_points else: # The following codes employ the DeepLabv3 ASPP module. Note that we # could express the ASPP module as one particular dense prediction # cell architecture. We do not do so but leave the following codes # for backward compatibility.# 空洞空间金字塔池化 ASPP batch_norm_params = utils.get_batch_norm_params( # 定义BN层参数 decay=0.9997, epsilon=1e-5, scale=True, is_training=(is_training and fine_tune_batch_norm), sync_batch_norm_method=model_options.sync_batch_norm_method) batch_norm = utils.get_batch_norm_fn( # BN层 model_options.sync_batch_norm_method) activation_fn = ( # 激活函数:有指定边界激活函数就用relu6否则用relu tf.nn.relu6 if model_options.use_bounded_activation else tf.nn.relu) with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=activation_fn, normalizer_fn=batch_norm, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([batch_norm], **batch_norm_params): depth = model_options.aspp_convs_filters # ASPP卷积过滤器的数量 256 branch_logits = [] # 存储ASPP中并行的特征 # 添加image_pooling层 if model_options.add_image_level_feature: # 添加图像水平特征 默认为True if model_options.crop_size is not None: image_pooling_crop_size = model_options.image_pooling_crop_size # If image_pooling_crop_size is not specified, use crop_size. if image_pooling_crop_size is None: #若image_pooling_crop_size未指定,用crop_size image_pooling_crop_size = model_options.crop_size # image_pooling池化输出的高度,宽度进行尺度变化 pool_height = scale_dimension( image_pooling_crop_size[0], 1. / model_options.output_stride) pool_width = scale_dimension( image_pooling_crop_size[1], 1. / model_options.output_stride) image_feature = slim.avg_pool2d( # image pooling采用平均池化 features, [pool_height, pool_width], model_options.image_pooling_stride, padding='VALID') resize_height = scale_dimension( # 高度映射 保证固定维度的输出 model_options.crop_size[0], 1. / model_options.output_stride) resize_width = scale_dimension( # 宽度映射 保证固定维度的输出 model_options.crop_size[1], 1. / model_options.output_stride) else: # If crop_size is None, we simply do global pooling. 如果crop_size为空,我们用全局池化 pool_height = tf.shape(features)[1] pool_width = tf.shape(features)[2] image_feature = tf.reduce_mean( # 若crop_size为空,采用全局池化 features, axis=[1, 2], keepdims=True) resize_height = pool_height resize_width = pool_width image_feature_activation_fn = tf.nn.relu image_feature_normalizer_fn = batch_norm if model_options.aspp_with_squeeze_and_excitation: # 一般为False,暂不考虑 image_feature_activation_fn = tf.nn.sigmoid if model_options.image_se_uses_qsigmoid: image_feature_activation_fn = utils.q_sigmoid image_feature_normalizer_fn = None image_feature = slim.conv2d( image_feature, depth, 1, # image_pooling出来的特征进行1x1卷积 activation_fn=image_feature_activation_fn, normalizer_fn=image_feature_normalizer_fn, scope=IMAGE_POOLING_SCOPE) image_feature = _resize_bilinear( # 上采样 image_feature, [resize_height, resize_width], image_feature.dtype) # Set shape for resize_height/resize_width if they are not Tensor. if isinstance(resize_height, tf.Tensor): resize_height = None if isinstance(resize_width, tf.Tensor): resize_width = None image_feature.set_shape( [None, resize_height, resize_width, depth]) if not model_options.aspp_with_squeeze_and_excitation: branch_logits.append(image_feature) # Employ a 1x1 convolution. 添加使用1x1卷积 branch_logits.append( slim.conv2d(features, depth, 1, scope=ASPP_SCOPE + str(0))) if model_options.atrous_rates: # 空洞卷积 # Employ 3x3 convolutions with different atrous rates. 为每个3X3的卷积使用指定的空洞率 for i, rate in enumerate(model_options.atrous_rates, 1): # rate 为空洞率 scope = ASPP_SCOPE + str(i) if model_options.aspp_with_separable_conv: # 若使用空洞的深度可分离卷积,可大大减少计算量 默认为True aspp_features = split_separable_conv2d( features, filters=depth, rate=rate, weight_decay=weight_decay, scope=scope) else: aspp_features = slim.conv2d(features, depth, 3, rate=rate, scope=scope) branch_logits.append( aspp_features) # 将空洞卷积提取的特征加入列表 # Merge branch logits. concat_logits = tf.concat(branch_logits, 3) # 将这些特征图进行按照通道的那个维度进行级联组合 if model_options.aspp_with_concat_projection: # 级联之后添加1x1卷积 默认为True concat_logits = slim.conv2d( concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE) concat_logits = slim.dropout( # 再加dropout层 防止过拟合 concat_logits, keep_prob=0.9, is_training=is_training, scope=CONCAT_PROJECTION_SCOPE + '_dropout') if (model_options.add_image_level_feature and model_options.aspp_with_squeeze_and_excitation): concat_logits *= image_feature return concat_logits, end_points