def _stacked_separable_conv(net, stride, operation, filter_size): """Takes in an operations and parses it to the correct sep operation.""" num_layers, kernel_size = _operation_to_info(operation) net_type = net.dtype net = tf.cast(net, tf.float32) if net_type == tf.float16 else net for layer_num in range(num_layers - 1): net = tf.nn.relu(net) net = slim.separable_conv2d(net, filter_size, kernel_size, depth_multiplier=1, scope='separable_{0}x{0}_{1}'.format( kernel_size, layer_num + 1), stride=stride) net = slim.batch_norm(net, scope='bn_sep_{0}x{0}_{1}'.format( kernel_size, layer_num + 1)) stride = 1 net = tf.nn.relu(net) net = slim.separable_conv2d(net, filter_size, kernel_size, depth_multiplier=1, scope='separable_{0}x{0}_{1}'.format( kernel_size, num_layers), stride=stride) net = slim.batch_norm(net, scope='bn_sep_{0}x{0}_{1}'.format( kernel_size, num_layers)) net = tf.cast(net, net_type) return net
def _stacked_separable_conv(net, stride, operation, filter_size, use_bounded_activation): """Takes in an operations and parses it to the correct sep operation.""" num_layers, kernel_size = _operation_to_info(operation) activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu for layer_num in range(num_layers - 1): net = activation_fn(net) net = slim.separable_conv2d(net, filter_size, kernel_size, depth_multiplier=1, scope='separable_{0}x{0}_{1}'.format( kernel_size, layer_num + 1), stride=stride) net = slim.batch_norm(net, scope='bn_sep_{0}x{0}_{1}'.format( kernel_size, layer_num + 1)) stride = 1 net = activation_fn(net) net = slim.separable_conv2d(net, filter_size, kernel_size, depth_multiplier=1, scope='separable_{0}x{0}_{1}'.format( kernel_size, num_layers), stride=stride) net = slim.batch_norm(net, scope='bn_sep_{0}x{0}_{1}'.format( kernel_size, num_layers)) return net
def middle_flow_block(inpt, num_outputs=728, kernel_size=None, unit_num=None): if kernel_size is None: kernel_size = [3, 3] unit_num = str(unit_num) residual = inpt net = tf.nn.relu(inpt) net = slim.separable_conv2d( net, num_outputs, kernel_size, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv1_depthwise' .format(unit_num)) net = slim.batch_norm( net, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv1_pointwise/BatchNorm' .format(unit_num)) net = tf.nn.relu(net) net = slim.separable_conv2d( net, num_outputs, kernel_size, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv2_depthwise' .format(unit_num)) net = slim.batch_norm( net, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv2_pointwise/BatchNorm' .format(unit_num)) net = tf.nn.relu(net) net = slim.separable_conv2d( net, num_outputs, kernel_size, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv3_depthwise' .format(unit_num)) net = slim.batch_norm( net, scope= 'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv3_pointwise/BatchNorm' .format(unit_num)) residual_next = tf.math.add(net, residual) return residual_next
def _expanded_conv(self, net, num_filters, expansion_rates, kernel_size, stride, scope): """Expanded convolution.""" expanded_num_filters = num_filters * expansion_rates add_fixed_padding = self._use_explicit_padding and stride > 1 padding = 'VALID' if add_fixed_padding else 'SAME' net = slim.conv2d(net, expanded_num_filters, [1, 1], activation_fn=self._activation_fn, normalizer_fn=self._normalization_fn, padding=padding, scope=scope + '/expansion') net = slim.separable_conv2d( ops.fixed_padding(net, kernel_size) if add_fixed_padding else net, num_outputs=None, kernel_size=kernel_size, activation_fn=self._activation_fn, normalizer_fn=self._normalization_fn, stride=stride, padding=padding, scope=scope + '/depthwise') net = slim.conv2d(net, num_filters, [1, 1], activation_fn=tf.identity, normalizer_fn=self._normalization_fn, padding=padding, scope=scope + '/projection') return net
def split_separable_conv2d(input_tensor, num_outputs, scope=None, normalizer_fn=None, stride=1, rate=1, endpoints=None, use_explicit_padding=False): """Separable mobilenet V1 style convolution. Depthwise convolution, with default non-linearity, followed by 1x1 depthwise convolution. This is similar to slim.separable_conv2d, but differs in tha it applies batch normalization and non-linearity to depthwise. This matches the basic building of Mobilenet Paper (https://arxiv.org/abs/1704.04861) Args: input_tensor: input num_outputs: number of outputs scope: optional name of the scope. Note if provided it will use scope_depthwise for deptwhise, and scope_pointwise for pointwise. normalizer_fn: which normalizer function to use for depthwise/pointwise stride: stride rate: output rate (also known as dilation rate) endpoints: optional, if provided, will export additional tensors to it. use_explicit_padding: Use 'VALID' padding for convolutions, but prepad inputs so that the output dimensions are the same as if 'SAME' padding were used. Returns: output tesnor """ with _v1_compatible_scope_naming(scope) as scope: dw_scope = scope + 'depthwise' endpoints = endpoints if endpoints is not None else {} kernel_size = [3, 3] padding = 'SAME' if use_explicit_padding: padding = 'VALID' input_tensor = _fixed_padding(input_tensor, kernel_size, rate) net = slim.separable_conv2d(input_tensor, None, kernel_size, depth_multiplier=1, stride=stride, rate=rate, normalizer_fn=normalizer_fn, padding=padding, scope=dw_scope) endpoints[dw_scope] = net pw_scope = scope + 'pointwise' net = slim.conv2d(net, num_outputs, [1, 1], stride=1, normalizer_fn=normalizer_fn, scope=pw_scope) endpoints[pw_scope] = net return net
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: mask_predictions: A float tensors of shape [batch_size, num_anchors, num_masks, mask_height, mask_width] representing the mask predictions for the proposals. """ image_feature = features # Add a slot for the background class. if self._masks_are_class_agnostic: num_masks = 1 else: num_masks = self._num_classes num_mask_channels = num_masks * self._mask_height * self._mask_width net = image_feature if self._use_dropout: net = slim.dropout(net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: mask_predictions = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope='MaskPredictor_depthwise') mask_predictions = slim.conv2d(mask_predictions, num_predictions_per_location * num_mask_channels, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='MaskPredictor') else: mask_predictions = slim.conv2d( net, num_predictions_per_location * num_mask_channels, [self._kernel_size, self._kernel_size], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='MaskPredictor') batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] mask_predictions = tf.reshape( mask_predictions, [batch_size, -1, num_masks, self._mask_height, self._mask_width]) return mask_predictions
def ghost_conv(x, ch_num, k_s=3, bn=slim.batch_norm, act=tf.nn.relu, name='ghost_conv'): with tf.variable_scope(name_or_scope=name): x1 = slim.conv2d(x, ch_num // 2, kernel_size=[k_s, k_s], activation_fn=None, normalizer_fn=None) x2 = slim.separable_conv2d(x1, None, [3, 3], activation_fn=None, normalizer_fn=None) y = tf.concat([x1, x2], axis=3) if bn: y = bn(y) if act: y = act(y) return y
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: class_predictions_with_background: A float tensors of shape [batch_size, num_anchors, num_class_slots] representing the class predictions for the proposals. """ net = features if self._use_dropout: net = slim.dropout(net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: depthwise_scope = self._scope + '_depthwise' class_predictions_with_background = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope=depthwise_scope) class_predictions_with_background = slim.conv2d( class_predictions_with_background, num_predictions_per_location * self._num_class_slots, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope=self._scope) else: class_predictions_with_background = slim.conv2d( net, num_predictions_per_location * self._num_class_slots, [self._kernel_size, self._kernel_size], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope=self._scope, biases_initializer=tf.constant_initializer( self._class_prediction_bias_init)) if self._apply_sigmoid_to_scores: class_predictions_with_background = tf.sigmoid( class_predictions_with_background) batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, -1, self._num_class_slots]) return class_predictions_with_background
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps conv_depth = 1024 if self._skip_last_stride: conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0 conv_depth = int(float(conv_depth) * conv_depth_ratio) depth = lambda d: max(int(d * 1.0), 16) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], padding='SAME'): net = slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=2, scope='Conv2d_12_pointwise') return slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=1, scope='Conv2d_13_pointwise')
def pre_bottleneck(self, inputs, state, input_index): """Apply pre-bottleneck projection to inputs. Pre-bottleneck operation maps features of different channels into the same dimension. The purpose of this op is to share the features from both large and small models in the same LSTM cell. Args: inputs: 4D Tensor with shape [batch_size x width x height x input_size]. state: 4D Tensor with shape [batch_size x width x height x state_size]. input_index: integer index indicating which base features the inputs correspoding to. Returns: inputs: pre-bottlenecked inputs. Raises: ValueError: If pre_bottleneck is not set or inputs is not rank 4. """ # Sometimes state is a tuple, in which case it cannot be modified, e.g. # during training, tf.contrib.training.SequenceQueueingStateSaver # returns the state as a tuple. This should not be an issue since we # only need to modify state[1] during export, when state should be a # list. if len(inputs.shape) != 4: raise ValueError('Expect rank 4 feature tensor.') if not self._flatten_state and len(state.shape) != 4: raise ValueError('Expect rank 4 state tensor.') if self._flatten_state and len(state.shape) != 2: raise ValueError( 'Expect rank 2 state tensor when flatten_state is set.') with tf.name_scope(None): state = tf.identity(state, name='raw_inputs/init_lstm_h') if self._flatten_state: batch_size = inputs.shape[0] height = inputs.shape[1] width = inputs.shape[2] state = tf.reshape(state, [batch_size, height, width, -1]) with tf.variable_scope('conv_lstm_cell', reuse=tf.AUTO_REUSE): scope_name = 'bottleneck_%d' % input_index inputs = slim.separable_conv2d(tf.concat([inputs, state], 3), self.output_size[-1], self._filter_size, depth_multiplier=1, activation_fn=tf.nn.relu6, normalizer_fn=None, scope=scope_name) # For exporting inference graph, we only mark the first timestep. with tf.name_scope(None): inputs = tf.identity(inputs, name='raw_outputs/base_endpoint_%d' % (input_index + 1)) return inputs
def ghost_bottleneck(x, ch_exp, ch_out, s_s, name): with tf.variable_scope(name_or_scope=name): net = ghost_conv(x, ch_exp, k_s=1, name='expand') if 2 == s_s: net = slim.separable_conv2d(net, None, kernel_size=3, stride=s_s, normalizer_fn=slim.batch_norm, activation_fn=None, scope='depthwise') net = ghost_conv(net, ch_out, k_s=1, act=None, name='project') ch_in = int(x.get_shape().as_list()[3]) if ch_in == ch_out and 2 != s_s: y = tf.add(net, x) else: y = net return y
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Int specifying number of boxes per location. Returns: box_encodings: A float tensors of shape [batch_size, num_anchors, q, code_size] representing the location of the objects, where q is 1 or the number of classes. """ net = features if self._use_depthwise: box_encodings = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope='BoxEncodingPredictor_depthwise') box_encodings = slim.conv2d(box_encodings, num_predictions_per_location * self._box_code_size, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='BoxEncodingPredictor') else: box_encodings = slim.conv2d(net, num_predictions_per_location * self._box_code_size, [self._kernel_size, self._kernel_size], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='BoxEncodingPredictor') batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] # Clipping the box encodings to make the inference graph TPU friendly. if self._box_encodings_clip_range is not None: box_encodings = tf.clip_by_value( box_encodings, self._box_encodings_clip_range.min, self._box_encodings_clip_range.max) box_encodings = tf.reshape(box_encodings, [batch_size, -1, 1, self._box_code_size]) return box_encodings
def _sep_conv(self, net, num_filters, kernel_size, stride, scope): """Depthwise Separable convolution.""" add_fixed_padding = self._use_explicit_padding and stride > 1 padding = 'VALID' if add_fixed_padding else 'SAME' net = slim.separable_conv2d( ops.fixed_padding(net, kernel_size) if add_fixed_padding else net, num_outputs=None, kernel_size=kernel_size, activation_fn=None, normalizer_fn=None, stride=stride, padding=padding, scope=scope + '/depthwise') net = slim.conv2d(net, num_filters, [1, 1], activation_fn=self._activation_fn, normalizer_fn=self._normalization_fn, padding=padding, scope=scope + '/pointwise') return net
def _separable_conv( h, filters, kernel_size, strides=1, activation_fn=tf.nn.relu6): """Separable convolution layer.""" if activation_fn is None: raise ValueError('Activation function cannot be None. Use tf.identity ' 'instead to better support quantized training.') # Depthwise variant of He initialization derived under the principle proposed # in the original paper. Note the original He normalization was designed for # full convolutions and calling tf.initializers.he_normal() can over-estimate # the fan-in of a depthwise kernel by orders of magnitude. stddev = (2.0 / kernel_size**2)**0.5 / .87962566103423978 depthwise_initializer = tf.initializers.truncated_normal(stddev=stddev) return slim.separable_conv2d( h, filters, kernel_size, stride=strides, activation_fn=activation_fn, normalizer_fn=slim.batch_norm, weights_initializer=depthwise_initializer, pointwise_initializer=tf.initializers.he_normal(), weights_regularizer=slim.l2_regularizer(BACKBONE_WEIGHT_DECAY), padding='SAME')
def simple_generator(z, image_size, num_interpolate=2, channels=None, depthwise_separate=None, output_bn=True, is_training=True, reuse=None, scope=None): """A simple generator model used in the paper.""" # The generator structure is originally defined in "XNOR-Net: ImageNet # Classification Using Binary Convolutional Neural Networks" # https://arxiv.org/pdf/1603.05279.pdf if not channels: # default: [128, 64] channels = [128 // (i + 1) for i in range(num_interpolate)] if not depthwise_separate: # default: no depthwise separate conv depthwise_separate = [False] * num_interpolate # noinspection PyTypeChecker assert len(channels) == len(depthwise_separate) == num_interpolate init_size = image_size // (2**num_interpolate) resize = functools.partial(tf.image.resize, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) with tf.variable_scope(scope, 'generator', [z], reuse=reuse): # noinspection PyCallingNonCallable with slim.arg_scope([slim.batch_norm], decay=0.9, center=True, scale=True, epsilon=0.8, is_training=is_training): # noinspection PyCallingNonCallable with slim.arg_scope([slim.conv2d, slim.separable_conv2d], activation_fn=tf.nn.leaky_relu, normalizer_fn=slim.batch_norm): x = slim.fully_connected(z, init_size * init_size * channels[0], activation_fn=None, biases_initializer=None, scope='dense') x = tf.reshape(x, [-1, init_size, init_size, channels[0]]) # The code of the DAFL paper uses different epsilon values for batch # normalization layers. We keep these settings for reproducibility. # See https://github.com/huawei-noah/Data-Efficient-Model-Compression/blob/master/DAFL/DAFL-train.py#L54 pylint: disable=line-too-long # for details. x = slim.batch_norm(x, epsilon=1e-5, scope='bn_0') x = tf.nn.leaky_relu(x) # Interpolate layers size = init_size for i, (n_channels, ds) in enumerate(zip(channels, depthwise_separate)): size *= 2 x = resize(x, [size, size], name='interpolate_{}'.format(i)) if not ds: x = slim.conv2d(x, n_channels, [3, 3], scope='conv_{}'.format(i)) else: x = slim.separable_conv2d( x, None, [3, 3], scope='conv_{}_depthwise'.format(i)) x = slim.conv2d(x, n_channels, [1, 1], scope='conv_{}_pointwise'.format(i)) # Output layer x = slim.conv2d(x, 3, [3, 3], activation_fn=tf.nn.tanh, normalizer_fn=None, scope='conv_{}'.format(num_interpolate)) if output_bn: x = slim.batch_norm(x, center=False, scale=True, scope='bn_output', is_training=is_training) return x
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, min_depth, insert_1x1_conv, image_features, pool_residual=False): """Generates multi resolution feature maps from input image features. Generates multi-scale feature maps for detection as in the SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. More specifically, it performs the following two tasks: 1) If a layer name is provided in the configuration, returns that layer as a feature map. 2) If a layer name is left as an empty string, constructs a new feature map based on the spatial shape and depth configuration. Note that the current implementation only supports generating new layers using convolution of stride 2 resulting in a spatial resolution reduction by a factor of 2. By default convolution kernel size is set to 3, and it can be customized by caller. An example of the configuration for Inception V3: { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } Args: feature_map_layout: Dictionary of specifications for the feature map layouts in the following format (Inception V2/V3 respectively): { 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } or { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } If 'from_layer' is specified, the specified feature map is directly used as a box predictor layer, and the layer_depth is directly infered from the feature map (instead of using the provided 'layer_depth' parameter). In this case, our convention is to set 'layer_depth' to -1 for clarity. Otherwise, if 'from_layer' is an empty string, then the box predictor layer will be built from the previous layer using convolution operations. Note that the current implementation only supports generating new layers using convolutions of stride 2 (resulting in a spatial resolution reduction by a factor of 2), and will be extended to a more flexible design. Convolution kernel size is set to 3 by default, and can be customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size' should be set to -1 if 'from_layer' is specified). The created convolution operation will be a normal 2D convolution by default, and a depthwise convolution followed by 1x1 convolution if 'use_depthwise' is set to True. depth_multiplier: Depth multiplier for convolutional layers. min_depth: Minimum depth for convolutional layers. insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution should be inserted before shrinking the feature map. image_features: A dictionary of handles to activation tensors from the base feature extractor. pool_residual: Whether to add an average pooling layer followed by a residual connection between subsequent feature maps when the channel depth match. For example, with option 'layer_depth': [-1, 512, 256, 256], a pooling and residual layer is added between the third and forth feature map. This option is better used with Weight Shared Convolution Box Predictor when all feature maps have the same channel depth to encourage more consistent features across multi-scale feature maps. Returns: feature_maps: an OrderedDict mapping keys (feature map names) to tensors where each tensor has shape [batch, height_i, width_i, depth_i]. Raises: ValueError: if the number entries in 'from_layer' and 'layer_depth' do not match. ValueError: if the generated layer does not have the same resolution as specified. """ depth_fn = get_depth_fn(depth_multiplier, min_depth) feature_map_keys = [] feature_maps = [] base_from_layer = '' use_explicit_padding = False if 'use_explicit_padding' in feature_map_layout: use_explicit_padding = feature_map_layout['use_explicit_padding'] use_depthwise = False if 'use_depthwise' in feature_map_layout: use_depthwise = feature_map_layout['use_depthwise'] for index, from_layer in enumerate(feature_map_layout['from_layer']): layer_depth = feature_map_layout['layer_depth'][index] conv_kernel_size = 3 if 'conv_kernel_size' in feature_map_layout: conv_kernel_size = feature_map_layout['conv_kernel_size'][index] if from_layer: feature_map = image_features[from_layer] base_from_layer = from_layer feature_map_keys.append(from_layer) else: pre_layer = feature_maps[-1] pre_layer_depth = pre_layer.get_shape().as_list()[3] intermediate_layer = pre_layer if insert_1x1_conv: layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( base_from_layer, index, depth_fn(layer_depth // 2)) intermediate_layer = slim.conv2d( pre_layer, depth_fn(layer_depth // 2), [1, 1], padding='SAME', stride=1, scope=layer_name) layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( base_from_layer, index, conv_kernel_size, conv_kernel_size, depth_fn(layer_depth)) stride = 2 padding = 'SAME' if use_explicit_padding: padding = 'VALID' intermediate_layer = ops.fixed_padding( intermediate_layer, conv_kernel_size) if use_depthwise: feature_map = slim.separable_conv2d( intermediate_layer, None, [conv_kernel_size, conv_kernel_size], depth_multiplier=1, padding=padding, stride=stride, scope=layer_name + '_depthwise') feature_map = slim.conv2d( feature_map, depth_fn(layer_depth), [1, 1], padding='SAME', stride=1, scope=layer_name) if pool_residual and pre_layer_depth == depth_fn(layer_depth): feature_map += slim.avg_pool2d( pre_layer, [3, 3], padding='SAME', stride=2, scope=layer_name + '_pool') else: feature_map = slim.conv2d( intermediate_layer, depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], padding=padding, stride=stride, scope=layer_name) feature_map_keys.append(layer_name) feature_maps.append(feature_map) return collections.OrderedDict( [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM) with bottlenecking. Args: inputs: Input tensor at the current timestep. state: Tuple of tensors, the state and output at the previous timestep. scope: Optional scope. Returns: A tuple where the first element is the LSTM output and the second is a LSTMStateTuple of the state at the current timestep. """ scope = scope or 'conv_lstm_cell' with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): c, h = state # unflatten state if necessary if self._flatten_state: c = tf.reshape(c, [-1] + self.output_size) h = tf.reshape(h, [-1] + self.output_size) # summary of input passed into cell if self._viz_gates: slim.summaries.add_histogram_summary(inputs, 'cell_input') if self._pre_bottleneck: bottleneck = inputs else: bottleneck = slim.separable_conv2d( tf.concat([inputs, h], 3), self._num_units, self._filter_size, depth_multiplier=1, activation_fn=self._activation, normalizer_fn=None, scope='bottleneck') if self._viz_gates: slim.summaries.add_histogram_summary( bottleneck, 'bottleneck') concat = slim.separable_conv2d(bottleneck, 4 * self._num_units, self._filter_size, depth_multiplier=1, activation_fn=None, normalizer_fn=None, scope='gates') i, j, f, o = tf.split(concat, 4, 3) new_c = (c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j)) if self._clip_state: new_c = tf.clip_by_value(new_c, -6, 6) new_h = self._activation(new_c) * tf.sigmoid(o) # summary of cell output and new state if self._viz_gates: slim.summaries.add_histogram_summary(new_h, 'cell_output') slim.summaries.add_histogram_summary(new_c, 'cell_state') output = new_h if self._output_bottleneck: output = tf.concat([new_h, bottleneck], axis=3) # reflatten state to store it if self._flatten_state: new_c = tf.reshape(new_c, [-1, self._param_count]) new_h = tf.reshape(new_h, [-1, self._param_count]) return output, contrib_rnn.LSTMStateTuple(new_c, new_h)
def xfcn(inputs, dropout_rate, scope='xfcn'): """Defines the xfcn network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ im_size = tf.shape(inputs) with tf.variable_scope(scope, 'xfcn', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs of all intermediate layers. with slim.arg_scope([slim.conv2d, slim.separable_conv2d], outputs_collections=end_points_collection): # Entry flow # Block 1 net = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='xception_65/entry_flow/conv1_1') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_1/BatchNorm') net = tf.nn.relu(net) net = slim.conv2d(net, 64, [3, 3], scope='xception_65/entry_flow/conv1_2') net = slim.batch_norm( net, scope='xception_65/entry_flow/conv1_2/BatchNorm') net = tf.nn.relu(net) residual_1 = slim.conv2d( net, 128, [1, 1], stride=2, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut' ) residual_1 = slim.batch_norm( residual_1, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 2 net = slim.separable_conv2d( net, 128, [3, 3], activation_fn=None, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 128, [3, 3], scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_2 = tf.math.add(residual_1, net) net_2_drop = slim.dropout(net_2, keep_prob=dropout_rate) residual_2 = slim.conv2d( net_2, 256, [1, 1], stride=2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut' ) residual_2 = slim.batch_norm( residual_2, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/shortcut/BatchNorm' ) # block 3 net = tf.nn.relu(net_2) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 256, [3, 3], scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_3 = tf.math.add(net, residual_2) net_3_drop = slim.dropout(net_3, keep_prob=dropout_rate) residual_3 = slim.conv2d( net_3, 728, [1, 1], stride=2, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut' ) residual_3 = slim.batch_norm( residual_3, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/shortcut/BatchNorm' ) # block 4 net = tf.nn.relu(net_3) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_4 = tf.math.add(net, residual_3) net_4_drop = slim.dropout(net_4, keep_prob=dropout_rate) # middle flow # block 5 net = middle_flow_block(net_4, unit_num=1) # block 6 - 20 net = middle_flow_block(net, unit_num=2) net_5_drop = slim.dropout(net, keep_prob=dropout_rate) # Exit flow residual_20 = slim.conv2d( net, 1024, [1, 1], stride=2, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut') residual_20 = slim.batch_norm( residual_20, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/shortcut/BatchNorm' ) # block 21 net = tf.nn.relu(net) net = slim.separable_conv2d( net, 728, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm' ) net = tf.nn.relu(net) net = slim.separable_conv2d( net, 1024, [3, 3], scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_depthwise' ) net = slim.batch_norm( net, scope= 'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm' ) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME') net_6 = tf.math.add(net, residual_20) net_6_drop = slim.dropout(net_6, keep_prob=dropout_rate) # Get side outputs of the network with slim.arg_scope([slim.conv2d], biases_initializer=tf.zeros_initializer()): side_2 = slim.conv2d(net_2_drop, 16, [3, 3], rate=1, scope='conv2_2_16') side_3 = slim.conv2d(net_3_drop, 16, [3, 3], rate=2, scope='conv3_3_16') side_4 = slim.conv2d(net_4_drop, 16, [3, 3], rate=4, scope='conv4_3_16') side_5 = slim.conv2d(net_5_drop, 16, [3, 3], rate=4, scope='conv5_3_16') side_6 = slim.conv2d(net_6_drop, 16, [3, 3], rate=8, scope='conv6_3_16') # Supervise side outputs side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2') side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3') side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4') side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5') side_6_s = slim.conv2d(side_6, 1, [1, 1], scope='score-dsn_6') with slim.arg_scope([slim.convolution2d_transpose], outputs_collections=end_points_collection): # Side outputs side_2_s = slim.convolution2d_transpose( side_2_s, 1, 8, 4, scope='score-dsn_2-up') side_2_s = crop_features(side_2_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_2-cr', side_2_s) side_3_s = slim.convolution2d_transpose( side_3_s, 1, 16, 8, scope='score-dsn_3-up') side_3_s = crop_features(side_3_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_3-cr', side_3_s) side_4_s = slim.convolution2d_transpose( side_4_s, 1, 32, 16, scope='score-dsn_4-up') side_4_s = crop_features(side_4_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_4-cr', side_4_s) side_5_s = slim.convolution2d_transpose( side_5_s, 1, 32, 16, scope='score-dsn_5-up') side_5_s = crop_features(side_5_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_5-cr', side_5_s) side_6_s = slim.convolution2d_transpose( side_6_s, 1, 64, 32, scope='score-dsn_6-up') side_6_s = crop_features(side_6_s, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/score-dsn_6-cr', side_6_s) # Main output side_2_f = slim.convolution2d_transpose( side_2, 16, 8, 4, scope='score-multi2-up') side_2_f = crop_features(side_2_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi2-cr', side_2_f) side_3_f = slim.convolution2d_transpose( side_3, 16, 16, 8, scope='score-multi3-up') side_3_f = crop_features(side_3_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi3-cr', side_3_f) side_4_f = slim.convolution2d_transpose( side_4, 16, 32, 16, scope='score-multi4-up') side_4_f = crop_features(side_4_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi4-cr', side_4_f) side_5_f = slim.convolution2d_transpose( side_5, 16, 32, 16, scope='score-multi5-up') side_5_f = crop_features(side_5_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi5-cr', side_5_f) side_6_f = slim.convolution2d_transpose( side_6, 16, 64, 32, scope='score-multi6-up') side_6_f = crop_features(side_6_f, im_size) utils.collect_named_outputs(end_points_collection, 'xfcn/side-multi6-cr', side_6_f) concat_side = tf.concat( [side_2_f, side_3_f, side_4_f, side_5_f, side_6_f], axis=3) net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse') end_points = utils.convert_collection_to_dict(end_points_collection) return net, end_points
def _mnasfpn_cell(feature_maps, feature_levels, cell_spec, output_channel=48, use_explicit_padding=False, use_native_resize_op=False, multiplier_func=None): """Create a MnasFPN cell. Args: feature_maps: input feature maps. feature_levels: levels of the feature maps. cell_spec: A list of Block configs. output_channel: Number of features for the input, output and intermediate feature maps. use_explicit_padding: Whether to use explicit padding. use_native_resize_op: Whether to use native resize op. multiplier_func: Depth-multiplier function. If None, use identity function. Returns: A transformed list of feature maps at the same resolutions as the inputs. """ # This is the level where multipliers are realized. if multiplier_func is None: multiplier_func = lambda x: x num_outputs = len(feature_maps) cell_features = list(feature_maps) cell_levels = list(feature_levels) padding = 'VALID' if use_explicit_padding else 'SAME' for bi, block in enumerate(cell_spec): with tf.variable_scope('block_{}'.format(bi)): block_level = block.output_level intermediate_feature = None for i, inp in enumerate(block.inputs): with tf.variable_scope('input_{}'.format(i)): input_level = cell_levels[inp] node = _apply_size_dependent_ordering( cell_features[inp], input_level, block_level, multiplier_func(block.expansion_size), use_explicit_padding, use_native_resize_op) # Add features incrementally to avoid producing AddN, which doesn't # play well with TfLite. if intermediate_feature is None: intermediate_feature = node else: intermediate_feature += node node = tf.nn.relu6(intermediate_feature) node = slim.separable_conv2d( _maybe_pad(node, use_explicit_padding, block.kernel_size), multiplier_func(output_channel), block.kernel_size, activation_fn=None, normalizer_fn=slim.batch_norm, padding=padding, scope='SepConv') cell_features.append(node) cell_levels.append(block_level) # Cell-wide residuals. out_idx = range(len(cell_features) - num_outputs, len(cell_features)) for in_i, out_i in enumerate(out_idx): if cell_features[out_i].shape.as_list( ) == cell_features[in_i].shape.as_list(): cell_features[out_i] += cell_features[in_i] return cell_features[-num_outputs:]
def mobilenet_v1_base(inputs, final_endpoint='Conv2d_13_pointwise', min_depth=8, depth_multiplier=1.0, conv_defs=None, output_stride=None, scope=None): """Mobilenet v1. Constructs a Mobilenet v1 network from inputs to the given final endpoint. Args: inputs: a tensor of shape [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5'_pointwise, 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', 'Conv2d_12_pointwise', 'Conv2d_13_pointwise']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. conv_defs: A list of ConvDef namedtuples specifying the net architecture. output_stride: An integer that specifies the requested ratio of input to output spatial resolution. If not None, then we invoke atrous convolution if necessary to prevent the network from reducing the spatial resolution of the activation maps. Allowed values are 8 (accurate fully convolutional mode), 16 (fast fully convolutional mode), 32 (classification mode). scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0, or the target output_stride is not allowed. """ depth = lambda d: max(int(d * depth_multiplier), min_depth) end_points = {} # Used to find thinned depths for each layer. if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') if conv_defs is None: conv_defs = _CONV_DEFS if output_stride is not None and output_stride not in [8, 16, 32]: raise ValueError('Only allowed output_stride values are 8, 16, 32.') with tf.compat.v1.variable_scope(scope, 'MobilenetV1', [inputs]): with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): # The current_stride variable keeps track of the output stride of the # activations, i.e., the running product of convolution strides up to the # current network layer. This allows us to invoke atrous convolution # whenever applying the next convolution would result in the activations # having output stride larger than the target output_stride. current_stride = 1 # The atrous convolution rate parameter. rate = 1 net = inputs for i, conv_def in enumerate(conv_defs): end_point_base = 'Conv2d_%d' % i if output_stride is not None and current_stride == output_stride: # If we have reached the target output_stride, then we need to employ # atrous convolution with stride=1 and multiply the atrous rate by the # current unit's stride for use in subsequent layers. layer_stride = 1 layer_rate = rate rate *= conv_def.stride else: layer_stride = conv_def.stride layer_rate = 1 current_stride *= conv_def.stride if isinstance(conv_def, Conv): end_point = end_point_base net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel, stride=conv_def.stride, normalizer_fn=slim.batch_norm, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points elif isinstance(conv_def, DepthSepConv): end_point = end_point_base + '_depthwise' # By passing filters=None # separable_conv2d produces only a depthwise convolution layer net = slim.separable_conv2d(net, None, conv_def.kernel, depth_multiplier=1, stride=layer_stride, rate=layer_rate, normalizer_fn=slim.batch_norm, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points end_point = end_point_base + '_pointwise' net = slim.conv2d(net, depth(conv_def.depth), [1, 1], stride=1, normalizer_fn=slim.batch_norm, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points else: raise ValueError( 'Unknown convolution type %s for layer %d' % (conv_def.ltype, i)) raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v2_base(inputs, final_endpoint='Mixed_5c', min_depth=16, depth_multiplier=1.0, use_separable_conv=True, data_format='NHWC', scope=None): """Inception v2 (6a2). Constructs an Inception v2 network from inputs to the given final endpoint. This method can construct the network up to the layer inception(5b) as described in http://arxiv.org/abs/1502.03167. Args: inputs: a tensor of shape [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. use_separable_conv: Use a separable convolution for the first layer Conv2d_1a_7x7. If this is False, use a normal convolution instead. data_format: Data format of the activations ('NHWC' or 'NCHW'). scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} # Used to find thinned depths for each layer. if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) if data_format != 'NHWC' and data_format != 'NCHW': raise ValueError('data_format must be either NHWC or NCHW.') if data_format == 'NCHW' and use_separable_conv: raise ValueError( 'separable convolution only supports NHWC layout. NCHW data format can' ' only be used when use_separable_conv is False.' ) concat_dim = 3 if data_format == 'NHWC' else 1 with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', data_format=data_format): # Note that sizes in the comments below assume an input spatial size of # 224x224, however, the inputs can be of any size greater 32x32. # 224 x 224 x 3 end_point = 'Conv2d_1a_7x7' if use_separable_conv: # depthwise_multiplier here is different from depth_multiplier. # depthwise_multiplier determines the output channels of the initial # depthwise conv (see docs for tf.nn.separable_conv2d), while # depth_multiplier controls the # channels of the subsequent 1x1 # convolution. Must have # in_channels * depthwise_multipler <= out_channels # so that the separable convolution is not overparameterized. depthwise_multiplier = min(int(depth(64) / 3), 8) net = slim.separable_conv2d( inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier, stride=2, padding='SAME', weights_initializer=trunc_normal(1.0), scope=end_point) else: # Use a normal convolution instead of a separable convolution. net = slim.conv2d( inputs, depth(64), [7, 7], stride=2, weights_initializer=trunc_normal(1.0), scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 112 x 112 x 64 end_point = 'MaxPool_2a_3x3' net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2b_1x1' net = slim.conv2d(net, depth(64), [1, 1], scope=end_point, weights_initializer=trunc_normal(0.1)) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2c_3x3' net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 192 end_point = 'MaxPool_3a_3x3' net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 192 # Inception module. end_point = 'Mixed_3b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(32), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 256 end_point = 'Mixed_3c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(64), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 320 end_point = 'Mixed_4a' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d( net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(128), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4d' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4e' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_5a' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)