def vgg_dual_16(inputs1, inputs2, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16', update_top_only = False, fc_conv_padding='VALID', reuse = False): with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs1]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): nets = [] for i, inputs in enumerate([inputs1, inputs2]): print(i > 0) # with slim.arg_scope(vgg_arg_scope(reuse = tf.compat.v1.AUTO_REUSE or (i > 0))): with slim.arg_scope(vgg_arg_scope(reuse = reuse or (i > 0))): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') # if update_top_only: # net = tf.stop_gradient(net) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') nets.append(net) with slim.arg_scope(vgg_arg_scope(reuse = reuse)): net = tf.concat(nets, 3) net = slim.conv2d(net, 512, [1, 1], scope='conv6') net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool6') net = slim.conv2d(net, 512, [1, 1], scope='conv7') net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool7') #net = slim.max_pool2d(net, [2, 2], scope='pool6') # Use conv2d instead of fully_connected layers. #net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.conv2d(net, 2048, [7, 7], padding=fc_conv_padding, scope = 'fc6_') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout6') # net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7') net = slim.conv2d(net, 2048, [1, 1], scope='fc7_') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7_') if num_classes is not None: net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn = None, scope = 'fc8') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def create_test_network_6(): """Aligned network with dropout for test. The graph is similar to create_test_network_1(), except that the right branch has dropout normalization. Returns: g: Tensorflow graph object (Graph proto). """ g = tf.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = tf.placeholder(tf.float32, (None, None, None, 1), name='input_image') # Left branch. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch. l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') dropout = slim.dropout(l3) # Addition. tf.nn.relu(l1 + dropout, name='output') return g
def _predict_object_embeddings(object_features, output_dims, slim_fc_scope, keep_prob=1.0, is_training=False): """Projects object features to `output_dims` dimensions. Args: object_features: A [batch, max_num_objects, feature_dims] float tensor. output_dims: Dimensions of the object embeddings. slim_fc_scope: Slim FC scope. keep_prob: Keep probability of the dropout layer. is_training: If true, build a training graph. Returns: A [batch, max_num_objects, output_dims] float tensor. """ output = object_features with slim.arg_scope(slim_fc_scope), tf.variable_scope('object_projection'): output = slim.fully_connected(output, num_outputs=output_dims) output = slim.dropout(output, keep_prob, is_training=is_training) output = slim.fully_connected(output, num_outputs=output_dims, activation_fn=None) return output
def _build(self, input_graph, hidden_size=50, attn_scale=1.0, attn_dropout_keep_prob=1.0, regularizer=None, is_training=False): node_values = input_graph.nodes edge_values = input_graph.edges value_dims = node_values.shape[-1].value assert value_dims == edge_values.shape[-1].value # Compute edge values, sender feature + edge feature. # - edge_values = [total_num_edges, value_dims] edge_value_block = blocks.EdgeBlock(edge_model_fn=lambda: snt.Linear( output_size=value_dims, regularizers={'w': regularizer}), use_edges=True, use_receiver_nodes=True, use_sender_nodes=True, use_globals=False, name='update_edge_values') edge_values = edge_value_block(input_graph).edges tf.summary.histogram('mpnn/edge_values', edge_values) logits_block = blocks.EdgeBlock( edge_model_fn=lambda: snt.Linear(output_size=1, regularizers={'w': regularizer}), # edge_model_fn=lambda: snt.nets.MLP(output_sizes=[hidden_size, 1], # activation=tf.nn.tanh, # regularizers={'w': regularizer}), use_edges=True, use_receiver_nodes=True, use_sender_nodes=True, use_globals=False, name='update_attention_logits') attention_weights_logits = attn_scale * logits_block(input_graph).edges tf.summary.histogram('mpnn/logits', attention_weights_logits) normalized_attention_weight = modules._received_edges_normalizer( input_graph.replace(edges=attention_weights_logits), normalizer=self._normalizer) normalized_attention_weight = slim.dropout(normalized_attention_weight, attn_dropout_keep_prob, is_training=is_training) # Attending to sender values according to the weights. # - attended_edges = [total_num_edges, value_dims] attended_edges = edge_values * normalized_attention_weight # Summing all of the attended values from each node. # aggregated_attended_values = [total_num_nodes, embedding_size] received_edges_aggregator = blocks.ReceivedEdgesToNodesAggregator( reducer=tf.math.unsorted_segment_sum) aggregated_attended_values = received_edges_aggregator( input_graph.replace(edges=attended_edges)) return input_graph.replace(nodes=aggregated_attended_values, edges=edge_values)
def adapt_detection_features(self, detection_features): """Projects detection features to embedding space. Args: detection_features: Detection features. Returns: embeddings: Projected detection features. """ is_training = self._is_training options = self._model_proto with tf.variable_scope('detection'): detection_features = slim.fully_connected( detection_features, options.detection_mlp_hidden_units, activation_fn=tf.nn.relu, scope='hidden') detection_features = slim.dropout( detection_features, keep_prob=options.dropout_keep_prob, is_training=is_training) detection_features = slim.fully_connected( detection_features, self._bert_config.hidden_size, activation_fn=None, scope='output') return detection_features
def model( inputs, is_training = True, dropout_keep_prob = 0.8, reuse = None, scope = 'InceptionV4', bottleneck_dim = 512, ): # inputs = tf.image.grayscale_to_rgb(inputs) with tf.variable_scope( scope, 'InceptionV4', [inputs], reuse = reuse ) as scope: with slim.arg_scope( [slim.batch_norm, slim.dropout], is_training = is_training ): net, end_points = inception_v4_base(inputs, scope = scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride = 1, padding = 'SAME', ): with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding = 'VALID', scope = 'AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor = net, axis = [1, 2], keepdims = True, name = 'global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout( net, dropout_keep_prob, scope = 'Dropout_1b' ) net = slim.flatten(net, scope = 'PreLogitsFlatten') end_points['PreLogitsFlatten'] = net bottleneck = slim.fully_connected( net, bottleneck_dim, scope = 'bottleneck' ) logits = slim.fully_connected( bottleneck, 2, activation_fn = None, scope = 'Logits_vad', ) return logits
def i3d(inputs, num_classes=1000, dropout_keep_prob=0.8, is_training=True, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV1'): """Defines the I3D architecture. The default image size used to train this network is 224x224. Args: inputs: A 5-D float tensor of size [batch_size, num_frames, height, width, channels]. num_classes: number of predicted classes. dropout_keep_prob: the percentage of activation values that are retained. is_training: whether is training or not. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. """ # Final pooling and prediction with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = i3d_base(inputs, scope=scope) with tf.variable_scope('Logits'): kernel_size = i3d_utils.reduced_kernel_size_3d(net, [2, 7, 7]) net = slim.avg_pool3d(net, kernel_size, stride=1, scope='AvgPool_0a_7x7') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b') logits = slim.conv3d(net, num_classes, [1, 1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0c_1x1') # Temporal average pooling. logits = tf.reduce_mean(input_tensor=logits, axis=1) if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES], } with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.compat.v1.keras.initializers. VarianceScaling( scale=1.0, mode="fan_avg", distribution=("uniform" if True else "truncated_normal")), weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.compat.v1.variable_scope('squeezenet', [images], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=phase_train): net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1') net = fire_module(net, 16, 64, scope='fire2') net = fire_module(net, 16, 64, scope='fire3') net = fire_module(net, 32, 128, scope='fire4') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4') net = fire_module(net, 32, 128, scope='fire5') net = fire_module(net, 48, 192, scope='fire6') net = fire_module(net, 48, 192, scope='fire7') net = fire_module(net, 64, 256, scope='fire8') net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8') net = fire_module(net, 64, 256, scope='fire9') net = slim.dropout(net, keep_probability) net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10') net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10') net = tf.squeeze(net, [1, 2], name='logits') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, None
def model( inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionV4', create_aux_logits=True, num_classes=2, ): with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4_base(inputs, scope=scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', ): # Final pooling and prediction # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.flatten(net, scope='PreLogitsFlatten') end_points['PreLogitsFlatten'] = net # 1536 logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='Logits') return logits
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: mask_predictions: A float tensors of shape [batch_size, num_anchors, num_masks, mask_height, mask_width] representing the mask predictions for the proposals. """ image_feature = features # Add a slot for the background class. if self._masks_are_class_agnostic: num_masks = 1 else: num_masks = self._num_classes num_mask_channels = num_masks * self._mask_height * self._mask_width net = image_feature if self._use_dropout: net = slim.dropout(net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: mask_predictions = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope='MaskPredictor_depthwise') mask_predictions = slim.conv2d(mask_predictions, num_predictions_per_location * num_mask_channels, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='MaskPredictor') else: mask_predictions = slim.conv2d( net, num_predictions_per_location * num_mask_channels, [self._kernel_size, self._kernel_size], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='MaskPredictor') batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] mask_predictions = tf.reshape( mask_predictions, [batch_size, -1, num_masks, self._mask_height, self._mask_width]) return mask_predictions
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: class_predictions_with_background: A float tensors of shape [batch_size, num_anchors, num_class_slots] representing the class predictions for the proposals. """ net = features if self._use_dropout: net = slim.dropout(net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: depthwise_scope = self._scope + '_depthwise' class_predictions_with_background = slim.separable_conv2d( net, None, [self._kernel_size, self._kernel_size], padding='SAME', depth_multiplier=1, stride=1, rate=1, scope=depthwise_scope) class_predictions_with_background = slim.conv2d( class_predictions_with_background, num_predictions_per_location * self._num_class_slots, [1, 1], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope=self._scope) else: class_predictions_with_background = slim.conv2d( net, num_predictions_per_location * self._num_class_slots, [self._kernel_size, self._kernel_size], activation_fn=None, normalizer_fn=None, normalizer_params=None, scope=self._scope, biases_initializer=tf.constant_initializer( self._class_prediction_bias_init)) if self._apply_sigmoid_to_scores: class_predictions_with_background = tf.sigmoid( class_predictions_with_background) batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, -1, self._num_class_slots]) return class_predictions_with_background
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: class_predictions_with_background: A tensor of shape [batch_size, num_anchors, num_class_slots] representing the class predictions for the proposals, or a tensor of shape [batch, height, width, num_predictions_per_location * num_class_slots] representing class predictions before reshaping if self._return_flat_predictions is False. """ class_predictions_net = features if self._use_dropout: class_predictions_net = slim.dropout( class_predictions_net, keep_prob=self._dropout_keep_prob) if self._use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d class_predictions_with_background = conv_op( class_predictions_net, num_predictions_per_location * self._num_class_slots, [self._kernel_size, self._kernel_size], activation_fn=None, stride=1, padding='SAME', normalizer_fn=None, biases_initializer=tf.constant_initializer( self._class_prediction_bias_init), scope=self._scope) batch_size, height, width = shape_utils.combined_static_and_dynamic_shape( features)[0:3] class_predictions_with_background = tf.reshape( class_predictions_with_background, [ batch_size, height, width, num_predictions_per_location, self._num_class_slots ]) class_predictions_with_background = self._score_converter_fn( class_predictions_with_background) if self._return_flat_predictions: class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, -1, self._num_class_slots]) else: class_predictions_with_background = tf.reshape( class_predictions_with_background, [ batch_size, height, width, num_predictions_per_location * self._num_class_slots ]) return class_predictions_with_background
def conv_block(inputs, n_filters, filter_size=[3, 3], dropout_p=0.0): """ Basic conv block for Encoder-Decoder Apply successivly Convolution, BatchNormalization, ReLU nonlinearity Dropout (if dropout_p > 0) on the inputs """ conv = slim.conv2d(inputs, n_filters, filter_size, activation_fn=None, normalizer_fn=None) out = tf.nn.relu(slim.batch_norm(conv, fused=True)) if dropout_p != 0.0: out = slim.dropout(out, keep_prob=(1.0-dropout_p)) return out
def conv_transpose_block(inputs, n_filters, strides=2, filter_size=[3, 3], dropout_p=0.0): """ Basic conv transpose block for Encoder-Decoder upsampling Apply successivly Transposed Convolution, BatchNormalization, ReLU nonlinearity Dropout (if dropout_p > 0) on the inputs """ conv = slim.conv2d_transpose(inputs, n_filters, kernel_size=[3, 3], stride=[strides, strides]) out = tf.nn.relu(slim.batch_norm(conv, fused=True)) if dropout_p != 0.0: out = slim.dropout(out, keep_prob=(1.0-dropout_p)) return out
def build_predictions(self, net, rois, is_training, initializer, initializer_bbox): # Crop image ROIs pool5 = self._crop_pool_layer(net, rois, "pool5") pool5_flat = slim.flatten(pool5, scope='flatten') # Fully connected layers fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # Scores and predictions cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_prediction = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') return cls_score, cls_prob, bbox_prediction
def regressor(x, num_output=85, is_training=True, reuse=False, name="3D_module"): with tf.variable_scope(name, reuse=reuse) as scope: net = slim.fully_connected(x, 1024, scope='fc1') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1') net = slim.fully_connected(net, 1024, scope='fc2') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2') small_xavier = tf.keras.initializers.VarianceScaling( scale=.01, mode='fan_avg', distribution='uniform') net = slim.fully_connected(net, num_output, activation_fn=None, weights_initializer=small_xavier, scope='fc3') return net
def Encoder_fc3_dropout(x, num_output=85, is_training=True, reuse=False, name="3D_module"): """ 3D inference module. 3 MLP layers (last is the output) With dropout on first 2. Input: - x: N x [|img_feat|, |3D_param|] - reuse: bool Outputs: - 3D params: N x num_output if orthogonal: either 85: (3 + 24*3 + 10) or 109 (3 + 24*4 + 10) for factored axis-angle representation if perspective: 86: (f, tx, ty, tz) + 24*3 + 10, or 110 for factored axis-angle. - variables: tf variables """ if reuse: print('Reuse is on!') with tf.variable_scope(name, reuse=reuse) as scope: net = slim.fully_connected(x, 1024, scope='fc1') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1') net = slim.fully_connected(net, 1024, scope='fc2') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2') small_xavier = variance_scaling_initializer(factor=.01, mode='FAN_AVG', uniform=True) net = slim.fully_connected(net, num_output, activation_fn=None, weights_initializer=small_xavier, scope='fc3') variables = tf.global_variables(scope.name) return net, variables
def conv_net(inputs, hparams): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = inputs i = 0 for (conv_temporal_size, conv_freq_size, num_filters, freq_pool_size, dropout_amt) in zip( hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters, hparams.pool_sizes, hparams.dropout_keep_amts): net = slim.conv2d( net, num_filters, [conv_temporal_size, conv_freq_size], scope='conv' + str(i), normalizer_fn=slim.batch_norm) if freq_pool_size > 1: net = slim.max_pool2d( net, [1, freq_pool_size], stride=[1, freq_pool_size], scope='pool' + str(i)) if dropout_amt < 1: net = slim.dropout(net, dropout_amt, scope='dropout' + str(i)) i += 1 # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape( net, (dims[0], dims[1], net.shape[2] * net.shape[3]), 'flatten_end') net = slim.fully_connected(net, hparams.fc_size, scope='fc_end') net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end') return net
def vgg_gel2(gel0_pre, gel0_post, gel1_pre, gel1_post, num_classes = 2, is_training = True, update_top_only = False, fc_conv_padding='VALID', dropout_keep_prob = 0.5, diff = True, reuse = False, scope = 'vgg_16'): print('reuse =', reuse) if diff: nets = [] r = reuse if gel0_pre is not None: nets.append(vgg_dual_16(gel0_post - gel0_pre, gel0_post, reuse = r, is_training = is_training, num_classes = None, update_top_only = update_top_only, scope = scope)[0]) r = True if gel1_pre is not None: nets.append(vgg_dual_16(gel1_post - gel1_pre, gel1_post, reuse = r, is_training = is_training, num_classes = None, update_top_only = update_top_only, scope = scope)[0]) r = True return tf.concat(nets, 1) else: net0 = pair_vgg(gel0_post, gel0_pre, is_training = is_training, update_top_only = update_top_only, scope = scope) net1 = pair_vgg(gel1_post, gel1_pre, reuse = True, is_training = is_training, update_top_only = update_top_only, scope = scope) with tf.variable_scope(scope, scope), \ slim.arg_scope(vgg_arg_scope(reuse)): net = tf.concat([net0, net1], 3) net = slim.conv2d(net, 2048, [7, 7], padding = fc_conv_padding, scope = 'fc6_') net = slim.dropout(net, dropout_keep_prob, is_training = is_training, scope = 'dropout6') net = slim.conv2d(net, 2048, [1, 1], scope = 'fc7_') net = slim.dropout(net, dropout_keep_prob, is_training = is_training, scope = 'dropout7') if num_classes is not None: net = slim.conv2d(net, num_classes, [1, 1], activation_fn = None, normalizer_fn = None, scope = 'fc8_') net = net[:, 0, 0, :] return net
def build(self, inputs): """Returns an InceptionV3FCN model with configurable conv2d normalization. Args: inputs: a map from input string names to tensors. Required: * IMAGES: a tensor of shape [batch, height, width, channels] Returns: A dictionary from network layer names to the corresponding layer activation Tensors. Includes: * PRE_LOGITS: activation layer preceding LOGITS * LOGITS: the pre-softmax activations, size [batch, num_classes] * PROBABILITIES: softmax probs, size [batch, num_classes] """ images = self._get_tensor(inputs, self.IMAGES, expected_rank=4) with slim.arg_scope( scope_utils.get_conv_scope(self._conv_scope_params, self._is_training)): net, end_points = self._network_base( images, min_depth=self._min_depth, depth_multiplier=self._depth_multiplier) # Final pooling and prediction with tf.variable_scope('Logits'): # 1 x 1 x 768 net = slim.dropout(net, keep_prob=self._prelogit_dropout_keep_prob, is_training=self._is_training, scope='Dropout_1b') end_points[self.PRE_LOGITS] = net # 1 x 1 x num_classes logits = slim.conv2d(net, self._num_classes, [1, 1], activation_fn=None, normalizer_fn=None, stride=self._logits_stride, scope='Conv2d_1c_1x1') probabilities_tensor = tf.nn.softmax(logits) end_points[self.PROBABILITIES_TENSOR] = probabilities_tensor if self._logits_stride == 1: # Reshape to remove height and width end_points[self.LOGITS] = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points[self.PROBABILITIES] = tf.squeeze( probabilities_tensor, [1, 2], name='SpatialSqueeze') else: end_points[self.LOGITS] = logits end_points[self.PROBABILITIES] = probabilities_tensor return end_points
def preact_conv(inputs, n_filters, kernel_size=[3, 3], dropout_p=0.2): """ Basic pre-activation layer for DenseNets Apply successivly BatchNormalization, ReLU nonlinearity, Convolution and Dropout (if dropout_p > 0) on the inputs """ preact = tf.nn.relu(slim.batch_norm(inputs, fused=True)) conv = slim.conv2d(preact, n_filters, kernel_size, activation_fn=None, normalizer_fn=None) if dropout_p != 0.0: conv = slim.dropout(conv, keep_prob=(1.0 - dropout_p)) return conv
def predict(self, features, num_predictions_per_location=1): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing features for a batch of images. num_predictions_per_location: Int containing number of predictions per location. Returns: box_encodings: A float tensor of shape [batch_size, 1, num_classes, code_size] representing the location of the objects. Raises: ValueError: If num_predictions_per_location is not 1. """ if num_predictions_per_location != 1: raise ValueError( 'Only num_predictions_per_location=1 is supported') spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2], keep_dims=True, name='AvgPool') flattened_roi_pooled_features = slim.flatten( spatial_averaged_roi_pooled_features) if self._use_dropout: flattened_roi_pooled_features = slim.dropout( flattened_roi_pooled_features, keep_prob=self._dropout_keep_prob, is_training=self._is_training) number_of_boxes = 1 if not self._share_box_across_classes: number_of_boxes = self._num_classes with slim.arg_scope(self._fc_hyperparams_fn()): box_encodings = slim.fully_connected(flattened_roi_pooled_features, number_of_boxes * self._box_code_size, reuse=tf.AUTO_REUSE, activation_fn=None, scope='BoxEncodingPredictor') box_encodings = tf.reshape( box_encodings, [-1, 1, number_of_boxes, self._box_code_size]) return box_encodings
def predict(self, features, num_predictions_per_location=1): """Predicts boxes and class scores. Args: features: A float tensor of shape [batch_size, height, width, channels] containing features for a batch of images. num_predictions_per_location: Int containing number of predictions per location. Returns: class_predictions_with_background: A float tensor of shape [batch_size, 1, num_class_slots] representing the class predictions for the proposals. Raises: ValueError: If num_predictions_per_location is not 1. """ if num_predictions_per_location != 1: raise ValueError( 'Only num_predictions_per_location=1 is supported') spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2], keep_dims=True, name='AvgPool') flattened_roi_pooled_features = slim.flatten( spatial_averaged_roi_pooled_features) if self._use_dropout: flattened_roi_pooled_features = slim.dropout( flattened_roi_pooled_features, keep_prob=self._dropout_keep_prob, is_training=self._is_training) with slim.arg_scope(self._fc_hyperparams_fn()): class_predictions_with_background = slim.fully_connected( flattened_roi_pooled_features, self._num_class_slots, reuse=tf.AUTO_REUSE, activation_fn=None, scope=self._scope) class_predictions_with_background = tf.reshape( class_predictions_with_background, [-1, 1, self._num_class_slots]) return class_predictions_with_background
def project_detection_features(self, detection_features): """Projects detection features to embedding space. Args: detection_features: Detection features. Returns: embeddings: Projected detection features. """ is_training = self._is_training options = self._model_proto if options.detection_adaptation == model_pb2.MLP: detection_features = slim.fully_connected( detection_features, options.detection_mlp_hidden_units, activation_fn=tf.nn.relu, scope='detection/project') detection_features = slim.dropout( detection_features, keep_prob=options.dropout_keep_prob, is_training=is_training) detection_features = slim.fully_connected( detection_features, self._bert_config.hidden_size, activation_fn=None, scope='detection/adaptation') return detection_features elif options.detection_adaptation == model_pb2.LINEAR: detection_features = slim.fully_connected( detection_features, self._bert_config.hidden_size, activation_fn=None, scope='detection/adaptation') return detection_features raise ValueError('Invalid detection adaptation method.')
def predict(self, features, num_predictions_per_location): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing image features. num_predictions_per_location: Number of box predictions to be made per spatial location. Returns: mask_predictions: A tensor of shape [batch_size, num_anchors, num_classes, mask_height, mask_width] representing the mask predictions for the proposals. """ mask_predictions_net = features if self._masks_are_class_agnostic: num_masks = 1 else: num_masks = self._num_classes num_mask_channels = num_masks * self._mask_height * self._mask_width if self._use_dropout: mask_predictions_net = slim.dropout( mask_predictions_net, keep_prob=self._dropout_keep_prob) mask_predictions = slim.conv2d(mask_predictions_net, num_predictions_per_location * num_mask_channels, [self._kernel_size, self._kernel_size], activation_fn=None, stride=1, padding='SAME', normalizer_fn=None, scope='MaskPredictor') batch_size = features.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(features)[0] mask_predictions = tf.reshape( mask_predictions, [batch_size, -1, num_masks, self._mask_height, self._mask_width]) return mask_predictions
def attention_inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, create_aux_logits=True, scope='InceptionV3', global_pool=False, attention_module='', attention_position='all'): """Inception model from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna. With the default arguments this method constructs the exact model defined in the paper. However, one can experiment with variations of the inception_v3 network by changing arguments dropout_keep_prob, min_depth and depth_multiplier. The default image size used to train this network is 299x299. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. create_aux_logits: Whether to create the auxiliary logits. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. attention_module: Optional attention_module. Accepted values are '' or 'se_block'. attention_position: Optional attention_position. Default is 'all'. Accepted values are 'head', 'extractor', and 'all'. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if 'depth_multiplier' is less than or equal to zero. """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf.variable_scope(scope, 'InceptionV3', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = attention_inception_v3_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, attention_module=attention_module, attention_position=attention_position) # Auxiliary Head logits if create_aux_logits and num_classes: with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['Mixed_6e'] with tf.variable_scope('AuxLogits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID', scope='AvgPool_1a_5x5') aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1], scope='Conv2d_1b_1x1') # Shape of feature map before the final layer. kernel_size = _reduced_kernel_size_for_small_input( aux_logits, [5, 5]) aux_logits = slim.conv2d( aux_logits, depth(768), kernel_size, weights_initializer=trunc_normal(0.01), padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) aux_logits = slim.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='Conv2d_2b_1x1') if spatial_squeeze: aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits # Final pooling and prediction with tf.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='GlobalPool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. kernel_size = _reduced_kernel_size_for_small_input( net, [8, 8]) net = slim.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) end_points['AvgPool_1a'] = net if not num_classes: return net, end_points # 1 x 1 x 2048 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') end_points['PreLogits'] = net # 2048 logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') # 1000 end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a', fc_conv_padding='VALID', global_pool=False): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original VGG architecture.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ with tf.compat.v1.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) if global_pool: net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') end_points['global_pool'] = net if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def mobilenet_v1(inputs, num_classes=1000, dropout_keep_prob=0.999, is_training=True, min_depth=8, depth_multiplier=1.0, conv_defs=None, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='MobilenetV1', global_pool=False): """Mobilenet v1 model for classification. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. dropout_keep_prob: the percentage of activation values that are retained. is_training: whether is training or not. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. conv_defs: A list of ConvDef namedtuples specifying the net architecture. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. Returns: net: a 2D Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: Input rank is invalid. """ input_shape = inputs.get_shape().as_list() if len(input_shape) != 4: raise ValueError('Invalid input tensor rank, expected 4, was: %d' % len(input_shape)) with tf.compat.v1.variable_scope(scope, 'MobilenetV1', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = mobilenet_v1_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, conv_defs=conv_defs) with tf.compat.v1.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. kernel_size = _reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') end_points['AvgPool_1a'] = net if not num_classes: return net, end_points # 1 x 1 x 1024 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits if prediction_fn: end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points