def inception_v2_ssd(img): with slim.arg_scope(inception_v2.inception_v2_arg_scope()): logits, end_point = inception_v2.inception_v2_base(img) c1 = end_point['Mixed_3c'] c2 = end_point['Mixed_4e'] c3 = end_point['Mixed_5c'] return c1, c2, c3
def inception_v2_ssd(img): with slim.arg_scope(inception_v2.inception_v2_arg_scope()): logits, end_point = inception_v2.inception_v2_base(img) vbs = slim.get_variables_to_restore() c1 = end_point['Mixed_3c'] c2 = end_point['Mixed_4e'] c3 = end_point['Mixed_5c'] return c1, c2, c3, vbs
def inception_v2_ssd(img,cfg): with slim.arg_scope(inception_v2.inception_v2_arg_scope()): logits, end_point = inception_v2.inception_v2_base(img) Mixed_3c = end_point['Mixed_3c'] Mixed_4e = end_point['Mixed_4e'] cell_11 = end_point['Mixed_5c'] vbs = slim.get_trainable_variables() cell_11 = tf.image.resize_bilinear(cell_11,size=[32,32]) cell_11 = tf.concat([cell_11,Mixed_4e],axis=3) cell_7 = tf.image.resize_bilinear(Mixed_4e,size=[64,64]) cell_7 = tf.concat([cell_7,Mixed_3c],axis=3) cell_11 = slim.conv2d(cell_11,1024,kernel_size=1,activation_fn=slim.nn.relu) cell_7 = slim.conv2d(cell_7, 512, kernel_size=3, activation_fn=slim.nn.relu) cell_7 = slim.conv2d(cell_7, 256, kernel_size=1, activation_fn=slim.nn.relu) cv6 = slim.conv2d(cell_11, 1024, kernel_size=3, rate=6, activation_fn=slim.nn.relu, scope='conv6') cv7 = slim.conv2d(cv6, 1024, kernel_size=1, activation_fn=slim.nn.relu, scope='conv7') s = utils.normalize_to_target(cell_7, target_norm_value=12.0, dim=1) cv8 = slim.conv2d(cv7, 256, kernel_size=1, stride=1, scope='conv8_0') cv8 = slim.conv2d(cv8, 512, kernel_size=3, stride=2, scope='conv8_1') cv9 = slim.conv2d(cv8, 128, kernel_size=1, stride=1, scope='conv9_0') cv9 = slim.conv2d(cv9, 256, kernel_size=3, stride=2, scope='conv9_1') cv10 = slim.conv2d(cv9, 128, kernel_size=1, stride=1, scope='conv10_0') cv10 = slim.conv2d(cv10, 256, kernel_size=3, stride=2, scope='conv10_1') cv11 = slim.conv2d(cv10, 128, kernel_size=1, stride=1, scope='conv11_0') cv11 = slim.conv2d(cv11, 256, kernel_size=3, stride=2, scope='conv11_1') source = [s, cv7, cv8, cv9, cv10, cv11] conf = [] loc = [] for cv, num in zip(source, cfg.Config['aspect_num']): print(num) loc.append(slim.conv2d(cv, num * 4, kernel_size=3, stride=1, activation_fn=None)) conf.append( slim.conv2d(cv, num * cfg.Config['num_classes'], kernel_size=3, stride=1, activation_fn=None)) print(loc) loc = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, 4)) for o in loc], axis=1) conf = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, cfg.Config['num_classes'])) for o in conf], axis=1) return loc, conf, vbs
def get_feature_map(self): input_image = self._processor() bn_params = { 'is_training': False, 'scale': False, 'decay': 0.9997, 'epsilon': 0.001 } with slim.arg_scope([slim.conv2d, slim.separable_conv2d], normalizer_fn=slim.batch_norm, normalizer_params=bn_params): _, activations = inception_v2.inception_v2_base( input_image, final_endpoint='Mixed_4e') feature_map = activations['Mixed_4e'] return feature_map
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with _batch_norm_arg_scope( [slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): _, activations = inception_v2.inception_v2_base( preprocessed_inputs, final_endpoint='Mixed_4e', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return tf.concat([ slim.avg_pool2d(activations['Mixed_3c'], [2, 2], stride=2, scope='ex_pool3c', padding='SAME'), activations['Mixed_4e'] ], 3, name='ex_concat'), 0
def inception_v2_ssd(img,cfg): with slim.arg_scope(inception_v2.inception_v2_arg_scope()): logits, end_point = inception_v2.inception_v2_base(img) Mixed_3c = end_point['Mixed_3c'] Mixed_4e = end_point['Mixed_4e'] Mixed_5c = end_point['Mixed_5c'] vbs = slim.get_trainable_variables() #vbs = None cell_11 = tf.image.resize_bilinear(Mixed_5c,size=[int(32*(cfg.image_size/512)),int(32*(cfg.image_size/512))]) cell_11 = tf.concat([cell_11,Mixed_4e],axis=3) cell_7 = tf.image.resize_bilinear(Mixed_4e,size=[int(64*(cfg.image_size/512)),int(64*(cfg.image_size/512))]) cell_7 = tf.concat([cell_7,Mixed_3c],axis=3) mask_fp = get_mask_fp(Mixed_3c ,Mixed_4e,Mixed_5c) cell_11 = slim.conv2d(cell_11,1024,kernel_size=1,activation_fn=slim.nn.relu) cell_7 = slim.conv2d(cell_7, 512, kernel_size=3, activation_fn=slim.nn.relu) cell_7 = slim.conv2d(cell_7, 256, kernel_size=1, activation_fn=slim.nn.relu) cv6 = slim.conv2d(cell_11, 1024, kernel_size=3, rate=6, activation_fn=slim.nn.relu, scope='conv6') cv7 = slim.conv2d(cv6, 1024, kernel_size=1, activation_fn=slim.nn.relu, scope='conv7') s = utils.normalize_to_target(cell_7, target_norm_value=cfg.norm_value, dim=1) cv8 = inception(cv7, out_put=512, name='cv8', stride=2) cv9 = inception(cv8, out_put=256, name='cv9', stride=2) cv10 = inception(cv9, out_put=256, name='cv10', stride=2) cv11 = inception(cv10, out_put=256,name= 'cv11', stride=2) source = [s, cv7, cv8, cv9, cv10, cv11] conf = [] loc = [] for cv, num in zip(source, cfg.Config['aspect_num']): loc.append(slim.conv2d(cv, num * 4, kernel_size=3, stride=1, activation_fn=None)) conf.append( slim.conv2d(cv, num * cfg.Config['num_classes'], kernel_size=3, stride=1, activation_fn=None)) loc = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, 4)) for o in loc], axis=1) conf = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, cfg.Config['num_classes'])) for o in conf], axis=1) return loc, conf,mask_fp, vbs
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], } with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( preprocessed_inputs, final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) ret = [] for endpoint_indicator, feature_map in zip(self._endpoints_indicator, feature_maps.values()): if endpoint_indicator == '1': ret.append(feature_map) return ret
def extract_features(self, inputs): """Extracts features from inputs. This function adds 4 additional feature maps on top of 'Mixed_4c' and 'Mixed_5c' in the base Inception v2 network. For example, if `inputs` has shape [1, 300, 300, 3], the generated feature maps have the following shapes: [ (1, 19, 19, 576), # Mixed_4c (1, 10, 10, 1024), # Mixed_5c (1, 5, 5, 512), (1, 3, 3, 256), (1, 2, 2, 256), (1, 1, 1, 128) ] Args: inputs: a tensor of shape [batch_size, height, with, channels], holding the input images. Returns: a list of 6 float tensors of shape [batch_size, height, width, channels], holding feature map tensors to be fed to box predictor. """ feature_map_specs_dict = { 'layer_name': ['Mixed_4c', 'Mixed_5c', None, None, None, None], 'layer_depth': [None, None, 512, 256, 256, 128] } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, end_points = inception_v2.inception_v2_base( inputs, final_endpoint='Mixed_5c', min_depth=16, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.ssd_feature_maps( feature_map_tensor_dict=end_points, feature_map_specs_dict=feature_map_specs_dict, depth_multiplier=1, # for depthwise conv in separable_conv2d use_depthwise=self._use_depthwise, insert_1x1_conv=True) feature_map_list = list(feature_maps.values()) return feature_map_list
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], } with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): _, activations = inception_v2.inception_v2_base( preprocessed_inputs, final_endpoint='Mixed_4e', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['Mixed_4e'], activations
def model(img): with slim.arg_scope(inception_v2.inception_v2_arg_scope()): with slim.arg_scope([slim.batch_norm], is_training=True): with slim.arg_scope([slim.conv2d], trainable=True): logits, end_point = inception_v2.inception_v2_base(img) c1 = end_point['Mixed_3c'] c2 = end_point['Mixed_4e'] c3 = end_point['Mixed_5c'] vbs = slim.get_variables_to_restore() c3 = slim.conv2d(c3, 256, 1, 1, activation_fn=None) c2 = slim.conv2d(c2, 256, 1, 1, activation_fn=None) + tf.image.resize_bilinear( c3, size=tf.shape(c2)[1:3]) c1 = slim.conv2d(c1, 256, 1, 1, activation_fn=None) + tf.image.resize_bilinear( c2, size=tf.shape(c1)[1:3]) return c1, c2, c3, vbs
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ # Make sure that input is in correct format with rank 4. preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_separable_conv=False, scope=scope) # 2. STDN version + combine mode image_features = combine_and_scale_transfer_module_v1( image_features, combine_mode=0) # return a list of feature maps return image_features.values()
def _extract_first_stage_features(self, inputs): """Extracts first stage features for RPN proposal prediction and for ROI pooling. Args: inputs: float tensor of shape [batch_size, height, width, depth]. Returns: shared_feature_map: float tensor of shape [batch_size, height_out, width_out, depth_out]. """ with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with slim.arg_scope([slim.conv2d, slim.separable_conv2d], # is_training normalizer_fn=slim.batch_norm, normalizer_params=self._batch_norm_params): _, end_points = inception_v2.inception_v2_base( inputs, final_endpoint='Mixed_4e', min_depth=16, depth_multiplier=self._depth_multiplier, scope=scope) return end_points['Mixed_4e']
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) trunc_normal = lambda stddev: tf.truncated_normal_initializer( 0.0, stddev) #add convolution autoencoder encoder_1_conv = slim.conv2d(preprocessed_inputs, depth(64), [3, 3], weights_initializer=trunc_normal(0.09), scope='encoder_c1_conv') #encoder_1_pool = slim.max_pool2d(encoder_1_conv, [2, 2], stride=2, # scope='encoder_c1_pool') #encoder_1_dropout1 = slim.dropout(encoder_1_pool, 0.7, scope='encoder_c1_dropout1') encoder_2_conv = slim.conv2d(encoder_1_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c2_conv') encoder_3_conv = slim.conv2d(encoder_2_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c3_conv') #decoder decoder_3_deconv = slim.conv2d_transpose( encoder_3_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c3_deconv') decoder_2_deconv = slim.conv2d_transpose( decoder_3_deconv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c2_deconv') decoder_1_deconv = slim.conv2d_transpose( decoder_2_deconv, depth(1), [3, 3], weights_initializer=trunc_normal(0.09), scope='decoder_c1_deconv') #visualize decoder output #vis_decoder = tf.slice(decoder_1_deconv,(0,0,0,0),(1,-1,-1,-1)) #print("vis_decoder:",vis_decoder) #vis_decoder = tf.reshape(vis_decoder,(256,256,1)) #print("vis_decoder2:",vis_decoder) #vis_decoder = tf.transpose(vis_decoder,(2,0,3,1)) #vis_decoder = tf.reshape(vis_decoder,(1,256,256,1)) #image_matrix = vis_decoder.eval() #print("image_matrix:",image_matrix) #tf.summary.image('decoder_visualized',tf.expand_dims(vis_decoder, 0) with tf.control_dependencies([shape_assert]): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with _batch_norm_arg_scope( [slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): _, activations = inception_v2.inception_v2_base( decoder_1_deconv, final_endpoint='Mixed_4e', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['Mixed_4e'], activations
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) trunc_normal = lambda stddev: tf.truncated_normal_initializer( 0.0, stddev) ''' Add a multi branch encoder with different dilation ''' ''' Block 1 ''' encoder_1_conv = slim.conv2d(preprocessed_inputs, depth(32), [3, 3], weights_initializer=trunc_normal(0.09), scope='encoder_c1_conv') encoder_2_conv = slim.conv2d(encoder_1_conv, depth(64), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c2_conv') encoder_3_conv = slim.conv2d(encoder_2_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c3_conv') ''' Block 2 ''' encoder_1_conv_dilated_1 = slim.conv2d( preprocessed_inputs, depth(32), [3, 3], rate=3, weights_initializer=trunc_normal(0.09), scope='encoder_1_conv_dilated_1') encoder_2_conv_dilated_1 = slim.conv2d( encoder_1_conv_dilated_1, depth(64), [5, 5], rate=3, weights_initializer=trunc_normal(0.09), scope='encoder_2_conv_dilated_1') encoder_3_conv_dilated_1 = slim.conv2d( encoder_2_conv_dilated_1, depth(128), [5, 5], rate=3, weights_initializer=trunc_normal(0.09), scope='encoder_3_conv_dilated_1') ''' Block 3 ''' encoder_1_conv_dilated_2 = slim.conv2d( preprocessed_inputs, depth(32), [3, 3], rate=5, weights_initializer=trunc_normal(0.09), scope='encoder_1_conv_dilated_2') encoder_2_conv_dilated_2 = slim.conv2d( encoder_1_conv_dilated_2, depth(64), [5, 5], rate=5, weights_initializer=trunc_normal(0.09), scope='encoder_2_conv_dilated_2') encoder_3_conv_dilated_2 = slim.conv2d( encoder_2_conv_dilated_2, depth(128), [5, 5], rate=5, weights_initializer=trunc_normal(0.09), scope='encoder_3_conv_dilated_2') encoder_concat = tf.concat([ encoder_3_conv, encoder_3_conv_dilated_1, encoder_3_conv_dilated_2 ], axis=3) #decoder decoder_3_deconv = slim.conv2d_transpose( encoder_concat, depth(64), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c3_deconv') decoder_2_deconv = slim.conv2d_transpose( decoder_3_deconv, depth(32), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c2_deconv') decoder_1_deconv = slim.conv2d_transpose( decoder_2_deconv, depth(1), [3, 3], weights_initializer=trunc_normal(0.09), scope='decoder_c1_deconv') with tf.control_dependencies([shape_assert]): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with _batch_norm_arg_scope( [slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): _, activations = inception_v2.inception_v2_base( decoder_1_deconv, final_endpoint='Mixed_4e', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['Mixed_4e'], activations
def network_fn(inputs): # return transformer_factory.transform(inputs, BATCH_PER_GPU, NUM_STN, (224, 224), NUM_CLASSES, FLAGS.weight_decay, True) end_points = {} # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # with slim.arg_scope(inception_v3_arg_scope(weight_decay=FLAGS.weight_decay)): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope(inception_v3_arg_scope(weight_decay=weight_decay)): with tf.variable_scope("loc") as scope: with tf.variable_scope("net") as scope2: # _, _end_points = inception_resnet_v2.inception_resnet_v2(inputs, num_classes=2, is_training=True, scope = scope2) loc_net, _ = inception_v2.inception_v2_base(inputs, scope=scope2) # loc_net = _end_points['Conv2d_7b_1x1'] loc_net = slim.conv2d(loc_net, 128, [1, 1], scope='Loc_1x1') default_kernel_size = [14, 14] # kernel_size = _reduced_kernel_size_for_small_input(loc_net, default_kernel_size) loc_net = slim.conv2d(loc_net, 128, loc_net.get_shape()[1:3], padding='VALID', activation_fn=tf.nn.tanh, scope='Loc_fc1') loc_net = slim.flatten(loc_net) iv = 4. initial = np.array([iv, 0, iv, 0] * NUM_STN, dtype=np.float32) b_fc_loc = tf.get_variable( "Loc_fc_b", shape=[4 * NUM_STN], initializer=init_ops.constant_initializer(initial), dtype=dtypes.float32) W_fc_loc = tf.get_variable( "Loc_fc_W", shape=[128, 4 * NUM_STN], initializer=init_ops.constant_initializer( np.zeros((128, 4 * NUM_STN))), dtype=dtypes.float32) theta = tf.nn.tanh(tf.matmul(loc_net, W_fc_loc) + b_fc_loc) _finals = [] for i in xrange(NUM_STN): scope_name = "stn%d" % i with tf.variable_scope(scope_name) as scope1: _theta = tf.slice(theta, [0, 4 * i], [-1, 4 * (i + 1)]) # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0,0.0,0.0,0.0,4.0,0.0]*128,dtype=dtypes.float32)) # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0],dtype=dtypes.float32)) # loc_net = slim.flatten(loc_net) stn_output_size = (STN_OUT_SIZE, STN_OUT_SIZE) x = transformer(inputs, _theta, stn_output_size) x.set_shape([ BATCH_PER_GPU, stn_output_size[0], stn_output_size[1], 3 ]) # x.set_shape(tf.shape(inputs)) # tf.reshape(x, tf.shape(inputs)) end_points['x'] = x # with tf.variable_scope("net") as scope2: # return inception_resnet_v2.inception_resnet_v2(x, num_classes=NUM_CLASSES, is_training=True, scope = scope2) with tf.variable_scope("net") as scope2: net, _ = inception_v2.inception_v2_base(x, scope=scope2) kernel_size = _reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') net = slim.dropout(net, keep_prob=0.7, scope='Dropout_1b') _finals.append(net) with tf.variable_scope('Logits'): net = tf.concat(axis=3, values=_finals) logits = slim.conv2d(net, NUM_CLASSES, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') predictions = slim.softmax(logits, scope='Predictions') end_points['Predictions'] = predictions logits_a = slim.conv2d(net, NUM_ATTRIBS, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1_a') logits_a = tf.squeeze(logits_a, [1, 2], name='SpatialSqueeze_a') predictions_a = slim.sigmoid(logits_a, scope='Predictions_a') end_points['Predictions_a'] = predictions_a return logits, logits_a, end_points
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) trunc_normal = lambda stddev: tf.truncated_normal_initializer( 0.0, stddev) #add convolution autoencoder encoder_1_conv = slim.conv2d(preprocessed_inputs, depth(64), [3, 3], weights_initializer=trunc_normal(0.09), scope='encoder_c1_conv') #encoder_1_pool = slim.max_pool2d(encoder_1_conv, [2, 2], stride=2, # scope='encoder_c1_pool') #encoder_1_dropout1 = slim.dropout(encoder_1_pool, 0.7, scope='encoder_c1_dropout1') encoder_2_conv = slim.conv2d(encoder_1_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c2_conv') encoder_3_conv = slim.conv2d(encoder_2_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='encoder_c3_conv') #decoder decoder_3_deconv = slim.conv2d_transpose( encoder_3_conv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c3_deconv') decoder_2_deconv = slim.conv2d_transpose( decoder_3_deconv, depth(128), [5, 5], weights_initializer=trunc_normal(0.09), scope='decoder_c2_deconv') decoder_1_deconv = slim.conv2d_transpose( decoder_2_deconv, depth(1), [3, 3], weights_initializer=trunc_normal(0.09), scope='decoder_c1_deconv') feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(decoder_1_deconv, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()