def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self): sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=True) self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False) self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) sc = mobilenet_v1.mobilenet_v1_arg_scope() self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
def build(self, cost, model, train): if model == "MobilenetV1": with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): self.logits, self.end_points = mobilenet_v1.mobilenet_v1( self.input, num_classes=self.numb_logits, dropout_keep_prob=self.kp, is_training=train) elif model == "vgg_16": with slim.arg_scope(vgg.vgg_arg_scope()): self.logits, self.end_points = vgg.vgg_16( self.input, num_classes=self.numb_logits, dropout_keep_prob=self.kp, is_training=True) self.prob = tf.nn.softmax(self.logits, name="prob") self.loss = tf.reduce_mean( tf.reduce_sum(tf.pow(self.prob - self.target, 2), axis=1)) tf.summary.scalar('loss', self.loss) if cost == "mse": self.cost = self.loss else: self.xtarget = self.target * (1 - 1e-11) + 1e-12 assert self.xtarget.get_shape().as_list()[1] == self.numb_logits self.xprob = self.prob * (1 - 1e-11) + 1e-12 assert self.xprob.get_shape().as_list()[1] == self.numb_logits self.cost = tf.reduce_mean( tf.reduce_sum(self.xtarget * tf.log(self.xtarget / self.prob), axis=1)) tf.summary.scalar('cost_kl', self.cost)
def main(_): if not FLAGS.output_file: raise ValueError( 'You must supply the path to save to with --output_file') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default() as graph: image_size = FLAGS.image_size input_shape = [FLAGS.batch_size, image_size, image_size, 3] placeholder = tf.placeholder(name='input', dtype=tf.float32, shape=input_shape) scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False, weight_decay=0.0) with slim.arg_scope(scope): mobilenet_v1_l2norm.mobilenet_v1_l2norm( placeholder, is_training=False, num_classes=FLAGS.num_classes) if FLAGS.quantize: tf.contrib.quantize.create_eval_graph() graph_def = graph.as_graph_def() if FLAGS.write_text_graphdef: tf.io.write_graph(graph_def, os.path.dirname(FLAGS.output_file), os.path.basename(FLAGS.output_file), as_text=True) else: with tf.gfile.GFile(FLAGS.output_file, 'wb') as f: f.write(graph_def.SerializeToString())
def head_net(self, blocks, is_training, trainable=True): normal_initializer = tf.truncated_normal_initializer(0, 0.01) msra_initializer = tf.contrib.layers.variance_scaling_initializer() xavier_initializer = tf.contrib.layers.xavier_initializer() with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)): out = slim.conv2d_transpose(blocks, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up1') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up2') out = slim.conv2d_transpose(out, 256, [4, 4], stride=2, trainable=trainable, weights_initializer=normal_initializer, padding='SAME', activation_fn=tf.nn.relu, scope='up3') out = slim.conv2d(out, cfg.nr_skeleton, [1, 1], trainable=trainable, weights_initializer=msra_initializer, padding='SAME', normalizer_fn=None, activation_fn=None, scope='out') return out
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps depth = lambda d: max(int(d * 1.0), 16) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): net = slim.separable_conv2d(net, depth(1024), [3, 3], depth_multiplier=1, stride=2, scope='Conv2d_12_pointwise') return slim.separable_conv2d(net, depth(1024), [3, 3], depth_multiplier=1, stride=1, scope='Conv2d_13_pointwise')
def generator(images, n_filter=4,train=True, reuse=False): """define generator model Args: images: input images for generator n_filter: number of filter to learn from each image train: boolean value to specify if the network is in training mode or inference mode reuse: whether to reuse network variables or not return: output_2: enhanced version of input value """ with tf.variable_scope("generator",reuse=reuse): # first generator network with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1(images, activation_fn=leakyRely, dropout_keep_prob=1, is_training=train) net = end_points['Conv2d_13_pointwise'] filters_1 = slim.conv2d(net,256, [3, 3],stride=2, activation_fn=tf.nn.relu, padding='VALID', normalizer_fn=None, scope='filters_1') filters_2_1 = slim.conv2d(filters_1,n_filter, [1, 1],stride=1, activation_fn=None, padding='SAME', normalizer_fn=None, scope='filters_2_1') filters_2_2 = tf.expand_dims(filters_2_1,axis=4, name='filters_2_2') output_1 = adapt_filter(images[:,:,:,1:2], filters_2_2[:,:,:,0:4,:], name="sat_adapt", train=train) print("output_1", output_1.get_shape().as_list()) output_2 = tf.concat([images[:,:,:,0:1], output_1,images[:,:,:,2:3] ], axis=3) print("output_2", output_2.get_shape().as_list()) return output_2
def create(self, images, num_classes, is_training): """See baseclass.""" with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): _, endpoints = mobilenet_v1.mobilenet_v1(inputs=images, num_classes=num_classes, is_training=is_training) return endpoints
def build_model(): """Build the mobilenet_v1 model for evaluation. Returns: g: graph with rewrites after insertion of quantization ops and batch norm folding. eval_ops: eval ops for inference. variables_to_restore: List of variables to restore from checkpoint. """ g = tf.Graph() with g.as_default(): inputs, labels = imagenet_input(is_training=False) scope = mobilenet_v1.mobilenet_v1_arg_scope( is_training=False, weight_decay=0.0) with slim.arg_scope(scope): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=False, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) if FLAGS.quantize: tf.contrib.quantize.create_eval_graph() eval_ops = metrics(logits, labels) return g, eval_ops
def build_model(): """Build the mobilenet_v1 model for evaluation. Returns: g: graph with rewrites after insertion of quantization ops and batch norm folding. eval_ops: eval ops for inference. variables_to_restore: List of variables to restore from checkpoint. """ g = tf.Graph() with g.as_default(): inputs, labels = imagenet_input(is_training=False) scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False, weight_decay=0.0) with slim.arg_scope(scope): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=False, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes, final_endpoint=FLAGS.final_endpoint) if FLAGS.quantize: contrib_quantize.create_eval_graph() eval_ops = metrics(logits, labels) return g, eval_ops
def embedding_fn(images, reuse=False): with tf.variable_scope('MobilenetV1', reuse=reuse) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)): return mobilenet_v1.mobilenet_v1_base( images, final_endpoint=mobilenent_config['final_endpoint'], conv_defs=mobilenet.CONV_DEFS, depth_multiplier=mobilenent_config['depth_multiplier'], scope=scope)
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Extracts features using the first half of the Inception Resnet v2 network. We construct the network in `align_feature_maps=True` mode, which means that all VALID paddings in the network are changed to SAME padding so that the feature maps are aligned. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], } depth_multiplier = 1.0 min_depth = 16 with tf.control_dependencies([shape_assert]): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._is_training, weight_decay=self._weight_decay)): # Forces is_training to False to disable batch norm update. with slim.arg_scope([slim.batch_norm], is_training=False): with tf.variable_scope('MobileNetV1', reuse=self._reuse_weights) as scope: _, image_features = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, scope=scope) #feature_maps = feature_map_generators.multi_resolution_feature_maps( # feature_map_layout=feature_map_layout, # depth_multiplier=depth_multiplier, # min_depth=min_depth, # insert_1x1_conv=True, # image_features=image_features) #_, end_points = vgg.vgg_16(preprocessed_inputs, scope=scope, # is_training=self._is_training) rpn_feature_map = image_features['Conv2d_13_pointwise'] return rpn_feature_map
def inference(hypes, images, train=True, reuse=None, num_classes=1000, num_blocks=[3, 4, 6, 3], # defaults to 50-layer network preprocess=True, bottleneck=True): # if preprocess is True, input should be RGB [0,1], otherwise BGR with mean # subtracted if preprocess: x = _inception_preprocess(images) is_train = tf.convert_to_tensor(train, dtype='bool', name='is_training') if reuse is None: with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_train)): net, end_points = mobilenet_v1.mobilenet_v1_base(x) else: with tf.variable_scope("MobilenetV1", [x], reuse = reuse) as scope: with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_train)): net, end_points = mobilenet_v1.mobilenet_v1_base(x, scope = scope) logits = {} logits['images'] = images logits['fcn_in'] = end_points['Conv2d_13_pointwise'] logits['feed2'] = end_points['Conv2d_11_pointwise'] logits['feed4'] = end_points['Conv2d_5_pointwise'] logits['early_feat'] = logits['feed2'] logits['deep_feat'] = logits['fcn_in'] if train: restore = tf.global_variables() hypes['init_function'] = _initalize_variables hypes['restore'] = restore return logits
def build_model(): """Builds graph for model to train with rewrites for quantization. Returns: g: Graph with fake quantization ops and batch norm folding suitable for training quantized weights. train_tensor: Train op for execution during training. """ g = tf.Graph() with g.as_default(), tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks)): #Reads in data and/or performs pre-processing on the images. inputs, labels, _ = tf_input.input(is_random=True, is_training=True) labels = labels - 1 labels = slim.one_hot_encoding(labels, FLAGS.num_classes) with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=True, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) tf.losses.softmax_cross_entropy(labels, logits) # Call rewriter to produce graph with fake quant ops and folded batch norms # quant_delay delays start of quantization till quant_delay steps, allowing # for better model accuracy. if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=get_quant_delay()) total_loss = tf.losses.get_total_loss(name='total_loss') # Configure the learning rate using an exponential decay. num_epochs_per_decay = 2.5 data_size = tf_input.TRAINING_SET_SIZE decay_steps = int(data_size / FLAGS.batch_size * num_epochs_per_decay) learning_rate = tf.train.exponential_decay( get_learning_rate(), tf.train.get_or_create_global_step(), decay_steps, _LEARNING_RATE_DECAY_FACTOR, staircase=True) opt = tf.train.GradientDescentOptimizer(learning_rate) train_tensor = slim.learning.create_train_op( total_loss, optimizer=opt) slim.summaries.add_scalar_summary(total_loss, 'total_loss', 'losses') slim.summaries.add_scalar_summary(learning_rate, 'learning_rate', 'training') return g, train_tensor
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def fcn_mobv1(images, num_classes, is_training=True): with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): _, end_points = mobilenet_v1.mobilenet_v1(images, num_classes, is_training=False, spatial_squeeze=False) # for v,k in end_points.items(): # print('{v}:{k}'.format(v = v, k = k)) # pool4=end_points['resnet_v1_101/pool4'] # # dconv1_out=pool4.get_shape().as_list() # # # deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1') # # fu1=tf.add(deconv1,pool4) # # # pool3=end_points['resnet_v1_101/pool3'] # dconv2_out=pool3.get_shape().as_list() # deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2') # # fu2=tf.add(deconv2,pool3) net_7 = end_points['Conv2d_13_pointwise'] net_14 = end_points['Conv2d_11_pointwise'] net_28 = end_points['Conv2d_5_pointwise'] up1 = slim.conv2d_transpose(net_7, 512, [4, 4], stride=2, scope='deconv32') fu1 = tf.add(up1, net_14, name='fu1') up2 = slim.conv2d_transpose(fu1, 256, [4, 4], stride=2, scope='deconv16') fu2 = tf.add(up2, net_28, name='fu2') logit = slim.conv2d_transpose(fu2, num_classes, [16, 16], stride=8, scope='deconv8') prediction = tf.argmax(logit, dimension=3) #, name="prediction") print('logit', logit) return logit, tf.expand_dims(prediction, axis=3)
def MOBILENET(image_batch_tensor, is_training): ''' Returns the MobileNet model definition for use within the MobileSeg model. Parameters ---------- image_batch_tensor : [batch_size, height, width, channels] Tensor Tensor containing a batch of input images. is_training : bool True if network is being trained, False otherwise. This controls whether dropout layers should be enabled, and the behaviour of the batchnorm layers. Returns ------- conv13_features: Features with a stride length of 32. The layer is referred to as 'MobilenetV1/Conv2d_13_pointwise/Conv2D' in the MobileNet Tensorflow implementation. These features feed into the average pooling layer in the original network; however the pooling layer and subsequent fc and softmax layers have been removed in this implementation. conv11_features: Features with a stride length of 16. (Output of the 'MobilenetV1/Conv2d_11_pointwise/Conv2D' layer.) conv5_features: Features with a stride length of 8. (Output of the 'MobilenetV1/Conv2d_5_pointwise/Conv2D' layer.) ''' # Convert image to float32 before subtracting the mean pixel values image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] with slim.arg_scope( mobilenet.mobilenet_v1_arg_scope(is_training=is_training)): conv13_features, end_points = mobilenet.mobilenet_v1_base( image_batch_tensor, final_endpoint='Conv2d_13_pointwise', min_depth=8, depth_multiplier=1.0, conv_defs=None, output_stride=None, scope=None) return conv13_features, end_points['Conv2d_11_pointwise'], end_points[ 'Conv2d_5_pointwise']
def _build_mobilenet_model(is_training, images, params): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=is_training)): out, _ = mobilenet_v1.mobilenet_v1( images, is_training=is_training, depth_multiplier=params.depth_multiplier, num_classes=None) tf.logging.info("mobilenet preembedding shape{}".format( out.get_shape().as_list())) out = tf.reshape(out, [-1, 256]) out = tf.layers.dense(out, params.embedding_size, name="embeddings") return out
def perceptual_params(images, reuse=False): """get semntics params of images Args: images: input images for generator reuse: whether to reuse network variables or not return: sementics params of images """ with tf.variable_scope("semantic",reuse=reuse): # first generator network with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1(images, num_classes=1001, dropout_keep_prob=1, is_training=False) return tf.squeeze(end_points['AvgPool_1a'],[1,2])
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=True, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def build_model(): """Builds graph for model to train with rewrites for quantization. Returns: g: Graph with fake quantization ops and batch norm folding suitable for training quantized weights. train_tensor: Train op for execution during training. """ g = tf.Graph() with g.as_default(), tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks)): inputs, labels = imagenet_input(is_training=True) with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)): logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=True, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) tf.losses.softmax_cross_entropy(labels, logits) # Call rewriter to produce graph with fake quant ops and folded batch norms # quant_delay delays start of quantization till quant_delay steps, allowing # for better model accuracy. if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=get_quant_delay()) total_loss = tf.losses.get_total_loss(name='total_loss') # Configure the learning rate using an exponential decay. num_epochs_per_decay = 2.5 imagenet_size = 1271167 decay_steps = int(imagenet_size / FLAGS.batch_size * num_epochs_per_decay) learning_rate = tf.train.exponential_decay( get_learning_rate(), tf.train.get_or_create_global_step(), decay_steps, _LEARNING_RATE_DECAY_FACTOR, staircase=True) opt = tf.train.GradientDescentOptimizer(learning_rate) train_tensor = slim.learning.create_train_op( total_loss, optimizer=opt) slim.summaries.add_scalar_summary(total_loss, 'total_loss', 'losses') slim.summaries.add_scalar_summary(learning_rate, 'learning_rate', 'training') return g, train_tensor
def create_model(self, inputs, num_classes, is_training): with slim.arg_scope( mobilenet_v1_arg_scope( is_training, FLAGS.weight_decay, regularize_depthwise=FLAGS.regularize_depthwise)): self.logits, self.end_points = mobilenet_v1( inputs, num_classes, FLAGS.dropout_keep_prob, is_training, depth_multiplier=FLAGS.depth_multiplier) for var in tf.model_variables(): if 'weights' in var.op.name: tf.add_to_collection(tf.GraphKeys.WEIGHTS, var)
def mobilenet_v1_050(inputs, is_training, opts): with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope( is_training=is_training, weight_decay=opts.weight_decay, stddev=0.09, regularize_depthwise=False, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon)): return mobilenet_v1.mobilenet_v1_050( inputs, num_classes=opts.num_classes, dropout_keep_prob=opts.dropout_keep_prob, is_training=is_training, min_depth=8, global_pool=opts.global_pool, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def freeze_mobilenet(meta_file, img_size=224, factor=1.0, num_classes=1001): tf.reset_default_graph() inp = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 3), name="input") is_training = False weight_decay = 0.0 arg_scope = mobilenet_v1.mobilenet_v1_arg_scope(weight_decay=weight_decay) with slim.arg_scope(arg_scope): logits, _ = mobilenet_v1.mobilenet_v1(inp, num_classes=num_classes, is_training=is_training, depth_multiplier=factor) predictions = tf.contrib.layers.softmax(logits) output = tf.identity(predictions, name='output') ckpt_file = meta_file.replace('.meta', '') output_graph_fn = ckpt_file.replace('.ckpt', '.pb') output_node_names = "output" rest_var = slim.get_variables_to_restore() with tf.Session() as sess: graph = tf.get_default_graph() input_graph_def = graph.as_graph_def() saver = tf.train.Saver(rest_var) saver.restore(sess, ckpt_file) # We use a built-in TF helper to export variables to constant output_graph_def = graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights input_graph_def, # The graph_def is used to retrieve the nodes output_node_names.split( "," ) # The output node names are used to select the usefull nodes ) # Finally we serialize and dump the output graph to the filesystem with tf.gfile.GFile(output_graph_fn, "wb") as f: f.write(output_graph_def.SerializeToString()) print("{} ops in the final graph.".format(len(output_graph_def.node)))
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=False, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: params[ 'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self. _conv_depth_ratio_in_percentage) _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return activations['Conv2d_11_pointwise']
def freeze_mobilenet(meta_file, img_size=224, factor=1.0, num_classes=1001): tf.reset_default_graph() inp = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 3), name="input") is_training=False weight_decay = 0.0 arg_scope = mobilenet_v1.mobilenet_v1_arg_scope(weight_decay=weight_decay) with slim.arg_scope(arg_scope): logits, _ = mobilenet_v1.mobilenet_v1(inp, num_classes=num_classes, is_training=is_training, depth_multiplier=factor) predictions = tf.contrib.layers.softmax(logits) output = tf.identity(predictions, name='output') ckpt_file = meta_file.replace('.meta', '') output_graph_fn = ckpt_file.replace('.ckpt', '.pb') output_node_names = "output" rest_var = slim.get_variables_to_restore() with tf.Session() as sess: graph = tf.get_default_graph() input_graph_def = graph.as_graph_def() saver = tf.train.Saver(rest_var) saver.restore(sess, ckpt_file) # We use a built-in TF helper to export variables to constant output_graph_def = graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights input_graph_def, # The graph_def is used to retrieve the nodes # The output node names are used to select the useful nodes output_node_names.split(",") ) # Finally we serialize and dump the output graph to the filesystem with tf.gfile.GFile(output_graph_fn, "wb") as f: f.write(output_graph_def.SerializeToString()) print("{} ops in the final graph.".format(len(output_graph_def.node)))
def discriminator(images, kp, n_output=10, reuse=False, train=True): """define discriminator model Args: images: input images for discriminator kp: keeping probality for droping layer of discriminator n_output: discriminator output size reuse: whether reuse variable or not train: training mode or inference mode return: preds: discriminator output containing image aesthetic rating and other variables """ with tf.variable_scope("discriminator", reuse=reuse): with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1(images, num_classes=n_output, dropout_keep_prob=kp, is_training=train) preds = tf.nn.softmax(logits) print("preds: ",preds.get_shape().as_list()) return preds
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: params[ 'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self. _conv_depth_ratio_in_percentage) _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return activations['Conv2d_11_pointwise'], activations
def export_eval_pbtxt(): """Export eval.pbtxt.""" g = tf.Graph() with g.as_default(): inputs = tf.placeholder(dtype=tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, CHANNEL]) scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False, weight_decay=0.0) with slim.arg_scope(scope): _, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=False, depth_multiplier=FLAGS.depth_multiplier, num_classes=FLAGS.num_classes) if FLAGS.quantize: tf.contrib.quantize.create_eval_graph() with tf.Session() as sess: with open(FLAGS.eval_graph_file, 'w') as f: f.write(str(g.as_graph_def()))
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self. _conv_depth_ratio_in_percentage) _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return activations['Conv2d_11_pointwise'], activations
def __call__(self, x_input, batch_size=None, is_training=False): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None preproc = tf.map_fn( lambda img: inception_preprocess(img, mobilenet_v1.mobilenet_v1.default_image_size, mobilenet_v1.mobilenet_v1.default_image_size), x_input) with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): with tf.variable_scope(self.ckpt): logits, end_points = mobilenet_v1.mobilenet_v1( preproc, num_classes=self.num_classes, is_training=is_training, reuse=reuse) preds = tf.argmax(logits, axis=1) self.built = True self.logits = logits self.preds = preds return logits
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps conv_depth = 1024 if self._skip_last_stride: conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0 conv_depth = int(float(conv_depth) * conv_depth_ratio) depth = lambda d: max(int(d * 1.0), 16) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], padding='SAME'): net = slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=2, scope='Conv2d_12_pointwise') return slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=1, scope='Conv2d_13_pointwise')
def extract_features(self, preprocessed_inputs): preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def build_model(): """Build the mobilenet_v1 model for evaluation. Returns: g: graph with rewrites after insertion of quantization ops and batch norm folding. eval_ops: eval ops for inference. variables_to_restore: List of variables to restore from checkpoint. """ g = tf.Graph() with g.as_default(): inputs, labels = dataset_input(is_training=False) # inputs, labels = merge_dataset(is_training=False) scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False, weight_decay=0.0) with slim.arg_scope(scope): dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) logits, _ = mobilenet_v1.mobilenet_v1( inputs, is_training=False, depth_multiplier=FLAGS.depth_multiplier, num_classes=dataset.num_classes, model_scope=FLAGS.model_scope, logits_scope=FLAGS.logits_scope, conv2d_0_scope=FLAGS.conv2d_0_scope, depthwise_scope=FLAGS.depthwise_scope, pointwise_scope=FLAGS.pointwise_scope, pointwise_merged_mask=FLAGS.pointwise_merged_mask) if FLAGS.quantize: tf.contrib.quantize.create_eval_graph() eval_ops = metrics(logits, labels) tf.logging.info('Evaluating %s' % FLAGS.checkpoint_path) return g, eval_ops
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: _, activations = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_11_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['Conv2d_11_pointwise'], activations
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=0, num_layers=6, image_features={ 'image_features': image_features['Conv2d_11_pointwise'] }) return feature_maps.values()
def create(self, images, num_classes, is_training): """See baseclass.""" with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): _, endpoints = mobilenet_v1.mobilenet_v1( inputs=images, num_classes=num_classes, is_training=is_training) return endpoints
def extract_features(self, preprocessed_inputs, state_saver=None, state_name='lstm_state', unroll_length=5, scope=None): """Extracts features from preprocessed inputs. The features include the base network features, lstm features and SSD features, organized in the following name scope: <parent scope>/MobilenetV1/... <parent scope>/LSTM/... <parent scope>/FeatureMaps/... Args: preprocessed_inputs: A [batch, height, width, channels] float tensor representing a batch of consecutive frames from video clips. state_saver: A state saver object with methods `state` and `save_state`. state_name: A python string for the name to use with the state_saver. unroll_length: The number of steps to unroll the lstm. scope: The scope for the base network of the feature extractor. Returns: A list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=self._is_training)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): with slim.arg_scope([slim.batch_norm], fused=False): # Base network. with tf.variable_scope(scope, self._base_network_scope, reuse=self._reuse_weights) as scope: net, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope([slim.batch_norm], fused=False, is_training=self._is_training): # ConvLSTM layers. with tf.variable_scope( 'LSTM', reuse=self._reuse_weights) as lstm_scope: lstm_cell = lstm_cells.BottleneckConvLSTMCell( filter_size=(3, 3), output_size=(net.shape[1].value, net.shape[2].value), num_units=max(self._min_depth, self._lstm_state_depth), activation=tf.nn.relu6, visualize_gates=True) net_seq = list(tf.split(net, unroll_length)) if state_saver is None: init_state = lstm_cell.init_state( state_name, net.shape[0].value / unroll_length, tf.float32) else: c = state_saver.state('%s_c' % state_name) h = state_saver.state('%s_h' % state_name) init_state = (c, h) # Identities added for inputing state tensors externally. c_ident = tf.identity(init_state[0], name='lstm_state_in_c') h_ident = tf.identity(init_state[1], name='lstm_state_in_h') init_state = (c_ident, h_ident) net_seq, states_out = rnn_decoder.rnn_decoder( net_seq, init_state, lstm_cell, scope=lstm_scope) batcher_ops = None self._states_out = states_out if state_saver is not None: self._step = state_saver.state('%s_step' % state_name) batcher_ops = [ state_saver.save_state('%s_c' % state_name, states_out[-1][0]), state_saver.save_state('%s_h' % state_name, states_out[-1][1]), state_saver.save_state('%s_step' % state_name, self._step - 1) ] with tf_ops.control_dependencies(batcher_ops): image_features['Conv2d_13_pointwise_lstm'] = tf.concat( net_seq, 0) # Identities added for reading output states, to be reused externally. tf.identity(states_out[-1][0], name='lstm_state_out_c') tf.identity(states_out[-1][1], name='lstm_state_out_h') # SSD layers. with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=self._feature_map_layout, depth_multiplier=(self._depth_multiplier), min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs, state_saver=None, state_name='lstm_state', unroll_length=5, scope=None): """Extracts features from preprocessed inputs. The features include the base network features, lstm features and SSD features, organized in the following name scope: <parent scope>/MobilenetV1/... <parent scope>/LSTM/... <parent scope>/FeatureMaps/... Args: preprocessed_inputs: A [batch, height, width, channels] float tensor representing a batch of consecutive frames from video clips. state_saver: A state saver object with methods `state` and `save_state`. state_name: A python string for the name to use with the state_saver. unroll_length: The number of steps to unroll the lstm. scope: The scope for the base network of the feature extractor. Returns: A list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=self._is_training)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): with slim.arg_scope([slim.batch_norm], fused=False): # Base network. with tf.variable_scope( scope, self._base_network_scope, reuse=self._reuse_weights) as scope: net, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope( [slim.batch_norm], fused=False, is_training=self._is_training): # ConvLSTM layers. with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope: lstm_cell = lstm_cells.BottleneckConvLSTMCell( filter_size=(3, 3), output_size=(net.shape[1].value, net.shape[2].value), num_units=max(self._min_depth, self._lstm_state_depth), activation=tf.nn.relu6, visualize_gates=True) net_seq = list(tf.split(net, unroll_length)) if state_saver is None: init_state = lstm_cell.init_state( state_name, net.shape[0].value / unroll_length, tf.float32) else: c = state_saver.state('%s_c' % state_name) h = state_saver.state('%s_h' % state_name) init_state = (c, h) # Identities added for inputing state tensors externally. c_ident = tf.identity(init_state[0], name='lstm_state_in_c') h_ident = tf.identity(init_state[1], name='lstm_state_in_h') init_state = (c_ident, h_ident) net_seq, states_out = rnn_decoder.rnn_decoder( net_seq, init_state, lstm_cell, scope=lstm_scope) batcher_ops = None self._states_out = states_out if state_saver is not None: self._step = state_saver.state('%s_step' % state_name) batcher_ops = [ state_saver.save_state('%s_c' % state_name, states_out[-1][0]), state_saver.save_state('%s_h' % state_name, states_out[-1][1]), state_saver.save_state('%s_step' % state_name, self._step - 1) ] with tf_ops.control_dependencies(batcher_ops): image_features['Conv2d_13_pointwise_lstm'] = tf.concat(net_seq, 0) # Identities added for reading output states, to be reused externally. tf.identity(states_out[-1][0], name='lstm_state_out_c') tf.identity(states_out[-1][1], name='lstm_state_out_h') # SSD layers. with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=self._feature_map_layout, depth_multiplier=(self._depth_multiplier), min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError('image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': [ 'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '' ], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self): sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=None) self.assertNotIn('is_training', sc[slim.arg_scope_func_key( slim.batch_norm)])
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(256)) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): last_feature_map = slim.conv2d( last_feature_map, num_outputs=depth_fn(256), kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13)) feature_maps.append(last_feature_map) return feature_maps