def _build_model(self, inputs, is_training=True): with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)): logits, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=self.config.num_outputs) ema = tf.train.ExponentialMovingAverage(0.999) self.mobile_net_vars = [var for var in tf.trainable_variables() if var.name.startswith("Mobilenet") and "Logits" not in var.name] return logits, endpoints
def net(image, classes): #encoding - convolution/pooling with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet(image, num_classes=None) logits = endpoints["layer_10/output"] print(logits.get_shape()) #new_size = (16,32) #resize = tf.image.resize(logits, new_size, align_corners=True) #conv = util.conv(resize, [3,3,512,320], "up_1", pad="SAME") #new_size = (64,128) #resize = tf.image.resize(logits, new_size, align_corners=True) #conv = util.conv(resize, [3,3,256,512], "up_2", pad="SAME") new_size = (192,256) resize = tf.image.resize(logits, new_size, align_corners=True) conv = util.conv(resize, [3,3,128,256], "up_3", pad="SAME") conv6 = util.conv(conv, [1,1,128,classes], "c6", pad="SAME") softmax = tf.nn.softmax(conv6) return conv6, tf.argmax(softmax, axis=3), softmax
def load_mobilenet_v2(model_dir, sess): model_url = "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz" filename = model_url.split("/")[-1] filepath = os.path.join(model_dir, filename.split(".tgz")[0]) try: utils.download_pretrained_model_weights(model_url, filepath, unzip=True) except: print("Pre-training weights download failed!") model_file_name = "mobilenet_v2_1.4_224.ckpt" model_path = os.path.join(filepath, model_file_name) resized_input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3]) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): bottleneck_tensor, _ = mobilenet_v2.mobilenet(resized_input_tensor, num_classes=None, depth_multiplier=1.4) variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn( model_path, tf.contrib.slim.get_trainable_variables(), ignore_missing_vars=True) variable_restore_op(sess) # bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2]) bottleneck_tensor_size = 1792 return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
def __init__(self, checkpoint='../mobilenet_v2_1.0_224.ckpt'): # save the checkpoint self.checkpoint = checkpoint tf.reset_default_graph() # placeholder for the image input, need to decode the file self.file_in = tf.placeholder(tf.string, ()) image = tf.image.decode_jpeg(tf.read_file(self.file_in)) # expand for batch then cast to between -1 and 1 inputs = tf.expand_dims(image, 0) inputs = (tf.cast(inputs, tf.float32) / 128) - 1 # ensure that it only has three dimensions and resize to 224x224 inputs.set_shape((None, None, None, 3)) inputs = tf.image.resize_images(inputs, (224, 224)) # get the endpoints of the network with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): _, self.endpoints = mobilenet_v2.mobilenet(inputs) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) # create the label map from imagenet, same thing self.label_map = imagenet.create_readable_names_for_imagenet_labels() # create session and restore the checkpoint downloaded self.sess = tf.Session() saver.restore(self.sess, self.checkpoint)
def create_inference_graph(self, input_image, base_graph): util.download(self.params.CHECKPOINT_TARBALL_URI, self.params.MODEL_BASEDIR) self.graph = base_graph with self.graph.as_default(): input_image = tf.cast(input_image, tf.float32) / 128. - 1 input_image.set_shape(self.params.INPUT_TENSOR_SHAPE) from nets.mobilenet import mobilenet_v2 with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): # See also e.g. mobilenet_v2_035 self.logits, self.endpoints = mobilenet_v2.mobilenet( input_image, is_training=False, depth_multiplier=self.params.DEPTH_MULTIPLIER, finegrain_classification_mode=self.params.FINE) # Per authors: Restore using exponential moving average since it produces # (1.5-2%) higher accuracy ema = tf.train.ExponentialMovingAverage(0.999) vs = ema.variables_to_restore() saver = tf.train.Saver(vs) checkpoint = os.path.join( self.params.MODEL_BASEDIR, self.params.CHECKPOINT + '.ckpt') nodes = list(self.output_names) + [input_image] self.graph = util.give_me_frozen_graph( checkpoint, nodes=self.output_names, base_graph=self.graph, saver=saver) return self.graph
def __call__(self, inputs, castFromUint8=True): pr_shape = lambda var : print(var.shape) if castFromUint8: inputs = tf.cast(inputs, self.dtype) # print(inputs.dtype) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope( is_training=self.is_training)): # print(inputs.dtype) global_pool, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=None) self.variables_to_restore = slim.get_variables() # len 260 # 后加两层fc dropout_keep_prob = 0.5 weight_decay = 0.05 with tf.variable_scope('additional', 'fc'): # flatten = tf.flatten(endpoints['global_pool']) flatten = slim.flatten(global_pool) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer = tc.layers.xavier_initializer(tf.float32), # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), activation_fn=None) as sc: net = slim.fully_connected(flatten, 128, activation_fn=None, scope='fc1') net = slim.dropout(net, dropout_keep_prob, is_training=self.is_training, scope='dropout') logits = slim.fully_connected(net, self.n_classes, activation_fn=None, scope='fc2') # 多出来的4个参数保存 共264 self.variables_to_save = slim.get_variables() for var in self.variables_to_save: if var in self.variables_to_restore: continue self.variables_to_train.append(var) # pr_shape(out) return logits
def training_scope(weight_decay, is_training, stddev, dropout_keep_prob, bn_decay): return mobilenet_v2_builder.training_scope( weight_decay=weight_decay, is_training=is_training, stddev=stddev, dropout_keep_prob=dropout_keep_prob, bn_decay=bn_decay)
def Encoder_mobilenet(x, is_training=True, weight_decay=0.001, reuse=False): # from nets.mobilenet import mobilenet_v2 from nets.mobilenet import mobilenet_v2 with slim.arg_scope(mobilenet_v2.training_scope()): net, endpoints = mobilenet_v2.mobilenet(x) variables = tf.contrib.framework.get_variables('mobilenet_v2') return net, variables
def train_kfold(record_file, train_log_step, train_param, val_log_step, num_classes, data_shape, snapshot, snapshot_prefix): [base_lr, max_steps] = train_param [batch_size, resize_height, resize_width, depths] = data_shape # ============================================================================================================ # Define the model: [core] with slim.arg_scope( mobilenet_v2.training_scope(dropout_keep_prob=R.dropout)): out, end_points = mobilenet_v2.mobilenet( input_tensor=input_images, num_classes=num_classes, depth_multiplier=R.depth_multiplier, is_training=is_training) # Specify the loss function: tf.losses定义的loss函数都会自动添加到loss函数, 无需 # slim.losses.add_loss(my_loss) tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out) # 添加交叉熵损失loss=1.6 loss = tf.losses.get_total_loss( add_regularization_losses=True) # 添加正则化损失loss=2.2 accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32)) # Specify the optimization scheme: optimizer = tf.train.GradientDescentOptimizer(learning_rate=base_lr) ''' global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.05, global_step, 150, 0.9) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) train_tensor = optimizer.minimize(loss, global_step) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) ''' # 在定义训练的时�? 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数, # 更新的过程不包含在正常的训练过程�? 需要我们去手动像下面这样更�? # 通过`tf.get_collection`获得所有需要更新的`op` # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # 使用`tensorflow`的控制流, 先执行更新算�? 再执行训�? # with tf.control_dependencies(update_ops): # create_train_op that ensures that when we evaluate it to get the loss, # the update_ops are done and the gradient updates are computed. train_op = slim.learning.create_train_op(total_loss=loss, optimizer=optimizer) # ================================================================================================================ # 从record中读取图片和labels数据 all_nums = get_example_nums(record_file) all_images, all_labels = read_records(record_file, resize_height, resize_width, type='normalization', is_train=None) all_images_batch, all_labels_batch = get_batch_images( all_images, all_labels, batch_size=batch_size, labels_nums=num_classes, one_hot=True, shuffle=True)
def _image_to_head(self, is_training, reuse=None): with slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)): net, endpoints = mobilenet_v2.mobilenet_base(self._image, conv_defs=CTPN_DEF) self.variables_to_restore = slim.get_variables_to_restore() self._act_summaries.append(net) self._layers['head'] = net return net
def inspect_module(): features = tf.zeros([8, 224, 224, 3], name='input') with tf.variable_scope('TestSSD', default_name=None, values=[features], reuse=tf.AUTO_REUSE): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(features) for key in endpoints: print(key, endpoints[key])
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _image_to_head(self, is_training, reuse=None): with slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training)): net, endpoints = mobilenet_v2.mobilenet_base(self._image, conv_defs=CTPN_DEF) self.variables_to_restore = slim.get_variables_to_restore() self._act_summaries.append(net) self._layers['head'] = net return net
def MobileNet(depth_multiplier, imgs_in, weight_decay, batch_norm_momentum, is_training): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training, weight_decay=weight_decay, bn_decay=batch_norm_momentum)): features, _ = mobilenet_v2.mobilenet_base( imgs_in, depth_multiplier=depth_multiplier, finegrain_classification_mode=depth_multiplier < 1, output_stride=16) return features
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def encode(self, input_tensor, name): """ 根据MobileNet框架对输入的tensor进行编码 :param input_tensor: :param name: :param flags: :return: 输出MobileNet编码特征 """ ret = OrderedDict() with tf.variable_scope(name): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): net, end_points = mobilenet_v2.mobilenet(input_tensor, base_only=True) # # Version B # ret['layer_5'] = dict() # ret['layer_5']['data'] = end_points['layer_5'] # ret['layer_5']['shape'] = end_points['layer_5'].get_shape().as_list() # # # ret['layer_8'] = dict() # ret['layer_8']['data'] = end_points['layer_8'] # ret['layer_8']['shape'] = end_points['layer_8'].get_shape().as_list() # # # ret['layer_18'] = dict() # ret['layer_18']['data'] = end_points['layer_18'] # ret['layer_18']['shape'] = end_points['layer_18'].get_shape().as_list() # Version A ret['layer_7'] = dict() ret['layer_7']['data'] = end_points['layer_7'] ret['layer_7']['shape'] = end_points['layer_7'].get_shape( ).as_list() ret['layer_14'] = dict() ret['layer_14']['data'] = end_points['layer_14'] ret['layer_14']['shape'] = end_points['layer_14'].get_shape( ).as_list() ret['layer_19'] = dict() ret['layer_19']['data'] = end_points['layer_19'] ret['layer_19']['shape'] = end_points['layer_19'].get_shape( ).as_list() # ret['end_points'] = end_points return ret
def mobilenet_v2_140(inputs, is_training, opts): if is_training: with slim.arg_scope(mobilenet_v2.training_scope( weight_decay=opts.weight_decay, stddev=0.09, bn_decay=opts.batch_norm_decay)): return mobilenet_v2.mobilenet_v2_140( inputs, num_classes=opts.num_classes, reuse=None) else: return mobilenet_v2.mobilenet_v2_140( inputs, num_classes=opts.num_classes, reuse=None)
def mobilenet(images, depth_multiplier=1.0, is_training=True, verbose=False, **kwargs): """ Base MobileNet architecture Based on https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet Args: images: input images in [0., 1.] depth_multiplier: MobileNet depth multiplier. is_training: training bool for batch norm verbose: verbosity level Kwargs: weight_decay: Regularization constant. Defaults to 0. normalizer_decay: Batch norm decay. Defaults to 0.9 """ del kwargs base_scope = tf.get_variable_scope().name # Input in [0., 1.] -> [-1, 1] with tf.control_dependencies([tf.assert_greater_equal(images, 0.)]): with tf.control_dependencies([tf.assert_less_equal(images, 1.)]): net = (images - 0.5) * 2. # Mobilenet with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training)): if depth_multiplier == 1.0: net, _ = mobilenet_v2.mobilenet(net, base_only=True) elif depth_multiplier == 0.5: net, _ = mobilenet_v2.mobilenet_v2_050(net, base_only=True) elif depth_multiplier == 0.35: net, _ = mobilenet_v2.mobilenet_v2_035(net, base_only=True) # Add a saver to restore Imagenet-pretrained weights saver_collection = '%s_mobilenet_%s_saver' % (base_scope, depth_multiplier) savers = tf.get_collection(saver_collection) if len(savers) == 0: var_list = { x.op.name.replace('%s/' % base_scope, ''): x for x in tf.global_variables(scope=base_scope) } saver = tf.train.Saver(var_list=var_list) tf.add_to_collection(saver_collection, saver) return net
def compare_layer_output(net, layer_name, checkpoint, tensor_name, image_file): ### Compare outputs from the same layer (tensor) ### from caffe net and tensorflow graph ### matching name examples: ## tf: MobilenetV2/Conv/Conv2D:0, MobilenetV2/Conv/Relu6:0, MobilenetV2/Conv/BatchNorm/FusedBatchNorm:0 ## caffe: conv1, conv1/relu, conv1/scale def square_error(x, x_): return np.sum(np.square(x - x_)) image = tf_preprocess(image_file) ## caffe inference net.blobs['data'].data[...] = image[...] net.forward() caffe_output = net.blobs[layer_name].data caffe_output = caffe_output.transpose(0, 2, 3, 1) # channel first to last ## tf inference tf.reset_default_graph() images = tf.placeholder(tf.float32, shape=(None, image_scale, image_scale, 3)) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images, num_classes=1001) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) with tf.Session() as sess: saver.restore(sess, checkpoint) tensor = sess.graph.get_tensor_by_name(tensor_name) tf_output = sess.run(tensor, feed_dict={images: image}) ### compare tf and caffe result of a specific layer ### need graphs and layer (tensor) name in caffe and tf print('...................................') error = 0 for i in range(32): err = square_error(tf_output[0, :, :, i], caffe_output[0, :, :, i]) print('channel', i, err) error += err print('total error:', error) print('...................................') return
def getMobileNet(checkpoint): graph = tf.Graph() sess = tf.Session(graph=graph) with graph.as_default(): file_input = tf.placeholder(tf.string, ()) image = tf.image.decode_image(tf.read_file(file_input)) images = tf.expand_dims(image, 0) images = tf.cast(images, tf.float32) / 128. - 1 images.set_shape((None, None, None, 3)) images = tf.image.resize_images(images, (224, 224)) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) saver.restore(sess, checkpoint) return sess, graph, endpoints, file_input
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ # print('###faster_rcnn_mobilenet_v2_feature_extractor.py### - extract_proposal_features') preprocessed_inputs.get_shape().assert_has_rank(4) preprocessed_inputs = shape_utils.check_min_image_dim( min_dim=33, image_tensor=preprocessed_inputs) with slim.arg_scope( mobilenet_v2.training_scope(is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: params = {} if self._skip_last_stride: # Not called by default, will use conv_defs in slim.nets.mobilenet.mobilenet_v2 params[ 'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs( conv_depth_ratio_in_percentage=self. _conv_depth_ratio_in_percentage) _, endpoints = mobilenet_v2.mobilenet_base( preprocessed_inputs, final_endpoint='layer_19', # actually 'MobilenetV2/Conv_1' min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope, **params) return endpoints['layer_19'], endpoints
def load_mobilenet_v2(model_dir, sess): model_file_name = "mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt" model_path = os.path.join(model_dir, model_file_name) resized_input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3]) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): bottleneck_tensor, _ = mobilenet_v2.mobilenet( resized_input_tensor, num_classes=None, depth_multiplier=1.4) variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn( model_path, tf.contrib.slim.get_trainable_variables(), ignore_missing_vars=True) variable_restore_op(sess) #bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2]) bottleneck_tensor_size = 1792 return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
def fcn_mobv2(images, num_classes, is_training=True): with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): _, end_points = mobilenet_v2.mobilenet(images, num_classes) for v, k in end_points.items(): print('{v}:{k}'.format(v=v, k=k)) # pool4=end_points['resnet_v1_101/pool4'] # # dconv1_out=pool4.get_shape().as_list() # # # deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1') # # fu1=tf.add(deconv1,pool4) # # # pool3=end_points['resnet_v1_101/pool3'] # dconv2_out=pool3.get_shape().as_list() # deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2') # # fu2=tf.add(deconv2,pool3) net = end_points['layer_18'] # net_14=end_points['Conv2d_11_pointwise'] # net_28=end_points['Conv2d_5_pointwise'] # up1=slim.conv2d_transpose(net_7,2,[4,4], stride=2,scope='deconv32') # fu1=tf.add(up1,net_14,name='fu1') # # up2=slim.conv2d_transpose(fu1,2,[4,4], stride=2,scope='deconv16') # fu2=tf.add(up2,net_28,name='fu2') logit = slim.conv2d_transpose(net, 2, [64, 64], stride=32, scope='deconv8') prediction = tf.argmax(logit, dimension=3) #, name="prediction") print('logit', logit) return logit, tf.expand_dims(prediction, axis=3)
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps conv_depth = 1024 if self._skip_last_stride: conv_depth_ratio = float( self._conv_depth_ratio_in_percentage) / 100.0 conv_depth = int(float(conv_depth) * conv_depth_ratio) depth = lambda d: max(int(d * 1.0), 16) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights): with slim.arg_scope( mobilenet_v2.training_scope( is_training=self._train_batch_norm, weight_decay=self._weight_decay)): with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): net = slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=2, scope='Conv_2') # or 'layer_20' return slim.separable_conv2d( net, depth(conv_depth), [3, 3], depth_multiplier=1, stride=1, scope='Conv_3') # or 'layer_21'
def _get_endpoints(model_name, img_tensor): if model_name == "res50": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name == "res152": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name.startswith("mobilenet"): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): _, endpoints = mobilenet_v2.mobilenet(img_tensor) return endpoints["Predictions"]
def extract_features(self, inputs): """Extracts features from inputs. This function adds 4 additional feature maps on top of 'layer_15/expansion_output' and 'layer_19' in the base Mobilenet v2 network. Args: inputs: a tensor of shape [batch_size, height, with, channels], holding the input images. Returns: a list of 6 float tensors of shape [batch_size, height, width, channels], holding feature map tensors to be fed to box predictor. """ feature_map_specs_dict = { 'layer_name': ['layer_15/expansion_output', 'layer_19', None, None, None, None], 'layer_depth': [None, None, 512, 256, 256, 128]} with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)): _, end_points = mobilenet_v2.mobilenet_base( inputs, final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.ssd_feature_maps( feature_map_tensor_dict=end_points, feature_map_specs_dict=feature_map_specs_dict, depth_multiplier=1, use_depthwise=self._use_depthwise, insert_1x1_conv=True) feature_map_list = list(feature_maps.values()) return feature_map_list
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): _, activations = mobilenet_v2.mobilenet_base( preprocessed_inputs, final_endpoint='layer_19', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['layer_19'], activations
#1.2、先构建图结构,再加载权重 #临时添加slim到python搜索路径 import sys sys.path.append('./models/research/slim') #导入mobilenet_v2 from nets.mobilenet import mobilenet_v2 #重置图 tf.reset_default_graph() #导入mobilenet,先构建图结构 #加载完毕后,tf.get_default_graph()中包含了mobilenet计算图结构,可以使用tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)对比reset_graph前后的差异 images = tf.placeholder(tf.float32,(None, 224, 224, 3)) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training = False)): logits, endpoints = mobilenet_v2.mobilenet(images, depth_multiplier = 1.4) #定义saver类,用于恢复图权重 saver = tf.train.Saver() with tf.Session() as sess: #latest_checkpoint检查checkpoint检查点文件,查找最新的模型 #restore恢复图权重 saver.restore(sess, tf.train.latest_checkpoint('./model_ckpt/moilenet_v2')) #get_tensor_by_name通过张量名称获取张量 print(sess.run(tf.get_default_graph().get_tensor_by_name('MoilenetV2/Conv/weights:0')).shape) #1.3、frozen inference """ pb文件将变量取值和计算图整个结构统一放在一个文件中,通过convert_variable_to_constants
tf.reset_default_graph() # For simplicity we just decode jpeg inside tensorflow. # But one can provide any input obviously. file_input = tf.placeholder(tf.string, ()) image = tf.image.decode_jpeg(tf.read_file(file_input)) images = tf.expand_dims(image, 0) images = tf.cast(images, tf.float32) / 128. - 1 images.set_shape((None, None, None, 3)) images = tf.image.resize_images(images, (224, 224)) # images = tf.placeholder(tf.float32, (None, 224, 224, 3)) # Note: arg_scope is optional for inference. with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) from datasets import imagenet with tf.Session() as sess: saver.restore(sess, checkpoint) x = endpoints['Predictions'].eval(feed_dict={file_input: 'imgs/dog.jpg'}) # writer = tf.summary.FileWriter("TensorBoard/", graph=sess.graph)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, conv_defs=self._conv_defs, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self. _min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'layer_4', 'layer_7', 'layer_14', 'layer_19' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise, use_explicit_padding=self._use_explicit_padding) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features padding = 'VALID' if self._use_explicit_padding else 'SAME' kernel_size = 3 for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if self._use_explicit_padding: last_feature_map = ops.fixed_padding( last_feature_map, kernel_size) last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[kernel_size, kernel_size], stride=2, padding=padding, scope='bottom_up_Conv2d_{}'.format( i - base_fpn_max_level + 19)) feature_maps.append(last_feature_map) return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.99)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with slim.arg_scope( training_scope(l2_weight_decay=4e-5, is_training=self._is_training)): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_18', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) multiplier_func = functools.partial( _apply_multiplier, multiplier=self._depth_multiplier, min_depth=self._min_depth) with tf.variable_scope('MnasFPN', reuse=self._reuse_weights): with slim.arg_scope( training_scope(l2_weight_decay=1e-4, is_training=self._is_training)): # Create C6 by downsampling C5. c6 = slim.max_pool2d( _maybe_pad(image_features['layer_18'], self._use_explicit_padding), [3, 3], stride=[2, 2], padding='VALID' if self._use_explicit_padding else 'SAME', scope='C6_downsample') c6 = slim.conv2d( c6, multiplier_func(self._fpn_layer_depth), [1, 1], activation_fn=tf.identity, normalizer_fn=slim.batch_norm, weights_regularizer=None, # this 1x1 has no kernel regularizer. padding='VALID', scope='C6_Conv1x1') image_features['C6'] = tf.identity(c6) # Needed for quantization. for k in sorted(image_features.keys()): tf.logging.error('{}: {}'.format(k, image_features[k])) mnasfpn_inputs = [ image_features['layer_7'], # C3 image_features['layer_14'], # C4 image_features['layer_18'], # C5 image_features['C6'] # C6 ] self._verify_config(mnasfpn_inputs) feature_maps = mnasfpn( mnasfpn_inputs, head_def=self._head_def, output_channel=self._fpn_layer_depth, use_explicit_padding=self._use_explicit_padding, use_native_resize_op=self._use_native_resize_op, multiplier_func=multiplier_func) return feature_maps
def extract_features(self, preprocessed_inputs, state_saver=None, state_name='lstm_state', unroll_length=10, scope=None): """Extract features from preprocessed inputs. The features include the base network features, lstm features and SSD features, organized in the following name scope: <scope>/MobilenetV2_1/... <scope>/MobilenetV2_2/... <scope>/LSTM/... <scope>/FeatureMap/... Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of consecutive frames from video clips. state_saver: A state saver object with methods `state` and `save_state`. state_name: Python string, the name to use with the state_saver. unroll_length: number of steps to unroll the lstm. scope: Scope for the base network of the feature extractor. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if interleave_method not recognized or large and small base network output feature maps of different sizes. """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) preprocessed_inputs = ops.pad_to_multiple( preprocessed_inputs, self._pad_to_multiple) batch_size = preprocessed_inputs.shape[0].value / unroll_length batch_axis = 0 nets = [] # Batch processing of mobilenet features. with slim.arg_scope(mobilenet_v2.training_scope( is_training=self._is_training, bn_decay=0.9997)), \ slim.arg_scope([mobilenet.depth_multiplier], min_depth=self._min_depth, divisible_by=8): # Big model. net, _ = self.extract_base_features_large(preprocessed_inputs) nets.append(net) large_base_feature_shape = net.shape # Small models net, _ = self.extract_base_features_small(preprocessed_inputs) nets.append(net) small_base_feature_shape = net.shape if not (large_base_feature_shape[1] == small_base_feature_shape[1] and large_base_feature_shape[2] == small_base_feature_shape[2]): raise ValueError('Large and Small base network feature map dimension ' 'not equal!') with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('LSTM', reuse=self._reuse_weights): output_size = (large_base_feature_shape[1], large_base_feature_shape[2]) lstm_cell, init_state, step = self.create_lstm_cell( batch_size, output_size, state_saver, state_name) nets_seq = [ tf.split(net, unroll_length, axis=batch_axis) for net in nets ] net_seq, states_out = rnn_decoder.multi_input_rnn_decoder( nets_seq, init_state, lstm_cell, step, selection_strategy=self._interleave_method, is_training=self._is_training, is_quantized=self._is_quantized, pre_bottleneck=self._pre_bottleneck, flatten_state=self._flatten_state, scope=None) self._states_out = states_out batcher_ops = None if state_saver is not None: self._step = state_saver.state(state_name + '_step') batcher_ops = [ state_saver.save_state(state_name + '_c', states_out[-1][0]), state_saver.save_state(state_name + '_h', states_out[-1][1]), state_saver.save_state(state_name + '_step', self._step + 1)] image_features = {} with tf_ops.control_dependencies(batcher_ops): image_features['layer_19'] = tf.concat(net_seq, 0) # SSD layers. with tf.variable_scope('FeatureMap'): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=self._feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features, pool_residual=True) return feature_maps.values()
def backbone_net(inputs, image_size, is_training=True, depth_multiplier=0.5): pad_to_multiple = 14 if image_size == 112 else (10 if image_size == 80 else 8) use_explicit_padding = False depth_multiplier = depth_multiplier print('construct backbone_net for image_size', image_size, 'depth_multiplier = ', depth_multiplier) use_depthwise = True override_base_feature_extractor_hyperparams = False reuse_weights = None min_depth = 16 specs = [ op(slim.conv2d, stride=2, num_outputs=64, kernel_size=[3, 3]), # todo: Depthwise Conv3×3 op(slim.separable_conv2d, stride=1, kernel_size=[3, 3], num_outputs=None, multiplier_func=dummy_depth_multiplier), # 562×64Bottleneck 2 64 5 2 op(ops.expanded_conv, stride=2, num_outputs=64), ] for _ in range(0, 4): specs.append(op(ops.expanded_conv, stride=1, num_outputs=64)) # 282×64Bottleneck212812 specs.append(op(ops.expanded_conv, stride=2, num_outputs=128)) # 142×128Bottleneck412861 for _ in range(0, 6): specs.append(op(ops.expanded_conv, expansion_size=expand_input(4), num_outputs=128, stride=1)) kernel_size = [7, 7] if image_size == 112 else ([5,5] if image_size == 80 else [4,4]) specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1')) specs.append(op(slim.conv2d, stride=2, kernel_size=[3, 3], num_outputs=32, scope='S2')) specs.append(op(slim.conv2d, stride=1, kernel_size=kernel_size, num_outputs=128, scope='S3', padding='VALID')) # print('specs = ', specs, ' len = ', len(specs)) arch = dict( defaults={ # Note: these parameters of batch norm affect the architecture # that's why they are here and not in training_scope. (slim.batch_norm,): {'center': True, 'scale': True}, (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 }, (ops.expanded_conv,): { 'expansion_size': expand_input(2), 'split_expansion': 1, 'normalizer_fn': slim.batch_norm, 'residual': True, }, (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME', 'weights_initializer': slim.xavier_initializer()} }, spec=specs ) print('input to backbone_net ' , inputs) with tf.variable_scope('Backbone', reuse=reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=min_depth): with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training)) if override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( od_ops.pad_to_multiple(inputs, pad_to_multiple), depth_multiplier=depth_multiplier, is_training=is_training, use_explicit_padding=use_explicit_padding, conv_defs=arch, scope=scope) # do a fully connected layer here # TODO layer_15 = image_features['layer_15'] layer_16 = image_features['layer_16'] layer_17 = image_features['layer_17'] # batch_size = tf.shape(S1)[0] S1 = slim.flatten(layer_15, scope='S1flatten') # tf.reshape(S1, [batch_size, -1]) S2 = slim.flatten(layer_16, scope='S2flatten') # [batch_size, -1]) S3 = slim.flatten(layer_17, scope='S3flatten') # [batch_size, -1]) before_dense = tf.concat([S1, S2, S3], 1) for i in range(1, 18): print('layer_' + str(i), image_features['layer_' + str(i)]) # print('layer_17', layer_17) print('S1', S1) print('S2', S2) print('S3', S3) # to_test = slim.conv2d(image_features['layer_19']) print('image image_features', image_features.keys()) with slim.arg_scope([slim.batch_norm], is_training=is_training, center=True, scale=True): return image_features, slim.fully_connected(before_dense, 136, activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm, weights_initializer=slim.xavier_initializer()), (image_features['layer_1'], inputs, image_features['layer_2'])
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, conv_defs=_CONV_DEFS if self._use_depthwise else None, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'layer_4', 'layer_7', 'layer_14', 'layer_19' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial( slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19)) feature_maps.append(last_feature_map) return feature_maps
def style_prediction_mobilenet(style_input_, activation_names, activation_depths, mobilenet_end_point='layer_19', mobilenet_trainable=True, style_params_trainable=False, style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings using MobileNetV2. Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. mobilenet_end_point: string. Specifies the endpoint to construct the MobileNetV2 network up to. This network is part of the style prediction network. mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked as trainable? style_params_trainable: bool. Should the mapping from bottleneck to beta and gamma parameters be marked as trainable? style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope(mobilenet_v2.training_scope( is_training=mobilenet_trainable)): _, end_points = mobilenet.mobilenet_base( style_input_, conv_defs=mobilenet_v2.V2_DEF, final_endpoint=mobilenet_end_point, scope='MobilenetV2' ) feat_convlayer = end_points[mobilenet_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean( feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with tf.variable_scope('mobilenet_conv'): with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=mobilenet_trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope( [slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=style_params_trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format(activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format(activation_names[i])] = gamma return style_params, bottleneck_feat