def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer(self.weight_decay)) as arg_sc: with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, batch_norm_params=batch_norm_params) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_1 = inj_conv2d(net, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = inj_conv2d(inter1, 256, 1, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_2 = inj_conv2d(net, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = inj_conv2d(inter2, 128, 1, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 6 feature_map_3 = inj_conv2d(feature_map_3, 3 * (5 + self.class_num), 1, strides=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer(), fixed_padding=False, inj_type=self.inj_type, quant_min_max=self.quant_min_max, inj_layer=self.inj_layer, delta_4d=self.delta_4d, num_layer=self.num_layer, batch_norm_params=batch_norm_params) self.num_layer += 1 feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_initializer=tf.contrib.layers. variance_scaling_initializer(dtype=tf.float32) # weights_regularizer=slim.l2_regularizer(self.weight_decay) ): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, (3 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return [feature_map_3]
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('inference_1'): route_1, route_2, route_3 = darknet53_body(inputs) route_1 = tf.identity(route_1, 'feature_output_1') route_2 = tf.identity(route_2, 'feature_output_2') route_3 = tf.identity(route_3, 'feature_output_3') with tf.variable_scope('inference_2'): route_1 = tf.identity(route_1, 'feature_input_1') route_2 = tf.identity(route_2, 'feature_input_2') route_3 = tf.identity(route_3, 'feature_input_3') inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_output_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_output_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_output_3') return feature_map_1, feature_map_2, feature_map_3
def forward_head(self, route_1, route_2, route_3, is_training=False): # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer(self.weight_decay)): with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } #设置卷积和BN时的一些参数,因为前面的utils.layer_utils里面的conv2d用的slim.conv2d,所以这里设置默认参数可以生效 with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): #允许复用 #用了BN,还带上了leaky——relu和L3正则化 with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), weights_regularizer=slim.l2_regularizer( self.weight_decay)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) #feature map 1 就是y1的输出,13*13*255 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) #inter1跟route2一样的shape #axis = 3,在channel方向做concat inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) #feature map 2是26*26*255,相当于y2 feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) #inter2跟route1一样的shape inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) #feature map 2是52*52*255,相当于y3 feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, train_with_gray=True, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.relu6(x)): with tf.variable_scope('darknet53_body'): # 转为灰度图 if train_with_gray: inputs = inputs * self.rgb_factor inputs = tf.reduce_sum(inputs, axis=-1) inputs = tf.expand_dims(inputs, -1) inputs = tf.tile(inputs, [1, 1, 1, 3]) if self.backbone_name == "darknet53": routes = darknet53_body( inputs, self.train_with_two_feature_map) elif self.backbone_name == "darknet53_prun": routes = darknet53_body_prun( inputs, self.train_with_two_feature_map) elif self.backbone_name == "mobilenetv2": routes = mobilenetv2(inputs, self.train_with_two_feature_map, is_training) elif self.backbone_name == "mobilenetv3": routes = mobilenetv3(inputs, self.train_with_two_feature_map, is_training) elif self.backbone_name == "mobilenetv3_add_zoom_factor": routes = mobilenetv3_add_zoom_factor( inputs, self.train_with_two_feature_map, is_training) else: print( "backbone name is not right, it is mast in [darknet53, darknet53_prun, mobilenetv2, mobilenetv3, mobilenetv3_add_zoom_factor]" ) sys.exit() with tf.variable_scope('yolov3_head'): if not self.train_with_two_feature_map: route_1, route_2, route_3 = routes inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) # # inter1 = slim.conv2d(inter1, inter1.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) # inter2 = slim.conv2d(inter2, inter2.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3 else: route_1, route_2 = routes inter2, net = yolo_block(route_2, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) # inter2 = slim.conv2d(inter2, inter2.get_shape().as_list()[3], 3, # stride=1, biases_initializer=tf.zeros_initializer()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'is_training': is_training, 'center': True, 'scale': True, 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'updates_collections': tf.GraphKeys.UPDATE_OPS #'fused': None, # Use fused batch norm if possible. } # hyperparams to use activation_fn = tf.nn.relu6 normalizer_fn = tf.contrib.slim.batch_norm normalizer_params = { 'is_training': True, 'center': True, 'scale': True, 'decay': 0.9997, 'epsilon': 0.001, 'updates_collections': tf.GraphKeys.UPDATE_OPS } weights_initializer = tf.truncated_normal_initializer(stddev=0.09) weights_regularizer = tf.contrib.layers.l2_regularizer(0.00004) with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, ): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, scope='feature_map_00') feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=activation_fn, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): """ 得到的feature_map就是映射到原图像的网格的尺寸 :param inputs: :param is_training: :param reuse: :return: """ # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] #[416,416] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): #参数是x 函数里面的内容是 :后面 with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) # (-1, 52, 52, 256) (-1, 26, 26, 512) (-1, 13, 13, 1024) with tf.variable_scope('yolov3_head'): #返回(-1, 13, 13, 512) (-1, 13, 13, 1024) #里面做了5个 和6个DBL inter1 5个DBL的输出 net 是6个 inter1, net = yolo_block(route_3, 512) #------------------------------------------------------------------------------------- #第一个尺度预测的输出,这里要把normalizer_fn,activation_fn设置为None #输入(-1, 13, 13, 1024) 输出 (-1,13,13,255) 3 * (5 + self.class_num)=255 #3 * (5 + self.class_num)的意思是 一个像素点预测三个框,一个框有5个值 需要预测几个类别 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') #------------------------------------------------------------------------------------- #第二个尺度的预测输出代码 #inter1输入 (-1, 13, 13, 512)==>256 inter1 = conv2d(inter1, 256, 1) #将route_3那条路线输出的图片进行 上采样 使与 route_2尺寸相同 深度不变 inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) #route_2 = (-1, 26, 26, 512) (-1, 26, 26, 256) ==> (-1, 26, 26, 768) concat1 = tf.concat([inter1, route_2], axis=3) # 里面做了5个 和6个DBL inter2 5个DBL的输出 net 是6个 inter2, net = yolo_block(concat1, 256) #------------------------------------------------------------------------------------- #第二个尺度预测的输出,这里要把normalizer_fn,activation_fn设置为None #输出(-1, 26, 26, 255) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') #------------------------------------------------------------------------------------- #第三个尺度 inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) #输出尺寸(-1, 52, 52, 256) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') #------------------------------------------------------------------------------------- return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params # ============================================================================= # batch_norm_params = { # 'decay': self.batch_norm_decay, # 'epsilon': 1e-05, # 'scale': True, # 'is_training': is_training, # 'fused': None, # Use fused batch norm if possible. # } # ============================================================================= batch_norm_params = { 'decay': 0.9997, 'epsilon': 0.001, 'updates_collections': tf.GraphKeys.UPDATE_OPS, 'fused': None, # Use fused batch norm if possible. 'scale': False, 'is_training': is_training } # ============================================================================= # with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): # with slim.arg_scope([slim.conv2d], # normalizer_fn=slim.batch_norm, # normalizer_params=batch_norm_params, # biases_initializer=None, # activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), # weights_regularizer=slim.l2_regularizer(self.weight_decay)): # ============================================================================= with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(0.00004), biases_regularizer=slim.l2_regularizer(0.00004), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) as scope: with tf.variable_scope('InceptionResnetV2', [inputs], reuse=reuse) as scope: route_1, route_2, route_3 = inception_resnet_v2_base( inputs, scope=scope) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer( inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2)) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer( inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1)) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] # it will be used later self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=False): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('darknet53_body'): # 三个输出值就是3 different scales. # route_1:52*52*256, route_2=26*26*512, route_3=13*13*1024 route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): '''实现first scale layer的predict''' # 输出inter1是用于下一层的upsampled feature。 8*8*512*1024 inter1, net = yolo_block(route_3, 512) # 针对输入416*416的图,输出值是13*13*255. a 3-d tensor encoding bounding box, objectness, and class predictions. # 单个1*1的卷积将维度降到预测channel。 feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) # 这一步的目的好像是在使用GPU时在不同设备间传递变量的值 # refer: https://blog.csdn.net/qq_23981335/article/details/81361748 feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') '''实现second scale layer的predict''' inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') '''实现third scale layer的predict''' inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3