def _yolo_block(self, inputs, filters): inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = slim.conv2d(inputs, filters, 1, stride=1, padding='SAME', normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params, biases_initializer=None, activation_fn=None) return inputs
def _darknet53_block(self, inputs, filters): """ implement residuals block in darknet53 """ shortcut = inputs inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = inputs + shortcut return inputs
def forward(self, inputs, is_training=False, reuse=False): """ Creates YOLO v3 model. :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. Dimension batch_size may be undefined. The channel order is RGB. :param is_training: whether is training or not. :param reuse: whether or not the network and its variables should be reused. :return: """ # it will be needed later on self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self._BATCH_NORM_DECAY, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope([slim.conv2d, slim.batch_norm, common._fixed_padding],reuse=reuse): with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=self._LEAKY_RELU)): with tf.variable_scope('darknet-53'): route_1, route_2, inputs = darknet53(inputs).outputs with tf.variable_scope('yolo-v3'): route, inputs = self._yolo_block(inputs, 512) feature_map_1 = self._detection_layer(inputs, self._ANCHORS[6:9]) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inputs = common._conv2d_fixed_padding(route, 256, 1) upsample_size = route_2.get_shape().as_list() inputs = self._upsample(inputs, upsample_size) inputs = tf.concat([inputs, route_2], axis=3) route, inputs = self._yolo_block(inputs, 256) feature_map_2 = self._detection_layer(inputs, self._ANCHORS[3:6]) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inputs = common._conv2d_fixed_padding(route, 128, 1) upsample_size = route_1.get_shape().as_list() inputs = self._upsample(inputs, upsample_size) inputs = tf.concat([inputs, route_1], axis=3) route, inputs = self._yolo_block(inputs, 128) feature_map_3 = self._detection_layer(inputs, self._ANCHORS[0:3]) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def _darknet53_block(self, inputs, filters): """ #todo:darknet53_block残差模块 implement residuals block in darknet53 :param inputs: 输入数据 :param filters: 卷积核个数 :return: 两次卷积后数据变为尺寸变为原来的1/2,且是和输入数据有关的 """ shortcut = inputs inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) #same inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) #下采样 inputs = inputs + shortcut return inputs
def forward(self, inputs, n_filters, n_strides, n_ksizes): for i in range(min(len(n_filters), len(n_strides)), len(n_ksizes)): inputs = common._conv2d_fixed_padding(inputs, filters=n_filters[i], kernel_size=n_ksizes[i], strides=n_strides[i]) return inputs
def forward(self, inputs): inputs = common._conv2d_fixed_padding(inputs, 32, 3, strides=1) inputs = common._conv2d_fixed_padding(inputs, 64, 3, strides=2) inputs = self._darknet53_block(inputs, 32) inputs = common._conv2d_fixed_padding(inputs, 128, 3, strides=2) for i in range(2): inputs = self._darknet53_block(inputs, 64) inputs = common._conv2d_fixed_padding(inputs, 256, 3, strides=2) for i in range(8): inputs = self._darknet53_block(inputs, 128) route_1 = inputs inputs = common._conv2d_fixed_padding(inputs, 512, 3, strides=2) for i in range(8): inputs = self._darknet53_block(inputs, 256) route_2 = inputs inputs = common._conv2d_fixed_padding(inputs, 1024, 3, strides=2) for i in range(4): inputs = self._darknet53_block(inputs, 512) return route_1, route_2, inputs
def _yolo_block(self, inputs, filters): inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) return inputs
def _yolo_block(self, inputs, filters): # if stride > 1 , padding inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) route = inputs inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) return route, inputs
def _yolo_block(self, inputs, filters): """ yolo残差模块 :param inputs: 输入数据 :param filters: #卷积核个数 :return: route, inputs """ inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) inputs = common._conv2d_fixed_padding(inputs, filters * 1, 1) route = inputs inputs = common._conv2d_fixed_padding(inputs, filters * 2, 3) return route, inputs
def forward(self, inputs, is_training=False, reuse=False): """ Creates YOLO v3 model. :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. Dimension batch_size may be undefined. The channel order is RGB. :param is_training: whether is training or not. :param reuse: whether or not the network and its variables should be reused. :return: """ # it will be needed later on 他在稍后将被需要 self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self. _BATCH_NORM_DECAY, # https://www.cnblogs.com/hellcat/p/8058092.html 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } # Set activation_fn and parameters for conv2d, batch_norm. with slim.arg_scope( [slim.conv2d, slim.batch_norm, common._fixed_padding], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, # 给定list(slim.conv2d)中的值设置默认值(normlizer,biase.....) normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu( x, alpha=self._LEAKY_RELU)): with tf.variable_scope('darknet-53'): route_1, route_2, inputs = darknet53( inputs).outputs # 得到图片张量 # route_1 : 52x52x256 # route_2 : 26x26x512 # inputs : 13x13x1024 with tf.variable_scope('yolo-v3'): # https://github.com/YunYang1994/tensorflow-yolov3/raw/master/docs/images/levio.jpeg # https://images2018.cnblogs.com/blog/606386/201803/606386-20180327004340505-1572852891.png # feature_map1 13x13x1024 --> 13x13x[3x(5+class_num)] route, inputs = self._yolo_block(inputs, 512) feature_map_1 = self._detection_layer( inputs, self._ANCHORS[6:9]) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') # feature_map2 26x26x512 --> 26x26x[3x(5+class_num)] inputs = common._conv2d_fixed_padding(route, 256, 1) upsample_size = route_2.get_shape().as_list() # 52x52 --> 26x26 inputs = self._upsample(inputs, upsample_size) # 通过直接放大进行上采样 inputs = tf.concat([inputs, route_2], axis=3) # 在axis=3 进行连接, route, inputs = self._yolo_block(inputs, 256) feature_map_2 = self._detection_layer( inputs, self._ANCHORS[3:6]) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') # feature_map3 52x52x256 --> 52x52x[3x(5+class_num)] inputs = common._conv2d_fixed_padding(route, 128, 1) upsample_size = route_1.get_shape().as_list() # 26x26 --> 52x52 inputs = self._upsample(inputs, upsample_size) inputs = tf.concat([inputs, route_1], axis=3) route, inputs = self._yolo_block(inputs, 128) feature_map_3 = self._detection_layer( inputs, self._ANCHORS[0:3]) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3