def _residual(self, inputs, in_filters, out_filters, strides, is_training): use_bias = False with tf.variable_scope('sub1'): bn1 = batch_norm("bn1", inputs, is_training=is_training) with tf.variable_scope('relu1'): relu1 = tf.nn.relu(bn1) conv1 = conv2d( "conv1", relu1, filters=out_filters, kernel_size=3, activation=None, use_bias=use_bias, strides=strides, is_debug=self.is_debug, ) with tf.variable_scope('sub2'): bn2 = batch_norm("bn2", conv1, is_training=is_training) with tf.variable_scope('relu2'): relu2 = tf.nn.relu(bn2) conv2 = conv2d( "conv2", relu2, filters=out_filters, kernel_size=3, activation=None, use_bias=use_bias, strides=1, is_debug=self.is_debug, ) with tf.variable_scope('sub_add'): if in_filters != out_filters: inputs = tf.nn.avg_pool( inputs, ksize=[1, strides, strides, 1], strides=[1, strides, strides, 1], padding='VALID' ) inputs = tf.pad( inputs, [[0, 0], [0, 0], [0, 0], [(out_filters - in_filters)//2, (out_filters - in_filters)//2]] ) output = conv2 + inputs return output
def darknet(name, inputs, filters, kernel_size, is_training=tf.constant(False), activation=None, data_format="NHWC"): """Darknet19 block. Do convolution, batch_norm, bias, leaky_relu activation. Ref: https://arxiv.org/pdf/1612.08242.pdf https://github.com/pjreddie/darknet/blob/3bf2f342c03b0ad22efd799d5be9990c9d792354/cfg/darknet19.cfg https://github.com/pjreddie/darknet/blob/8215a8864d4ad07e058acafd75b2c6ff6600b9e8/cfg/yolo.2.0.cfg """ if data_format == "NCHW": channel_data_format = "channels_first" elif data_format == "NHWC": channel_data_format = "channels_last" else: raise ValueError("data format must be 'NCHW' or 'NHWC'. got {}.".format(data_format)) with tf.variable_scope(name): if activation is None: def activation(x): return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu") conv = conv2d("conv", inputs, filters=filters, kernel_size=kernel_size, activation=None, use_bias=False, data_format=channel_data_format, kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),) # he initializer # TODO(wakisaka): Should be the same as darknet batch norm. # https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/layers/python/layers/layers.py # https://github.com/pjreddie/darknet/blob/8215a8864d4ad07e058acafd75b2c6ff6600b9e8/src/batchnorm_layer.c#L135 batch_normed = batch_norm("bn", conv, is_training=is_training, decay=0.99, scale=True, center=True, data_format=data_format) tf.summary.histogram("batch_normed", batch_normed) output = activation(batch_normed) tf.summary.histogram("output", output) return output
def base(self, images, is_training): use_bias = False self.images = self.input = images with tf.variable_scope("init"): self.conv1 = conv2d( "conv1", self.images, filters=16, kernel_size=3, activation=None, use_bias=use_bias, is_debug=self.is_debug, ) self.bn1 = batch_norm("bn1", self.conv1, is_training=is_training) with tf.variable_scope("relu1"): self.relu1 = tf.nn.relu(self.bn1) for i in range(0, self.num_residual): with tf.variable_scope("unit1_{}".format(i)): if i == 0: out = self._residual(self.relu1, in_filters=16, out_filters=16, strides=1, is_training=is_training) else: out = self._residual(out, in_filters=16, out_filters=16, strides=1, is_training=is_training) for i in range(0, self.num_residual): with tf.variable_scope("unit2_{}".format(i)): if i == 0: out = self._residual(out, in_filters=16, out_filters=32, strides=2, is_training=is_training) else: out = self._residual(out, in_filters=32, out_filters=32, strides=1, is_training=is_training) for i in range(0, self.num_residual): with tf.variable_scope("unit3_{}".format(i)): if i == 0: out = self._residual(out, in_filters=32, out_filters=64, strides=2, is_training=is_training) else: out = self._residual(out, in_filters=64, out_filters=64, strides=1, is_training=is_training) with tf.variable_scope("unit4_0"): self.bn4 = batch_norm("bn4", out, is_training=is_training, activation=tf.nn.relu) # global average pooling h = self.bn4.get_shape()[1].value w = self.bn4.get_shape()[2].value self.global_average_pool = average_pooling2d( "global_average_pool", self.bn4, pool_size=[h, w], padding="VALID", is_debug=self.is_debug,) self._heatmap_layer = None self.fc = fully_connected("fc", self.global_average_pool, filters=self.num_classes, activation=None) return self.fc
def base(self, images, is_training): """Base network. Returns: Output. output shape depends on parameter. When `data_format` is `NHWC` shape is [ batch_size, num_cell[0], num_cell[1], (num_classes + 5(x, y ,w, h, confidence)) * boxes_per_cell(length of anchors), ] When `data_format` is `NCHW` shape is [ batch_size, (num_classes + 5(x, y ,w, h, confidence)) * boxes_per_cell(length of anchors), num_cell[0], num_cell[1], ] """ self.inputs = self.images = images if self.data_format == "NCHW": channel_data_format = "channels_first" elif self.data_format == "NHWC": channel_data_format = "channels_last" else: raise RuntimeError( "data format {} should be in ['NCHW', 'NHWC]'.".format( self.data_format)) darknet_block = partial(darknet, is_training=is_training, activation=self.activation, data_format=self.data_format) x = darknet_block("block_1", self.inputs, filters=32, kernel_size=1) x = darknet_block("block_2", x, filters=8, kernel_size=3) x = self._reorg("pool_1", x, stride=2, data_format=self.data_format) x = darknet_block("block_3", x, filters=16, kernel_size=3) x = self._reorg("pool_2", x, stride=2, data_format=self.data_format) x4 = darknet_block("block_4", x, filters=32, kernel_size=3) x = self._reorg("pool_3", x4, stride=2, data_format=self.data_format) x5 = darknet_block("block_5", x, filters=64, kernel_size=3) x = self._reorg("pool_4", x5, stride=2, data_format=self.data_format) x6 = darknet_block("block_6", x, filters=128, kernel_size=3) x = self._reorg("pool_5", x6, stride=2, data_format=self.data_format) x4_1 = darknet_block("block_4_1x1", x4, filters=4, kernel_size=1) x5_1 = darknet_block("block_5_1x1", x5, filters=16, kernel_size=1) x6_1 = darknet_block("block_6_1x1", x6, filters=32, kernel_size=1) x4_s2d = self._reorg("block_4_s2d", x4_1, stride=8, data_format=self.data_format) x5_s2d = self._reorg("block_5_s2d", x5_1, stride=4, data_format=self.data_format) x6_s2d = self._reorg("block_6_s2d", x6_1, stride=2, data_format=self.data_format) x7 = darknet_block("block_7", x, filters=128, kernel_size=3) x8 = darknet_block("block_8", x7, filters=256, kernel_size=3) if self.data_format == "NHWC": concat_axis = -1 if self.data_format == "NCHW": concat_axis = 1 merged = tf.concat([x8, x7, x6_s2d, x5_s2d, x4_s2d], axis=concat_axis, name="block_merged") x = darknet_block("block_9", merged, filters=256, kernel_size=3) x = darknet_block("block_10", x, filters=128, kernel_size=3) x = darknet_block("block_11", x, filters=256, kernel_size=3) x = darknet_block("block_12", x, filters=128, kernel_size=3) x = darknet_block("block_13", x, filters=256, kernel_size=3, activation=self.before_last_activation) output_filters = (self.num_classes + 5) * self.boxes_per_cell self.block_last = conv2d("block_last", x, filters=output_filters, kernel_size=1, activation=None, use_bias=True, is_debug=self.is_debug, data_format=channel_data_format) if self.change_base_output: predict_classes, predict_confidence, predict_boxes = self._predictions(self.block_last) with tf.name_scope("convert_boxes_space_from_yolo_to_real"): predict_boxes = self.convert_boxes_space_from_yolo_to_real(predict_boxes) output = self._concat_predictions(predict_classes, predict_confidence, predict_boxes) else: # with tf.control_dependencies([assert_num_cell_x, assert_num_cell_y]): output = self.block_last return output
def base(self, images, is_training): self.images = images keep_prob = tf.cond(is_training, lambda: tf.constant(0.5), lambda: tf.constant(1.0)) self.conv_1 = conv2d("conv_1", images, filters=64, kernel_size=7, strides=2, activation=self.leaky_relu) self.pool_2 = max_pooling2d("pool_2", self.conv_1, pool_size=2, strides=2) self.conv_3 = conv2d("conv_3", self.pool_2, filters=192, kernel_size=3, activation=self.leaky_relu) self.pool_4 = max_pooling2d("pool_4", self.conv_3, pool_size=2, strides=2) self.conv_5 = conv2d("conv_5", self.pool_4, filters=128, kernel_size=1, activation=self.leaky_relu) self.conv_6 = conv2d("conv_6", self.conv_5, filters=256, kernel_size=3, activation=self.leaky_relu) self.conv_7 = conv2d("conv_7", self.conv_6, filters=256, kernel_size=1, activation=self.leaky_relu) self.conv_8 = conv2d("conv_8", self.conv_7, filters=512, kernel_size=3, activation=self.leaky_relu) self.pool_9 = max_pooling2d("pool_9", self.conv_8, pool_size=2, strides=2) self.conv_10 = conv2d("conv_10", self.pool_9, filters=256, kernel_size=1, activation=self.leaky_relu) self.conv_11 = conv2d("conv_11", self.conv_10, filters=512, kernel_size=3, activation=self.leaky_relu) self.conv_12 = conv2d("conv_12", self.conv_11, filters=256, kernel_size=1, activation=self.leaky_relu) self.conv_13 = conv2d("conv_13", self.conv_12, filters=512, kernel_size=3, activation=self.leaky_relu) self.conv_14 = conv2d("conv_14", self.conv_13, filters=256, kernel_size=1, activation=self.leaky_relu) self.conv_15 = conv2d("conv_15", self.conv_14, filters=512, kernel_size=3, activation=self.leaky_relu) self.conv_16 = conv2d("conv_16", self.conv_15, filters=256, kernel_size=1, activation=self.leaky_relu) self.conv_17 = conv2d("conv_17", self.conv_16, filters=512, kernel_size=3, activation=self.leaky_relu) self.conv_18 = conv2d("conv_18", self.conv_17, filters=512, kernel_size=1, activation=self.leaky_relu) self.conv_19 = conv2d("conv_19", self.conv_18, filters=1024, kernel_size=3, activation=self.leaky_relu) self.pool_20 = max_pooling2d("pool_20", self.conv_19, pool_size=2, strides=2) self.conv_21 = conv2d("conv_21", self.pool_20, filters=512, kernel_size=1, activation=self.leaky_relu) self.conv_22 = conv2d("conv_22", self.conv_21, filters=1024, kernel_size=3, activation=self.leaky_relu) self.conv_23 = conv2d("conv_23", self.conv_22, filters=512, kernel_size=1, activation=self.leaky_relu) self.conv_24 = conv2d("conv_24", self.conv_23, filters=1024, kernel_size=3, activation=self.leaky_relu) self.conv_25 = conv2d("conv_25", self.conv_24, filters=1024, kernel_size=3, activation=self.leaky_relu) self.conv_26 = conv2d("conv_26", self.conv_25, filters=1024, kernel_size=3, strides=2, activation=self.leaky_relu) self.conv_27 = conv2d("conv_27", self.conv_26, filters=1024, kernel_size=3, activation=self.leaky_relu) self.conv_28 = conv2d("conv_28", self.conv_27, filters=1024, kernel_size=3, activation=self.leaky_relu) self.fc_29 = fully_connected("fc_29", self.conv_28, filters=512, activation=self.leaky_relu) self.fc_30 = fully_connected("fc_30", self.fc_29, filters=4096, activation=self.leaky_relu) self.dropout_31 = tf.nn.dropout(self.fc_30, keep_prob) output_size = (self.cell_size * self.cell_size) * ( self.num_classes + self.boxes_per_cell * 5) self.fc_32 = fully_connected("fc_32", self.dropout_31, filters=output_size, activation=None) return self.fc_32
def base(self, images, is_training): if self.data_format == "NCHW": channel_data_format = "channels_first" elif self.data_format == "NHWC": channel_data_format = "channels_last" else: raise RuntimeError( "data format {} shodul be in ['NCHW', 'NHWC]'.".format( self.data_format)) self.inputs = self.images = images self.block_1 = darknet_block( "block_1", self.inputs, filters=32, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.pool_1 = max_pooling2d("pool_1", self.block_1, pool_size=2, strides=2, data_format=channel_data_format) self.block_2 = darknet_block( "block_2", self.pool_1, filters=64, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.pool_2 = max_pooling2d("pool_2", self.block_2, pool_size=2, strides=2, data_format=channel_data_format) self.block_3 = darknet_block( "block_3", self.pool_2, filters=128, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_4 = darknet_block( "block_4", self.block_3, filters=64, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_5 = darknet_block( "block_5", self.block_4, filters=128, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.pool_3 = max_pooling2d("pool_3", self.block_5, pool_size=2, strides=2, data_format=channel_data_format) self.block_6 = darknet_block( "block_6", self.pool_3, filters=256, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_7 = darknet_block( "block_7", self.block_6, filters=128, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_8 = darknet_block( "block_8", self.block_7, filters=256, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.pool_4 = max_pooling2d("pool_4", self.block_8, pool_size=2, strides=2, data_format=channel_data_format) self.block_9 = darknet_block( "block_9", self.pool_4, filters=512, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_10 = darknet_block( "block_10", self.block_9, filters=256, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_11 = darknet_block( "block_11", self.block_10, filters=512, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_12 = darknet_block( "block_12", self.block_11, filters=256, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_13 = darknet_block( "block_13", self.block_12, filters=512, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.pool_5 = max_pooling2d("pool_5", self.block_13, pool_size=2, strides=2, data_format=channel_data_format) self.block_14 = darknet_block( "block_14", self.pool_5, filters=1024, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_15 = darknet_block( "block_15", self.block_14, filters=512, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_16 = darknet_block( "block_16", self.block_15, filters=1024, kernel_size=3, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_17 = darknet_block( "block_17", self.block_16, filters=512, kernel_size=1, is_training=is_training, activation=self.activation, data_format=self.data_format, ) self.block_18 = darknet_block( "block_18", self.block_17, filters=1024, kernel_size=3, is_training=is_training, activation=self.before_last_activation, data_format=self.data_format, ) kernel_initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) self.conv_19 = conv2d( "conv_19", self.block_18, filters=self.num_classes, kernel_size=1, activation=None, use_bias=True, is_debug=self.is_debug, kernel_initializer=kernel_initializer, data_format=channel_data_format, ) if self.is_debug: self._heatmap_layer = self.conv_19 if self.data_format == "NCHW": axis = [2, 3] if self.data_format == "NHWC": axis = [1, 2] # TODO(wakisaka): global average pooling should use tf.reduce_mean() self.pool_6 = tf.reduce_mean(self.conv_19, axis=axis, name="global_average_pool_6") self.base_output = tf.reshape(self.pool_6, [-1, self.num_classes], name="pool6_reshape") return self.base_output