Beispiel #1
0
    def base(self, images, is_training):
        assert self.data_format == "NHWC"
        channel_data_format = "channels_last"

        self.inputs = self.images = images

        with tf.compat.v1.variable_scope("block_1"):
            conv = conv2d("conv", self.inputs, filters=32, kernel_size=3,
                          activation=None, use_bias=False, data_format=channel_data_format,
                          kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=2.0))
            batch_normed = batch_norm("bn", conv, is_training=is_training, decay=0.99, scale=True, center=True,
                                      data_format=self.data_format)
            self.block_1 = self.activation(batch_normed)

        self.block_last = conv2d("block_last", self.block_1, filters=self.num_classes, kernel_size=1,
                                 activation=None, use_bias=True, is_debug=self.is_debug,
                                 kernel_initializer=tf.compat.v1.random_normal_initializer(mean=0.0, stddev=0.01),
                                 data_format=channel_data_format)

        h = self.block_last.get_shape()[1].value
        w = self.block_last.get_shape()[2].value
        self.pool = tf.compat.v1.layers.average_pooling2d(name='global_average_pool', inputs=self.block_last,
                                                          pool_size=[h, w], padding='VALID', strides=1,
                                                          data_format=channel_data_format)
        self.base_output = tf.reshape(self.pool, [-1, self.num_classes], name="pool_reshape")

        return self.base_output
Beispiel #2
0
    def _residual(self, inputs, in_filters, out_filters, strides, is_training):
        use_bias = False

        with tf.compat.v1.variable_scope('sub1'):
            bn1 = batch_norm("bn1", inputs, is_training=is_training)

            with tf.compat.v1.variable_scope('relu1'):
                relu1 = tf.nn.relu(bn1)
            conv1 = conv2d(
                "conv1",
                relu1,
                filters=out_filters,
                kernel_size=3,
                activation=None,
                use_bias=use_bias,
                strides=strides,
                is_debug=self.is_debug,
            )

        with tf.compat.v1.variable_scope('sub2'):
            bn2 = batch_norm("bn2", conv1, is_training=is_training)

            with tf.compat.v1.variable_scope('relu2'):
                relu2 = tf.nn.relu(bn2)

            conv2 = conv2d(
                "conv2",
                relu2,
                filters=out_filters,
                kernel_size=3,
                activation=None,
                use_bias=use_bias,
                strides=1,
                is_debug=self.is_debug,
            )

        with tf.compat.v1.variable_scope('sub_add'):
            if in_filters != out_filters:
                inputs = tf.nn.avg_pool(
                    inputs,
                    ksize=[1, strides, strides, 1],
                    strides=[1, strides, strides, 1],
                    padding='VALID'
                )
                inputs = tf.pad(
                    inputs,
                    [[0, 0], [0, 0], [0, 0], [(out_filters - in_filters)//2, (out_filters - in_filters)//2]]
                )

        output = conv2 + inputs

        return output
Beispiel #3
0
def darknet(name,
            inputs,
            filters,
            kernel_size,
            is_training=tf.constant(False),
            activation=None,
            data_format="NHWC"):
    """Darknet19 block.

    Do convolution, batch_norm, bias, leaky_relu activation.
    Ref: https://arxiv.org/pdf/1612.08242.pdf
         https://github.com/pjreddie/darknet/blob/3bf2f342c03b0ad22efd799d5be9990c9d792354/cfg/darknet19.cfg
         https://github.com/pjreddie/darknet/blob/8215a8864d4ad07e058acafd75b2c6ff6600b9e8/cfg/yolo.2.0.cfg
    """
    if data_format == "NCHW":
        channel_data_format = "channels_first"
    elif data_format == "NHWC":
        channel_data_format = "channels_last"
    else:
        raise ValueError(
            "data format must be 'NCHW' or 'NHWC'. got {}.".format(
                data_format))

    with tf.compat.v1.variable_scope(name):
        if activation is None:

            def activation(x):
                return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu")

        conv = conv2d(
            "conv",
            inputs,
            filters=filters,
            kernel_size=kernel_size,
            activation=None,
            use_bias=False,
            data_format=channel_data_format,
            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(
            ),
        )  # he initializer

        # TODO(wakisaka): Should be the same as darknet batch norm.
        # https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/layers/python/layers/layers.py
        # https://github.com/pjreddie/darknet/blob/8215a8864d4ad07e058acafd75b2c6ff6600b9e8/src/batchnorm_layer.c#L135
        batch_normed = batch_norm("bn",
                                  conv,
                                  is_training=is_training,
                                  decay=0.99,
                                  scale=True,
                                  center=True,
                                  data_format=data_format)
        tf.compat.v1.summary.histogram("batch_normed", batch_normed)

        output = activation(batch_normed)
        tf.compat.v1.summary.histogram("output", output)

        return output
Beispiel #4
0
    def base(self, images, is_training):
        use_bias = False

        self.images = self.input = images

        with tf.compat.v1.variable_scope("init"):
            self.conv1 = conv2d(
                "conv1",
                self.images,
                filters=16,
                kernel_size=3,
                activation=None,
                use_bias=use_bias,
                is_debug=self.is_debug,
            )

            self.bn1 = batch_norm("bn1", self.conv1, is_training=is_training)
            with tf.compat.v1.variable_scope("relu1"):
                self.relu1 = tf.nn.relu(self.bn1)

        for i in range(0, self.num_residual):
            with tf.compat.v1.variable_scope("unit1_{}".format(i)):
                if i == 0:
                    out = self._residual(self.relu1, in_filters=16, out_filters=16, strides=1, is_training=is_training)
                else:
                    out = self._residual(out, in_filters=16, out_filters=16, strides=1, is_training=is_training)

        for i in range(0, self.num_residual):
            with tf.compat.v1.variable_scope("unit2_{}".format(i)):
                if i == 0:
                    out = self._residual(out, in_filters=16, out_filters=32, strides=2, is_training=is_training)
                else:
                    out = self._residual(out, in_filters=32, out_filters=32, strides=1, is_training=is_training)

        for i in range(0, self.num_residual):
            with tf.compat.v1.variable_scope("unit3_{}".format(i)):
                if i == 0:
                    out = self._residual(out, in_filters=32, out_filters=64, strides=2, is_training=is_training)
                else:
                    out = self._residual(out, in_filters=64, out_filters=64, strides=1, is_training=is_training)

        with tf.compat.v1.variable_scope("unit4_0"):
            self.bn4 = batch_norm("bn4", out, is_training=is_training, activation=tf.nn.relu)

        # global average pooling
        h = self.bn4.get_shape()[1].value
        w = self.bn4.get_shape()[2].value
        self.global_average_pool = average_pooling2d(
            "global_average_pool", self.bn4, pool_size=[h, w], padding="VALID", is_debug=self.is_debug,)

        self._heatmap_layer = None
        self.fc = fully_connected("fc", self.global_average_pool, filters=self.num_classes, activation=None)

        return self.fc
Beispiel #5
0
    def base(self, images, is_training):
        if self.data_format == "NCHW":
            channel_data_format = "channels_first"
        elif self.data_format == "NHWC":
            channel_data_format = "channels_last"
        else:
            raise RuntimeError(
                "data format {} should be in ['NCHW', 'NHWC]'.".format(
                    self.data_format))

        self.inputs = self.images = images

        self.block_1 = darknet_block(
            "block_1",
            self.inputs,
            filters=32,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.pool_1 = max_pooling2d("pool_1",
                                    self.block_1,
                                    pool_size=2,
                                    strides=2,
                                    data_format=channel_data_format)

        self.block_2 = darknet_block(
            "block_2",
            self.pool_1,
            filters=64,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.pool_2 = max_pooling2d("pool_2",
                                    self.block_2,
                                    pool_size=2,
                                    strides=2,
                                    data_format=channel_data_format)

        self.block_3 = darknet_block(
            "block_3",
            self.pool_2,
            filters=128,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_4 = darknet_block(
            "block_4",
            self.block_3,
            filters=64,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_5 = darknet_block(
            "block_5",
            self.block_4,
            filters=128,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.pool_3 = max_pooling2d("pool_3",
                                    self.block_5,
                                    pool_size=2,
                                    strides=2,
                                    data_format=channel_data_format)

        self.block_6 = darknet_block(
            "block_6",
            self.pool_3,
            filters=256,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_7 = darknet_block(
            "block_7",
            self.block_6,
            filters=128,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_8 = darknet_block(
            "block_8",
            self.block_7,
            filters=256,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.pool_4 = max_pooling2d("pool_4",
                                    self.block_8,
                                    pool_size=2,
                                    strides=2,
                                    data_format=channel_data_format)

        self.block_9 = darknet_block(
            "block_9",
            self.pool_4,
            filters=512,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_10 = darknet_block(
            "block_10",
            self.block_9,
            filters=256,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_11 = darknet_block(
            "block_11",
            self.block_10,
            filters=512,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_12 = darknet_block(
            "block_12",
            self.block_11,
            filters=256,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_13 = darknet_block(
            "block_13",
            self.block_12,
            filters=512,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.pool_5 = max_pooling2d("pool_5",
                                    self.block_13,
                                    pool_size=2,
                                    strides=2,
                                    data_format=channel_data_format)

        self.block_14 = darknet_block(
            "block_14",
            self.pool_5,
            filters=1024,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_15 = darknet_block(
            "block_15",
            self.block_14,
            filters=512,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_16 = darknet_block(
            "block_16",
            self.block_15,
            filters=1024,
            kernel_size=3,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_17 = darknet_block(
            "block_17",
            self.block_16,
            filters=512,
            kernel_size=1,
            is_training=is_training,
            activation=self.activation,
            data_format=self.data_format,
        )

        self.block_18 = darknet_block(
            "block_18",
            self.block_17,
            filters=1024,
            kernel_size=3,
            is_training=is_training,
            activation=self.before_last_activation,
            data_format=self.data_format,
        )

        kernel_initializer = tf.random_normal_initializer(mean=0.0,
                                                          stddev=0.01)

        self.conv_19 = conv2d(
            "conv_19",
            self.block_18,
            filters=self.num_classes,
            kernel_size=1,
            activation=None,
            use_bias=True,
            is_debug=self.is_debug,
            kernel_initializer=kernel_initializer,
            data_format=channel_data_format,
        )

        if self.is_debug:
            self._heatmap_layer = self.conv_19

        if self.data_format == "NCHW":
            axis = [2, 3]
        if self.data_format == "NHWC":
            axis = [1, 2]
        # TODO(wakisaka): global average pooling should use tf.reduce_mean()

        self.pool_6 = tf.reduce_mean(self.conv_19,
                                     axis=axis,
                                     name="global_average_pool_6")
        self.base_output = tf.reshape(self.pool_6, [-1, self.num_classes],
                                      name="pool6_reshape")

        return self.base_output
Beispiel #6
0
    def base(self, images, is_training):
        self.images = images
        keep_prob = tf.cond(is_training, lambda: tf.constant(0.5), lambda: tf.constant(1.0))

        self.conv_1 = conv2d("conv_1", images, filters=64, kernel_size=7, strides=2,
                             activation=self.leaky_relu)
        self.pool_2 = max_pooling2d("pool_2", self.conv_1, pool_size=2, strides=2)
        self.conv_3 = conv2d("conv_3", self.pool_2, filters=192, kernel_size=3,
                             activation=self.leaky_relu)
        self.pool_4 = max_pooling2d("pool_4", self.conv_3, pool_size=2, strides=2)
        self.conv_5 = conv2d("conv_5", self.pool_4, filters=128, kernel_size=1,
                             activation=self.leaky_relu)
        self.conv_6 = conv2d("conv_6", self.conv_5, filters=256, kernel_size=3,
                             activation=self.leaky_relu)
        self.conv_7 = conv2d("conv_7", self.conv_6, filters=256, kernel_size=1,
                             activation=self.leaky_relu)
        self.conv_8 = conv2d("conv_8", self.conv_7, filters=512, kernel_size=3,
                             activation=self.leaky_relu)
        self.pool_9 = max_pooling2d("pool_9", self.conv_8, pool_size=2, strides=2)
        self.conv_10 = conv2d("conv_10", self.pool_9, filters=256, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_11 = conv2d("conv_11", self.conv_10, filters=512, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_12 = conv2d("conv_12", self.conv_11, filters=256, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_13 = conv2d("conv_13", self.conv_12, filters=512, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_14 = conv2d("conv_14", self.conv_13, filters=256, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_15 = conv2d("conv_15", self.conv_14, filters=512, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_16 = conv2d("conv_16", self.conv_15, filters=256, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_17 = conv2d("conv_17", self.conv_16, filters=512, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_18 = conv2d("conv_18", self.conv_17, filters=512, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_19 = conv2d("conv_19", self.conv_18, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.pool_20 = max_pooling2d("pool_20", self.conv_19, pool_size=2, strides=2)
        self.conv_21 = conv2d("conv_21", self.pool_20, filters=512, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_22 = conv2d("conv_22", self.conv_21, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_23 = conv2d("conv_23", self.conv_22, filters=512, kernel_size=1,
                              activation=self.leaky_relu)
        self.conv_24 = conv2d("conv_24", self.conv_23, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_25 = conv2d("conv_25", self.conv_24, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_26 = conv2d("conv_26", self.conv_25, filters=1024, kernel_size=3, strides=2,
                              activation=self.leaky_relu)
        self.conv_27 = conv2d("conv_27", self.conv_26, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.conv_28 = conv2d("conv_28", self.conv_27, filters=1024, kernel_size=3,
                              activation=self.leaky_relu)
        self.fc_29 = fully_connected("fc_29", self.conv_28, filters=512,
                                     activation=self.leaky_relu)
        self.fc_30 = fully_connected("fc_30", self.fc_29, filters=4096,
                                     activation=self.leaky_relu)

        self.dropout_31 = tf.nn.dropout(self.fc_30, keep_prob)

        output_size = (self.cell_size * self.cell_size) * (self.num_classes + self.boxes_per_cell * 5)
        self.fc_32 = fully_connected("fc_32", self.dropout_31, filters=output_size, activation=None)

        return self.fc_32
Beispiel #7
0
    def base(self, images, is_training):
        """Base network.
        Returns: Output. output shape depends on parameter.
            When `data_format` is `NHWC`
            shape is [
                batch_size,
                num_cell[0],
                num_cell[1],
                (num_classes + 5(x, y ,w, h, confidence)) * boxes_per_cell(length of anchors),
            ]
            When `data_format` is `NCHW`
            shape is [
                batch_size,
                (num_classes + 5(x, y ,w, h, confidence)) * boxes_per_cell(length of anchors),
                num_cell[0],
                num_cell[1],
            ]
        """
        self.inputs = self.images = images

        if self.data_format == "NCHW":
            channel_data_format = "channels_first"
        elif self.data_format == "NHWC":
            channel_data_format = "channels_last"
        else:
            raise RuntimeError(
                "data format {} should be in ['NCHW', 'NHWC]'.".format(
                    self.data_format))

        darknet_block = partial(darknet,
                                is_training=is_training,
                                activation=self.activation,
                                data_format=self.data_format)

        x = darknet_block("block_1", self.inputs, filters=32, kernel_size=1)
        x = darknet_block("block_2", x, filters=8, kernel_size=3)
        x = self._reorg("pool_1", x, stride=2, data_format=self.data_format)

        x = darknet_block("block_3", x, filters=16, kernel_size=3)
        x = self._reorg("pool_2", x, stride=2, data_format=self.data_format)

        x4 = darknet_block("block_4", x, filters=32, kernel_size=3)
        x = self._reorg("pool_3", x4, stride=2, data_format=self.data_format)

        x5 = darknet_block("block_5", x, filters=64, kernel_size=3)
        x = self._reorg("pool_4", x5, stride=2, data_format=self.data_format)

        x6 = darknet_block("block_6", x, filters=128, kernel_size=3)
        x = self._reorg("pool_5", x6, stride=2, data_format=self.data_format)

        x4_1 = darknet_block("block_4_1x1", x4, filters=4, kernel_size=1)
        x5_1 = darknet_block("block_5_1x1", x5, filters=16, kernel_size=1)
        x6_1 = darknet_block("block_6_1x1", x6, filters=32, kernel_size=1)

        x4_s2d = self._reorg("block_4_s2d",
                             x4_1,
                             stride=8,
                             data_format=self.data_format)
        x5_s2d = self._reorg("block_5_s2d",
                             x5_1,
                             stride=4,
                             data_format=self.data_format)
        x6_s2d = self._reorg("block_6_s2d",
                             x6_1,
                             stride=2,
                             data_format=self.data_format)

        x7 = darknet_block("block_7", x, filters=128, kernel_size=3)
        x8 = darknet_block("block_8", x7, filters=256, kernel_size=3)

        if self.data_format == "NHWC":
            concat_axis = -1
        if self.data_format == "NCHW":
            concat_axis = 1

        merged = tf.concat([x8, x7, x6_s2d, x5_s2d, x4_s2d],
                           axis=concat_axis,
                           name="block_merged")

        x = darknet_block("block_9", merged, filters=256, kernel_size=3)
        x = darknet_block("block_10", x, filters=128, kernel_size=3)

        x = darknet_block("block_11", x, filters=256, kernel_size=3)
        x = darknet_block("block_12", x, filters=128, kernel_size=3)
        x = darknet_block("block_13",
                          x,
                          filters=256,
                          kernel_size=3,
                          activation=self.before_last_activation)

        output_filters = (self.num_classes + 5) * self.boxes_per_cell
        self.block_last = conv2d("block_last",
                                 x,
                                 filters=output_filters,
                                 kernel_size=1,
                                 activation=None,
                                 use_bias=True,
                                 is_debug=self.is_debug,
                                 data_format=channel_data_format)

        if self.change_base_output:

            predict_classes, predict_confidence, predict_boxes = self._predictions(
                self.block_last)

            with tf.name_scope("convert_boxes_space_from_yolo_to_real"):
                predict_boxes = self.convert_boxes_space_from_yolo_to_real(
                    predict_boxes)

            output = self._concat_predictions(predict_classes,
                                              predict_confidence,
                                              predict_boxes)

        else:
            # with tf.control_dependencies([assert_num_cell_x, assert_num_cell_y]):
            output = self.block_last

        return output