def build_network(input, trainable):
    is_trainable(trainable)

    net = convb(input, 3, 3, out_channel_ratio(16), 2, name="Conv2d_0")

    # 128, 112
    net = slim.stack(net,
                     inverted_bottleneck, [(1, out_channel_ratio(16), 0, 3),
                                           (1, out_channel_ratio(16), 0, 3)],
                     scope="Conv2d_1")

    # 64, 56
    net = slim.stack(net,
                     inverted_bottleneck, [
                         (up_channel_ratio(6), out_channel_ratio(24), 1, 3),
                         (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                         (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                         (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                         (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                     ],
                     scope="Conv2d_2")

    net_h_w = int(net.shape[1])
    # build network recursively
    hg_out = hourglass_module(net, STAGE_NUM)

    for index, l2 in enumerate(l2s):
        l2_w_h = int(l2.shape[1])
        if l2_w_h == net_h_w:
            continue
        scale = net_h_w // l2_w_h
        l2s[index] = upsample(l2, scale, name="upsample_for_loss_%d" % index)

    return hg_out, l2s
    def build_network(self, input, trainable):
        is_trainable(trainable)

        intermediate_heatmap_layers = []

        tower = convb(input, 3, 3, out_channel_ratio(16), 2, name="Conv2d_0")

        # 128, 112
        tower = inverted_bottleneck(tower, 1, out_channel_ratio(16), 0, 3)
        tower = inverted_bottleneck(tower, 1, out_channel_ratio(16), 0, 3)

        # 64, 56
        tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                    out_channel_ratio(24), 1, 3)
        tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                    out_channel_ratio(24), 0, 3)
        tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                    out_channel_ratio(24), 0, 3)
        tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                    out_channel_ratio(24), 0, 3)
        tower = inverted_bottleneck(tower, up_channel_ratio(6),
                                    out_channel_ratio(24), 0, 3)

        net_h_w = int(tower.shape[1])
        # build network recursively
        hg_out = self.hourglass_module(tower, STAGE_NUM,
                                       intermediate_heatmap_layers)

        for index, l2 in enumerate(intermediate_heatmap_layers):
            l2_w_h = int(l2.shape[1])
            if l2_w_h == net_h_w:
                continue
            scale = net_h_w // l2_w_h
            intermediate_heatmap_layers[index] = upsample(
                l2, scale, name="upsample_for_loss_%d" % index)
        merged_layer = tf.keras.layers.Average()(intermediate_heatmap_layers)
        return hg_out, merged_layer
Ejemplo n.º 3
0
def build_network(input, trainable):
    is_trainable(trainable)

    net = convb(input, 3, 3, out_channel_ratio(32), 2, name="Conv2d_0")

    with tf.variable_scope('MobilenetV2'):

        # 128, 112
        mv2_branch_0 = slim.stack(net,
                                  inverted_bottleneck,
                                  [(1, out_channel_ratio(16), 0, 3),
                                   (1, out_channel_ratio(16), 0, 3)],
                                  scope="MobilenetV2_part_0")

        # 64, 56
        mv2_branch_1 = slim.stack(
            mv2_branch_0,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(24), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(24), 0, 3),
            ],
            scope="MobilenetV2_part_1")

        # 32, 28
        mv2_branch_2 = slim.stack(
            mv2_branch_1,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(32), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(32), 0, 3),
            ],
            scope="MobilenetV2_part_2")

        # 16, 14
        mv2_branch_3 = slim.stack(
            mv2_branch_2,
            inverted_bottleneck, [
                (up_channel_ratio(6), out_channel_ratio(64), 1, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
                (up_channel_ratio(6), out_channel_ratio(64), 0, 3),
            ],
            scope="MobilenetV2_part_3")

        # 8, 7
        mv2_branch_4 = slim.stack(
            mv2_branch_3,
            inverted_bottleneck,
            [(up_channel_ratio(6), out_channel_ratio(96), 1, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3),
             (up_channel_ratio(6), out_channel_ratio(96), 0, 3)],
            scope="MobilenetV2_part_4")

        cancat_mv2 = tf.concat([
            max_pool(mv2_branch_0, 4, 4, 4, 4, name="mv2_0_max_pool"),
            max_pool(mv2_branch_1, 2, 2, 2, 2, name="mv2_1_max_pool"),
            mv2_branch_2,
            upsample(mv2_branch_3, 2, name="mv2_3_upsample"),
            upsample(mv2_branch_4, 4, name="mv2_4_upsample")
        ],
                               axis=3)

    with tf.variable_scope("Convolutional_Pose_Machine"):
        l2s = []
        prev = None
        for stage_number in range(STAGE_NUM):
            if prev is not None:
                inputs = tf.concat([cancat_mv2, prev], axis=3)
            else:
                inputs = cancat_mv2

            kernel_size = 7
            lastest_channel_size = 128
            if stage_number == 0:
                kernel_size = 3
                lastest_channel_size = 512

            _ = slim.stack(
                inputs,
                inverted_bottleneck, [
                    (2, out_channel_cpm(32), 0, kernel_size),
                    (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size),
                    (up_channel_ratio(4), out_channel_cpm(32), 0, kernel_size),
                ],
                scope="stage_%d_mv2" % stage_number)

            _ = slim.stack(_,
                           separable_conv,
                           [(out_channel_ratio(lastest_channel_size), 1, 1),
                            (N_KPOINTS, 1, 1)],
                           scope="stage_%d_mv1" % stage_number)

            prev = _
            cpm_out = upsample(_, 4, "stage_%d_out" % stage_number)
            l2s.append(cpm_out)

    return cpm_out, l2s